diff options
Diffstat (limited to 'xml/mxml-file.c')
-rw-r--r-- | xml/mxml-file.c | 3080 |
1 files changed, 3080 insertions, 0 deletions
diff --git a/xml/mxml-file.c b/xml/mxml-file.c new file mode 100644 index 0000000..c41ec96 --- /dev/null +++ b/xml/mxml-file.c @@ -0,0 +1,3080 @@ +/* + * "$Id: mxml-file.c 438 2011-03-24 05:47:51Z mike $" + * + * File loading code for Mini-XML, a small XML-like file parsing library. + * + * Copyright 2003-2011 by Michael R Sweet. + * + * These coded instructions, statements, and computer programs are the + * property of Michael R Sweet and are protected by Federal copyright + * law. Distribution and use rights are outlined in the file "COPYING" + * which should have been included with this file. If this file is + * missing or damaged, see the license at: + * + * http://www.minixml.org/ + * + * Contents: + * + * mxmlLoadFd() - Load a file descriptor into an XML node tree. + * mxmlLoadFile() - Load a file into an XML node tree. + * mxmlLoadString() - Load a string into an XML node tree. + * mxmlSaveAllocString() - Save an XML tree to an allocated string. + * mxmlSaveFd() - Save an XML tree to a file descriptor. + * mxmlSaveFile() - Save an XML tree to a file. + * mxmlSaveString() - Save an XML node tree to a string. + * mxmlSAXLoadFd() - Load a file descriptor into an XML node tree + * using a SAX callback. + * mxmlSAXLoadFile() - Load a file into an XML node tree + * using a SAX callback. + * mxmlSAXLoadString() - Load a string into an XML node tree + * using a SAX callback. + * mxmlSetCustomHandlers() - Set the handling functions for custom data. + * mxmlSetErrorCallback() - Set the error message callback. + * mxmlSetWrapMargin() - Set the wrap margin when saving XML data. + * mxml_add_char() - Add a character to a buffer, expanding as needed. + * mxml_fd_getc() - Read a character from a file descriptor. + * mxml_fd_putc() - Write a character to a file descriptor. + * mxml_fd_read() - Read a buffer of data from a file descriptor. + * mxml_fd_write() - Write a buffer of data to a file descriptor. + * mxml_file_getc() - Get a character from a file. + * mxml_file_putc() - Write a character to a file. + * mxml_get_entity() - Get the character corresponding to an entity... + * mxml_load_data() - Load data into an XML node tree. + * mxml_parse_element() - Parse an element for any attributes... + * mxml_string_getc() - Get a character from a string. + * mxml_string_putc() - Write a character to a string. + * mxml_write_name() - Write a name string. + * mxml_write_node() - Save an XML node to a file. + * mxml_write_string() - Write a string, escaping & and < as needed. + * mxml_write_ws() - Do whitespace callback... + */ + +/* + * Include necessary headers... + */ + +#ifndef WIN32 +# include <unistd.h> +#endif /* !WIN32 */ +#include "mxml-private.h" + + +/* + * Character encoding... + */ + +#define ENCODE_UTF8 0 /* UTF-8 */ +#define ENCODE_UTF16BE 1 /* UTF-16 Big-Endian */ +#define ENCODE_UTF16LE 2 /* UTF-16 Little-Endian */ + + +/* + * Macro to test for a bad XML character... + */ + +#define mxml_bad_char(ch) ((ch) < ' ' && (ch) != '\n' && (ch) != '\r' && (ch) != '\t') + + +/* + * Types and structures... + */ + +typedef int (*_mxml_getc_cb_t)(void *, int *); +typedef int (*_mxml_putc_cb_t)(int, void *); + +typedef struct _mxml_fdbuf_s /**** File descriptor buffer ****/ +{ + int fd; /* File descriptor */ + unsigned char *current, /* Current position in buffer */ + *end, /* End of buffer */ + buffer[8192]; /* Character buffer */ +} _mxml_fdbuf_t; + + +/* + * Local functions... + */ + +static int mxml_add_char(int ch, char **ptr, char **buffer, + int *bufsize); +static int mxml_fd_getc(void *p, int *encoding); +static int mxml_fd_putc(int ch, void *p); +static int mxml_fd_read(_mxml_fdbuf_t *buf); +static int mxml_fd_write(_mxml_fdbuf_t *buf); +static int mxml_file_getc(void *p, int *encoding); +static int mxml_file_putc(int ch, void *p); +static int mxml_get_entity(mxml_node_t *parent, void *p, + int *encoding, + _mxml_getc_cb_t getc_cb); +static int mxml_isspace(int ch) + { + return (ch == ' ' || ch == '\t' || ch == '\r' || + ch == '\n'); + } +static mxml_node_t *mxml_load_data(mxml_node_t *top, void *p, + mxml_load_cb_t cb, + _mxml_getc_cb_t getc_cb, + mxml_sax_cb_t sax_cb, void *sax_data); +static int mxml_parse_element(mxml_node_t *node, void *p, + int *encoding, + _mxml_getc_cb_t getc_cb); +static int mxml_string_getc(void *p, int *encoding); +static int mxml_string_putc(int ch, void *p); +static int mxml_write_name(const char *s, void *p, + _mxml_putc_cb_t putc_cb); +static int mxml_write_node(mxml_node_t *node, void *p, + mxml_save_cb_t cb, int col, + _mxml_putc_cb_t putc_cb, + _mxml_global_t *global); +static int mxml_write_string(const char *s, void *p, + _mxml_putc_cb_t putc_cb); +static int mxml_write_ws(mxml_node_t *node, void *p, + mxml_save_cb_t cb, int ws, + int col, _mxml_putc_cb_t putc_cb); + + +/* + * 'mxmlLoadFd()' - Load a file descriptor into an XML node tree. + * + * The nodes in the specified file are added to the specified top node. + * If no top node is provided, the XML file MUST be well-formed with a + * single parent node like <?xml> for the entire file. The callback + * function returns the value type that should be used for child nodes. + * If MXML_NO_CALLBACK is specified then all child nodes will be either + * MXML_ELEMENT or MXML_TEXT nodes. + * + * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK, + * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading + * child nodes of the specified type. + */ + +mxml_node_t * /* O - First node or NULL if the file could not be read. */ +mxmlLoadFd(mxml_node_t *top, /* I - Top node */ + int fd, /* I - File descriptor to read from */ + mxml_load_cb_t cb) /* I - Callback function or MXML_NO_CALLBACK */ +{ + _mxml_fdbuf_t buf; /* File descriptor buffer */ + + + /* + * Initialize the file descriptor buffer... + */ + + buf.fd = fd; + buf.current = buf.buffer; + buf.end = buf.buffer; + + /* + * Read the XML data... + */ + + return (mxml_load_data(top, &buf, cb, mxml_fd_getc, MXML_NO_CALLBACK, NULL)); +} + + +/* + * 'mxmlLoadFile()' - Load a file into an XML node tree. + * + * The nodes in the specified file are added to the specified top node. + * If no top node is provided, the XML file MUST be well-formed with a + * single parent node like <?xml> for the entire file. The callback + * function returns the value type that should be used for child nodes. + * If MXML_NO_CALLBACK is specified then all child nodes will be either + * MXML_ELEMENT or MXML_TEXT nodes. + * + * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK, + * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading + * child nodes of the specified type. + */ + +mxml_node_t * /* O - First node or NULL if the file could not be read. */ +mxmlLoadFile(mxml_node_t *top, /* I - Top node */ + FILE *fp, /* I - File to read from */ + mxml_load_cb_t cb) /* I - Callback function or MXML_NO_CALLBACK */ +{ + /* + * Read the XML data... + */ + + return (mxml_load_data(top, fp, cb, mxml_file_getc, MXML_NO_CALLBACK, NULL)); +} + + +/* + * 'mxmlLoadString()' - Load a string into an XML node tree. + * + * The nodes in the specified string are added to the specified top node. + * If no top node is provided, the XML string MUST be well-formed with a + * single parent node like <?xml> for the entire string. The callback + * function returns the value type that should be used for child nodes. + * If MXML_NO_CALLBACK is specified then all child nodes will be either + * MXML_ELEMENT or MXML_TEXT nodes. + * + * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK, + * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading + * child nodes of the specified type. + */ + +mxml_node_t * /* O - First node or NULL if the string has errors. */ +mxmlLoadString(mxml_node_t *top, /* I - Top node */ + const char *s, /* I - String to load */ + mxml_load_cb_t cb) /* I - Callback function or MXML_NO_CALLBACK */ +{ + /* + * Read the XML data... + */ + + return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, MXML_NO_CALLBACK, + NULL)); +} + + +/* + * 'mxmlSaveAllocString()' - Save an XML tree to an allocated string. + * + * This function returns a pointer to a string containing the textual + * representation of the XML node tree. The string should be freed + * using the free() function when you are done with it. NULL is returned + * if the node would produce an empty string or if the string cannot be + * allocated. + * + * The callback argument specifies a function that returns a whitespace + * string or NULL before and after each element. If MXML_NO_CALLBACK + * is specified, whitespace will only be added before MXML_TEXT nodes + * with leading whitespace and before attribute names inside opening + * element tags. + */ + +char * /* O - Allocated string or NULL */ +mxmlSaveAllocString( + mxml_node_t *node, /* I - Node to write */ + mxml_save_cb_t cb) /* I - Whitespace callback or MXML_NO_CALLBACK */ +{ + int bytes; /* Required bytes */ + char buffer[8192]; /* Temporary buffer */ + char *s; /* Allocated string */ + + + /* + * Write the node to the temporary buffer... + */ + + bytes = mxmlSaveString(node, buffer, sizeof(buffer), cb); + + if (bytes <= 0) + return (NULL); + + if (bytes < (int)(sizeof(buffer) - 1)) + { + /* + * Node fit inside the buffer, so just duplicate that string and + * return... + */ + + return (strdup(buffer)); + } + + /* + * Allocate a buffer of the required size and save the node to the + * new buffer... + */ + + if ((s = malloc(bytes + 1)) == NULL) + return (NULL); + + mxmlSaveString(node, s, bytes + 1, cb); + + /* + * Return the allocated string... + */ + + return (s); +} + + +/* + * 'mxmlSaveFd()' - Save an XML tree to a file descriptor. + * + * The callback argument specifies a function that returns a whitespace + * string or NULL before and after each element. If MXML_NO_CALLBACK + * is specified, whitespace will only be added before MXML_TEXT nodes + * with leading whitespace and before attribute names inside opening + * element tags. + */ + +int /* O - 0 on success, -1 on error. */ +mxmlSaveFd(mxml_node_t *node, /* I - Node to write */ + int fd, /* I - File descriptor to write to */ + mxml_save_cb_t cb) /* I - Whitespace callback or MXML_NO_CALLBACK */ +{ + int col; /* Final column */ + _mxml_fdbuf_t buf; /* File descriptor buffer */ + _mxml_global_t *global = _mxml_global(); + /* Global data */ + + + /* + * Initialize the file descriptor buffer... + */ + + buf.fd = fd; + buf.current = buf.buffer; + buf.end = buf.buffer + sizeof(buf.buffer); + + /* + * Write the node... + */ + + if ((col = mxml_write_node(node, &buf, cb, 0, mxml_fd_putc, global)) < 0) + return (-1); + + if (col > 0) + if (mxml_fd_putc('\n', &buf) < 0) + return (-1); + + /* + * Flush and return... + */ + + return (mxml_fd_write(&buf)); +} + + +/* + * 'mxmlSaveFile()' - Save an XML tree to a file. + * + * The callback argument specifies a function that returns a whitespace + * string or NULL before and after each element. If MXML_NO_CALLBACK + * is specified, whitespace will only be added before MXML_TEXT nodes + * with leading whitespace and before attribute names inside opening + * element tags. + */ + +int /* O - 0 on success, -1 on error. */ +mxmlSaveFile(mxml_node_t *node, /* I - Node to write */ + FILE *fp, /* I - File to write to */ + mxml_save_cb_t cb) /* I - Whitespace callback or MXML_NO_CALLBACK */ +{ + int col; /* Final column */ + _mxml_global_t *global = _mxml_global(); + /* Global data */ + + + /* + * Write the node... + */ + + if ((col = mxml_write_node(node, fp, cb, 0, mxml_file_putc, global)) < 0) + return (-1); + + if (col > 0) + if (putc('\n', fp) < 0) + return (-1); + + /* + * Return 0 (success)... + */ + + return (0); +} + + +/* + * 'mxmlSaveString()' - Save an XML node tree to a string. + * + * This function returns the total number of bytes that would be + * required for the string but only copies (bufsize - 1) characters + * into the specified buffer. + * + * The callback argument specifies a function that returns a whitespace + * string or NULL before and after each element. If MXML_NO_CALLBACK + * is specified, whitespace will only be added before MXML_TEXT nodes + * with leading whitespace and before attribute names inside opening + * element tags. + */ + +int /* O - Size of string */ +mxmlSaveString(mxml_node_t *node, /* I - Node to write */ + char *buffer, /* I - String buffer */ + int bufsize, /* I - Size of string buffer */ + mxml_save_cb_t cb) /* I - Whitespace callback or MXML_NO_CALLBACK */ +{ + int col; /* Final column */ + char *ptr[2]; /* Pointers for putc_cb */ + _mxml_global_t *global = _mxml_global(); + /* Global data */ + + + /* + * Write the node... + */ + + ptr[0] = buffer; + ptr[1] = buffer + bufsize; + + if ((col = mxml_write_node(node, ptr, cb, 0, mxml_string_putc, global)) < 0) + return (-1); + + if (col > 0) + mxml_string_putc('\n', ptr); + + /* + * Nul-terminate the buffer... + */ + + if (ptr[0] >= ptr[1]) + buffer[bufsize - 1] = '\0'; + else + ptr[0][0] = '\0'; + + /* + * Return the number of characters... + */ + + return (ptr[0] - buffer); +} + + +/* + * 'mxmlSAXLoadFd()' - Load a file descriptor into an XML node tree + * using a SAX callback. + * + * The nodes in the specified file are added to the specified top node. + * If no top node is provided, the XML file MUST be well-formed with a + * single parent node like <?xml> for the entire file. The callback + * function returns the value type that should be used for child nodes. + * If MXML_NO_CALLBACK is specified then all child nodes will be either + * MXML_ELEMENT or MXML_TEXT nodes. + * + * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK, + * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading + * child nodes of the specified type. + * + * The SAX callback must call mxmlRetain() for any nodes that need to + * be kept for later use. Otherwise, nodes are deleted when the parent + * node is closed or after each data, comment, CDATA, or directive node. + * + * @since Mini-XML 2.3@ + */ + +mxml_node_t * /* O - First node or NULL if the file could not be read. */ +mxmlSAXLoadFd(mxml_node_t *top, /* I - Top node */ + int fd, /* I - File descriptor to read from */ + mxml_load_cb_t cb, /* I - Callback function or MXML_NO_CALLBACK */ + mxml_sax_cb_t sax_cb, /* I - SAX callback or MXML_NO_CALLBACK */ + void *sax_data) /* I - SAX user data */ +{ + _mxml_fdbuf_t buf; /* File descriptor buffer */ + + + /* + * Initialize the file descriptor buffer... + */ + + buf.fd = fd; + buf.current = buf.buffer; + buf.end = buf.buffer; + + /* + * Read the XML data... + */ + + return (mxml_load_data(top, &buf, cb, mxml_fd_getc, sax_cb, sax_data)); +} + + +/* + * 'mxmlSAXLoadFile()' - Load a file into an XML node tree + * using a SAX callback. + * + * The nodes in the specified file are added to the specified top node. + * If no top node is provided, the XML file MUST be well-formed with a + * single parent node like <?xml> for the entire file. The callback + * function returns the value type that should be used for child nodes. + * If MXML_NO_CALLBACK is specified then all child nodes will be either + * MXML_ELEMENT or MXML_TEXT nodes. + * + * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK, + * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading + * child nodes of the specified type. + * + * The SAX callback must call mxmlRetain() for any nodes that need to + * be kept for later use. Otherwise, nodes are deleted when the parent + * node is closed or after each data, comment, CDATA, or directive node. + * + * @since Mini-XML 2.3@ + */ + +mxml_node_t * /* O - First node or NULL if the file could not be read. */ +mxmlSAXLoadFile( + mxml_node_t *top, /* I - Top node */ + FILE *fp, /* I - File to read from */ + mxml_load_cb_t cb, /* I - Callback function or MXML_NO_CALLBACK */ + mxml_sax_cb_t sax_cb, /* I - SAX callback or MXML_NO_CALLBACK */ + void *sax_data) /* I - SAX user data */ +{ + /* + * Read the XML data... + */ + + return (mxml_load_data(top, fp, cb, mxml_file_getc, sax_cb, sax_data)); +} + + +/* + * 'mxmlSAXLoadString()' - Load a string into an XML node tree + * using a SAX callback. + * + * The nodes in the specified string are added to the specified top node. + * If no top node is provided, the XML string MUST be well-formed with a + * single parent node like <?xml> for the entire string. The callback + * function returns the value type that should be used for child nodes. + * If MXML_NO_CALLBACK is specified then all child nodes will be either + * MXML_ELEMENT or MXML_TEXT nodes. + * + * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK, + * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading + * child nodes of the specified type. + * + * The SAX callback must call mxmlRetain() for any nodes that need to + * be kept for later use. Otherwise, nodes are deleted when the parent + * node is closed or after each data, comment, CDATA, or directive node. + * + * @since Mini-XML 2.3@ + */ + +mxml_node_t * /* O - First node or NULL if the string has errors. */ +mxmlSAXLoadString( + mxml_node_t *top, /* I - Top node */ + const char *s, /* I - String to load */ + mxml_load_cb_t cb, /* I - Callback function or MXML_NO_CALLBACK */ + mxml_sax_cb_t sax_cb, /* I - SAX callback or MXML_NO_CALLBACK */ + void *sax_data) /* I - SAX user data */ +{ + /* + * Read the XML data... + */ + + return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, sax_cb, sax_data)); +} + + +/* + * 'mxmlSetCustomHandlers()' - Set the handling functions for custom data. + * + * The load function accepts a node pointer and a data string and must + * return 0 on success and non-zero on error. + * + * The save function accepts a node pointer and must return a malloc'd + * string on success and NULL on error. + * + */ + +void +mxmlSetCustomHandlers( + mxml_custom_load_cb_t load, /* I - Load function */ + mxml_custom_save_cb_t save) /* I - Save function */ +{ + _mxml_global_t *global = _mxml_global(); + /* Global data */ + + + global->custom_load_cb = load; + global->custom_save_cb = save; +} + + +/* + * 'mxmlSetErrorCallback()' - Set the error message callback. + */ + +void +mxmlSetErrorCallback(mxml_error_cb_t cb)/* I - Error callback function */ +{ + _mxml_global_t *global = _mxml_global(); + /* Global data */ + + + global->error_cb = cb; +} + + +/* + * 'mxmlSetWrapMargin()' - Set the wrap margin when saving XML data. + * + * Wrapping is disabled when "column" is 0. + * + * @since Mini-XML 2.3@ + */ + +void +mxmlSetWrapMargin(int column) /* I - Column for wrapping, 0 to disable wrapping */ +{ + _mxml_global_t *global = _mxml_global(); + /* Global data */ + + + global->wrap = column; +} + + +/* + * 'mxml_add_char()' - Add a character to a buffer, expanding as needed. + */ + +static int /* O - 0 on success, -1 on error */ +mxml_add_char(int ch, /* I - Character to add */ + char **bufptr, /* IO - Current position in buffer */ + char **buffer, /* IO - Current buffer */ + int *bufsize) /* IO - Current buffer size */ +{ + char *newbuffer; /* New buffer value */ + + + if (*bufptr >= (*buffer + *bufsize - 4)) + { + /* + * Increase the size of the buffer... + */ + + if (*bufsize < 1024) + (*bufsize) *= 2; + else + (*bufsize) += 1024; + + if ((newbuffer = realloc(*buffer, *bufsize)) == NULL) + { + free(*buffer); + + mxml_error("Unable to expand string buffer to %d bytes!", *bufsize); + + return (-1); + } + + *bufptr = newbuffer + (*bufptr - *buffer); + *buffer = newbuffer; + } + + if (ch < 0x80) + { + /* + * Single byte ASCII... + */ + + *(*bufptr)++ = ch; + } + else if (ch < 0x800) + { + /* + * Two-byte UTF-8... + */ + + *(*bufptr)++ = 0xc0 | (ch >> 6); + *(*bufptr)++ = 0x80 | (ch & 0x3f); + } + else if (ch < 0x10000) + { + /* + * Three-byte UTF-8... + */ + + *(*bufptr)++ = 0xe0 | (ch >> 12); + *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f); + *(*bufptr)++ = 0x80 | (ch & 0x3f); + } + else + { + /* + * Four-byte UTF-8... + */ + + *(*bufptr)++ = 0xf0 | (ch >> 18); + *(*bufptr)++ = 0x80 | ((ch >> 12) & 0x3f); + *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f); + *(*bufptr)++ = 0x80 | (ch & 0x3f); + } + + return (0); +} + + +/* + * 'mxml_fd_getc()' - Read a character from a file descriptor. + */ + +static int /* O - Character or EOF */ +mxml_fd_getc(void *p, /* I - File descriptor buffer */ + int *encoding) /* IO - Encoding */ +{ + _mxml_fdbuf_t *buf; /* File descriptor buffer */ + int ch, /* Current character */ + temp; /* Temporary character */ + + + /* + * Grab the next character in the buffer... + */ + + buf = (_mxml_fdbuf_t *)p; + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + ch = *(buf->current)++; + + switch (*encoding) + { + case ENCODE_UTF8 : + /* + * Got a UTF-8 character; convert UTF-8 to Unicode and return... + */ + + if (!(ch & 0x80)) + { +#if DEBUG > 1 + printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x not allowed by XML standard!", + ch); + return (EOF); + } + + return (ch); + } + else if (ch == 0xfe) + { + /* + * UTF-16 big-endian BOM? + */ + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + ch = *(buf->current)++; + + if (ch != 0xff) + return (EOF); + + *encoding = ENCODE_UTF16BE; + + return (mxml_fd_getc(p, encoding)); + } + else if (ch == 0xff) + { + /* + * UTF-16 little-endian BOM? + */ + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + ch = *(buf->current)++; + + if (ch != 0xfe) + return (EOF); + + *encoding = ENCODE_UTF16LE; + + return (mxml_fd_getc(p, encoding)); + } + else if ((ch & 0xe0) == 0xc0) + { + /* + * Two-byte value... + */ + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + if ((temp & 0xc0) != 0x80) + return (EOF); + + ch = ((ch & 0x1f) << 6) | (temp & 0x3f); + + if (ch < 0x80) + { + mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch); + return (EOF); + } + } + else if ((ch & 0xf0) == 0xe0) + { + /* + * Three-byte value... + */ + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + if ((temp & 0xc0) != 0x80) + return (EOF); + + ch = ((ch & 0x0f) << 6) | (temp & 0x3f); + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + if ((temp & 0xc0) != 0x80) + return (EOF); + + ch = (ch << 6) | (temp & 0x3f); + + if (ch < 0x800) + { + mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch); + return (EOF); + } + + /* + * Ignore (strip) Byte Order Mark (BOM)... + */ + + if (ch == 0xfeff) + return (mxml_fd_getc(p, encoding)); + } + else if ((ch & 0xf8) == 0xf0) + { + /* + * Four-byte value... + */ + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + if ((temp & 0xc0) != 0x80) + return (EOF); + + ch = ((ch & 0x07) << 6) | (temp & 0x3f); + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + if ((temp & 0xc0) != 0x80) + return (EOF); + + ch = (ch << 6) | (temp & 0x3f); + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + if ((temp & 0xc0) != 0x80) + return (EOF); + + ch = (ch << 6) | (temp & 0x3f); + + if (ch < 0x10000) + { + mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch); + return (EOF); + } + } + else + return (EOF); + break; + + case ENCODE_UTF16BE : + /* + * Read UTF-16 big-endian char... + */ + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + ch = (ch << 8) | temp; + + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x not allowed by XML standard!", + ch); + return (EOF); + } + else if (ch >= 0xd800 && ch <= 0xdbff) + { + /* + * Multi-word UTF-16 char... + */ + + int lch; + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + lch = *(buf->current)++; + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + lch = (lch << 8) | temp; + + if (lch < 0xdc00 || lch >= 0xdfff) + return (EOF); + + ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; + } + break; + + case ENCODE_UTF16LE : + /* + * Read UTF-16 little-endian char... + */ + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + ch |= (temp << 8); + + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x not allowed by XML standard!", + ch); + return (EOF); + } + else if (ch >= 0xd800 && ch <= 0xdbff) + { + /* + * Multi-word UTF-16 char... + */ + + int lch; + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + lch = *(buf->current)++; + + if (buf->current >= buf->end) + if (mxml_fd_read(buf) < 0) + return (EOF); + + temp = *(buf->current)++; + + lch |= (temp << 8); + + if (lch < 0xdc00 || lch >= 0xdfff) + return (EOF); + + ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; + } + break; + } + +#if DEBUG > 1 + printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + return (ch); +} + + +/* + * 'mxml_fd_putc()' - Write a character to a file descriptor. + */ + +static int /* O - 0 on success, -1 on error */ +mxml_fd_putc(int ch, /* I - Character */ + void *p) /* I - File descriptor buffer */ +{ + _mxml_fdbuf_t *buf; /* File descriptor buffer */ + + + /* + * Flush the write buffer as needed... + */ + + buf = (_mxml_fdbuf_t *)p; + + if (buf->current >= buf->end) + if (mxml_fd_write(buf) < 0) + return (-1); + + *(buf->current)++ = ch; + + /* + * Return successfully... + */ + + return (0); +} + + +/* + * 'mxml_fd_read()' - Read a buffer of data from a file descriptor. + */ + +static int /* O - 0 on success, -1 on error */ +mxml_fd_read(_mxml_fdbuf_t *buf) /* I - File descriptor buffer */ +{ + int bytes; /* Bytes read... */ + + + /* + * Range check input... + */ + + if (!buf) + return (-1); + + /* + * Read from the file descriptor... + */ + + while ((bytes = read(buf->fd, buf->buffer, sizeof(buf->buffer))) < 0) +#ifdef EINTR + if (errno != EAGAIN && errno != EINTR) +#else + if (errno != EAGAIN) +#endif /* EINTR */ + return (-1); + + if (bytes == 0) + return (-1); + + /* + * Update the pointers and return success... + */ + + buf->current = buf->buffer; + buf->end = buf->buffer + bytes; + + return (0); +} + + +/* + * 'mxml_fd_write()' - Write a buffer of data to a file descriptor. + */ + +static int /* O - 0 on success, -1 on error */ +mxml_fd_write(_mxml_fdbuf_t *buf) /* I - File descriptor buffer */ +{ + int bytes; /* Bytes written */ + unsigned char *ptr; /* Pointer into buffer */ + + + /* + * Range check... + */ + + if (!buf) + return (-1); + + /* + * Return 0 if there is nothing to write... + */ + + if (buf->current == buf->buffer) + return (0); + + /* + * Loop until we have written everything... + */ + + for (ptr = buf->buffer; ptr < buf->current; ptr += bytes) + if ((bytes = write(buf->fd, ptr, buf->current - ptr)) < 0) + return (-1); + + /* + * All done, reset pointers and return success... + */ + + buf->current = buf->buffer; + + return (0); +} + + +/* + * 'mxml_file_getc()' - Get a character from a file. + */ + +static int /* O - Character or EOF */ +mxml_file_getc(void *p, /* I - Pointer to file */ + int *encoding) /* IO - Encoding */ +{ + int ch, /* Character from file */ + temp; /* Temporary character */ + FILE *fp; /* Pointer to file */ + + + /* + * Read a character from the file and see if it is EOF or ASCII... + */ + + fp = (FILE *)p; + ch = getc(fp); + + if (ch == EOF) + return (EOF); + + switch (*encoding) + { + case ENCODE_UTF8 : + /* + * Got a UTF-8 character; convert UTF-8 to Unicode and return... + */ + + if (!(ch & 0x80)) + { + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x not allowed by XML standard!", + ch); + return (EOF); + } + +#if DEBUG > 1 + printf("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + return (ch); + } + else if (ch == 0xfe) + { + /* + * UTF-16 big-endian BOM? + */ + + ch = getc(fp); + if (ch != 0xff) + return (EOF); + + *encoding = ENCODE_UTF16BE; + + return (mxml_file_getc(p, encoding)); + } + else if (ch == 0xff) + { + /* + * UTF-16 little-endian BOM? + */ + + ch = getc(fp); + if (ch != 0xfe) + return (EOF); + + *encoding = ENCODE_UTF16LE; + + return (mxml_file_getc(p, encoding)); + } + else if ((ch & 0xe0) == 0xc0) + { + /* + * Two-byte value... + */ + + if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) + return (EOF); + + ch = ((ch & 0x1f) << 6) | (temp & 0x3f); + + if (ch < 0x80) + { + mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch); + return (EOF); + } + } + else if ((ch & 0xf0) == 0xe0) + { + /* + * Three-byte value... + */ + + if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) + return (EOF); + + ch = ((ch & 0x0f) << 6) | (temp & 0x3f); + + if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) + return (EOF); + + ch = (ch << 6) | (temp & 0x3f); + + if (ch < 0x800) + { + mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch); + return (EOF); + } + + /* + * Ignore (strip) Byte Order Mark (BOM)... + */ + + if (ch == 0xfeff) + return (mxml_file_getc(p, encoding)); + } + else if ((ch & 0xf8) == 0xf0) + { + /* + * Four-byte value... + */ + + if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) + return (EOF); + + ch = ((ch & 0x07) << 6) | (temp & 0x3f); + + if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) + return (EOF); + + ch = (ch << 6) | (temp & 0x3f); + + if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) + return (EOF); + + ch = (ch << 6) | (temp & 0x3f); + + if (ch < 0x10000) + { + mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch); + return (EOF); + } + } + else + return (EOF); + break; + + case ENCODE_UTF16BE : + /* + * Read UTF-16 big-endian char... + */ + + ch = (ch << 8) | getc(fp); + + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x not allowed by XML standard!", + ch); + return (EOF); + } + else if (ch >= 0xd800 && ch <= 0xdbff) + { + /* + * Multi-word UTF-16 char... + */ + + int lch = (getc(fp) << 8) | getc(fp); + + if (lch < 0xdc00 || lch >= 0xdfff) + return (EOF); + + ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; + } + break; + + case ENCODE_UTF16LE : + /* + * Read UTF-16 little-endian char... + */ + + ch |= (getc(fp) << 8); + + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x not allowed by XML standard!", + ch); + return (EOF); + } + else if (ch >= 0xd800 && ch <= 0xdbff) + { + /* + * Multi-word UTF-16 char... + */ + + int lch = getc(fp) | (getc(fp) << 8); + + if (lch < 0xdc00 || lch >= 0xdfff) + return (EOF); + + ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; + } + break; + } + +#if DEBUG > 1 + printf("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + return (ch); +} + + +/* + * 'mxml_file_putc()' - Write a character to a file. + */ + +static int /* O - 0 on success, -1 on failure */ +mxml_file_putc(int ch, /* I - Character to write */ + void *p) /* I - Pointer to file */ +{ + return (putc(ch, (FILE *)p) == EOF ? -1 : 0); +} + + +/* + * 'mxml_get_entity()' - Get the character corresponding to an entity... + */ + +static int /* O - Character value or EOF on error */ +mxml_get_entity(mxml_node_t *parent, /* I - Parent node */ + void *p, /* I - Pointer to source */ + int *encoding, /* IO - Character encoding */ + int (*getc_cb)(void *, int *)) + /* I - Get character function */ +{ + int ch; /* Current character */ + char entity[64], /* Entity string */ + *entptr; /* Pointer into entity */ + + + entptr = entity; + + while ((ch = (*getc_cb)(p, encoding)) != EOF) + if (ch > 126 || (!isalnum(ch) && ch != '#')) + break; + else if (entptr < (entity + sizeof(entity) - 1)) + *entptr++ = ch; + else + { + mxml_error("Entity name too long under parent <%s>!", + parent ? parent->value.element.name : "null"); + break; + } + + *entptr = '\0'; + + if (ch != ';') + { + mxml_error("Character entity \"%s\" not terminated under parent <%s>!", + entity, parent ? parent->value.element.name : "null"); + return (EOF); + } + + if (entity[0] == '#') + { + if (entity[1] == 'x') + ch = strtol(entity + 2, NULL, 16); + else + ch = strtol(entity + 1, NULL, 10); + } + else if ((ch = mxmlEntityGetValue(entity)) < 0) + mxml_error("Entity name \"%s;\" not supported under parent <%s>!", + entity, parent ? parent->value.element.name : "null"); + + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x under parent <%s> not allowed by XML standard!", + ch, parent ? parent->value.element.name : "null"); + return (EOF); + } + + return (ch); +} + + +/* + * 'mxml_load_data()' - Load data into an XML node tree. + */ + +static mxml_node_t * /* O - First node or NULL if the file could not be read. */ +mxml_load_data( + mxml_node_t *top, /* I - Top node */ + void *p, /* I - Pointer to data */ + mxml_load_cb_t cb, /* I - Callback function or MXML_NO_CALLBACK */ + _mxml_getc_cb_t getc_cb, /* I - Read function */ + mxml_sax_cb_t sax_cb, /* I - SAX callback or MXML_NO_CALLBACK */ + void *sax_data) /* I - SAX user data */ +{ + mxml_node_t *node, /* Current node */ + *first, /* First node added */ + *parent; /* Current parent node */ + int ch, /* Character from file */ + whitespace; /* Non-zero if whitespace seen */ + char *buffer, /* String buffer */ + *bufptr; /* Pointer into buffer */ + int bufsize; /* Size of buffer */ + mxml_type_t type; /* Current node type */ + int encoding; /* Character encoding */ + _mxml_global_t *global = _mxml_global(); + /* Global data */ + static const char * const types[] = /* Type strings... */ + { + "MXML_ELEMENT", /* XML element with attributes */ + "MXML_INTEGER", /* Integer value */ + "MXML_OPAQUE", /* Opaque string */ + "MXML_REAL", /* Real value */ + "MXML_TEXT", /* Text fragment */ + "MXML_CUSTOM" /* Custom data */ + }; + + + /* + * Read elements and other nodes from the file... + */ + + if ((buffer = malloc(64)) == NULL) + { + mxml_error("Unable to allocate string buffer!"); + return (NULL); + } + + bufsize = 64; + bufptr = buffer; + parent = top; + first = NULL; + whitespace = 0; + encoding = ENCODE_UTF8; + + if (cb && parent) + type = (*cb)(parent); + else + type = MXML_TEXT; + + while ((ch = (*getc_cb)(p, &encoding)) != EOF) + { + if ((ch == '<' || + (mxml_isspace(ch) && type != MXML_OPAQUE && type != MXML_CUSTOM)) && + bufptr > buffer) + { + /* + * Add a new value node... + */ + + *bufptr = '\0'; + + switch (type) + { + case MXML_INTEGER : + node = mxmlNewInteger(parent, strtol(buffer, &bufptr, 0)); + break; + + case MXML_OPAQUE : + node = mxmlNewOpaque(parent, buffer); + break; + + case MXML_REAL : + node = mxmlNewReal(parent, strtod(buffer, &bufptr)); + break; + + case MXML_TEXT : + node = mxmlNewText(parent, whitespace, buffer); + break; + + case MXML_CUSTOM : + if (global->custom_load_cb) + { + /* + * Use the callback to fill in the custom data... + */ + + node = mxmlNewCustom(parent, NULL, NULL); + + if ((*global->custom_load_cb)(node, buffer)) + { + mxml_error("Bad custom value '%s' in parent <%s>!", + buffer, parent ? parent->value.element.name : "null"); + mxmlDelete(node); + node = NULL; + } + break; + } + + default : /* Ignore... */ + node = NULL; + break; + } + + if (*bufptr) + { + /* + * Bad integer/real number value... + */ + + mxml_error("Bad %s value '%s' in parent <%s>!", + type == MXML_INTEGER ? "integer" : "real", buffer, + parent ? parent->value.element.name : "null"); + break; + } + + bufptr = buffer; + whitespace = mxml_isspace(ch) && type == MXML_TEXT; + + if (!node && type != MXML_IGNORE) + { + /* + * Print error and return... + */ + + mxml_error("Unable to add value node of type %s to parent <%s>!", + types[type], parent ? parent->value.element.name : "null"); + goto error; + } + + if (sax_cb) + { + (*sax_cb)(node, MXML_SAX_DATA, sax_data); + + if (!mxmlRelease(node)) + node = NULL; + } + + if (!first && node) + first = node; + } + else if (mxml_isspace(ch) && type == MXML_TEXT) + whitespace = 1; + + /* + * Add lone whitespace node if we have an element and existing + * whitespace... + */ + + if (ch == '<' && whitespace && type == MXML_TEXT) + { + if (parent) + { + node = mxmlNewText(parent, whitespace, ""); + + if (sax_cb) + { + (*sax_cb)(node, MXML_SAX_DATA, sax_data); + + if (!mxmlRelease(node)) + node = NULL; + } + + if (!first && node) + first = node; + } + + whitespace = 0; + } + + if (ch == '<') + { + /* + * Start of open/close tag... + */ + + bufptr = buffer; + + while ((ch = (*getc_cb)(p, &encoding)) != EOF) + if (mxml_isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer)) + break; + else if (ch == '<') + { + mxml_error("Bare < in element!"); + goto error; + } + else if (ch == '&') + { + if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF) + goto error; + + if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + goto error; + } + else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + goto error; + else if (((bufptr - buffer) == 1 && buffer[0] == '?') || + ((bufptr - buffer) == 3 && !strncmp(buffer, "!--", 3)) || + ((bufptr - buffer) == 8 && !strncmp(buffer, "![CDATA[", 8))) + break; + + *bufptr = '\0'; + + if (!strcmp(buffer, "!--")) + { + /* + * Gather rest of comment... + */ + + while ((ch = (*getc_cb)(p, &encoding)) != EOF) + { + if (ch == '>' && bufptr > (buffer + 4) && + bufptr[-3] != '-' && bufptr[-2] == '-' && bufptr[-1] == '-') + break; + else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + goto error; + } + + /* + * Error out if we didn't get the whole comment... + */ + + if (ch != '>') + { + /* + * Print error and return... + */ + + mxml_error("Early EOF in comment node!"); + goto error; + } + + + /* + * Otherwise add this as an element under the current parent... + */ + + *bufptr = '\0'; + + if (!parent && first) + { + /* + * There can only be one root element! + */ + + mxml_error("<%s> cannot be a second root node after <%s>", + buffer, first->value.element.name); + goto error; + } + + if ((node = mxmlNewElement(parent, buffer)) == NULL) + { + /* + * Just print error for now... + */ + + mxml_error("Unable to add comment node to parent <%s>!", + parent ? parent->value.element.name : "null"); + break; + } + + if (sax_cb) + { + (*sax_cb)(node, MXML_SAX_COMMENT, sax_data); + + if (!mxmlRelease(node)) + node = NULL; + } + + if (node && !first) + first = node; + } + else if (!strcmp(buffer, "![CDATA[")) + { + /* + * Gather CDATA section... + */ + + while ((ch = (*getc_cb)(p, &encoding)) != EOF) + { + if (ch == '>' && !strncmp(bufptr - 2, "]]", 2)) + break; + else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + goto error; + } + + /* + * Error out if we didn't get the whole comment... + */ + + if (ch != '>') + { + /* + * Print error and return... + */ + + mxml_error("Early EOF in CDATA node!"); + goto error; + } + + + /* + * Otherwise add this as an element under the current parent... + */ + + *bufptr = '\0'; + + if (!parent && first) + { + /* + * There can only be one root element! + */ + + mxml_error("<%s> cannot be a second root node after <%s>", + buffer, first->value.element.name); + goto error; + } + + if ((node = mxmlNewElement(parent, buffer)) == NULL) + { + /* + * Print error and return... + */ + + mxml_error("Unable to add CDATA node to parent <%s>!", + parent ? parent->value.element.name : "null"); + goto error; + } + + if (sax_cb) + { + (*sax_cb)(node, MXML_SAX_CDATA, sax_data); + + if (!mxmlRelease(node)) + node = NULL; + } + + if (node && !first) + first = node; + } + else if (buffer[0] == '?') + { + /* + * Gather rest of processing instruction... + */ + + while ((ch = (*getc_cb)(p, &encoding)) != EOF) + { + if (ch == '>' && bufptr > buffer && bufptr[-1] == '?') + break; + else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + goto error; + } + + /* + * Error out if we didn't get the whole processing instruction... + */ + + if (ch != '>') + { + /* + * Print error and return... + */ + + mxml_error("Early EOF in processing instruction node!"); + goto error; + } + + /* + * Otherwise add this as an element under the current parent... + */ + + *bufptr = '\0'; + + if (!parent && first) + { + /* + * There can only be one root element! + */ + + mxml_error("<%s> cannot be a second root node after <%s>", + buffer, first->value.element.name); + goto error; + } + + if ((node = mxmlNewElement(parent, buffer)) == NULL) + { + /* + * Print error and return... + */ + + mxml_error("Unable to add processing instruction node to parent <%s>!", + parent ? parent->value.element.name : "null"); + goto error; + } + + if (sax_cb) + { + (*sax_cb)(node, MXML_SAX_DIRECTIVE, sax_data); + + if (!mxmlRelease(node)) + node = NULL; + } + + if (node) + { + if (!first) + first = node; + + if (!parent) + { + parent = node; + + if (cb) + type = (*cb)(parent); + } + } + } + else if (buffer[0] == '!') + { + /* + * Gather rest of declaration... + */ + + do + { + if (ch == '>') + break; + else + { + if (ch == '&') + if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF) + goto error; + + if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + goto error; + } + } + while ((ch = (*getc_cb)(p, &encoding)) != EOF); + + /* + * Error out if we didn't get the whole declaration... + */ + + if (ch != '>') + { + /* + * Print error and return... + */ + + mxml_error("Early EOF in declaration node!"); + goto error; + } + + /* + * Otherwise add this as an element under the current parent... + */ + + *bufptr = '\0'; + + if (!parent && first) + { + /* + * There can only be one root element! + */ + + mxml_error("<%s> cannot be a second root node after <%s>", + buffer, first->value.element.name); + goto error; + } + + if ((node = mxmlNewElement(parent, buffer)) == NULL) + { + /* + * Print error and return... + */ + + mxml_error("Unable to add declaration node to parent <%s>!", + parent ? parent->value.element.name : "null"); + goto error; + } + + if (sax_cb) + { + (*sax_cb)(node, MXML_SAX_DIRECTIVE, sax_data); + + if (!mxmlRelease(node)) + node = NULL; + } + + if (node) + { + if (!first) + first = node; + + if (!parent) + { + parent = node; + + if (cb) + type = (*cb)(parent); + } + } + } + else if (buffer[0] == '/') + { + /* + * Handle close tag... + */ + + if (!parent || strcmp(buffer + 1, parent->value.element.name)) + { + /* + * Close tag doesn't match tree; print an error for now... + */ + + mxml_error("Mismatched close tag <%s> under parent <%s>!", + buffer, parent ? parent->value.element.name : "(null)"); + goto error; + } + + /* + * Keep reading until we see >... + */ + + while (ch != '>' && ch != EOF) + ch = (*getc_cb)(p, &encoding); + + node = parent; + parent = parent->parent; + + if (sax_cb) + { + (*sax_cb)(node, MXML_SAX_ELEMENT_CLOSE, sax_data); + + if (!mxmlRelease(node) && first == node) + first = NULL; + } + + /* + * Ascend into the parent and set the value type as needed... + */ + + if (cb && parent) + type = (*cb)(parent); + } + else + { + /* + * Handle open tag... + */ + + if (!parent && first) + { + /* + * There can only be one root element! + */ + + mxml_error("<%s> cannot be a second root node after <%s>", + buffer, first->value.element.name); + goto error; + } + + if ((node = mxmlNewElement(parent, buffer)) == NULL) + { + /* + * Just print error for now... + */ + + mxml_error("Unable to add element node to parent <%s>!", + parent ? parent->value.element.name : "null"); + goto error; + } + + if (mxml_isspace(ch)) + { + if ((ch = mxml_parse_element(node, p, &encoding, getc_cb)) == EOF) + goto error; + } + else if (ch == '/') + { + if ((ch = (*getc_cb)(p, &encoding)) != '>') + { + mxml_error("Expected > but got '%c' instead for element <%s/>!", + ch, buffer); + mxmlDelete(node); + goto error; + } + + ch = '/'; + } + + if (sax_cb) + (*sax_cb)(node, MXML_SAX_ELEMENT_OPEN, sax_data); + + if (!first) + first = node; + + if (ch == EOF) + break; + + if (ch != '/') + { + /* + * Descend into this node, setting the value type as needed... + */ + + parent = node; + + if (cb && parent) + type = (*cb)(parent); + } + else if (sax_cb) + { + (*sax_cb)(node, MXML_SAX_ELEMENT_CLOSE, sax_data); + + if (!mxmlRelease(node) && first == node) + first = NULL; + } + } + + bufptr = buffer; + } + else if (ch == '&') + { + /* + * Add character entity to current buffer... + */ + + if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF) + goto error; + + if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + goto error; + } + else if (type == MXML_OPAQUE || type == MXML_CUSTOM || !mxml_isspace(ch)) + { + /* + * Add character to current buffer... + */ + + if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + goto error; + } + } + + /* + * Free the string buffer - we don't need it anymore... + */ + + free(buffer); + + /* + * Find the top element and return it... + */ + + if (parent) + { + node = parent; + + while (parent->parent != top && parent->parent) + parent = parent->parent; + + if (node != parent) + { + mxml_error("Missing close tag </%s> under parent <%s>!", + node->value.element.name, + node->parent ? node->parent->value.element.name : "(null)"); + + mxmlDelete(first); + + return (NULL); + } + } + + if (parent) + return (parent); + else + return (first); + + /* + * Common error return... + */ + +error: + + mxmlDelete(first); + + free(buffer); + + return (NULL); +} + + +/* + * 'mxml_parse_element()' - Parse an element for any attributes... + */ + +static int /* O - Terminating character */ +mxml_parse_element( + mxml_node_t *node, /* I - Element node */ + void *p, /* I - Data to read from */ + int *encoding, /* IO - Encoding */ + _mxml_getc_cb_t getc_cb) /* I - Data callback */ +{ + int ch, /* Current character in file */ + quote; /* Quoting character */ + char *name, /* Attribute name */ + *value, /* Attribute value */ + *ptr; /* Pointer into name/value */ + int namesize, /* Size of name string */ + valsize; /* Size of value string */ + + + /* + * Initialize the name and value buffers... + */ + + if ((name = malloc(64)) == NULL) + { + mxml_error("Unable to allocate memory for name!"); + return (EOF); + } + + namesize = 64; + + if ((value = malloc(64)) == NULL) + { + free(name); + mxml_error("Unable to allocate memory for value!"); + return (EOF); + } + + valsize = 64; + + /* + * Loop until we hit a >, /, ?, or EOF... + */ + + while ((ch = (*getc_cb)(p, encoding)) != EOF) + { +#if DEBUG > 1 + fprintf(stderr, "parse_element: ch='%c'\n", ch); +#endif /* DEBUG > 1 */ + + /* + * Skip leading whitespace... + */ + + if (mxml_isspace(ch)) + continue; + + /* + * Stop at /, ?, or >... + */ + + if (ch == '/' || ch == '?') + { + /* + * Grab the > character and print an error if it isn't there... + */ + + quote = (*getc_cb)(p, encoding); + + if (quote != '>') + { + mxml_error("Expected '>' after '%c' for element %s, but got '%c'!", + ch, node->value.element.name, quote); + goto error; + } + + break; + } + else if (ch == '<') + { + mxml_error("Bare < in element %s!", node->value.element.name); + goto error; + } + else if (ch == '>') + break; + + /* + * Read the attribute name... + */ + + name[0] = ch; + ptr = name + 1; + + if (ch == '\"' || ch == '\'') + { + /* + * Name is in quotes, so get a quoted string... + */ + + quote = ch; + + while ((ch = (*getc_cb)(p, encoding)) != EOF) + { + if (ch == '&') + if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF) + goto error; + + if (mxml_add_char(ch, &ptr, &name, &namesize)) + goto error; + + if (ch == quote) + break; + } + } + else + { + /* + * Grab an normal, non-quoted name... + */ + + while ((ch = (*getc_cb)(p, encoding)) != EOF) + if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>' || + ch == '?') + break; + else + { + if (ch == '&') + if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF) + goto error; + + if (mxml_add_char(ch, &ptr, &name, &namesize)) + goto error; + } + } + + *ptr = '\0'; + + if (mxmlElementGetAttr(node, name)) + goto error; + + while (ch != EOF && mxml_isspace(ch)) + ch = (*getc_cb)(p, encoding); + + if (ch == '=') + { + /* + * Read the attribute value... + */ + + while ((ch = (*getc_cb)(p, encoding)) != EOF && mxml_isspace(ch)); + + if (ch == EOF) + { + mxml_error("Missing value for attribute '%s' in element %s!", + name, node->value.element.name); + goto error; + } + + if (ch == '\'' || ch == '\"') + { + /* + * Read quoted value... + */ + + quote = ch; + ptr = value; + + while ((ch = (*getc_cb)(p, encoding)) != EOF) + if (ch == quote) + break; + else + { + if (ch == '&') + if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF) + goto error; + + if (mxml_add_char(ch, &ptr, &value, &valsize)) + goto error; + } + + *ptr = '\0'; + } + else + { + /* + * Read unquoted value... + */ + + value[0] = ch; + ptr = value + 1; + + while ((ch = (*getc_cb)(p, encoding)) != EOF) + if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>') + break; + else + { + if (ch == '&') + if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF) + goto error; + + if (mxml_add_char(ch, &ptr, &value, &valsize)) + goto error; + } + + *ptr = '\0'; + } + + /* + * Set the attribute with the given string value... + */ + + mxmlElementSetAttr(node, name, value); + } + else + { + mxml_error("Missing value for attribute '%s' in element %s!", + name, node->value.element.name); + goto error; + } + + /* + * Check the end character... + */ + + if (ch == '/' || ch == '?') + { + /* + * Grab the > character and print an error if it isn't there... + */ + + quote = (*getc_cb)(p, encoding); + + if (quote != '>') + { + mxml_error("Expected '>' after '%c' for element %s, but got '%c'!", + ch, node->value.element.name, quote); + ch = EOF; + } + + break; + } + else if (ch == '>') + break; + } + + /* + * Free the name and value buffers and return... + */ + + free(name); + free(value); + + return (ch); + + /* + * Common error return point... + */ + +error: + + free(name); + free(value); + + return (EOF); +} + + +/* + * 'mxml_string_getc()' - Get a character from a string. + */ + +static int /* O - Character or EOF */ +mxml_string_getc(void *p, /* I - Pointer to file */ + int *encoding) /* IO - Encoding */ +{ + int ch; /* Character */ + const char **s; /* Pointer to string pointer */ + + + s = (const char **)p; + + if ((ch = (*s)[0] & 255) != 0 || *encoding == ENCODE_UTF16LE) + { + /* + * Got character; convert UTF-8 to integer and return... + */ + + (*s)++; + + switch (*encoding) + { + case ENCODE_UTF8 : + if (!(ch & 0x80)) + { +#if DEBUG > 1 + printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x not allowed by XML standard!", + ch); + return (EOF); + } + + return (ch); + } + else if (ch == 0xfe) + { + /* + * UTF-16 big-endian BOM? + */ + + if (((*s)[0] & 255) != 0xff) + return (EOF); + + *encoding = ENCODE_UTF16BE; + (*s)++; + + return (mxml_string_getc(p, encoding)); + } + else if (ch == 0xff) + { + /* + * UTF-16 little-endian BOM? + */ + + if (((*s)[0] & 255) != 0xfe) + return (EOF); + + *encoding = ENCODE_UTF16LE; + (*s)++; + + return (mxml_string_getc(p, encoding)); + } + else if ((ch & 0xe0) == 0xc0) + { + /* + * Two-byte value... + */ + + if (((*s)[0] & 0xc0) != 0x80) + return (EOF); + + ch = ((ch & 0x1f) << 6) | ((*s)[0] & 0x3f); + + (*s)++; + + if (ch < 0x80) + { + mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch); + return (EOF); + } + +#if DEBUG > 1 + printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + return (ch); + } + else if ((ch & 0xf0) == 0xe0) + { + /* + * Three-byte value... + */ + + if (((*s)[0] & 0xc0) != 0x80 || + ((*s)[1] & 0xc0) != 0x80) + return (EOF); + + ch = ((((ch & 0x0f) << 6) | ((*s)[0] & 0x3f)) << 6) | ((*s)[1] & 0x3f); + + (*s) += 2; + + if (ch < 0x800) + { + mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch); + return (EOF); + } + + /* + * Ignore (strip) Byte Order Mark (BOM)... + */ + + if (ch == 0xfeff) + return (mxml_string_getc(p, encoding)); + +#if DEBUG > 1 + printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + return (ch); + } + else if ((ch & 0xf8) == 0xf0) + { + /* + * Four-byte value... + */ + + if (((*s)[0] & 0xc0) != 0x80 || + ((*s)[1] & 0xc0) != 0x80 || + ((*s)[2] & 0xc0) != 0x80) + return (EOF); + + ch = ((((((ch & 0x07) << 6) | ((*s)[0] & 0x3f)) << 6) | + ((*s)[1] & 0x3f)) << 6) | ((*s)[2] & 0x3f); + + (*s) += 3; + + if (ch < 0x10000) + { + mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch); + return (EOF); + } + +#if DEBUG > 1 + printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + return (ch); + } + else + return (EOF); + + case ENCODE_UTF16BE : + /* + * Read UTF-16 big-endian char... + */ + + ch = (ch << 8) | ((*s)[0] & 255); + (*s) ++; + + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x not allowed by XML standard!", + ch); + return (EOF); + } + else if (ch >= 0xd800 && ch <= 0xdbff) + { + /* + * Multi-word UTF-16 char... + */ + + int lch; /* Lower word */ + + + if (!(*s)[0]) + return (EOF); + + lch = (((*s)[0] & 255) << 8) | ((*s)[1] & 255); + (*s) += 2; + + if (lch < 0xdc00 || lch >= 0xdfff) + return (EOF); + + ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; + } + +#if DEBUG > 1 + printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + return (ch); + + case ENCODE_UTF16LE : + /* + * Read UTF-16 little-endian char... + */ + + ch = ch | (((*s)[0] & 255) << 8); + + if (!ch) + { + (*s) --; + return (EOF); + } + + (*s) ++; + + if (mxml_bad_char(ch)) + { + mxml_error("Bad control character 0x%02x not allowed by XML standard!", + ch); + return (EOF); + } + else if (ch >= 0xd800 && ch <= 0xdbff) + { + /* + * Multi-word UTF-16 char... + */ + + int lch; /* Lower word */ + + + if (!(*s)[1]) + return (EOF); + + lch = (((*s)[1] & 255) << 8) | ((*s)[0] & 255); + (*s) += 2; + + if (lch < 0xdc00 || lch >= 0xdfff) + return (EOF); + + ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; + } + +#if DEBUG > 1 + printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); +#endif /* DEBUG > 1 */ + + return (ch); + } + } + + return (EOF); +} + + +/* + * 'mxml_string_putc()' - Write a character to a string. + */ + +static int /* O - 0 on success, -1 on failure */ +mxml_string_putc(int ch, /* I - Character to write */ + void *p) /* I - Pointer to string pointers */ +{ + char **pp; /* Pointer to string pointers */ + + + pp = (char **)p; + + if (pp[0] < pp[1]) + pp[0][0] = ch; + + pp[0] ++; + + return (0); +} + + +/* + * 'mxml_write_name()' - Write a name string. + */ + +static int /* O - 0 on success, -1 on failure */ +mxml_write_name(const char *s, /* I - Name to write */ + void *p, /* I - Write pointer */ + int (*putc_cb)(int, void *)) + /* I - Write callback */ +{ + char quote; /* Quote character */ + const char *name; /* Entity name */ + + + if (*s == '\"' || *s == '\'') + { + /* + * Write a quoted name string... + */ + + if ((*putc_cb)(*s, p) < 0) + return (-1); + + quote = *s++; + + while (*s && *s != quote) + { + if ((name = mxmlEntityGetName(*s)) != NULL) + { + if ((*putc_cb)('&', p) < 0) + return (-1); + + while (*name) + { + if ((*putc_cb)(*name, p) < 0) + return (-1); + + name ++; + } + + if ((*putc_cb)(';', p) < 0) + return (-1); + } + else if ((*putc_cb)(*s, p) < 0) + return (-1); + + s ++; + } + + /* + * Write the end quote... + */ + + if ((*putc_cb)(quote, p) < 0) + return (-1); + } + else + { + /* + * Write a non-quoted name string... + */ + + while (*s) + { + if ((*putc_cb)(*s, p) < 0) + return (-1); + + s ++; + } + } + + return (0); +} + + +/* + * 'mxml_write_node()' - Save an XML node to a file. + */ + +static int /* O - Column or -1 on error */ +mxml_write_node(mxml_node_t *node, /* I - Node to write */ + void *p, /* I - File to write to */ + mxml_save_cb_t cb, /* I - Whitespace callback */ + int col, /* I - Current column */ + _mxml_putc_cb_t putc_cb,/* I - Output callback */ + _mxml_global_t *global)/* I - Global data */ +{ + int i, /* Looping var */ + width; /* Width of attr + value */ + mxml_attr_t *attr; /* Current attribute */ + char s[255]; /* Temporary string */ + + + /* + * Print the node value... + */ + + switch (node->type) + { + case MXML_ELEMENT : + col = mxml_write_ws(node, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb); + + if ((*putc_cb)('<', p) < 0) + return (-1); + if (node->value.element.name[0] == '?' || + !strncmp(node->value.element.name, "!--", 3) || + !strncmp(node->value.element.name, "![CDATA[", 8)) + { + /* + * Comments, CDATA, and processing instructions do not + * use character entities. + */ + + const char *ptr; /* Pointer into name */ + + + for (ptr = node->value.element.name; *ptr; ptr ++) + if ((*putc_cb)(*ptr, p) < 0) + return (-1); + } + else if (mxml_write_name(node->value.element.name, p, putc_cb) < 0) + return (-1); + + col += strlen(node->value.element.name) + 1; + + for (i = node->value.element.num_attrs, attr = node->value.element.attrs; + i > 0; + i --, attr ++) + { + width = strlen(attr->name); + + if (attr->value) + width += strlen(attr->value) + 3; + + if (global->wrap > 0 && (col + width) > global->wrap) + { + if ((*putc_cb)('\n', p) < 0) + return (-1); + + col = 0; + } + else + { + if ((*putc_cb)(' ', p) < 0) + return (-1); + + col ++; + } + + if (mxml_write_name(attr->name, p, putc_cb) < 0) + return (-1); + + if (attr->value) + { + if ((*putc_cb)('=', p) < 0) + return (-1); + if ((*putc_cb)('\"', p) < 0) + return (-1); + if (mxml_write_string(attr->value, p, putc_cb) < 0) + return (-1); + if ((*putc_cb)('\"', p) < 0) + return (-1); + } + + col += width; + } + + if (node->child) + { + /* + * Write children... + */ + + mxml_node_t *child; /* Current child */ + + + if ((*putc_cb)('>', p) < 0) + return (-1); + else + col ++; + + col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb); + + for (child = node->child; child; child = child->next) + { + if ((col = mxml_write_node(child, p, cb, col, putc_cb, global)) < 0) + return (-1); + } + + /* + * The ? and ! elements are special-cases and have no end tags... + */ + + if (node->value.element.name[0] != '!' && + node->value.element.name[0] != '?') + { + col = mxml_write_ws(node, p, cb, MXML_WS_BEFORE_CLOSE, col, putc_cb); + + if ((*putc_cb)('<', p) < 0) + return (-1); + if ((*putc_cb)('/', p) < 0) + return (-1); + if (mxml_write_string(node->value.element.name, p, putc_cb) < 0) + return (-1); + if ((*putc_cb)('>', p) < 0) + return (-1); + + col += strlen(node->value.element.name) + 3; + + col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_CLOSE, col, putc_cb); + } + } + else if (node->value.element.name[0] == '!' || + node->value.element.name[0] == '?') + { + /* + * The ? and ! elements are special-cases... + */ + + if ((*putc_cb)('>', p) < 0) + return (-1); + else + col ++; + + col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb); + } + else + { + if ((*putc_cb)(' ', p) < 0) + return (-1); + if ((*putc_cb)('/', p) < 0) + return (-1); + if ((*putc_cb)('>', p) < 0) + return (-1); + + col += 3; + + col = mxml_write_ws(node, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb); + } + break; + + case MXML_INTEGER : + if (node->prev) + { + if (global->wrap > 0 && col > global->wrap) + { + if ((*putc_cb)('\n', p) < 0) + return (-1); + + col = 0; + } + else if ((*putc_cb)(' ', p) < 0) + return (-1); + else + col ++; + } + + sprintf(s, "%d", node->value.integer); + if (mxml_write_string(s, p, putc_cb) < 0) + return (-1); + + col += strlen(s); + break; + + case MXML_OPAQUE : + if (mxml_write_string(node->value.opaque, p, putc_cb) < 0) + return (-1); + + col += strlen(node->value.opaque); + break; + + case MXML_REAL : + if (node->prev) + { + if (global->wrap > 0 && col > global->wrap) + { + if ((*putc_cb)('\n', p) < 0) + return (-1); + + col = 0; + } + else if ((*putc_cb)(' ', p) < 0) + return (-1); + else + col ++; + } + + sprintf(s, "%f", node->value.real); + if (mxml_write_string(s, p, putc_cb) < 0) + return (-1); + + col += strlen(s); + break; + + case MXML_TEXT : + if (node->value.text.whitespace && col > 0) + { + if (global->wrap > 0 && col > global->wrap) + { + if ((*putc_cb)('\n', p) < 0) + return (-1); + + col = 0; + } + else if ((*putc_cb)(' ', p) < 0) + return (-1); + else + col ++; + } + + if (mxml_write_string(node->value.text.string, p, putc_cb) < 0) + return (-1); + + col += strlen(node->value.text.string); + break; + + case MXML_CUSTOM : + if (global->custom_save_cb) + { + char *data; /* Custom data string */ + const char *newline; /* Last newline in string */ + + + if ((data = (*global->custom_save_cb)(node)) == NULL) + return (-1); + + if (mxml_write_string(data, p, putc_cb) < 0) + return (-1); + + if ((newline = strrchr(data, '\n')) == NULL) + col += strlen(data); + else + col = strlen(newline); + + free(data); + break; + } + + default : /* Should never happen */ + return (-1); + } + + return (col); +} + + +/* + * 'mxml_write_string()' - Write a string, escaping & and < as needed. + */ + +static int /* O - 0 on success, -1 on failure */ +mxml_write_string( + const char *s, /* I - String to write */ + void *p, /* I - Write pointer */ + _mxml_putc_cb_t putc_cb) /* I - Write callback */ +{ + const char *name; /* Entity name, if any */ + + + while (*s) + { + if ((name = mxmlEntityGetName(*s)) != NULL) + { + if ((*putc_cb)('&', p) < 0) + return (-1); + + while (*name) + { + if ((*putc_cb)(*name, p) < 0) + return (-1); + name ++; + } + + if ((*putc_cb)(';', p) < 0) + return (-1); + } + else if ((*putc_cb)(*s, p) < 0) + return (-1); + + s ++; + } + + return (0); +} + + +/* + * 'mxml_write_ws()' - Do whitespace callback... + */ + +static int /* O - New column */ +mxml_write_ws(mxml_node_t *node, /* I - Current node */ + void *p, /* I - Write pointer */ + mxml_save_cb_t cb, /* I - Callback function */ + int ws, /* I - Where value */ + int col, /* I - Current column */ + _mxml_putc_cb_t putc_cb) /* I - Write callback */ +{ + const char *s; /* Whitespace string */ + + + if (cb && (s = (*cb)(node, ws)) != NULL) + { + while (*s) + { + if ((*putc_cb)(*s, p) < 0) + return (-1); + else if (*s == '\n') + col = 0; + else if (*s == '\t') + { + col += MXML_TAB; + col = col - (col % MXML_TAB); + } + else + col ++; + + s ++; + } + } + + return (col); +} + + +/* + * End of "$Id: mxml-file.c 438 2011-03-24 05:47:51Z mike $". + */ |