From e603c180e7d70f64cf50302ecf6d8937172ed121 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Sun, 3 Apr 2011 03:44:12 +0200 Subject: xps: Remove dependency on expat. --- xps/xpsxml.c | 588 ++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 318 insertions(+), 270 deletions(-) (limited to 'xps/xpsxml.c') diff --git a/xps/xpsxml.c b/xps/xpsxml.c index c6f41123..397b68ec 100644 --- a/xps/xpsxml.c +++ b/xps/xpsxml.c @@ -1,339 +1,387 @@ -/* Simple XML document object model on top of Expat. */ - #include "fitz.h" #include "muxps.h" -#include - -#define XMLBUFLEN 4096 - -#define NS_XPS "http://schemas.microsoft.com/xps/2005/06" -#define NS_MC "http://schemas.openxmlformats.org/markup-compatibility/2006" - -typedef struct xps_parser_s xps_parser; +struct attribute +{ + char name[32]; + char *value; + struct attribute *next; +}; -struct xps_parser_s +struct element { - xps_context *ctx; - xps_item *root; - xps_item *head; - char *error; - int compat; - char *base; /* base of relative URIs */ + char name[32]; + struct attribute *atts; + struct element *up, *down, *next; }; -struct xps_item_s +struct parser { - char *name; - char **atts; - xps_item *up; - xps_item *down; - xps_item *next; + struct element *head; }; -static char * -skip_namespace(char *s) +static inline void indent(int n) { - char *p = strchr(s, ' '); - if (p) - return p + 1; - return s; + while (n--) putchar(' '); } -static void -on_open_tag(void *zp, char *ns_name, char **atts) +void xml_print_element(struct element *item, int level) { - xps_parser *parser = zp; - xps_item *item; - xps_item *tail; - int namelen; - int attslen; - int textlen; - char *name, *p; - int i; - - if (parser->error) - return; - - /* check namespace */ - - name = NULL; - - p = strstr(ns_name, NS_XPS); - if (p == ns_name) - { - name = strchr(ns_name, ' ') + 1; + while (item) { + struct attribute *att; + indent(level); + printf("<%s", item->name); + for (att = item->atts; att; att = att->next) + printf(" %s=\"%s\"", att->name, att->value); + if (item->down) { + printf(">\n"); + xml_print_element(item->down, level + 1); + indent(level); + printf("\n", item->name); + } + else { + printf("/>\n"); + } + item = item->next; } +} - p = strstr(ns_name, NS_MC); - if (p == ns_name) - { - name = strchr(ns_name, ' ') + 1; - parser->compat = 1; - } +struct element *xml_next(struct element *item) +{ + return item->next; +} - if (!name) - { - fz_warn("unknown namespace: %s", ns_name); - name = ns_name; - } +struct element *xml_down(struct element *item) +{ + return item->down; +} - /* count size to alloc */ +char *xml_tag(struct element *item) +{ + return item->name; +} - namelen = strlen(name) + 1; /* zero terminated */ - attslen = sizeof(char*); /* with space for sentinel */ - textlen = 0; - for (i = 0; atts[i]; i++) - { - attslen += sizeof(char*); - if ((i & 1) == 0) - textlen += strlen(skip_namespace(atts[i])) + 1; - else - textlen += strlen(atts[i]) + 1; - } +char *xml_att(struct element *item, const char *name) +{ + struct attribute *att; + for (att = item->atts; att; att = att->next) + if (!strcmp(att->name, name)) + return att->value; + return NULL; +} - item = fz_malloc(sizeof(xps_item) + attslen + namelen + textlen); - if (!item) - { - parser->error = "out of memory"; +static void xml_free_attribute(struct attribute *att) +{ + while (att) { + struct attribute *next = att->next; + if (att->value) + fz_free(att->value); + fz_free(att); + att = next; } +} - /* copy strings to new memory */ - - item->atts = (char**) (((char*)item) + sizeof(xps_item)); - item->name = ((char*)item) + sizeof(xps_item) + attslen; - p = ((char*)item) + sizeof(xps_item) + attslen + namelen; +void xml_free_element(struct element *item) +{ + while (item) { + struct element *next = item->next; + if (item->atts) + xml_free_attribute(item->atts); + if (item->down) + xml_free_element(item->down); + fz_free(item); + item = next; + } +} - strcpy(item->name, name); - for (i = 0; atts[i]; i++) - { - item->atts[i] = p; - if ((i & 1) == 0) - strcpy(item->atts[i], skip_namespace(atts[i])); +static int xml_parse_entity(int *c, char *a) +{ + char *b; + if (a[1] == '#') { + if (a[2] == 'x') + *c = strtol(a + 3, &b, 16); else - strcpy(item->atts[i], atts[i]); - p += strlen(p) + 1; + *c = strtol(a + 2, &b, 10); + if (*b == ';') + return b - a; } - - item->atts[i] = 0; - - /* link item into tree */ - - item->up = parser->head; - item->down = NULL; - item->next = NULL; - - if (!parser->head) - { - parser->root = item; - parser->head = item; - return; + else if (a[1] == 'l' && a[2] == 't' && a[3] == ';') { + *c = '<'; + return 4; } - - if (!parser->head->down) - { - parser->head->down = item; - parser->head = item; - return; + else if (a[1] == 'g' && a[2] == 't' && a[3] == ';') { + *c = '>'; + return 4; } - - tail = parser->head->down; - while (tail->next) - tail = tail->next; - tail->next = item; - parser->head = item; + else if (a[1] == 'a' && a[2] == 'm' && a[3] == 'p' && a[4] == ';') { + *c = '&'; + return 5; + } + else if (a[1] == 'a' && a[2] == 'p' && a[3] == 'o' && a[4] == 's' && a[5] == ';') { + *c = '\''; + return 6; + } + else if (a[1] == 'q' && a[2] == 'u' && a[3] == 'o' && a[4] == 't' && a[5] == ';') { + *c = '"'; + return 6; + } + *c = *a++; + return 1; } -static void -on_close_tag(void *zp, char *name) +static void xml_emit_open_tag(struct parser *parser, char *a, char *b) { - xps_parser *parser = zp; + struct element *head, *tail; - if (parser->error) - return; + head = fz_malloc(sizeof(struct element)); + if (b - a > sizeof(head->name)) + b = a + sizeof(head->name); + memcpy(head->name, a, b - a); + head->name[b - a] = 0; - if (parser->head) - parser->head = parser->head->up; -} + head->atts = NULL; + head->up = parser->head; + head->down = NULL; + head->next = NULL; -static inline int -is_xml_space(int c) -{ - return c == ' ' || c == '\t' || c == '\r' || c == '\n'; -} - -static void -on_text(void *zp, char *buf, int len) -{ - xps_parser *parser = zp; - char *atts[3]; - int i; - - if (parser->error) - return; - - for (i = 0; i < len; i++) - { - if (!is_xml_space(buf[i])) - { - char *tmp = fz_malloc(len + 1); - if (!tmp) - { - parser->error = "out of memory"; - return; - } - - atts[0] = ""; - atts[1] = tmp; - atts[2] = NULL; - - memcpy(tmp, buf, len); - tmp[len] = 0; - on_open_tag(zp, "", atts); - on_close_tag(zp, ""); - fz_free(tmp); - return; - } + if (!parser->head->down) { + parser->head->down = head; + } + else { + tail = parser->head->down; + while (tail->next) + tail = tail->next; + tail->next = head; } + + parser->head = head; } -static xps_item * -xps_process_compatibility(xps_context *ctx, xps_item *root) +static void xml_emit_att_name(struct parser *parser, char *a, char *b) { - fz_warn("XPS document uses markup compatibility tags"); - return root; + struct element *head = parser->head; + struct attribute *att; + + att = fz_malloc(sizeof(struct attribute)); + if (b - a > sizeof(att->name)) + b = a + sizeof(att->name); + memcpy(att->name, a, b - a); + att->name[b - a] = 0; + att->value = NULL; + att->next = head->atts; + head->atts = att; } -xps_item * -xps_parse_xml(xps_context *ctx, byte *buf, int len) +static void xml_emit_att_value(struct parser *parser, char *a, char *b) { - xps_parser parser; - XML_Parser xp; - int code; - - parser.ctx = ctx; - parser.root = NULL; - parser.head = NULL; - parser.error = NULL; - parser.compat = 0; - - xp = XML_ParserCreateNS(NULL, ' '); - if (!xp) - { - fz_throw("xml error: cannot create expat parser"); - return NULL; - } - - XML_SetUserData(xp, &parser); - XML_SetParamEntityParsing(xp, XML_PARAM_ENTITY_PARSING_NEVER); - XML_SetStartElementHandler(xp, (XML_StartElementHandler)on_open_tag); - XML_SetEndElementHandler(xp, (XML_EndElementHandler)on_close_tag); - XML_SetCharacterDataHandler(xp, (XML_CharacterDataHandler)on_text); - - code = XML_Parse(xp, (char*)buf, len, 1); - if (code == 0) - { - if (parser.root) - xps_free_item(ctx, parser.root); - XML_ParserFree(xp); - fz_throw("xml error: %s", XML_ErrorString(XML_GetErrorCode(xp))); - return NULL; + struct element *head = parser->head; + struct attribute *att = head->atts; + char *s; + int c; + + /* entities are all longer than UTFmax so runetochar is safe */ + s = att->value = fz_malloc(b - a + 1); + while (a < b) { + if (*a == '&') { + a += xml_parse_entity(&c, a); + s += runetochar(s, &c); + } + else { + *s++ = *a++; + } } - - XML_ParserFree(xp); - - if (parser.compat) - xps_process_compatibility(ctx, parser.root); - - return parser.root; + *s = 0; } -xps_item * -xps_next(xps_item *item) +static void xml_emit_close_tag(struct parser *parser) { - return item->next; + if (parser->head->up) + parser->head = parser->head->up; } -xps_item * -xps_down(xps_item *item) +static inline int isname(int c) { - return item->down; + return c == '.' || c == '-' || c == '_' || c == ':' || + (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z'); } -char * -xps_tag(xps_item *item) +static inline int iswhite(int c) { - return item->name; + return c == ' ' || c == '\r' || c == '\n' || c == '\t'; } -char * -xps_att(xps_item *item, const char *att) +static char *xml_parse_document_imp(struct parser *x, char *p) { - int i; - for (i = 0; item->atts[i]; i += 2) - if (!strcmp(item->atts[i], att)) - return item->atts[i + 1]; + char *mark; + int quote; + +parse_text: + mark = p; + while (*p && *p != '<') ++p; + if (*p == '<') { ++p; goto parse_element; } return NULL; -} -void -xps_free_item(xps_context *ctx, xps_item *item) -{ - xps_item *next; - while (item) - { - next = item->next; - if (item->down) - xps_free_item(ctx, item->down); - fz_free(item); - item = next; +parse_element: + if (*p == '/') { ++p; goto parse_closing_element; } + if (*p == '!') { ++p; goto parse_comment; } + if (*p == '?') { ++p; goto parse_processing_instruction; } + while (iswhite(*p)) ++p; + if (isname(*p)) + goto parse_element_name; + return "syntax error in element"; + +parse_comment: + if (*p == '[') goto parse_cdata; + if (*p++ != '-') return "syntax error in comment (') { + p += 3; + goto parse_text; + } + ++p; + } + return "end of data in comment"; + +parse_cdata: + if (p[1] != 'C' || p[2] != 'D' || p[3] != 'A' || p[4] != 'T' || p[5] != 'A' || p[6] != '[') + return "syntax error in CDATA section"; + p += 7; + mark = p; + while (*p) { + if (p[0] == ']' && p[1] == ']' && p[2] == '>') { + p += 3; + goto parse_text; + } + ++p; + } + return "end of data in CDATA section"; + +parse_processing_instruction: + while (*p) { + if (p[0] == '?' && p[1] == '>') { + p += 2; + goto parse_text; + } + ++p; + } + return "end of data in processing instruction"; + +parse_closing_element: + while (iswhite(*p)) ++p; + mark = p; + while (isname(*p)) ++p; + while (iswhite(*p)) ++p; + if (*p != '>') + return "syntax error in closing element"; + xml_emit_close_tag(x); + ++p; + goto parse_text; + +parse_element_name: + mark = p; + while (isname(*p)) ++p; + xml_emit_open_tag(x, mark, p); + if (*p == '>') { ++p; goto parse_text; } + if (p[0] == '/' && p[1] == '>') { + xml_emit_close_tag(x); + p += 2; + goto parse_text; + } + if (iswhite(*p)) + goto parse_attributes; + return "syntax error after element name"; + +parse_attributes: + while (iswhite(*p)) ++p; + if (isname(*p)) + goto parse_attribute_name; + if (*p == '>') { ++p; goto parse_text; } + if (p[0] == '/' && p[1] == '>') { + xml_emit_close_tag(x); + p += 2; + goto parse_text; } + return "syntax error in attributes"; + +parse_attribute_name: + mark = p; + while (isname(*p)) ++p; + xml_emit_att_name(x, mark, p); + while (iswhite(*p)) ++p; + if (*p == '=') { ++p; goto parse_attribute_value; } + return "syntax error after attribute name"; + +parse_attribute_value: + while (iswhite(*p)) ++p; + quote = *p++; + if (quote != '"' && quote != '\'') + return "missing quote character"; + mark = p; + while (*p && *p != quote) ++p; + if (*p == quote) { + xml_emit_att_value(x, mark, p++); + goto parse_attributes; + } + return "end of data in attribute value"; } -static void indent(int n) +static char *convert_to_utf8(unsigned char *s, int n) { - while (n--) - printf(" "); + unsigned char *e = s + n; + char *dst, *d; + int c; + + if (s[0] == 0xFE && s[1] == 0xFF) { + dst = d = fz_malloc(n * 2); + while (s + 1 < e) { + c = s[0] << 8 | s[1]; + d += runetochar(d, &c); + s += 2; + } + *d = 0; + return dst; + } + + if (s[0] == 0xFF && s[1] == 0xFE) { + dst = d = fz_malloc(n * 2); + while (s + 1 < e) { + c = s[0] | s[1] << 8; + d += runetochar(d, &c); + s += 2; + } + *d = 0; + return dst; + } + + return (char*)s; } -static void -xps_debug_item_imp(xps_item *item, int level, int loop) +struct element * +xml_parse_document(unsigned char *s, int n) { - int i; + struct parser parser; + struct element root; + char *p, *error; - while (item) - { - indent(level); + /* s is already null-terminated (see xps_new_part) */ - if (strlen(item->name) == 0) - printf("%s\n", item->atts[1]); - else - { - printf("<%s", item->name); - - for (i = 0; item->atts[i]; i += 2) - printf(" %s=\"%s\"", item->atts[i], item->atts[i+1]); - - if (item->down) - { - printf(">\n"); - xps_debug_item_imp(item->down, level + 1, 1); - indent(level); - printf("\n", item->name); - } - else - printf(" />\n"); - } + memset(&root, 0, sizeof(root)); + parser.head = &root; - item = item->next; + p = convert_to_utf8(s, n); - if (!loop) - return; + error = xml_parse_document_imp(&parser, p); + if (error) { + fz_throw(error); + return NULL; } -} -void -xps_debug_item(xps_item *item, int level) -{ - xps_debug_item_imp(item, level, 0); + if (p != (char*)s) + fz_free(p); + + return root.down; } -- cgit v1.2.3