From ddf0dbf478b2998dbab95d15b3b5b5ba3b59652a Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Sat, 24 Nov 2012 14:28:29 +0100 Subject: xps: Save text content in XML nodes. New accessor xml_text() will return NULL or text content of a node. Tag names for text nodes is the empty string "". Fix bug 692191. --- xps/xps_xml.c | 91 ++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 69 insertions(+), 22 deletions(-) (limited to 'xps/xps_xml.c') diff --git a/xps/xps_xml.c b/xps/xps_xml.c index 8daee00b..e89a24d2 100644 --- a/xps/xps_xml.c +++ b/xps/xps_xml.c @@ -10,6 +10,7 @@ struct attribute struct element { char name[40]; + char *text; struct attribute *atts; struct element *up, *down, *next; }; @@ -28,21 +29,25 @@ static inline void indent(int n) void xml_print_element(struct element *item, int level) { while (item) { - struct attribute *att; - indent(level); - printf("<%s", item->name); - for (att = item->atts; att; att = att->next) - printf(" %s=\"%s\"", att->name, att->value); - if (item->down) { - printf(">\n"); - xml_print_element(item->down, level + 1); + if (item->text) { + printf("%s\n", item->text); + } else { + struct attribute *att; indent(level); - printf("\n", item->name); + printf("<%s", item->name); + for (att = item->atts; att; att = att->next) + printf(" %s=\"%s\"", att->name, att->value); + if (item->down) { + printf(">\n"); + xml_print_element(item->down, level + 1); + indent(level); + printf("\n", item->name); + } + else { + printf("/>\n"); + } + item = item->next; } - else { - printf("/>\n"); - } - item = item->next; } } @@ -56,6 +61,11 @@ struct element *xml_down(struct element *item) return item->down; } +char *xml_text(struct element *item) +{ + return item->text; +} + char *xml_tag(struct element *item) { return item->name; @@ -85,6 +95,8 @@ void xml_free_element(fz_context *ctx, struct element *item) { while (item) { struct element *next = item->next; + if (item->text) + fz_free(ctx, item->text); if (item->atts) xml_free_attribute(ctx, item->atts); if (item->down) @@ -135,6 +147,19 @@ static int xml_parse_entity(int *c, char *a) return 1; } +static inline int isname(int c) +{ + return c == '.' || c == '-' || c == '_' || c == ':' || + (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z'); +} + +static inline int iswhite(int c) +{ + return c == ' ' || c == '\r' || c == '\n' || c == '\t'; +} + static void xml_emit_open_tag(struct parser *parser, char *a, char *b) { struct element *head, *tail; @@ -146,6 +171,7 @@ static void xml_emit_open_tag(struct parser *parser, char *a, char *b) head->name[b - a] = 0; head->atts = NULL; + head->text = NULL; head->up = parser->head; head->down = NULL; head->next = NULL; @@ -205,17 +231,37 @@ static void xml_emit_close_tag(struct parser *parser) parser->head = parser->head->up; } -static inline int isname(int c) +static void xml_emit_text(struct parser *parser, char *a, char *b) { - return c == '.' || c == '-' || c == '_' || c == ':' || - (c >= '0' && c <= '9') || - (c >= 'A' && c <= 'Z') || - (c >= 'a' && c <= 'z'); -} + static char *empty = ""; + struct element *head; + char *s; + int c; -static inline int iswhite(int c) -{ - return c == ' ' || c == '\r' || c == '\n' || c == '\t'; + /* Skip all-whitespace text nodes */ + for (s = a; s < b; s++) + if (!iswhite(*s)) + break; + if (s == b) + return; + + xml_emit_open_tag(parser, empty, empty); + head = parser->head; + + /* entities are all longer than UTFmax so runetochar is safe */ + s = head->text = fz_malloc(parser->ctx, b - a + 1); + while (a < b) { + if (*a == '&') { + a += xml_parse_entity(&c, a); + s += fz_runetochar(s, c); + } + else { + *s++ = *a++; + } + } + *s = 0; + + xml_emit_close_tag(parser); } static char *xml_parse_document_imp(struct parser *x, char *p) @@ -226,6 +272,7 @@ static char *xml_parse_document_imp(struct parser *x, char *p) parse_text: mark = p; while (*p && *p != '<') ++p; + xml_emit_text(x, mark, p); if (*p == '<') { ++p; goto parse_element; } return NULL; -- cgit v1.2.3