summaryrefslogtreecommitdiff
path: root/xps/xps_xml.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2012-11-24 14:28:29 +0100
committerRobin Watts <robin.watts@artifex.com>2012-11-26 12:49:48 +0000
commitddf0dbf478b2998dbab95d15b3b5b5ba3b59652a (patch)
treee76efb25b00df285f777899eeafedf5c968ac315 /xps/xps_xml.c
parent36085815dafbb3653a51b526c528aa15bb6fb6e1 (diff)
downloadmupdf-ddf0dbf478b2998dbab95d15b3b5b5ba3b59652a.tar.xz
xps: Save text content in XML nodes.
New accessor xml_text() will return NULL or text content of a node. Tag names for text nodes is the empty string "". Fix bug 692191.
Diffstat (limited to 'xps/xps_xml.c')
-rw-r--r--xps/xps_xml.c91
1 files changed, 69 insertions, 22 deletions
diff --git a/xps/xps_xml.c b/xps/xps_xml.c
index 8daee00b..e89a24d2 100644
--- a/xps/xps_xml.c
+++ b/xps/xps_xml.c
@@ -10,6 +10,7 @@ struct attribute
struct element
{
char name[40];
+ char *text;
struct attribute *atts;
struct element *up, *down, *next;
};
@@ -28,21 +29,25 @@ static inline void indent(int n)
void xml_print_element(struct element *item, int level)
{
while (item) {
- struct attribute *att;
- indent(level);
- printf("<%s", item->name);
- for (att = item->atts; att; att = att->next)
- printf(" %s=\"%s\"", att->name, att->value);
- if (item->down) {
- printf(">\n");
- xml_print_element(item->down, level + 1);
+ if (item->text) {
+ printf("%s\n", item->text);
+ } else {
+ struct attribute *att;
indent(level);
- printf("</%s>\n", item->name);
+ printf("<%s", item->name);
+ for (att = item->atts; att; att = att->next)
+ printf(" %s=\"%s\"", att->name, att->value);
+ if (item->down) {
+ printf(">\n");
+ xml_print_element(item->down, level + 1);
+ indent(level);
+ printf("</%s>\n", item->name);
+ }
+ else {
+ printf("/>\n");
+ }
+ item = item->next;
}
- else {
- printf("/>\n");
- }
- item = item->next;
}
}
@@ -56,6 +61,11 @@ struct element *xml_down(struct element *item)
return item->down;
}
+char *xml_text(struct element *item)
+{
+ return item->text;
+}
+
char *xml_tag(struct element *item)
{
return item->name;
@@ -85,6 +95,8 @@ void xml_free_element(fz_context *ctx, struct element *item)
{
while (item) {
struct element *next = item->next;
+ if (item->text)
+ fz_free(ctx, item->text);
if (item->atts)
xml_free_attribute(ctx, item->atts);
if (item->down)
@@ -135,6 +147,19 @@ static int xml_parse_entity(int *c, char *a)
return 1;
}
+static inline int isname(int c)
+{
+ return c == '.' || c == '-' || c == '_' || c == ':' ||
+ (c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z');
+}
+
+static inline int iswhite(int c)
+{
+ return c == ' ' || c == '\r' || c == '\n' || c == '\t';
+}
+
static void xml_emit_open_tag(struct parser *parser, char *a, char *b)
{
struct element *head, *tail;
@@ -146,6 +171,7 @@ static void xml_emit_open_tag(struct parser *parser, char *a, char *b)
head->name[b - a] = 0;
head->atts = NULL;
+ head->text = NULL;
head->up = parser->head;
head->down = NULL;
head->next = NULL;
@@ -205,17 +231,37 @@ static void xml_emit_close_tag(struct parser *parser)
parser->head = parser->head->up;
}
-static inline int isname(int c)
+static void xml_emit_text(struct parser *parser, char *a, char *b)
{
- return c == '.' || c == '-' || c == '_' || c == ':' ||
- (c >= '0' && c <= '9') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= 'a' && c <= 'z');
-}
+ static char *empty = "";
+ struct element *head;
+ char *s;
+ int c;
-static inline int iswhite(int c)
-{
- return c == ' ' || c == '\r' || c == '\n' || c == '\t';
+ /* Skip all-whitespace text nodes */
+ for (s = a; s < b; s++)
+ if (!iswhite(*s))
+ break;
+ if (s == b)
+ return;
+
+ xml_emit_open_tag(parser, empty, empty);
+ head = parser->head;
+
+ /* entities are all longer than UTFmax so runetochar is safe */
+ s = head->text = fz_malloc(parser->ctx, b - a + 1);
+ while (a < b) {
+ if (*a == '&') {
+ a += xml_parse_entity(&c, a);
+ s += fz_runetochar(s, c);
+ }
+ else {
+ *s++ = *a++;
+ }
+ }
+ *s = 0;
+
+ xml_emit_close_tag(parser);
}
static char *xml_parse_document_imp(struct parser *x, char *p)
@@ -226,6 +272,7 @@ static char *xml_parse_document_imp(struct parser *x, char *p)
parse_text:
mark = p;
while (*p && *p != '<') ++p;
+ xml_emit_text(x, mark, p);
if (*p == '<') { ++p; goto parse_element; }
return NULL;