summaryrefslogtreecommitdiff
path: root/source/fitz/xml.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2016-02-23 21:52:00 +0100
committerTor Andersson <tor.andersson@artifex.com>2016-02-24 13:25:27 +0100
commit67c252d77a1665164c3b3d217d556f19ef879a99 (patch)
tree9df45de0ad5e886004876fab49c88a60a13426f4 /source/fitz/xml.c
parentca4af51ba401595acc76cecc8d6c0cc2c4cf6711 (diff)
downloadmupdf-67c252d77a1665164c3b3d217d556f19ef879a99.tar.xz
xml: Implement SGML line break rules.
Always ignore a line break immediately after an opening tag and before a closing tag.
Diffstat (limited to 'source/fitz/xml.c')
-rw-r--r--source/fitz/xml.c66
1 files changed, 50 insertions, 16 deletions
diff --git a/source/fitz/xml.c b/source/fitz/xml.c
index f301ad71..85763802 100644
--- a/source/fitz/xml.c
+++ b/source/fitz/xml.c
@@ -85,16 +85,43 @@ struct fz_xml_s
fz_xml *up, *down, *tail, *prev, *next;
};
-static inline void indent(int n)
+static void indent(int n)
{
- while (n--) putchar(' ');
+ while (n--) {
+ putchar(' ');
+ putchar(' ');
+ }
}
void fz_debug_xml(fz_xml *item, int level)
{
if (item->text)
{
- printf("%s\n", item->text);
+ char *s = item->text;
+ int c;
+ indent(level);
+ putchar('"');
+ while ((c = *s++)) {
+ switch (c) {
+ default:
+ if (c < 32 || c > 127) {
+ putchar('\\');
+ putchar('0' + ((c >> 6) & 7));
+ putchar('0' + ((c >> 3) & 7));
+ putchar('0' + ((c) & 7));
+ } else {
+ putchar(c);
+ }
+ break;
+ case '\\': putchar('\\'); putchar('\\'); break;
+ case '\b': putchar('\\'); putchar('b'); break;
+ case '\f': putchar('\\'); putchar('f'); break;
+ case '\n': putchar('\\'); putchar('n'); break;
+ case '\r': putchar('\\'); putchar('r'); break;
+ case '\t': putchar('\\'); putchar('t'); break;
+ }
+ }
+ putchar('\n');
}
else
{
@@ -102,21 +129,16 @@ void fz_debug_xml(fz_xml *item, int level)
struct attribute *att;
indent(level);
- printf("<%s", item->name);
+ printf("(%s\n", item->name);
for (att = item->atts; att; att = att->next)
- printf(" %s=\"%s\"", att->name, att->value);
- if (item->down)
{
- printf(">\n");
- for (child = item->down; child; child = child->next)
- fz_debug_xml(child, level + 1);
indent(level);
- printf("</%s>\n", item->name);
- }
- else
- {
- printf("/>\n");
+ printf("=%s %s\n", att->name, att->value);
}
+ for (child = item->down; child; child = child->next)
+ fz_debug_xml(child, level + 1);
+ indent(level);
+ printf(")%s\n", item->name);
}
}
@@ -432,7 +454,15 @@ static char *xml_parse_document_imp(fz_context *ctx, struct parser *parser, char
parse_text:
mark = p;
while (*p && *p != '<') ++p;
- if (mark != p) xml_emit_text(ctx, parser, mark, p);
+ if (mark < p) {
+ /* must skip linebreak immediately before an end tag */
+ if (p[1] == '/' && p[-1] == '\n') {
+ if (mark < p-1)
+ xml_emit_text(ctx, parser, mark, p-1);
+ } else {
+ xml_emit_text(ctx, parser, mark, p);
+ }
+ }
if (*p == '<') { ++p; goto parse_element; }
return NULL;
@@ -503,7 +533,11 @@ parse_element_name:
mark = p;
while (isname(*p)) ++p;
xml_emit_open_tag(ctx, parser, mark, p);
- if (*p == '>') { ++p; goto parse_text; }
+ if (*p == '>') {
+ ++p;
+ if (*p == '\n') ++p; /* must skip linebreak immediately after an opening tag */
+ goto parse_text;
+ }
if (p[0] == '/' && p[1] == '>') {
xml_emit_close_tag(ctx, parser);
p += 2;