From 955573c9792c6992398072f83a1ae0de1a29dcd9 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Tue, 13 Nov 2018 23:03:25 +0100 Subject: Bug 697414: Allow more encodings in XML parser. Support some common FB2 encodings as well as UTF-8 and UTF-16. --- source/fitz/xml.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/source/fitz/xml.c b/source/fitz/xml.c index b7c89cf4..6b117479 100644 --- a/source/fitz/xml.c +++ b/source/fitz/xml.c @@ -581,8 +581,50 @@ parse_attribute_value: return "end of data in attribute value"; } -static char *convert_to_utf8(fz_context *doc, const unsigned char *s, size_t n, int *dofree) +static int startswith(const char *a, const char *b) { + return !fz_strncasecmp(a, b, strlen(b)); +} + +static const unsigned short *find_xml_encoding(char *s) +{ + const unsigned short *table = NULL; + char *end, *xml, *enc; + + end = strchr(s, '>'); + if (end) + { + *end = 0; + xml = strstr(s, "'; + } + + return table; +} + +static char *convert_to_utf8(fz_context *doc, unsigned char *s, size_t n, int *dofree) +{ + const unsigned short *table; const unsigned char *e = s + n; char *dst, *d; int c; @@ -613,6 +655,18 @@ static char *convert_to_utf8(fz_context *doc, const unsigned char *s, size_t n, return dst; } + table = find_xml_encoding((char*)s); + if (table) { + dst = d = fz_malloc(doc, n * FZ_UTFMAX); + while (*s) { + c = table[*s++]; + d += fz_runetochar(d, c); + } + *d = 0; + *dofree = 1; + return dst; + } + *dofree = 0; if (s[0] == 0xEF && s[1] == 0xBB && s[2] == 0xBF) -- cgit v1.2.3