From 8219259c39f19f45c2f094a6ba7930f940b1074a Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Tue, 5 May 2015 18:18:36 +0200 Subject: epub: Decode URL escapes in epub paths. --- source/fitz/string.c | 39 +++++++++++++++++++++++++++++++++++++++ source/html/epub-doc.c | 2 +- source/html/html-layout.c | 2 ++ 3 files changed, 42 insertions(+), 1 deletion(-) (limited to 'source') diff --git a/source/fitz/string.c b/source/fitz/string.c index 224eeff7..820a4426 100644 --- a/source/fitz/string.c +++ b/source/fitz/string.c @@ -84,6 +84,44 @@ fz_dirname(char *dir, const char *path, int n) dir[i+1] = 0; } +static int ishex(int a) +{ + return (a >= 'A' && a <= 'F') || + (a >= 'a' && a <= 'f') || + (a >= '0' && a <= '9'); +} + +static int tohex(int c) +{ + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; + if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; + return 0; +} + +char * +fz_urldecode(char *url) +{ + char *s = url; + char *p = url; + while (*s) + { + int c = (unsigned char) *s++; + if (c == '%' && ishex(s[0]) && ishex(s[1])) + { + int a = tohex(*s++); + int b = tohex(*s++); + *p++ = a << 4 | b; + } + else + { + *p++ = c; + } + } + *p = 0; + return url; +} + #define SEP(x) ((x)=='/' || (x) == 0) char * @@ -139,6 +177,7 @@ fz_cleanname(char *name) *q = '\0'; return name; } + enum { UTFmax = 4, /* maximum bytes per rune */ diff --git a/source/html/epub-doc.c b/source/html/epub-doc.c index 2ea9f298..e77770ac 100644 --- a/source/html/epub-doc.c +++ b/source/html/epub-doc.c @@ -164,7 +164,7 @@ path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char * fz_strlcpy(path, base_uri, n); fz_strlcat(path, "/", n); fz_strlcat(path, rel_path, n); - return fz_cleanname(path); + return fz_cleanname(fz_urldecode(path)); } static epub_chapter * diff --git a/source/html/html-layout.c b/source/html/html-layout.c index a64fccc4..4926dc51 100644 --- a/source/html/html-layout.c +++ b/source/html/html-layout.c @@ -129,6 +129,7 @@ static void generate_image(fz_context *ctx, fz_archive *zip, const char *base_ur fz_strlcpy(path, base_uri, sizeof path); fz_strlcat(path, "/", sizeof path); fz_strlcat(path, src, sizeof path); + fz_urldecode(path); fz_cleanname(path); fz_try(ctx) @@ -1024,6 +1025,7 @@ html_load_css(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_css_rul fz_strlcpy(path, base_uri, sizeof path); fz_strlcat(path, "/", sizeof path); fz_strlcat(path, href, sizeof path); + fz_urldecode(path); fz_cleanname(path); buf = fz_read_archive_entry(ctx, zip, path); -- cgit v1.2.3