summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2015-05-05 18:18:36 +0200
committerTor Andersson <tor.andersson@artifex.com>2015-05-05 20:02:20 +0200
commit8219259c39f19f45c2f094a6ba7930f940b1074a (patch)
treed6b654c99a91f119c66bd805d5ab28c0807d922f
parent3d81ce6e34e6216b4ec67deee50aab60097a52b0 (diff)
downloadmupdf-8219259c39f19f45c2f094a6ba7930f940b1074a.tar.xz
epub: Decode URL escapes in epub paths.
-rw-r--r--include/mupdf/fitz/string.h5
-rw-r--r--source/fitz/string.c39
-rw-r--r--source/html/epub-doc.c2
-rw-r--r--source/html/html-layout.c2
4 files changed, 47 insertions, 1 deletions
diff --git a/include/mupdf/fitz/string.h b/include/mupdf/fitz/string.h
index 4de882a6..010dc46a 100644
--- a/include/mupdf/fitz/string.h
+++ b/include/mupdf/fitz/string.h
@@ -58,6 +58,11 @@ int fz_strlcat(char *dst, const char *src, int n);
void fz_dirname(char *dir, const char *path, int dirsize);
/*
+ fz_urldecode: decode url escapes.
+*/
+char *fz_urldecode(char *url);
+
+/*
fz_cleanname: rewrite path to the shortest string that names the same path.
Eliminates multiple and trailing slashes, interprets "." and "..".
diff --git a/source/fitz/string.c b/source/fitz/string.c
index 224eeff7..820a4426 100644
--- a/source/fitz/string.c
+++ b/source/fitz/string.c
@@ -84,6 +84,44 @@ fz_dirname(char *dir, const char *path, int n)
dir[i+1] = 0;
}
+static int ishex(int a)
+{
+ return (a >= 'A' && a <= 'F') ||
+ (a >= 'a' && a <= 'f') ||
+ (a >= '0' && a <= '9');
+}
+
+static int tohex(int c)
+{
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
+ if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
+ return 0;
+}
+
+char *
+fz_urldecode(char *url)
+{
+ char *s = url;
+ char *p = url;
+ while (*s)
+ {
+ int c = (unsigned char) *s++;
+ if (c == '%' && ishex(s[0]) && ishex(s[1]))
+ {
+ int a = tohex(*s++);
+ int b = tohex(*s++);
+ *p++ = a << 4 | b;
+ }
+ else
+ {
+ *p++ = c;
+ }
+ }
+ *p = 0;
+ return url;
+}
+
#define SEP(x) ((x)=='/' || (x) == 0)
char *
@@ -139,6 +177,7 @@ fz_cleanname(char *name)
*q = '\0';
return name;
}
+
enum
{
UTFmax = 4, /* maximum bytes per rune */
diff --git a/source/html/epub-doc.c b/source/html/epub-doc.c
index 2ea9f298..e77770ac 100644
--- a/source/html/epub-doc.c
+++ b/source/html/epub-doc.c
@@ -164,7 +164,7 @@ path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char *
fz_strlcpy(path, base_uri, n);
fz_strlcat(path, "/", n);
fz_strlcat(path, rel_path, n);
- return fz_cleanname(path);
+ return fz_cleanname(fz_urldecode(path));
}
static epub_chapter *
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
index a64fccc4..4926dc51 100644
--- a/source/html/html-layout.c
+++ b/source/html/html-layout.c
@@ -129,6 +129,7 @@ static void generate_image(fz_context *ctx, fz_archive *zip, const char *base_ur
fz_strlcpy(path, base_uri, sizeof path);
fz_strlcat(path, "/", sizeof path);
fz_strlcat(path, src, sizeof path);
+ fz_urldecode(path);
fz_cleanname(path);
fz_try(ctx)
@@ -1024,6 +1025,7 @@ html_load_css(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_css_rul
fz_strlcpy(path, base_uri, sizeof path);
fz_strlcat(path, "/", sizeof path);
fz_strlcat(path, href, sizeof path);
+ fz_urldecode(path);
fz_cleanname(path);
buf = fz_read_archive_entry(ctx, zip, path);