summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor@ghostscript.com>2010-12-04 21:17:43 +0000
committerTor Andersson <tor@ghostscript.com>2010-12-04 21:17:43 +0000
commitcf4e4a89c9614198fc0e38dc2f740ca1c05ab867 (patch)
tree81461dcf04b38f06b797ab0f612071e4139bcfa7
parentc8c6acfa08a280e6eabd89eb7a2fedd95b4d2f48 (diff)
downloadmupdf-cf4e4a89c9614198fc0e38dc2f740ca1c05ab867.tar.xz
Add option to ascii hex encode binary streams in pdfclean.
-rw-r--r--apps/pdfclean.c136
-rw-r--r--fitz/fitz.h1
-rw-r--r--fitz/obj_array.c23
-rw-r--r--mupdf/mupdf.h1
-rw-r--r--mupdf/pdf_image.c10
5 files changed, 155 insertions, 16 deletions
diff --git a/apps/pdfclean.c b/apps/pdfclean.c
index 87d6e9c3..c4d7b879 100644
--- a/apps/pdfclean.c
+++ b/apps/pdfclean.c
@@ -21,6 +21,7 @@ static int *renumbermap = NULL;
static int dogarbage = 0;
static int doexpand = 0;
+static int doascii = 0;
static pdf_xref *xref = NULL;
@@ -41,6 +42,7 @@ static void usage(void)
"\t-gg\tin addition to -g compact xref table\n"
"\t-ggg\tin addition to -gg merge duplicate objects\n"
"\t-d\tdecompress streams\n"
+ "\t-a\tascii hex encode binary streams\n"
"\tpages\tcomma separated list of ranges\n");
exit(1);
}
@@ -385,15 +387,118 @@ static void preloadobjstms(void)
* Save streams and objects to the output
*/
+static inline int isbinary(int c)
+{
+ if (c == '\n' || c == '\r' || c == '\t')
+ return 0;
+ return c < 32 || c > 127;
+}
+
+static int isbinarystream(fz_buffer *buf)
+{
+ int i;
+ for (i = 0; i < buf->len; i++)
+ if (isbinary(buf->data[i]))
+ return 1;
+ return 0;
+}
+
+static fz_buffer *hexbuf(unsigned char *p, int n)
+{
+ static const char hex[16] = "0123456789abcdef";
+ fz_buffer *buf;
+ int x = 0;
+
+ buf = fz_newbuffer(n * 2 + (n / 32) + 2);
+
+ while (n--)
+ {
+ buf->data[buf->len++] = hex[*p >> 4];
+ buf->data[buf->len++] = hex[*p & 15];
+ if (++x == 32)
+ {
+ buf->data[buf->len++] = '\n';
+ x = 0;
+ }
+ p++;
+ }
+
+ buf->data[buf->len++] = '>';
+ buf->data[buf->len++] = '\n';
+
+ return buf;
+}
+
+static void addhexfilter(fz_obj *dict)
+{
+ fz_obj *f, *dp, *newf, *newdp;
+ fz_obj *ahx, *nullobj;
+
+ ahx = fz_newname("ASCIIHexDecode");
+ nullobj = fz_newnull();
+ newf = newdp = nil;
+
+ f = fz_dictgets(dict, "Filter");
+ dp = fz_dictgets(dict, "DecodeParms");
+
+ if (fz_isname(f))
+ {
+ newf = fz_newarray(2);
+ fz_arraypush(newf, ahx);
+ fz_arraypush(newf, f);
+ f = newf;
+ if (fz_isdict(dp))
+ {
+ newdp = fz_newarray(2);
+ fz_arraypush(newdp, nullobj);
+ fz_arraypush(newdp, dp);
+ dp = newdp;
+ }
+ }
+ else if (fz_isarray(f))
+ {
+ fz_arrayinsert(f, ahx);
+ if (fz_isarray(dp))
+ fz_arrayinsert(dp, nullobj);
+ }
+ else
+ f = ahx;
+
+ fz_dictputs(dict, "Filter", f);
+ if (dp)
+ fz_dictputs(dict, "DecodeParms", dp);
+
+ fz_dropobj(ahx);
+ fz_dropobj(nullobj);
+ if (newf)
+ fz_dropobj(newf);
+ if (newdp)
+ fz_dropobj(newdp);
+}
+
static void copystream(fz_obj *obj, int num, int gen)
{
fz_error error;
- fz_buffer *buf;
+ fz_buffer *buf, *tmp;
+ fz_obj *newlen;
error = pdf_loadrawstream(&buf, xref, num, gen);
if (error)
die(error);
+ if (doascii && isbinarystream(buf))
+ {
+ tmp = hexbuf(buf->data, buf->len);
+ fz_dropbuffer(buf);
+ buf = tmp;
+
+ addhexfilter(obj);
+
+ newlen = fz_newint(buf->len);
+ fz_dictputs(obj, "Length", newlen);
+ fz_dropobj(newlen);
+ }
+
fprintf(out, "%d %d obj\n", num, gen);
fz_fprintobj(out, obj, !doexpand);
fprintf(out, "stream\n");
@@ -406,29 +511,35 @@ static void copystream(fz_obj *obj, int num, int gen)
static void expandstream(fz_obj *obj, int num, int gen)
{
fz_error error;
- fz_buffer *buf;
- fz_obj *newdict, *newlen;
+ fz_buffer *buf, *tmp;
+ fz_obj *newlen;
error = pdf_loadstream(&buf, xref, num, gen);
if (error)
die(error);
- newdict = fz_copydict(obj);
- fz_dictdels(newdict, "Filter");
- fz_dictdels(newdict, "DecodeParms");
+ fz_dictdels(obj, "Filter");
+ fz_dictdels(obj, "DecodeParms");
+
+ if (doascii && isbinarystream(buf))
+ {
+ tmp = hexbuf(buf->data, buf->len);
+ fz_dropbuffer(buf);
+ buf = tmp;
+
+ addhexfilter(obj);
+ }
newlen = fz_newint(buf->len);
- fz_dictputs(newdict, "Length", newlen);
+ fz_dictputs(obj, "Length", newlen);
fz_dropobj(newlen);
fprintf(out, "%d %d obj\n", num, gen);
- fz_fprintobj(out, newdict, !doexpand);
+ fz_fprintobj(out, obj, !doexpand);
fprintf(out, "stream\n");
fwrite(buf->data, 1, buf->len, out);
fprintf(out, "endstream\nendobj\n\n");
- fz_dropobj(newdict);
-
fz_dropbuffer(buf);
}
@@ -468,7 +579,7 @@ static void writeobject(int num, int gen)
}
else
{
- if (doexpand)
+ if (doexpand && !pdf_isjpximage(obj))
expandstream(obj, num, gen);
else
copystream(obj, num, gen);
@@ -572,13 +683,14 @@ int main(int argc, char **argv)
int c, num;
int subset;
- while ((c = fz_getopt(argc, argv, "gdp:")) != -1)
+ while ((c = fz_getopt(argc, argv, "adgp:")) != -1)
{
switch (c)
{
case 'p': password = fz_optarg; break;
case 'g': dogarbage ++; break;
case 'd': doexpand ++; break;
+ case 'a': doascii ++; break;
default: usage(); break;
}
}
diff --git a/fitz/fitz.h b/fitz/fitz.h
index a2803899..99751b3e 100644
--- a/fitz/fitz.h
+++ b/fitz/fitz.h
@@ -424,6 +424,7 @@ int fz_arraylen(fz_obj *array);
fz_obj *fz_arrayget(fz_obj *array, int i);
void fz_arrayput(fz_obj *array, int i, fz_obj *obj);
void fz_arraypush(fz_obj *array, fz_obj *obj);
+void fz_arrayinsert(fz_obj *array, fz_obj *obj);
int fz_dictlen(fz_obj *dict);
fz_obj *fz_dictgetkey(fz_obj *dict, int idx);
diff --git a/fitz/obj_array.c b/fitz/obj_array.c
index d7d2d2e7..75f4a6b5 100644
--- a/fitz/obj_array.c
+++ b/fitz/obj_array.c
@@ -104,6 +104,29 @@ fz_arraypush(fz_obj *obj, fz_obj *item)
}
void
+fz_arrayinsert(fz_obj *obj, fz_obj *item)
+{
+ obj = fz_resolveindirect(obj);
+
+ if (!fz_isarray(obj))
+ fz_warn("assert: not an array (%s)", fz_objkindstr(obj));
+ else
+ {
+ if (obj->u.a.len + 1 > obj->u.a.cap)
+ {
+ int i;
+ obj->u.a.cap = (obj->u.a.cap * 3) / 2;
+ obj->u.a.items = fz_realloc(obj->u.a.items, sizeof (fz_obj*) * obj->u.a.cap);
+ for (i = obj->u.a.len ; i < obj->u.a.cap; i++)
+ obj->u.a.items[i] = nil;
+ }
+ memmove(obj->u.a.items + 1, obj->u.a.items, obj->u.a.len * sizeof(fz_obj*));
+ obj->u.a.items[0] = fz_keepobj(item);
+ obj->u.a.len++;
+ }
+}
+
+void
fz_freearray(fz_obj *obj)
{
int i;
diff --git a/mupdf/mupdf.h b/mupdf/mupdf.h
index 0fc0ec75..419a06db 100644
--- a/mupdf/mupdf.h
+++ b/mupdf/mupdf.h
@@ -260,6 +260,7 @@ void pdf_dropxobject(pdf_xobject *xobj);
fz_error pdf_loadinlineimage(fz_pixmap **imgp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict, fz_stream *file);
fz_error pdf_loadimage(fz_pixmap **imgp, pdf_xref *xref, fz_obj *rdb, fz_obj *obj);
+int pdf_isjpximage(fz_obj *dict);
/*
* CMap
diff --git a/mupdf/pdf_image.c b/mupdf/pdf_image.c
index 6021da09..9eb1925e 100644
--- a/mupdf/pdf_image.c
+++ b/mupdf/pdf_image.c
@@ -251,10 +251,13 @@ pdf_loadinlineimage(fz_pixmap **pixp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict,
return fz_okay;
}
-static int
-pdf_isjpximage(fz_obj *filter)
+int
+pdf_isjpximage(fz_obj *dict)
{
+ fz_obj *filter;
int i;
+
+ filter = fz_dictgets(dict, "Filter");
if (!strcmp(fz_toname(filter), "JPXDecode"))
return 1;
for (i = 0; i < fz_arraylen(filter); i++)
@@ -342,8 +345,7 @@ pdf_loadimage(fz_pixmap **pixp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict)
pdf_logimage("load image (%d 0 R) {\n", fz_tonum(dict));
/* special case for JPEG2000 images */
- obj = fz_dictgets(dict, "Filter");
- if (pdf_isjpximage(obj))
+ if (pdf_isjpximage(dict))
{
error = pdf_loadjpximage(pixp, xref, rdb, dict);
if (error)