summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor@ghostscript.com>2010-12-04 22:54:08 +0000
committerTor Andersson <tor@ghostscript.com>2010-12-04 22:54:08 +0000
commitd62f4a4cb145b5da1ae38bea340fbc9e903f6612 (patch)
tree1cf4ba00c798d0d7520d8870f3656020526ad282
parent2f54f3ec4fb3c8c77f7c71032d6663081359296a (diff)
downloadmupdf-d62f4a4cb145b5da1ae38bea340fbc9e903f6612.tar.xz
Support repairing PDF documents with compressed object streams.
-rw-r--r--apps/pdfclean.c2
-rw-r--r--mupdf/mupdf.h3
-rw-r--r--mupdf/pdf_repair.c87
-rw-r--r--mupdf/pdf_xref.c93
4 files changed, 115 insertions, 70 deletions
diff --git a/apps/pdfclean.c b/apps/pdfclean.c
index c4d7b879..0cbeb77e 100644
--- a/apps/pdfclean.c
+++ b/apps/pdfclean.c
@@ -241,7 +241,7 @@ static void renumberobjs(void)
/* Create new table for the reordered, compacted xref */
oldxref = xref->table;
- xref->table = fz_malloc(xref->cap * sizeof (pdf_xrefentry));
+ xref->table = fz_malloc(xref->len * sizeof (pdf_xrefentry));
xref->table[0] = oldxref[0];
/* Move used objects into the new compacted xref */
diff --git a/mupdf/mupdf.h b/mupdf/mupdf.h
index 419a06db..52fe415b 100644
--- a/mupdf/mupdf.h
+++ b/mupdf/mupdf.h
@@ -128,7 +128,6 @@ struct pdf_xref_s
fz_obj *trailer;
int len;
- int cap;
pdf_xrefentry *table;
int pagelen;
@@ -168,7 +167,9 @@ void pdf_freexref(pdf_xref *);
/* private */
fz_error pdf_repairxref(pdf_xref *xref, char *buf, int bufsize);
+fz_error pdf_repairobjstms(pdf_xref *xref);
void pdf_debugxref(pdf_xref *);
+void pdf_resizexref(pdf_xref *xref, int newcap);
/*
* Resource store
diff --git a/mupdf/pdf_repair.c b/mupdf/pdf_repair.c
index 9f30a690..3b062e79 100644
--- a/mupdf/pdf_repair.c
+++ b/mupdf/pdf_repair.c
@@ -110,6 +110,59 @@ atobjend:
return fz_okay;
}
+static fz_error
+pdf_repairobjstm(pdf_xref *xref, int num, int gen)
+{
+ fz_error error;
+ fz_obj *obj;
+ fz_stream *stm;
+ pdf_token_e tok;
+ int i, n, count;
+ char buf[256];
+
+ error = pdf_loadobject(&obj, xref, num, gen);
+ if (error)
+ return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen);
+
+ count = fz_toint(fz_dictgets(obj, "N"));
+
+ fz_dropobj(obj);
+
+ error = pdf_openstream(&stm, xref, num, gen);
+ if (error)
+ return fz_rethrow(error, "cannot open object stream object (%d %d R)", num, gen);
+
+ for (i = 0; i < count; i++)
+ {
+ error = pdf_lex(&tok, stm, buf, sizeof buf, &n);
+ if (error || tok != PDF_TINT)
+ {
+ fz_close(stm);
+ return fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
+ }
+
+ n = atoi(buf);
+ if (n >= xref->len)
+ pdf_resizexref(xref, n + 1);
+
+ xref->table[n].ofs = num;
+ xref->table[n].gen = i;
+ xref->table[n].stmofs = 0;
+ xref->table[n].obj = nil;
+ xref->table[n].type = 'o';
+
+ error = pdf_lex(&tok, stm, buf, sizeof buf, &n);
+ if (error || tok != PDF_TINT)
+ {
+ fz_close(stm);
+ return fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
+ }
+ }
+
+ fz_close(stm);
+ return fz_okay;
+}
+
fz_error
pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
{
@@ -268,9 +321,7 @@ pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
fz_dropobj(id);
}
- xref->len = maxnum + 1;
- xref->cap = xref->len;
- xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry));
+ pdf_resizexref(xref, maxnum + 1);
xref->table[0].type = 'f';
xref->table[0].ofs = 0;
@@ -278,15 +329,6 @@ pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
xref->table[0].stmofs = 0;
xref->table[0].obj = nil;
- for (i = 1; i < xref->len; i++)
- {
- xref->table[i].type = 'f';
- xref->table[i].ofs = 0;
- xref->table[i].gen = 0;
- xref->table[i].stmofs = 0;
- xref->table[i].obj = nil;
- }
-
for (i = 0; i < listlen; i++)
{
xref->table[list[i].num].type = 'n';
@@ -314,6 +356,7 @@ pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
fz_dropobj(dict);
}
+
}
next = 0;
@@ -335,3 +378,23 @@ cleanup:
fz_free(list);
return error; /* already rethrown */
}
+
+fz_error
+pdf_repairobjstms(pdf_xref *xref)
+{
+ fz_obj *dict;
+ int i;
+
+ for (i = 0; i < xref->len; i++)
+ {
+ if (xref->table[i].stmofs)
+ {
+ pdf_loadobject(&dict, xref, i, 0);
+ if (!strcmp(fz_toname(fz_dictgets(dict, "Type")), "ObjStm"))
+ pdf_repairobjstm(xref, i, 0);
+ fz_dropobj(dict);
+ }
+ }
+
+ return fz_okay;
+}
diff --git a/mupdf/pdf_xref.c b/mupdf/pdf_xref.c
index 6bf1ae55..aece3cb1 100644
--- a/mupdf/pdf_xref.c
+++ b/mupdf/pdf_xref.c
@@ -175,6 +175,23 @@ pdf_readtrailer(pdf_xref *xref, char *buf, int cap)
* xref tables
*/
+void
+pdf_resizexref(pdf_xref *xref, int newlen)
+{
+ int i;
+
+ xref->table = fz_realloc(xref->table, newlen * sizeof(pdf_xrefentry));
+ for (i = xref->len; i < newlen; i++)
+ {
+ xref->table[i].type = 0;
+ xref->table[i].ofs = 0;
+ xref->table[i].gen = 0;
+ xref->table[i].stmofs = 0;
+ xref->table[i].obj = nil;
+ }
+ xref->len = newlen;
+}
+
static fz_error
pdf_readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
{
@@ -211,24 +228,10 @@ pdf_readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
}
/* broken pdfs where size in trailer undershoots entries in xref sections */
- if (ofs + len > xref->cap)
+ if (ofs + len > xref->len)
{
fz_warn("broken xref section, proceeding anyway.");
- xref->cap = ofs + len;
- xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry));
- }
-
- if ((ofs + len) > xref->len)
- {
- for (i = xref->len; i < (ofs + len); i++)
- {
- xref->table[i].ofs = 0;
- xref->table[i].gen = 0;
- xref->table[i].stmofs = 0;
- xref->table[i].obj = nil;
- xref->table[i].type = 0;
- }
- xref->len = ofs + len;
+ pdf_resizexref(xref, ofs + len);
}
for (i = ofs; i < ofs + len; i++)
@@ -318,7 +321,6 @@ pdf_readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
int num, gen, stmofs;
int size, w0, w1, w2;
int t;
- int i;
pdf_logxref("load new xref format\n");
@@ -334,38 +336,15 @@ pdf_readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
}
size = fz_toint(obj);
- if (size >= xref->cap)
- {
- xref->cap = size + 1; /* for hack to allow broken pdf generators with off-by-one errors */
- xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry));
- }
-
if (size > xref->len)
{
- for (i = xref->len; i < xref->cap; i++)
- {
- xref->table[i].ofs = 0;
- xref->table[i].gen = 0;
- xref->table[i].stmofs = 0;
- xref->table[i].obj = nil;
- xref->table[i].type = 0;
- }
- xref->len = size;
+ pdf_resizexref(xref, size);
}
if (num < 0 || num >= xref->len)
{
- if (num == xref->len && num < xref->cap)
- {
- /* allow broken pdf files that have off-by-one errors in the xref */
- fz_warn("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1);
- xref->len ++;
- }
- else
- {
- fz_dropobj(trailer);
- return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1);
- }
+ fz_dropobj(trailer);
+ return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1);
}
pdf_logxref("\tnum=%d gen=%d size=%d\n", num, gen, size);
@@ -523,17 +502,7 @@ pdf_loadxref(pdf_xref *xref, char *buf, int bufsize)
pdf_logxref("\tsize %d at %#x\n", fz_toint(size), xref->startxref);
- xref->len = fz_toint(size);
- xref->cap = xref->len + 1; /* for hack to allow broken pdf generators with off-by-one errors */
- xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry));
- for (i = 0; i < xref->cap; i++)
- {
- xref->table[i].ofs = 0;
- xref->table[i].gen = 0;
- xref->table[i].stmofs = 0;
- xref->table[i].obj = nil;
- xref->table[i].type = 0;
- }
+ pdf_resizexref(xref, fz_toint(size));
error = pdf_readxrefsections(xref, xref->startxref, buf, bufsize);
if (error)
@@ -547,7 +516,7 @@ pdf_loadxref(pdf_xref *xref, char *buf, int bufsize)
for (i = 0; i < xref->len; i++)
if (xref->table[i].type == 'n')
if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->filesize)
- return fz_throw("object offset out of range: %d", xref->table[i].ofs);
+ return fz_throw("object offset out of range: %d (%d 0 R)", xref->table[i].ofs, i);
return fz_okay;
}
@@ -565,6 +534,8 @@ pdf_openxrefwithstream(pdf_xref **xrefp, fz_stream *file, char *password)
fz_obj *encrypt;
fz_obj *id;
+ int repaired = 0;
+
xref = fz_malloc(sizeof(pdf_xref));
memset(xref, 0, sizeof(pdf_xref));
@@ -582,7 +553,6 @@ pdf_openxrefwithstream(pdf_xref **xrefp, fz_stream *file, char *password)
fz_free(xref->table);
xref->table = NULL;
xref->len = 0;
- xref->cap = 0;
}
error = pdf_repairxref(xref, xref->scratch, sizeof xref->scratch);
if (error)
@@ -590,6 +560,7 @@ pdf_openxrefwithstream(pdf_xref **xrefp, fz_stream *file, char *password)
pdf_freexref(xref);
return fz_rethrow(error, "cannot repair document");
}
+ repaired = 1;
}
encrypt = fz_dictgets(xref->trailer, "Encrypt");
@@ -618,6 +589,16 @@ pdf_openxrefwithstream(pdf_xref **xrefp, fz_stream *file, char *password)
}
}
+ if (repaired)
+ {
+ error = pdf_repairobjstms(xref);
+ if (error)
+ {
+ pdf_freexref(xref);
+ return fz_rethrow(error, "cannot repair document");
+ }
+ }
+
*xrefp = xref;
return fz_okay;
}