summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Rasmussen <sebras@hotmail.com>2009-07-09 02:03:19 +0200
committerSebastian Rasmussen <sebras@hotmail.com>2009-07-09 02:03:19 +0200
commit1ef9dff0e7ad7112025efe273ae855f6d2ad5f36 (patch)
tree08240b59b6ab17fc48ef16f1c5160ed447285498
parent60a1411f016d2fecce12abf715a10d7e7faf886f (diff)
downloadmupdf-1ef9dff0e7ad7112025efe273ae855f6d2ad5f36.tar.xz
Parse page tree on-demand instead of parsing the entire tree after reading xref.
-rw-r--r--apps/common/pdfapp.c52
-rw-r--r--apps/common/pdftool.c16
-rw-r--r--apps/mozilla/moz_main.c46
-rw-r--r--apps/pdfapp.h2
-rw-r--r--apps/pdfdraw.c11
-rw-r--r--apps/pdfinfo.c17
-rw-r--r--apps/pdftool.h3
-rw-r--r--apps/unix/x11pdf.c3
-rw-r--r--mupdf/mupdf.h19
-rw-r--r--mupdf/pdf_open.c7
-rw-r--r--mupdf/pdf_pagetree.c379
11 files changed, 330 insertions, 225 deletions
diff --git a/apps/common/pdfapp.c b/apps/common/pdfapp.c
index 7b52b744..f4cea83f 100644
--- a/apps/common/pdfapp.c
+++ b/apps/common/pdfapp.c
@@ -114,14 +114,6 @@ void pdfapp_open(pdfapp_t *app, char *filename)
}
/*
- * Load page tree
- */
-
- error = pdf_loadpagetree(&app->pages, app->xref);
- if (error)
- pdfapp_error(app, error);
-
- /*
* Load meta information
* TODO: move this into mupdf library
*/
@@ -167,6 +159,7 @@ void pdfapp_open(pdfapp_t *app, char *filename)
* Start at first page
*/
+ app->pagecount = app->xref->pagecount;
app->shrinkwrap = 1;
if (app->pageno < 1)
app->pageno = 1;
@@ -183,10 +176,6 @@ void pdfapp_open(pdfapp_t *app, char *filename)
void pdfapp_close(pdfapp_t *app)
{
- if (app->pages)
- pdf_droppagetree(app->pages);
- app->pages = nil;
-
if (app->page)
pdf_droppage(app->page);
app->page = nil;
@@ -257,14 +246,16 @@ static void pdfapp_showpage(pdfapp_t *app, int loadpage, int drawpage)
pdf_droppage(app->page);
app->page = nil;
- obj = pdf_getpageobject(app->pages, app->pageno - 1);
+ error = pdf_getpageobject(app->xref, app->pageno, &obj);
+ if (error)
+ pdfapp_error(app, error);
error = pdf_loadpage(&app->page, app->xref, obj);
if (error)
pdfapp_error(app, error);
sprintf(buf, "%s - %d/%d", app->doctitle,
- app->pageno, pdf_getpagecount(app->pages));
+ app->pageno, app->xref->pagecount);
wintitle(app, buf);
}
@@ -321,24 +312,21 @@ static void pdfapp_gotouri(pdfapp_t *app, fz_obj *uri)
static void pdfapp_gotopage(pdfapp_t *app, fz_obj *obj)
{
- int oid = fz_tonum(obj);
- int i;
+ fz_error error;
+ int page;
- for (i = 0; i < pdf_getpagecount(app->pages); i++)
+ error = pdf_findpageobject(app->xref, obj, &page);
+ if (error)
+ pdfapp_error(app, error);
+
+ if (app->histlen + 1 == 256)
{
- if (fz_tonum(app->pages->pobj[i]) == oid)
- {
- if (app->histlen + 1 == 256)
- {
- memmove(app->hist, app->hist + 1, sizeof(int) * 255);
- app->histlen --;
- }
- app->hist[app->histlen++] = app->pageno;
- app->pageno = i + 1;
- pdfapp_showpage(app, 1, 1);
- return;
- }
+ memmove(app->hist, app->hist + 1, sizeof(int) * 255);
+ app->histlen --;
}
+ app->hist[app->histlen++] = app->pageno;
+ app->pageno = page;
+ pdfapp_showpage(app, 1, 1);
}
void pdfapp_onresize(pdfapp_t *app, int w, int h)
@@ -453,7 +441,7 @@ void pdfapp_onkey(pdfapp_t *app, int c)
break;
case 'G':
- app->pageno = pdf_getpagecount(app->pages);
+ app->pageno = app->xref->pagecount;
break;
case 'm':
@@ -494,8 +482,8 @@ void pdfapp_onkey(pdfapp_t *app, int c)
if (app->pageno < 1)
app->pageno = 1;
- if (app->pageno > pdf_getpagecount(app->pages))
- app->pageno = pdf_getpagecount(app->pages);
+ if (app->pageno > app->xref->pagecount)
+ app->pageno = app->xref->pagecount;
if (app->pageno != oldpage)
{
diff --git a/apps/common/pdftool.c b/apps/common/pdftool.c
index 3133a9d8..a607ba3d 100644
--- a/apps/common/pdftool.c
+++ b/apps/common/pdftool.c
@@ -2,7 +2,6 @@
char *basename = nil;
pdf_xref *xref = nil;
-pdf_pagetree *pagetree = nil;
static void (*cleanup)(void) = nil;
void closexref(void);
@@ -75,12 +74,6 @@ void closexref(void)
if (cleanup)
cleanup();
- if (pagetree)
- {
- pdf_droppagetree(pagetree);
- pagetree = nil;
- }
-
if (xref)
{
pdf_closexref(xref);
@@ -90,12 +83,3 @@ void closexref(void)
basename = nil;
}
-void loadpagetree(void)
-{
- fz_error error;
-
- error = pdf_loadpagetree(&pagetree, xref);
- if (error)
- die(error);
-}
-
diff --git a/apps/mozilla/moz_main.c b/apps/mozilla/moz_main.c
index b9cf0079..52deb38f 100644
--- a/apps/mozilla/moz_main.c
+++ b/apps/mozilla/moz_main.c
@@ -117,15 +117,7 @@ void pdfmoz_open(pdfmoz_t *moz, char *filename)
pdfmoz_warn(moz, "Invalid password.");
}
- /*
- * Load page tree
- */
-
- error = pdf_loadpagetree(&pages, moz->xref);
- if (error)
- pdfmoz_error(moz, error);
-
- moz->pagecount = pdf_getpagecount(pages);
+ moz->pagecount = moz->xrex->pagecount;
moz->pages = fz_malloc(sizeof(page_t) * moz->pagecount);
for (i = 0; i < moz->pagecount; i++)
@@ -288,28 +280,32 @@ void pdfmoz_gotouri(pdfmoz_t *moz, fz_obj *uri)
int pdfmoz_getpagenum(pdfmoz_t *moz, fz_obj *obj)
{
- int oid = fz_tonum(obj);
- int i;
- for (i = 0; i < moz->pagecount; i++)
- if (fz_tonum(moz->pages[i].obj) == oid)
- return i;
- return 0;
+ fz_error error;
+ int page;
+ int i, y = 0;
+
+ error = pdf_findpageobject(moz->xref, obj, &page);
+ if (error)
+ pdfmoz_error(moz, error);
+
+ return page;
}
void pdfmoz_gotopage(pdfmoz_t *moz, fz_obj *obj)
{
- int oid = fz_tonum(obj);
+ fz_error error;
+ int page;
int i, y = 0;
- for (i = 0; i < moz->pagecount; i++)
- {
- if (fz_tonum(moz->pages[i].obj) == oid)
- {
- SetScrollPos(moz->hwnd, SB_VERT, y, TRUE);
- InvalidateRect(moz->hwnd, NULL, FALSE);
- return;
- }
+
+ error = pdf_findpageobject(moz->xref, obj, &page);
+ if (error)
+ pdfmoz_error(moz, error);
+
+ for (i = 0; i < page; i++)
y += moz->pages[i].px;
- }
+
+ SetScrollPos(moz->hwnd, SB_VERT, y, TRUE);
+ InvalidateRect(moz->hwnd, NULL, FALSE);
}
void pdfmoz_onmouse(pdfmoz_t *moz, int x, int y, int click)
diff --git a/apps/pdfapp.h b/apps/pdfapp.h
index 0acccc2b..3b7852be 100644
--- a/apps/pdfapp.h
+++ b/apps/pdfapp.h
@@ -26,8 +26,8 @@ struct pdfapp_s
char *doctitle;
pdf_xref *xref;
pdf_outline *outline;
- pdf_pagetree *pages;
fz_renderer *rast;
+ int pagecount;
/* current view params */
float zoom;
diff --git a/apps/pdfdraw.c b/apps/pdfdraw.c
index ee4b4a05..14e85781 100644
--- a/apps/pdfdraw.c
+++ b/apps/pdfdraw.c
@@ -93,7 +93,9 @@ static void drawloadpage(int pagenum, struct benchmark *loadtimes)
gettime(&start);
}
- pageobj = pdf_getpageobject(pagetree, pagenum - 1);
+ error = pdf_getpageobject(xref, pagenum, &pageobj);
+ if (error)
+ die(error);
error = pdf_loadpage(&drawpage, xref, pageobj);
if (error)
die(error);
@@ -324,7 +326,7 @@ static void drawpages(char *pagelist)
if (strlen(dash) > 1)
epage = atoi(dash + 1);
else
- epage = pdf_getpagecount(pagetree);
+ epage = xref->pagecount;
}
if (spage > epage)
@@ -332,8 +334,8 @@ static void drawpages(char *pagelist)
if (spage < 1)
spage = 1;
- if (epage > pdf_getpagecount(pagetree))
- epage = pdf_getpagecount(pagetree);
+ if (epage > xref->pagecount)
+ epage = xref->pagecount;
printf("Drawing pages %d-%d...\n", spage, epage);
for (page = spage; page <= epage; page++)
@@ -412,7 +414,6 @@ int main(int argc, char **argv)
die(error);
openxref(argv[fz_optind], password, 0);
- loadpagetree();
state = NO_PAGES_DRAWN;
}
else
diff --git a/apps/pdfinfo.c b/apps/pdfinfo.c
index d09a45ca..176e26e6 100644
--- a/apps/pdfinfo.c
+++ b/apps/pdfinfo.c
@@ -658,7 +658,9 @@ gatherinfo(int show, int page)
fz_obj *shade;
fz_obj *pattern;
- pageobj = pdf_getpageobject(pagetree, page - 1);
+ error = pdf_getpageobject(xref, page, &pageobj);
+ if (error)
+ die(error);
if (!pageobj)
die(fz_throw("cannot retrieve info from page %d", page));
@@ -740,7 +742,7 @@ printglobalinfo(void)
fz_debugobj(cryptinfo->u.crypt.obj);
}
- printf("\nPages: %d\n\n", pdf_getpagecount(pagetree));
+ printf("\nPages: %d\n\n", xref->pagecount);
}
static void
@@ -977,7 +979,7 @@ showinfo(char *filename, int show, char *pagelist)
if (strlen(dash) > 1)
epage = atoi(dash + 1);
else
- epage = pdf_getpagecount(pagetree);
+ epage = xref->pagecount;
}
if (spage > epage)
@@ -985,10 +987,10 @@ showinfo(char *filename, int show, char *pagelist)
if (spage < 1)
spage = 1;
- if (epage > pdf_getpagecount(pagetree))
- epage = pdf_getpagecount(pagetree);
- if (spage > pdf_getpagecount(pagetree))
- spage = pdf_getpagecount(pagetree);
+ if (epage > xref->pagecount)
+ epage = xref->pagecount;
+ if (spage > xref->pagecount)
+ spage = xref->pagecount;
if (allpages)
printf("Retrieving info from pages %d-%d...\n", spage, epage);
@@ -1058,7 +1060,6 @@ int main(int argc, char **argv)
closexref();
filename = argv[fz_optind];
openxref(filename, password, 0);
- loadpagetree();
gatherglobalinfo();
state = NO_INFO_GATHERED;
}
diff --git a/apps/pdftool.h b/apps/pdftool.h
index 44e9e361..6eb75770 100644
--- a/apps/pdftool.h
+++ b/apps/pdftool.h
@@ -3,7 +3,7 @@
extern char *basename;
extern pdf_xref *xref;
-extern pdf_pagetree *pagetree;
+extern int pages;
void die(fz_error error);
void setcleanup(void (*cleanup)(void));
@@ -11,4 +11,3 @@ void setcleanup(void (*cleanup)(void));
void openxref(char *filename, char *password, int dieonbadpass);
void closexref(void);
-void loadpagetree(void);
diff --git a/apps/unix/x11pdf.c b/apps/unix/x11pdf.c
index e3bc4b72..3ae1d930 100644
--- a/apps/unix/x11pdf.c
+++ b/apps/unix/x11pdf.c
@@ -320,8 +320,7 @@ static void windrawpageno(pdfapp_t *app)
{
char s[100];
- int ret = snprintf(s, 100, "Page %d/%d", gapp.pageno,
- pdf_getpagecount(gapp.pages));
+ int ret = snprintf(s, 100, "Page %d/%d", gapp.pageno, gapp.pagecount);
if (ret >= 0)
{
isshowingpage = 1;
diff --git a/mupdf/mupdf.h b/mupdf/mupdf.h
index 0b7e062f..3e7cebda 100644
--- a/mupdf/mupdf.h
+++ b/mupdf/mupdf.h
@@ -120,8 +120,8 @@ struct pdf_xref_s
pdf_xrefentry *table;
struct pdf_store_s *store;
- struct pdf_pagetree_s *pages;
struct pdf_outline_s *outlines;
+ int pagecount;
};
struct pdf_xrefentry_s
@@ -550,19 +550,10 @@ fz_error pdf_loadannots(pdf_comment **, pdf_link **, pdf_xref *, fz_obj *annots)
* Page tree, pages and related objects
*/
-typedef struct pdf_pagetree_s pdf_pagetree;
typedef struct pdf_page_s pdf_page;
typedef struct pdf_textline_s pdf_textline;
typedef struct pdf_textchar_s pdf_textchar;
-struct pdf_pagetree_s
-{
- int cap;
- int count;
- int cursor;
- fz_obj **pobj;
-};
-
struct pdf_page_s
{
fz_rect mediabox;
@@ -587,11 +578,9 @@ struct pdf_textline_s
};
/* pagetree.c */
-fz_error pdf_loadpagetree(pdf_pagetree **pp, pdf_xref *xref);
-int pdf_getpagecount(pdf_pagetree *pages);
-fz_obj *pdf_getpageobject(pdf_pagetree *pages, int p);
-void pdf_debugpagetree(pdf_pagetree *pages);
-void pdf_droppagetree(pdf_pagetree *pages);
+fz_error pdf_getpagecount(pdf_xref *xref, int *pagesp);
+fz_error pdf_getpageobject(pdf_xref *xref, int p, fz_obj **pagep);
+fz_error pdf_findpageobject(pdf_xref *xref, fz_obj *pageobj, int *page);
/* page.c */
fz_error pdf_loadpage(pdf_page **pagep, pdf_xref *xref, fz_obj *ref);
diff --git a/mupdf/pdf_open.c b/mupdf/pdf_open.c
index c3e96224..da5f138b 100644
--- a/mupdf/pdf_open.c
+++ b/mupdf/pdf_open.c
@@ -720,6 +720,13 @@ pdf_loadxref(pdf_xref *xref, char *filename)
goto cleanup;
}
+ error = pdf_getpagecount(xref, &xref->pagecount);
+ if (error)
+ {
+ error = fz_rethrow(error, "cannot determine page count");
+ goto cleanup;
+ }
+
return fz_okay;
cleanup:
diff --git a/mupdf/pdf_pagetree.c b/mupdf/pdf_pagetree.c
index b18e2ae7..0521ea6c 100644
--- a/mupdf/pdf_pagetree.c
+++ b/mupdf/pdf_pagetree.c
@@ -10,26 +10,28 @@ struct stuff
};
static fz_error
-loadpagetree(pdf_xref *xref, pdf_pagetree *pages,
- struct stuff inherit, fz_obj *obj, int *pagenum)
+getpagecount(pdf_xref *xref, fz_obj *node, int *pagesp)
{
fz_error error;
fz_obj *type;
fz_obj *kids;
- fz_obj *kobj;
- fz_obj *inh;
+ fz_obj *count;
char *typestr;
+ int pages = 0;
int i;
- if (fz_isnull(obj))
+ if (fz_isnull(node))
return fz_throw("pagetree node is missing");
- type = fz_dictgets(obj, "Type");
+ type = fz_dictgets(node, "Type");
+ kids = fz_dictgets(node, "Kids");
+ count = fz_dictgets(node, "Count");
+
if (!type)
{
- fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(obj), fz_togen(obj));
+ fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(node), fz_togen(node));
- kids = fz_dictgets(obj, "Kids");
+ kids = fz_dictgets(node, "Kids");
if (kids)
{
fz_warn("guessing it may be a pagetree node, continuing...");
@@ -44,87 +46,193 @@ loadpagetree(pdf_xref *xref, pdf_pagetree *pages,
else
typestr = fz_toname(type);
- if (strcmp(typestr, "Page") == 0)
+ if (!strcmp(typestr, "Page"))
+ (*pagesp)++;
+
+ else if (!strcmp(typestr, "Pages"))
{
- pdf_logpage("page %d (%d %d R)\n", *pagenum, fz_tonum(obj), fz_togen(obj));
+ if (!fz_isarray(kids))
+ return fz_throw("page tree contains no pages");
- (*pagenum)++;
+ pdf_logpage("subtree (%d %d R) {\n", fz_tonum(node), fz_togen(node));
- if (inherit.resources && !fz_dictgets(obj, "Resources"))
+ for (i = 0; i < fz_arraylen(kids); i++)
{
- pdf_logpage("inherit resources (%d)\n", pages->cursor);
- error = fz_dictputs(obj, "Resources", inherit.resources);
- if (error) return fz_rethrow(error, "cannot inherit page tree resources");
+ fz_obj *obj = fz_arrayget(kids, i);
+
+ /* prevent infinite recursion possible in maliciously crafted PDFs */
+ if (obj == node)
+ return fz_throw("corrupted pdf file");
+
+ error = getpagecount(xref, obj, &pages);
+ if (error)
+ return fz_rethrow(error, "cannot load pagesubtree (%d %d R)", fz_tonum(obj), fz_togen(obj));
}
- if (inherit.mediabox && !fz_dictgets(obj, "MediaBox"))
+ if (pages != fz_toint(count))
{
- pdf_logpage("inherit mediabox (%d)\n", pages->cursor);
- error = fz_dictputs(obj, "MediaBox", inherit.mediabox);
- if (error) return fz_rethrow(error, "cannot inherit page tree mediabox");
+ fz_warn("page tree node contains incorrect number of page, continuing...");
+
+ error = fz_newint(&count, pages);
+ if (!error)
+ {
+ error = fz_dictputs(node, "Count", count);
+ fz_dropobj(count);
+ }
+ if (error)
+ return fz_rethrow(error, "cannot correct wrong page count");
}
- if (inherit.cropbox && !fz_dictgets(obj, "CropBox"))
+ pdf_logpage("%d pages\n", pages);
+
+ (*pagesp) += pages;
+
+ pdf_logpage("}\n");
+ }
+
+ return fz_okay;
+}
+
+fz_error
+pdf_getpagecount(pdf_xref *xref, int *pagesp)
+{
+ fz_error error;
+ fz_obj *ref;
+ fz_obj *catalog;
+ fz_obj *pages;
+
+ ref = fz_dictgets(xref->trailer, "Root");
+ catalog = fz_resolveindirect(ref);
+
+ pages = fz_dictgets(catalog, "Pages");
+ pdf_logpage("determining page count (%d %d R) {\n", fz_tonum(pages), fz_togen(pages));
+
+ *pagesp = 0;
+ error = getpagecount(xref, pages, pagesp);
+ if (error)
+ return fz_rethrow(error, "cannot determine page count");
+
+ pdf_logpage("}\n");
+
+ return fz_okay;
+}
+
+static fz_error
+getpageobject(pdf_xref *xref, struct stuff inherit, fz_obj *node, int *pagesp, int pageno, fz_obj **pagep)
+{
+ fz_error error;
+ char *typestr;
+ fz_obj *type;
+ fz_obj *kids;
+ fz_obj *count;
+ fz_obj *inh;
+ int i;
+
+ if (fz_isnull(node))
+ return fz_throw("pagetree node is missing");
+
+ type = fz_dictgets(node, "Type");
+ kids = fz_dictgets(node, "Kids");
+ count = fz_dictgets(node, "Count");
+
+ if (!type)
+ {
+ fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(node), fz_togen(node));
+
+ kids = fz_dictgets(node, "Kids");
+ if (kids)
{
- pdf_logpage("inherit cropbox (%d)\n", pages->cursor);
- error = fz_dictputs(obj, "CropBox", inherit.cropbox);
- if (error) return fz_rethrow(error, "cannot inherit page tree cropbox");
+ fz_warn("guessing it may be a pagetree node, continuing...");
+ typestr = "Pages";
}
-
- if (inherit.rotate && !fz_dictgets(obj, "Rotate"))
+ else
{
- pdf_logpage("inherit rotate (%d)\n", pages->cursor);
- error = fz_dictputs(obj, "Rotate", inherit.rotate);
- if (error) return fz_rethrow(error, "cannot inherit page tree rotate");
+ fz_warn("guessing it may be a page, continuing...");
+ typestr = "Page";
}
+ }
+ else
+ typestr = fz_toname(type);
- if (pages->cursor >= pages->cap)
+ if (!strcmp(typestr, "Page"))
+ {
+ (*pagesp)++;
+ if (*pagesp == pageno)
{
- fz_warn("initial page tree size too small, enlarging");
+ pdf_logpage("page %d (%d %d R)\n", *pagesp, fz_tonum(node), fz_togen(node));
- pages->cap += 10;
- pages->count = pages->cursor + 1;
- pages->pobj = fz_realloc(pages->pobj, sizeof(fz_obj*) * pages->cap);
- if (!pages->pobj)
- return fz_throw("error allocating enlarged page tree");
- }
+ if (inherit.resources && !fz_dictgets(node, "Resources"))
+ {
+ pdf_logpage("inherited resources\n");
+ error = fz_dictputs(node, "Resources", inherit.resources);
+ if (error)
+ return fz_rethrow(error, "cannot inherit page tree resources");
+ }
+
+ if (inherit.mediabox && !fz_dictgets(node, "MediaBox"))
+ {
+ pdf_logpage("inherit mediabox\n");
+ error = fz_dictputs(node, "MediaBox", inherit.mediabox);
+ if (error)
+ return fz_rethrow(error, "cannot inherit page tree mediabox");
+ }
- pages->pobj[pages->cursor] = fz_keepobj(obj);
- pages->cursor ++;
+ if (inherit.cropbox && !fz_dictgets(node, "CropBox"))
+ {
+ pdf_logpage("inherit cropbox\n");
+ error = fz_dictputs(node, "CropBox", inherit.cropbox);
+ if (error)
+ return fz_rethrow(error, "cannot inherit page tree cropbox");
+ }
+
+ if (inherit.rotate && !fz_dictgets(node, "Rotate"))
+ {
+ pdf_logpage("inherit rotate\n");
+ error = fz_dictputs(node, "Rotate", inherit.rotate);
+ if (error)
+ return fz_rethrow(error, "cannot inherit page tree rotate");
+ }
+
+ *pagep = node;
+ }
}
- else if (strcmp(typestr, "Pages") == 0)
+ else if (!strcmp(typestr, "Pages"))
{
- inh = fz_dictgets(obj, "Resources");
+ if (!fz_isarray(kids))
+ return fz_throw("page tree contains no pages");
+
+ if (*pagesp + fz_toint(count) < pageno)
+ {
+ (*pagesp) += fz_toint(count);
+ return fz_okay;
+ }
+
+ inh = fz_dictgets(node, "Resources");
if (inh) inherit.resources = inh;
- inh = fz_dictgets(obj, "MediaBox");
+ inh = fz_dictgets(node, "MediaBox");
if (inh) inherit.mediabox = inh;
- inh = fz_dictgets(obj, "CropBox");
+ inh = fz_dictgets(node, "CropBox");
if (inh) inherit.cropbox = inh;
- inh = fz_dictgets(obj, "Rotate");
+ inh = fz_dictgets(node, "Rotate");
if (inh) inherit.rotate = inh;
- kids = fz_dictgets(obj, "Kids");
- if (!fz_isarray(kids))
- return fz_throw("page tree contains no pages");
-
- pdf_logpage("subtree %d pages (%d %d R) {\n", fz_arraylen(kids), fz_tonum(obj), fz_togen(obj));
+ pdf_logpage("subtree (%d %d R) {\n", fz_tonum(node), fz_togen(node));
- for (i = 0; i < fz_arraylen(kids); i++)
+ for (i = 0; !(*pagep) && i < fz_arraylen(kids); i++)
{
- kobj = fz_arrayget(kids, i);
- if (kobj == obj)
- {
- /* prevent infinite recursion possible in maliciously crafted PDFs */
+ fz_obj *obj = fz_arrayget(kids, i);
+
+ /* prevent infinite recursion possible in maliciously crafted PDFs */
+ if (obj == node)
return fz_throw("corrupted pdf file");
- }
- error = loadpagetree(xref, pages, inherit, kobj, pagenum);
+ error = getpageobject(xref, inherit, obj, pagesp, pageno, pagep);
if (error)
- return fz_rethrow(error, "cannot load pagesubtree (%d %d R)", fz_tonum(kobj), fz_togen(kobj));
+ return fz_rethrow(error, "cannot load pagesubtree (%d %d R)", fz_tonum(obj), fz_togen(obj));
}
pdf_logpage("}\n");
@@ -133,101 +241,134 @@ loadpagetree(pdf_xref *xref, pdf_pagetree *pages,
return fz_okay;
}
-void
-pdf_debugpagetree(pdf_pagetree *pages)
-{
- int i;
- printf("<<\n /Type /Pages\n /Count %d\n /Kids [\n", pages->count);
- for (i = 0; i < pages->count; i++) {
- printf(" %% page %d\n", i + 1);
- printf(" %d %d R\n", fz_tonum(pages->pobj[i]), fz_togen(pages->pobj[i]));
- }
- printf(" ]\n>>\n");
-}
-
fz_error
-pdf_loadpagetree(pdf_pagetree **pp, pdf_xref *xref)
+pdf_getpageobject(pdf_xref *xref, int pageno, fz_obj **pagep)
{
fz_error error;
struct stuff inherit;
- pdf_pagetree *p = nil;
- fz_obj *catalog = nil;
- fz_obj *pages = nil;
- fz_obj *trailer;
fz_obj *ref;
+ fz_obj *catalog;
+ fz_obj *pages;
int count;
- int pagenum = 1;
inherit.resources = nil;
inherit.mediabox = nil;
inherit.cropbox = nil;
inherit.rotate = nil;
- trailer = xref->trailer;
-
- ref = fz_dictgets(trailer, "Root");
+ ref = fz_dictgets(xref->trailer, "Root");
catalog = fz_resolveindirect(ref);
pages = fz_dictgets(catalog, "Pages");
- count = fz_toint(fz_dictgets(pages, "Count"));
+ pdf_logpage("get page %d (%d %d R) {\n", pageno, fz_tonum(pages), fz_togen(pages));
- p = fz_malloc(sizeof(pdf_pagetree));
- if (!p) { error = fz_rethrow(-1, "out of memory: page tree struct"); goto cleanup; }
+ *pagep = nil;
+ count = 0;
+ error = getpageobject(xref, inherit, pages, &count, pageno, pagep);
+ if (error)
+ return fz_rethrow(error, "cannot find page %d", pageno);
- pdf_logpage("load pagetree %d pages (%d %d R) ptr=%p {\n", count, fz_tonum(pages), fz_togen(pages), p);
+ pdf_logpage("}\n");
+
+ return fz_okay;
+}
- p->pobj = nil;
- p->cap = count;
- p->count = count;
- p->cursor = 0;
+static fz_error
+findpageobject(pdf_xref *xref, fz_obj *node, fz_obj *page, int *pagenop, int *foundp)
+{
+ fz_error error;
+ char *typestr;
+ fz_obj *type;
+ fz_obj *kids;
+ int i;
- p->pobj = fz_malloc(sizeof(fz_obj*) * p->cap);
- if (!p->pobj) { error = fz_rethrow(-1, "out of memory: page tree object array"); goto cleanup; }
+ if (fz_isnull(node))
+ return fz_throw("pagetree node is missing");
- error = loadpagetree(xref, p, inherit, pages, &pagenum);
- if (error) { error = fz_rethrow(error, "cannot load pagetree (%d %d R)", fz_tonum(pages), fz_togen(pages)); goto cleanup; }
+ type = fz_dictgets(node, "Type");
+ kids = fz_dictgets(node, "Kids");
- pdf_logpage("}\n", count);
+ if (!type)
+ {
+ fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(node), fz_togen(node));
- *pp = p;
- return fz_okay;
+ kids = fz_dictgets(node, "Kids");
+ if (kids)
+ {
+ fz_warn("guessing it may be a pagetree node, continuing...");
+ typestr = "Pages";
+ }
+ else
+ {
+ fz_warn("guessing it may be a page, continuing...");
+ typestr = "Page";
+ }
+ }
+ else
+ typestr = fz_toname(type);
-cleanup:
- if (p)
+ if (!strcmp(typestr, "Page"))
{
- fz_free(p->pobj);
- fz_free(p);
+ (*pagenop)++;
+ if (fz_tonum(node) == fz_tonum(page))
+ {
+ pdf_logpage("page %d (%d %d R)\n", *pagenop, fz_tonum(node), fz_togen(node));
+ *foundp = 1;
+ }
}
- return error; /* already rethrown */
-}
-int
-pdf_getpagecount(pdf_pagetree *pages)
-{
- return pages->count;
-}
+ else if (!strcmp(typestr, "Pages"))
+ {
+ if (!fz_isarray(kids))
+ return fz_throw("page tree contains no pages");
-fz_obj *
-pdf_getpageobject(pdf_pagetree *pages, int p)
-{
- if (p < 0 || p >= pages->count)
- return nil;
- return pages->pobj[p];
+ pdf_logpage("subtree (%d %d R) {\n", fz_tonum(node), fz_togen(node));
+
+ for (i = 0; !(*foundp) && i < fz_arraylen(kids); i++)
+ {
+ fz_obj *obj = fz_arrayget(kids, i);
+
+ /* prevent infinite recursion possible in maliciously crafted PDFs */
+ if (obj == node)
+ return fz_throw("corrupted pdf file");
+
+ error = findpageobject(xref, obj, page, pagenop, foundp);
+ if (error)
+ return fz_rethrow(error, "cannot load pagesubtree (%d %d R)", fz_tonum(obj), fz_togen(obj));
+ }
+
+ pdf_logpage("}\n");
+ }
+
+ return fz_okay;
}
-void
-pdf_droppagetree(pdf_pagetree *pages)
+fz_error
+pdf_findpageobject(pdf_xref *xref, fz_obj *page, int *pagenop)
{
- int i;
+ fz_error error;
+ fz_obj *ref;
+ fz_obj *catalog;
+ fz_obj *pages;
+ int found;
- pdf_logpage("drop pagetree %p\n", pages);
+ ref = fz_dictgets(xref->trailer, "Root");
+ catalog = fz_resolveindirect(ref);
- for (i = 0; i < pages->count; i++) {
- if (pages->pobj[i])
- fz_dropobj(pages->pobj[i]);
- }
+ pages = fz_dictgets(catalog, "Pages");
+ pdf_logpage("find page object (%d %d R) (%d %d R) {\n", fz_tonum(page), fz_togen(page), fz_tonum(pages), fz_togen(pages));
- fz_free(pages->pobj);
- fz_free(pages);
+ *pagenop = 0;
+ found = 0;
+ error = findpageobject(xref, pages, page, pagenop, &found);
+ if (error)
+ return fz_rethrow(error, "cannot find page object (%d %d R)", fz_tonum(page), fz_togen(page));
+
+ pdf_logpage("}\n");
+
+ if (!found)
+ return fz_throw("cannot find page object (%d %d R)", fz_tonum(page), fz_togen(page));
+
+ return fz_okay;
}