diff options
author | Sebastian Rasmussen <sebras@hotmail.com> | 2009-07-09 02:03:19 +0200 |
---|---|---|
committer | Sebastian Rasmussen <sebras@hotmail.com> | 2009-07-09 02:03:19 +0200 |
commit | 1ef9dff0e7ad7112025efe273ae855f6d2ad5f36 (patch) | |
tree | 08240b59b6ab17fc48ef16f1c5160ed447285498 | |
parent | 60a1411f016d2fecce12abf715a10d7e7faf886f (diff) | |
download | mupdf-1ef9dff0e7ad7112025efe273ae855f6d2ad5f36.tar.xz |
Parse page tree on-demand instead of parsing the entire tree after reading xref.
-rw-r--r-- | apps/common/pdfapp.c | 52 | ||||
-rw-r--r-- | apps/common/pdftool.c | 16 | ||||
-rw-r--r-- | apps/mozilla/moz_main.c | 46 | ||||
-rw-r--r-- | apps/pdfapp.h | 2 | ||||
-rw-r--r-- | apps/pdfdraw.c | 11 | ||||
-rw-r--r-- | apps/pdfinfo.c | 17 | ||||
-rw-r--r-- | apps/pdftool.h | 3 | ||||
-rw-r--r-- | apps/unix/x11pdf.c | 3 | ||||
-rw-r--r-- | mupdf/mupdf.h | 19 | ||||
-rw-r--r-- | mupdf/pdf_open.c | 7 | ||||
-rw-r--r-- | mupdf/pdf_pagetree.c | 379 |
11 files changed, 330 insertions, 225 deletions
diff --git a/apps/common/pdfapp.c b/apps/common/pdfapp.c index 7b52b744..f4cea83f 100644 --- a/apps/common/pdfapp.c +++ b/apps/common/pdfapp.c @@ -114,14 +114,6 @@ void pdfapp_open(pdfapp_t *app, char *filename) } /* - * Load page tree - */ - - error = pdf_loadpagetree(&app->pages, app->xref); - if (error) - pdfapp_error(app, error); - - /* * Load meta information * TODO: move this into mupdf library */ @@ -167,6 +159,7 @@ void pdfapp_open(pdfapp_t *app, char *filename) * Start at first page */ + app->pagecount = app->xref->pagecount; app->shrinkwrap = 1; if (app->pageno < 1) app->pageno = 1; @@ -183,10 +176,6 @@ void pdfapp_open(pdfapp_t *app, char *filename) void pdfapp_close(pdfapp_t *app) { - if (app->pages) - pdf_droppagetree(app->pages); - app->pages = nil; - if (app->page) pdf_droppage(app->page); app->page = nil; @@ -257,14 +246,16 @@ static void pdfapp_showpage(pdfapp_t *app, int loadpage, int drawpage) pdf_droppage(app->page); app->page = nil; - obj = pdf_getpageobject(app->pages, app->pageno - 1); + error = pdf_getpageobject(app->xref, app->pageno, &obj); + if (error) + pdfapp_error(app, error); error = pdf_loadpage(&app->page, app->xref, obj); if (error) pdfapp_error(app, error); sprintf(buf, "%s - %d/%d", app->doctitle, - app->pageno, pdf_getpagecount(app->pages)); + app->pageno, app->xref->pagecount); wintitle(app, buf); } @@ -321,24 +312,21 @@ static void pdfapp_gotouri(pdfapp_t *app, fz_obj *uri) static void pdfapp_gotopage(pdfapp_t *app, fz_obj *obj) { - int oid = fz_tonum(obj); - int i; + fz_error error; + int page; - for (i = 0; i < pdf_getpagecount(app->pages); i++) + error = pdf_findpageobject(app->xref, obj, &page); + if (error) + pdfapp_error(app, error); + + if (app->histlen + 1 == 256) { - if (fz_tonum(app->pages->pobj[i]) == oid) - { - if (app->histlen + 1 == 256) - { - memmove(app->hist, app->hist + 1, sizeof(int) * 255); - app->histlen --; - } - app->hist[app->histlen++] = app->pageno; - app->pageno = i + 1; - pdfapp_showpage(app, 1, 1); - return; - } + memmove(app->hist, app->hist + 1, sizeof(int) * 255); + app->histlen --; } + app->hist[app->histlen++] = app->pageno; + app->pageno = page; + pdfapp_showpage(app, 1, 1); } void pdfapp_onresize(pdfapp_t *app, int w, int h) @@ -453,7 +441,7 @@ void pdfapp_onkey(pdfapp_t *app, int c) break; case 'G': - app->pageno = pdf_getpagecount(app->pages); + app->pageno = app->xref->pagecount; break; case 'm': @@ -494,8 +482,8 @@ void pdfapp_onkey(pdfapp_t *app, int c) if (app->pageno < 1) app->pageno = 1; - if (app->pageno > pdf_getpagecount(app->pages)) - app->pageno = pdf_getpagecount(app->pages); + if (app->pageno > app->xref->pagecount) + app->pageno = app->xref->pagecount; if (app->pageno != oldpage) { diff --git a/apps/common/pdftool.c b/apps/common/pdftool.c index 3133a9d8..a607ba3d 100644 --- a/apps/common/pdftool.c +++ b/apps/common/pdftool.c @@ -2,7 +2,6 @@ char *basename = nil; pdf_xref *xref = nil; -pdf_pagetree *pagetree = nil; static void (*cleanup)(void) = nil; void closexref(void); @@ -75,12 +74,6 @@ void closexref(void) if (cleanup) cleanup(); - if (pagetree) - { - pdf_droppagetree(pagetree); - pagetree = nil; - } - if (xref) { pdf_closexref(xref); @@ -90,12 +83,3 @@ void closexref(void) basename = nil; } -void loadpagetree(void) -{ - fz_error error; - - error = pdf_loadpagetree(&pagetree, xref); - if (error) - die(error); -} - diff --git a/apps/mozilla/moz_main.c b/apps/mozilla/moz_main.c index b9cf0079..52deb38f 100644 --- a/apps/mozilla/moz_main.c +++ b/apps/mozilla/moz_main.c @@ -117,15 +117,7 @@ void pdfmoz_open(pdfmoz_t *moz, char *filename) pdfmoz_warn(moz, "Invalid password."); } - /* - * Load page tree - */ - - error = pdf_loadpagetree(&pages, moz->xref); - if (error) - pdfmoz_error(moz, error); - - moz->pagecount = pdf_getpagecount(pages); + moz->pagecount = moz->xrex->pagecount; moz->pages = fz_malloc(sizeof(page_t) * moz->pagecount); for (i = 0; i < moz->pagecount; i++) @@ -288,28 +280,32 @@ void pdfmoz_gotouri(pdfmoz_t *moz, fz_obj *uri) int pdfmoz_getpagenum(pdfmoz_t *moz, fz_obj *obj) { - int oid = fz_tonum(obj); - int i; - for (i = 0; i < moz->pagecount; i++) - if (fz_tonum(moz->pages[i].obj) == oid) - return i; - return 0; + fz_error error; + int page; + int i, y = 0; + + error = pdf_findpageobject(moz->xref, obj, &page); + if (error) + pdfmoz_error(moz, error); + + return page; } void pdfmoz_gotopage(pdfmoz_t *moz, fz_obj *obj) { - int oid = fz_tonum(obj); + fz_error error; + int page; int i, y = 0; - for (i = 0; i < moz->pagecount; i++) - { - if (fz_tonum(moz->pages[i].obj) == oid) - { - SetScrollPos(moz->hwnd, SB_VERT, y, TRUE); - InvalidateRect(moz->hwnd, NULL, FALSE); - return; - } + + error = pdf_findpageobject(moz->xref, obj, &page); + if (error) + pdfmoz_error(moz, error); + + for (i = 0; i < page; i++) y += moz->pages[i].px; - } + + SetScrollPos(moz->hwnd, SB_VERT, y, TRUE); + InvalidateRect(moz->hwnd, NULL, FALSE); } void pdfmoz_onmouse(pdfmoz_t *moz, int x, int y, int click) diff --git a/apps/pdfapp.h b/apps/pdfapp.h index 0acccc2b..3b7852be 100644 --- a/apps/pdfapp.h +++ b/apps/pdfapp.h @@ -26,8 +26,8 @@ struct pdfapp_s char *doctitle; pdf_xref *xref; pdf_outline *outline; - pdf_pagetree *pages; fz_renderer *rast; + int pagecount; /* current view params */ float zoom; diff --git a/apps/pdfdraw.c b/apps/pdfdraw.c index ee4b4a05..14e85781 100644 --- a/apps/pdfdraw.c +++ b/apps/pdfdraw.c @@ -93,7 +93,9 @@ static void drawloadpage(int pagenum, struct benchmark *loadtimes) gettime(&start); } - pageobj = pdf_getpageobject(pagetree, pagenum - 1); + error = pdf_getpageobject(xref, pagenum, &pageobj); + if (error) + die(error); error = pdf_loadpage(&drawpage, xref, pageobj); if (error) die(error); @@ -324,7 +326,7 @@ static void drawpages(char *pagelist) if (strlen(dash) > 1) epage = atoi(dash + 1); else - epage = pdf_getpagecount(pagetree); + epage = xref->pagecount; } if (spage > epage) @@ -332,8 +334,8 @@ static void drawpages(char *pagelist) if (spage < 1) spage = 1; - if (epage > pdf_getpagecount(pagetree)) - epage = pdf_getpagecount(pagetree); + if (epage > xref->pagecount) + epage = xref->pagecount; printf("Drawing pages %d-%d...\n", spage, epage); for (page = spage; page <= epage; page++) @@ -412,7 +414,6 @@ int main(int argc, char **argv) die(error); openxref(argv[fz_optind], password, 0); - loadpagetree(); state = NO_PAGES_DRAWN; } else diff --git a/apps/pdfinfo.c b/apps/pdfinfo.c index d09a45ca..176e26e6 100644 --- a/apps/pdfinfo.c +++ b/apps/pdfinfo.c @@ -658,7 +658,9 @@ gatherinfo(int show, int page) fz_obj *shade; fz_obj *pattern; - pageobj = pdf_getpageobject(pagetree, page - 1); + error = pdf_getpageobject(xref, page, &pageobj); + if (error) + die(error); if (!pageobj) die(fz_throw("cannot retrieve info from page %d", page)); @@ -740,7 +742,7 @@ printglobalinfo(void) fz_debugobj(cryptinfo->u.crypt.obj); } - printf("\nPages: %d\n\n", pdf_getpagecount(pagetree)); + printf("\nPages: %d\n\n", xref->pagecount); } static void @@ -977,7 +979,7 @@ showinfo(char *filename, int show, char *pagelist) if (strlen(dash) > 1) epage = atoi(dash + 1); else - epage = pdf_getpagecount(pagetree); + epage = xref->pagecount; } if (spage > epage) @@ -985,10 +987,10 @@ showinfo(char *filename, int show, char *pagelist) if (spage < 1) spage = 1; - if (epage > pdf_getpagecount(pagetree)) - epage = pdf_getpagecount(pagetree); - if (spage > pdf_getpagecount(pagetree)) - spage = pdf_getpagecount(pagetree); + if (epage > xref->pagecount) + epage = xref->pagecount; + if (spage > xref->pagecount) + spage = xref->pagecount; if (allpages) printf("Retrieving info from pages %d-%d...\n", spage, epage); @@ -1058,7 +1060,6 @@ int main(int argc, char **argv) closexref(); filename = argv[fz_optind]; openxref(filename, password, 0); - loadpagetree(); gatherglobalinfo(); state = NO_INFO_GATHERED; } diff --git a/apps/pdftool.h b/apps/pdftool.h index 44e9e361..6eb75770 100644 --- a/apps/pdftool.h +++ b/apps/pdftool.h @@ -3,7 +3,7 @@ extern char *basename; extern pdf_xref *xref; -extern pdf_pagetree *pagetree; +extern int pages; void die(fz_error error); void setcleanup(void (*cleanup)(void)); @@ -11,4 +11,3 @@ void setcleanup(void (*cleanup)(void)); void openxref(char *filename, char *password, int dieonbadpass); void closexref(void); -void loadpagetree(void); diff --git a/apps/unix/x11pdf.c b/apps/unix/x11pdf.c index e3bc4b72..3ae1d930 100644 --- a/apps/unix/x11pdf.c +++ b/apps/unix/x11pdf.c @@ -320,8 +320,7 @@ static void windrawpageno(pdfapp_t *app) { char s[100]; - int ret = snprintf(s, 100, "Page %d/%d", gapp.pageno, - pdf_getpagecount(gapp.pages)); + int ret = snprintf(s, 100, "Page %d/%d", gapp.pageno, gapp.pagecount); if (ret >= 0) { isshowingpage = 1; diff --git a/mupdf/mupdf.h b/mupdf/mupdf.h index 0b7e062f..3e7cebda 100644 --- a/mupdf/mupdf.h +++ b/mupdf/mupdf.h @@ -120,8 +120,8 @@ struct pdf_xref_s pdf_xrefentry *table; struct pdf_store_s *store; - struct pdf_pagetree_s *pages; struct pdf_outline_s *outlines; + int pagecount; }; struct pdf_xrefentry_s @@ -550,19 +550,10 @@ fz_error pdf_loadannots(pdf_comment **, pdf_link **, pdf_xref *, fz_obj *annots) * Page tree, pages and related objects */ -typedef struct pdf_pagetree_s pdf_pagetree; typedef struct pdf_page_s pdf_page; typedef struct pdf_textline_s pdf_textline; typedef struct pdf_textchar_s pdf_textchar; -struct pdf_pagetree_s -{ - int cap; - int count; - int cursor; - fz_obj **pobj; -}; - struct pdf_page_s { fz_rect mediabox; @@ -587,11 +578,9 @@ struct pdf_textline_s }; /* pagetree.c */ -fz_error pdf_loadpagetree(pdf_pagetree **pp, pdf_xref *xref); -int pdf_getpagecount(pdf_pagetree *pages); -fz_obj *pdf_getpageobject(pdf_pagetree *pages, int p); -void pdf_debugpagetree(pdf_pagetree *pages); -void pdf_droppagetree(pdf_pagetree *pages); +fz_error pdf_getpagecount(pdf_xref *xref, int *pagesp); +fz_error pdf_getpageobject(pdf_xref *xref, int p, fz_obj **pagep); +fz_error pdf_findpageobject(pdf_xref *xref, fz_obj *pageobj, int *page); /* page.c */ fz_error pdf_loadpage(pdf_page **pagep, pdf_xref *xref, fz_obj *ref); diff --git a/mupdf/pdf_open.c b/mupdf/pdf_open.c index c3e96224..da5f138b 100644 --- a/mupdf/pdf_open.c +++ b/mupdf/pdf_open.c @@ -720,6 +720,13 @@ pdf_loadxref(pdf_xref *xref, char *filename) goto cleanup; } + error = pdf_getpagecount(xref, &xref->pagecount); + if (error) + { + error = fz_rethrow(error, "cannot determine page count"); + goto cleanup; + } + return fz_okay; cleanup: diff --git a/mupdf/pdf_pagetree.c b/mupdf/pdf_pagetree.c index b18e2ae7..0521ea6c 100644 --- a/mupdf/pdf_pagetree.c +++ b/mupdf/pdf_pagetree.c @@ -10,26 +10,28 @@ struct stuff }; static fz_error -loadpagetree(pdf_xref *xref, pdf_pagetree *pages, - struct stuff inherit, fz_obj *obj, int *pagenum) +getpagecount(pdf_xref *xref, fz_obj *node, int *pagesp) { fz_error error; fz_obj *type; fz_obj *kids; - fz_obj *kobj; - fz_obj *inh; + fz_obj *count; char *typestr; + int pages = 0; int i; - if (fz_isnull(obj)) + if (fz_isnull(node)) return fz_throw("pagetree node is missing"); - type = fz_dictgets(obj, "Type"); + type = fz_dictgets(node, "Type"); + kids = fz_dictgets(node, "Kids"); + count = fz_dictgets(node, "Count"); + if (!type) { - fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(obj), fz_togen(obj)); + fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(node), fz_togen(node)); - kids = fz_dictgets(obj, "Kids"); + kids = fz_dictgets(node, "Kids"); if (kids) { fz_warn("guessing it may be a pagetree node, continuing..."); @@ -44,87 +46,193 @@ loadpagetree(pdf_xref *xref, pdf_pagetree *pages, else typestr = fz_toname(type); - if (strcmp(typestr, "Page") == 0) + if (!strcmp(typestr, "Page")) + (*pagesp)++; + + else if (!strcmp(typestr, "Pages")) { - pdf_logpage("page %d (%d %d R)\n", *pagenum, fz_tonum(obj), fz_togen(obj)); + if (!fz_isarray(kids)) + return fz_throw("page tree contains no pages"); - (*pagenum)++; + pdf_logpage("subtree (%d %d R) {\n", fz_tonum(node), fz_togen(node)); - if (inherit.resources && !fz_dictgets(obj, "Resources")) + for (i = 0; i < fz_arraylen(kids); i++) { - pdf_logpage("inherit resources (%d)\n", pages->cursor); - error = fz_dictputs(obj, "Resources", inherit.resources); - if (error) return fz_rethrow(error, "cannot inherit page tree resources"); + fz_obj *obj = fz_arrayget(kids, i); + + /* prevent infinite recursion possible in maliciously crafted PDFs */ + if (obj == node) + return fz_throw("corrupted pdf file"); + + error = getpagecount(xref, obj, &pages); + if (error) + return fz_rethrow(error, "cannot load pagesubtree (%d %d R)", fz_tonum(obj), fz_togen(obj)); } - if (inherit.mediabox && !fz_dictgets(obj, "MediaBox")) + if (pages != fz_toint(count)) { - pdf_logpage("inherit mediabox (%d)\n", pages->cursor); - error = fz_dictputs(obj, "MediaBox", inherit.mediabox); - if (error) return fz_rethrow(error, "cannot inherit page tree mediabox"); + fz_warn("page tree node contains incorrect number of page, continuing..."); + + error = fz_newint(&count, pages); + if (!error) + { + error = fz_dictputs(node, "Count", count); + fz_dropobj(count); + } + if (error) + return fz_rethrow(error, "cannot correct wrong page count"); } - if (inherit.cropbox && !fz_dictgets(obj, "CropBox")) + pdf_logpage("%d pages\n", pages); + + (*pagesp) += pages; + + pdf_logpage("}\n"); + } + + return fz_okay; +} + +fz_error +pdf_getpagecount(pdf_xref *xref, int *pagesp) +{ + fz_error error; + fz_obj *ref; + fz_obj *catalog; + fz_obj *pages; + + ref = fz_dictgets(xref->trailer, "Root"); + catalog = fz_resolveindirect(ref); + + pages = fz_dictgets(catalog, "Pages"); + pdf_logpage("determining page count (%d %d R) {\n", fz_tonum(pages), fz_togen(pages)); + + *pagesp = 0; + error = getpagecount(xref, pages, pagesp); + if (error) + return fz_rethrow(error, "cannot determine page count"); + + pdf_logpage("}\n"); + + return fz_okay; +} + +static fz_error +getpageobject(pdf_xref *xref, struct stuff inherit, fz_obj *node, int *pagesp, int pageno, fz_obj **pagep) +{ + fz_error error; + char *typestr; + fz_obj *type; + fz_obj *kids; + fz_obj *count; + fz_obj *inh; + int i; + + if (fz_isnull(node)) + return fz_throw("pagetree node is missing"); + + type = fz_dictgets(node, "Type"); + kids = fz_dictgets(node, "Kids"); + count = fz_dictgets(node, "Count"); + + if (!type) + { + fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(node), fz_togen(node)); + + kids = fz_dictgets(node, "Kids"); + if (kids) { - pdf_logpage("inherit cropbox (%d)\n", pages->cursor); - error = fz_dictputs(obj, "CropBox", inherit.cropbox); - if (error) return fz_rethrow(error, "cannot inherit page tree cropbox"); + fz_warn("guessing it may be a pagetree node, continuing..."); + typestr = "Pages"; } - - if (inherit.rotate && !fz_dictgets(obj, "Rotate")) + else { - pdf_logpage("inherit rotate (%d)\n", pages->cursor); - error = fz_dictputs(obj, "Rotate", inherit.rotate); - if (error) return fz_rethrow(error, "cannot inherit page tree rotate"); + fz_warn("guessing it may be a page, continuing..."); + typestr = "Page"; } + } + else + typestr = fz_toname(type); - if (pages->cursor >= pages->cap) + if (!strcmp(typestr, "Page")) + { + (*pagesp)++; + if (*pagesp == pageno) { - fz_warn("initial page tree size too small, enlarging"); + pdf_logpage("page %d (%d %d R)\n", *pagesp, fz_tonum(node), fz_togen(node)); - pages->cap += 10; - pages->count = pages->cursor + 1; - pages->pobj = fz_realloc(pages->pobj, sizeof(fz_obj*) * pages->cap); - if (!pages->pobj) - return fz_throw("error allocating enlarged page tree"); - } + if (inherit.resources && !fz_dictgets(node, "Resources")) + { + pdf_logpage("inherited resources\n"); + error = fz_dictputs(node, "Resources", inherit.resources); + if (error) + return fz_rethrow(error, "cannot inherit page tree resources"); + } + + if (inherit.mediabox && !fz_dictgets(node, "MediaBox")) + { + pdf_logpage("inherit mediabox\n"); + error = fz_dictputs(node, "MediaBox", inherit.mediabox); + if (error) + return fz_rethrow(error, "cannot inherit page tree mediabox"); + } - pages->pobj[pages->cursor] = fz_keepobj(obj); - pages->cursor ++; + if (inherit.cropbox && !fz_dictgets(node, "CropBox")) + { + pdf_logpage("inherit cropbox\n"); + error = fz_dictputs(node, "CropBox", inherit.cropbox); + if (error) + return fz_rethrow(error, "cannot inherit page tree cropbox"); + } + + if (inherit.rotate && !fz_dictgets(node, "Rotate")) + { + pdf_logpage("inherit rotate\n"); + error = fz_dictputs(node, "Rotate", inherit.rotate); + if (error) + return fz_rethrow(error, "cannot inherit page tree rotate"); + } + + *pagep = node; + } } - else if (strcmp(typestr, "Pages") == 0) + else if (!strcmp(typestr, "Pages")) { - inh = fz_dictgets(obj, "Resources"); + if (!fz_isarray(kids)) + return fz_throw("page tree contains no pages"); + + if (*pagesp + fz_toint(count) < pageno) + { + (*pagesp) += fz_toint(count); + return fz_okay; + } + + inh = fz_dictgets(node, "Resources"); if (inh) inherit.resources = inh; - inh = fz_dictgets(obj, "MediaBox"); + inh = fz_dictgets(node, "MediaBox"); if (inh) inherit.mediabox = inh; - inh = fz_dictgets(obj, "CropBox"); + inh = fz_dictgets(node, "CropBox"); if (inh) inherit.cropbox = inh; - inh = fz_dictgets(obj, "Rotate"); + inh = fz_dictgets(node, "Rotate"); if (inh) inherit.rotate = inh; - kids = fz_dictgets(obj, "Kids"); - if (!fz_isarray(kids)) - return fz_throw("page tree contains no pages"); - - pdf_logpage("subtree %d pages (%d %d R) {\n", fz_arraylen(kids), fz_tonum(obj), fz_togen(obj)); + pdf_logpage("subtree (%d %d R) {\n", fz_tonum(node), fz_togen(node)); - for (i = 0; i < fz_arraylen(kids); i++) + for (i = 0; !(*pagep) && i < fz_arraylen(kids); i++) { - kobj = fz_arrayget(kids, i); - if (kobj == obj) - { - /* prevent infinite recursion possible in maliciously crafted PDFs */ + fz_obj *obj = fz_arrayget(kids, i); + + /* prevent infinite recursion possible in maliciously crafted PDFs */ + if (obj == node) return fz_throw("corrupted pdf file"); - } - error = loadpagetree(xref, pages, inherit, kobj, pagenum); + error = getpageobject(xref, inherit, obj, pagesp, pageno, pagep); if (error) - return fz_rethrow(error, "cannot load pagesubtree (%d %d R)", fz_tonum(kobj), fz_togen(kobj)); + return fz_rethrow(error, "cannot load pagesubtree (%d %d R)", fz_tonum(obj), fz_togen(obj)); } pdf_logpage("}\n"); @@ -133,101 +241,134 @@ loadpagetree(pdf_xref *xref, pdf_pagetree *pages, return fz_okay; } -void -pdf_debugpagetree(pdf_pagetree *pages) -{ - int i; - printf("<<\n /Type /Pages\n /Count %d\n /Kids [\n", pages->count); - for (i = 0; i < pages->count; i++) { - printf(" %% page %d\n", i + 1); - printf(" %d %d R\n", fz_tonum(pages->pobj[i]), fz_togen(pages->pobj[i])); - } - printf(" ]\n>>\n"); -} - fz_error -pdf_loadpagetree(pdf_pagetree **pp, pdf_xref *xref) +pdf_getpageobject(pdf_xref *xref, int pageno, fz_obj **pagep) { fz_error error; struct stuff inherit; - pdf_pagetree *p = nil; - fz_obj *catalog = nil; - fz_obj *pages = nil; - fz_obj *trailer; fz_obj *ref; + fz_obj *catalog; + fz_obj *pages; int count; - int pagenum = 1; inherit.resources = nil; inherit.mediabox = nil; inherit.cropbox = nil; inherit.rotate = nil; - trailer = xref->trailer; - - ref = fz_dictgets(trailer, "Root"); + ref = fz_dictgets(xref->trailer, "Root"); catalog = fz_resolveindirect(ref); pages = fz_dictgets(catalog, "Pages"); - count = fz_toint(fz_dictgets(pages, "Count")); + pdf_logpage("get page %d (%d %d R) {\n", pageno, fz_tonum(pages), fz_togen(pages)); - p = fz_malloc(sizeof(pdf_pagetree)); - if (!p) { error = fz_rethrow(-1, "out of memory: page tree struct"); goto cleanup; } + *pagep = nil; + count = 0; + error = getpageobject(xref, inherit, pages, &count, pageno, pagep); + if (error) + return fz_rethrow(error, "cannot find page %d", pageno); - pdf_logpage("load pagetree %d pages (%d %d R) ptr=%p {\n", count, fz_tonum(pages), fz_togen(pages), p); + pdf_logpage("}\n"); + + return fz_okay; +} - p->pobj = nil; - p->cap = count; - p->count = count; - p->cursor = 0; +static fz_error +findpageobject(pdf_xref *xref, fz_obj *node, fz_obj *page, int *pagenop, int *foundp) +{ + fz_error error; + char *typestr; + fz_obj *type; + fz_obj *kids; + int i; - p->pobj = fz_malloc(sizeof(fz_obj*) * p->cap); - if (!p->pobj) { error = fz_rethrow(-1, "out of memory: page tree object array"); goto cleanup; } + if (fz_isnull(node)) + return fz_throw("pagetree node is missing"); - error = loadpagetree(xref, p, inherit, pages, &pagenum); - if (error) { error = fz_rethrow(error, "cannot load pagetree (%d %d R)", fz_tonum(pages), fz_togen(pages)); goto cleanup; } + type = fz_dictgets(node, "Type"); + kids = fz_dictgets(node, "Kids"); - pdf_logpage("}\n", count); + if (!type) + { + fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(node), fz_togen(node)); - *pp = p; - return fz_okay; + kids = fz_dictgets(node, "Kids"); + if (kids) + { + fz_warn("guessing it may be a pagetree node, continuing..."); + typestr = "Pages"; + } + else + { + fz_warn("guessing it may be a page, continuing..."); + typestr = "Page"; + } + } + else + typestr = fz_toname(type); -cleanup: - if (p) + if (!strcmp(typestr, "Page")) { - fz_free(p->pobj); - fz_free(p); + (*pagenop)++; + if (fz_tonum(node) == fz_tonum(page)) + { + pdf_logpage("page %d (%d %d R)\n", *pagenop, fz_tonum(node), fz_togen(node)); + *foundp = 1; + } } - return error; /* already rethrown */ -} -int -pdf_getpagecount(pdf_pagetree *pages) -{ - return pages->count; -} + else if (!strcmp(typestr, "Pages")) + { + if (!fz_isarray(kids)) + return fz_throw("page tree contains no pages"); -fz_obj * -pdf_getpageobject(pdf_pagetree *pages, int p) -{ - if (p < 0 || p >= pages->count) - return nil; - return pages->pobj[p]; + pdf_logpage("subtree (%d %d R) {\n", fz_tonum(node), fz_togen(node)); + + for (i = 0; !(*foundp) && i < fz_arraylen(kids); i++) + { + fz_obj *obj = fz_arrayget(kids, i); + + /* prevent infinite recursion possible in maliciously crafted PDFs */ + if (obj == node) + return fz_throw("corrupted pdf file"); + + error = findpageobject(xref, obj, page, pagenop, foundp); + if (error) + return fz_rethrow(error, "cannot load pagesubtree (%d %d R)", fz_tonum(obj), fz_togen(obj)); + } + + pdf_logpage("}\n"); + } + + return fz_okay; } -void -pdf_droppagetree(pdf_pagetree *pages) +fz_error +pdf_findpageobject(pdf_xref *xref, fz_obj *page, int *pagenop) { - int i; + fz_error error; + fz_obj *ref; + fz_obj *catalog; + fz_obj *pages; + int found; - pdf_logpage("drop pagetree %p\n", pages); + ref = fz_dictgets(xref->trailer, "Root"); + catalog = fz_resolveindirect(ref); - for (i = 0; i < pages->count; i++) { - if (pages->pobj[i]) - fz_dropobj(pages->pobj[i]); - } + pages = fz_dictgets(catalog, "Pages"); + pdf_logpage("find page object (%d %d R) (%d %d R) {\n", fz_tonum(page), fz_togen(page), fz_tonum(pages), fz_togen(pages)); - fz_free(pages->pobj); - fz_free(pages); + *pagenop = 0; + found = 0; + error = findpageobject(xref, pages, page, pagenop, &found); + if (error) + return fz_rethrow(error, "cannot find page object (%d %d R)", fz_tonum(page), fz_togen(page)); + + pdf_logpage("}\n"); + + if (!found) + return fz_throw("cannot find page object (%d %d R)", fz_tonum(page), fz_togen(page)); + + return fz_okay; } |