summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor@ghostscript.com>2010-06-21 16:23:32 +0200
committerTor Andersson <tor@ghostscript.com>2010-06-21 16:23:32 +0200
commit17d2a8b03562410e644d55c0452c63c142d9b333 (patch)
treec19113375ac1bdd9c659cda40c1497e013990111
parent1f36f832e475ea995e978b1d12998dc3071d8410 (diff)
downloadmupdf-17d2a8b03562410e644d55c0452c63c142d9b333.tar.xz
Preload page tree into an array to avoid unnecessary linear searching when looking for a given page.
-rw-r--r--apps/pdfapp.c5
-rw-r--r--apps/pdfclean.c2
-rw-r--r--apps/pdfdraw.c4
-rw-r--r--apps/pdfextract.c2
-rw-r--r--apps/pdfinfo.c2
-rw-r--r--apps/pdfshow.c2
-rw-r--r--apps/pdftool.c11
-rw-r--r--apps/pdftool.h2
-rw-r--r--mupdf/mupdf.h6
-rw-r--r--mupdf/pdf_pagetree.c400
10 files changed, 114 insertions, 322 deletions
diff --git a/apps/pdfapp.c b/apps/pdfapp.c
index 75ec8368..c955d1a0 100644
--- a/apps/pdfapp.c
+++ b/apps/pdfapp.c
@@ -89,6 +89,7 @@ void pdfapp_invert(pdfapp_t *app, fz_bbox rect)
void pdfapp_open(pdfapp_t *app, char *filename, int fd)
{
+ fz_error error;
fz_obj *obj;
fz_obj *info;
char *password = "";
@@ -147,6 +148,10 @@ void pdfapp_open(pdfapp_t *app, char *filename, int fd)
* Start at first page
*/
+ error = pdf_loadpagetree(app->xref);
+ if (error)
+ pdfapp_error(app, fz_rethrow(error, "cannot load page tree"));
+
app->pagecount = pdf_getpagecount(app->xref);
app->shrinkwrap = 1;
diff --git a/apps/pdfclean.c b/apps/pdfclean.c
index a6cf402c..021eb2bc 100644
--- a/apps/pdfclean.c
+++ b/apps/pdfclean.c
@@ -295,7 +295,7 @@ int main(int argc, char **argv)
if (argc - fz_optind > 0)
subset = 1;
- openxref(infile, password, 0);
+ openxref(infile, password, 0, 0);
out = fopen(outfile, "wb");
if (!out)
diff --git a/apps/pdfdraw.c b/apps/pdfdraw.c
index 345d8c73..46925838 100644
--- a/apps/pdfdraw.c
+++ b/apps/pdfdraw.c
@@ -292,7 +292,7 @@ static void drawtxt(int pagenum, struct benchmark *loadtimes)
fz_freedevice(dev);
printf("[Page %d]\n", pagenum);
- fz_debugtextspan(text);
+ fz_debugtextspanxml(text);
printf("\n");
fz_freetextspan(text);
@@ -446,7 +446,7 @@ int main(int argc, char **argv)
drawcache = fz_newglyphcache();
- openxref(argv[fz_optind], password, 0);
+ openxref(argv[fz_optind], password, 0, 1);
state = NO_PAGES_DRAWN;
}
else
diff --git a/apps/pdfextract.c b/apps/pdfextract.c
index 25b00e8e..297fdbe5 100644
--- a/apps/pdfextract.c
+++ b/apps/pdfextract.c
@@ -230,7 +230,7 @@ int main(int argc, char **argv)
if (fz_optind == argc)
showusage();
- openxref(argv[fz_optind++], password, 0);
+ openxref(argv[fz_optind++], password, 0, 0);
if (fz_optind == argc)
for (o = 0; o < xref->len; o++)
diff --git a/apps/pdfinfo.c b/apps/pdfinfo.c
index ceeb8ded..7702f25b 100644
--- a/apps/pdfinfo.c
+++ b/apps/pdfinfo.c
@@ -1021,7 +1021,7 @@ int main(int argc, char **argv)
closexref();
filename = argv[fz_optind];
printf("%s:\n", filename);
- openxref(filename, password, 0);
+ openxref(filename, password, 0, 1);
gatherglobalinfo();
state = NO_INFO_GATHERED;
}
diff --git a/apps/pdfshow.c b/apps/pdfshow.c
index 85b78d29..ba2d51f6 100644
--- a/apps/pdfshow.c
+++ b/apps/pdfshow.c
@@ -141,7 +141,7 @@ int main(int argc, char **argv)
if (fz_optind == argc)
showusage();
- openxref(argv[fz_optind++], password, 0);
+ openxref(argv[fz_optind++], password, 0, 0);
if (fz_optind == argc)
showtrailer();
diff --git a/apps/pdftool.c b/apps/pdftool.c
index 42f712dd..5724e022 100644
--- a/apps/pdftool.c
+++ b/apps/pdftool.c
@@ -21,9 +21,10 @@ void setcleanup(void (*func)(void))
cleanup = func;
}
-void openxref(char *filename, char *password, int dieonbadpass)
+void openxref(char *filename, char *password, int dieonbadpass, int loadpages)
{
fz_stream *file;
+ fz_error error;
int okay;
int fd;
@@ -52,7 +53,13 @@ void openxref(char *filename, char *password, int dieonbadpass)
die(fz_throw("invalid password"));
}
- pagecount = pdf_getpagecount(xref);
+ if (loadpages)
+ {
+ error = pdf_loadpagetree(xref);
+ if (error)
+ die(fz_rethrow(error, "cannot load page tree"));
+ pagecount = pdf_getpagecount(xref);
+ }
}
void flushxref(void)
diff --git a/apps/pdftool.h b/apps/pdftool.h
index 2bc39657..1560c225 100644
--- a/apps/pdftool.h
+++ b/apps/pdftool.h
@@ -8,7 +8,7 @@ extern int pagecount;
void die(fz_error error);
void setcleanup(void (*cleanup)(void));
-void openxref(char *filename, char *password, int dieonbadpass);
+void openxref(char *filename, char *password, int dieonbadpass, int loadpages);
void flushxref(void);
void closexref(void);
diff --git a/mupdf/mupdf.h b/mupdf/mupdf.h
index a0b3155f..73610990 100644
--- a/mupdf/mupdf.h
+++ b/mupdf/mupdf.h
@@ -129,6 +129,11 @@ struct pdf_xref_s
int cap;
pdf_xrefentry *table;
+ int pagelen;
+ int pagecap;
+ fz_obj **pageobjs;
+ fz_obj **pagerefs;
+
struct pdf_store_s *store;
struct pdf_outline_s *outlines;
@@ -538,6 +543,7 @@ struct pdf_page_s
};
/* pagetree.c */
+fz_error pdf_loadpagetree(pdf_xref *xref);
int pdf_getpagecount(pdf_xref *xref);
fz_obj * pdf_getpageobject(pdf_xref *xref, int p);
int pdf_findpageobject(pdf_xref *xref, fz_obj *pageobj);
diff --git a/mupdf/pdf_pagetree.c b/mupdf/pdf_pagetree.c
index d9339712..086dec7e 100644
--- a/mupdf/pdf_pagetree.c
+++ b/mupdf/pdf_pagetree.c
@@ -1,7 +1,7 @@
#include "fitz.h"
#include "mupdf.h"
-struct stuff
+struct info
{
fz_obj *resources;
fz_obj *mediabox;
@@ -9,343 +9,117 @@ struct stuff
fz_obj *rotate;
};
-static void
-pdf_getpagecountimp(pdf_xref *xref, fz_obj *node, int *pagesp)
-{
- fz_obj *type;
- fz_obj *kids;
- fz_obj *count;
- char *typestr;
- int pages = 0;
- int i;
-
- if (!fz_isdict(node))
- {
- fz_warn("pagetree node is missing, igoring missing pages...");
- return;
- }
-
- type = fz_dictgets(node, "Type");
- kids = fz_dictgets(node, "Kids");
- count = fz_dictgets(node, "Count");
-
- if (fz_isname(type))
- typestr = fz_toname(type);
- else
- {
- fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(node), fz_togen(node));
-
- kids = fz_dictgets(node, "Kids");
- if (kids)
- {
- fz_warn("guessing it may be a pagetree node, continuing...");
- typestr = "Pages";
- }
- else
- {
- fz_warn("guessing it may be a page, continuing...");
- typestr = "Page";
- }
- }
-
- if (!strcmp(typestr, "Page"))
- (*pagesp)++;
-
- else if (!strcmp(typestr, "Pages"))
- {
- if (!fz_isarray(kids))
- fz_warn("page tree node contains no pages");
-
- pdf_logpage("subtree (%d %d R) {\n", fz_tonum(node), fz_togen(node));
-
- for (i = 0; i < fz_arraylen(kids); i++)
- {
- fz_obj *obj = fz_arrayget(kids, i);
-
- /* prevent infinite recursion possible in maliciously crafted PDFs */
- if (obj == node)
- {
- fz_warn("cyclic page tree");
- return;
- }
-
- pdf_getpagecountimp(xref, obj, &pages);
- }
-
- if (pages != fz_toint(count))
- {
- fz_warn("page tree node contains incorrect number of pages, continuing...");
- count = fz_newint(pages);
- fz_dictputs(node, "Count", count);
- fz_dropobj(count);
- }
-
- pdf_logpage("%d pages\n", pages);
-
- (*pagesp) += pages;
-
- pdf_logpage("}\n");
- }
-}
-
int
pdf_getpagecount(pdf_xref *xref)
{
- fz_obj *catalog;
- fz_obj *pages;
- int count;
-
- catalog = fz_dictgets(xref->trailer, "Root");
-
- pages = fz_dictgets(catalog, "Pages");
- pdf_logpage("determining page count (%d %d R) {\n", fz_tonum(pages), fz_togen(pages));
-
- count = 0;
- pdf_getpagecountimp(xref, pages, &count);
-
- pdf_logpage("}\n");
-
- return count;
+ return xref->pagelen;
}
-static void
-pdf_getpageobjectimp(pdf_xref *xref, struct stuff inherit, fz_obj *node, int *pagesp, int pageno, fz_obj **pagep)
+fz_obj *
+pdf_getpageobject(pdf_xref *xref, int number)
{
- char *typestr;
- fz_obj *type;
- fz_obj *kids;
- fz_obj *count;
- fz_obj *inh;
- int i;
-
- if (!fz_isdict(node))
- {
- fz_warn("pagetree node is missing, ignoring missing pages...");
- *pagep = nil;
- return;
- }
-
- type = fz_dictgets(node, "Type");
- kids = fz_dictgets(node, "Kids");
- count = fz_dictgets(node, "Count");
-
- if (fz_isname(type))
- typestr = fz_toname(type);
- else
- {
- fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(node), fz_togen(node));
-
- kids = fz_dictgets(node, "Kids");
- if (kids)
- {
- fz_warn("guessing it may be a pagetree node, continuing...");
- typestr = "Pages";
- }
- else
- {
- fz_warn("guessing it may be a page, continuing...");
- typestr = "Page";
- }
- }
-
- if (!strcmp(typestr, "Page"))
- {
- (*pagesp)++;
- if (*pagesp == pageno)
- {
- pdf_logpage("page %d (%d %d R)\n", *pagesp, fz_tonum(node), fz_togen(node));
-
- if (inherit.resources && !fz_dictgets(node, "Resources"))
- {
- pdf_logpage("inherited resources\n");
- fz_dictputs(node, "Resources", inherit.resources);
- }
-
- if (inherit.mediabox && !fz_dictgets(node, "MediaBox"))
- {
- pdf_logpage("inherit mediabox\n");
- fz_dictputs(node, "MediaBox", inherit.mediabox);
- }
-
- if (inherit.cropbox && !fz_dictgets(node, "CropBox"))
- {
- pdf_logpage("inherit cropbox\n");
- fz_dictputs(node, "CropBox", inherit.cropbox);
- }
-
- if (inherit.rotate && !fz_dictgets(node, "Rotate"))
- {
- pdf_logpage("inherit rotate\n");
- fz_dictputs(node, "Rotate", inherit.rotate);
- }
-
- *pagep = node;
- }
- }
-
- else if (!strcmp(typestr, "Pages"))
- {
- if (!fz_isarray(kids))
- fz_warn("page tree node contains no pages");
-
- if (*pagesp + fz_toint(count) < pageno)
- {
- (*pagesp) += fz_toint(count);
- return;
- }
-
- inh = fz_dictgets(node, "Resources");
- if (inh) inherit.resources = inh;
-
- inh = fz_dictgets(node, "MediaBox");
- if (inh) inherit.mediabox = inh;
-
- inh = fz_dictgets(node, "CropBox");
- if (inh) inherit.cropbox = inh;
-
- inh = fz_dictgets(node, "Rotate");
- if (inh) inherit.rotate = inh;
-
- pdf_logpage("subtree (%d %d R) {\n", fz_tonum(node), fz_togen(node));
-
- for (i = 0; !(*pagep) && i < fz_arraylen(kids); i++)
- {
- fz_obj *obj = fz_arrayget(kids, i);
-
- /* prevent infinite recursion possible in maliciously crafted PDFs */
- if (obj == node)
- {
- fz_warn("cyclic page tree");
- return;
- }
-
- pdf_getpageobjectimp(xref, inherit, obj, pagesp, pageno, pagep);
- }
-
- pdf_logpage("}\n");
- }
+ if (number > 0 && number <= xref->pagelen)
+ return xref->pageobjs[number - 1];
+ return nil;
}
-fz_obj *
-pdf_getpageobject(pdf_xref *xref, int pageno)
+int
+pdf_findpageobject(pdf_xref *xref, fz_obj *page)
{
- struct stuff inherit;
- fz_obj *catalog;
- fz_obj *pages;
- fz_obj *page;
- int count;
-
- inherit.resources = nil;
- inherit.mediabox = nil;
- inherit.cropbox = nil;
- inherit.rotate = nil;
-
- catalog = fz_dictgets(xref->trailer, "Root");
-
- pages = fz_dictgets(catalog, "Pages");
- pdf_logpage("get page %d (%d %d R) {\n", pageno, fz_tonum(pages), fz_togen(pages));
-
- page = nil;
- count = 0;
- pdf_getpageobjectimp(xref, inherit, pages, &count, pageno, &page);
- if (!page)
- fz_warn("cannot find page %d", pageno);
-
- pdf_logpage("}\n");
-
- return page;
+ int num = fz_tonum(page);
+ int gen = fz_togen(page);
+ int i;
+ for (i = 0; i < xref->pagelen; i++)
+ if (num == fz_tonum(xref->pagerefs[i]) && gen == fz_togen(xref->pagerefs[i]))
+ return i + 1;
+ return 0;
}
-static void
-pdf_findpageobjectimp(pdf_xref *xref, fz_obj *node, fz_obj *page, int *pagenop, int *foundp)
+void
+pdf_loadpagetreenode(pdf_xref *xref, fz_obj *node, struct info info)
{
- char *typestr;
- fz_obj *type;
+ fz_obj *dict;
fz_obj *kids;
- int i;
-
- if (!fz_isdict(node))
- return;
+ fz_obj *count;
+ fz_obj *obj;
+ int i, n;
- type = fz_dictgets(node, "Type");
kids = fz_dictgets(node, "Kids");
+ count = fz_dictgets(node, "Count");
- if (fz_isname(type))
- typestr = fz_toname(type);
- else
- {
- fz_warn("pagetree node (%d %d R) lacks required type", fz_tonum(node), fz_togen(node));
-
- kids = fz_dictgets(node, "Kids");
- if (kids)
- {
- fz_warn("guessing it may be a pagetree node, continuing...");
- typestr = "Pages";
- }
- else
- {
- fz_warn("guessing it may be a page, continuing...");
- typestr = "Page";
- }
- }
-
- if (!strcmp(typestr, "Page"))
+ if (fz_isarray(kids) && fz_isint(count))
{
- (*pagenop)++;
- if (fz_tonum(node) == fz_tonum(page))
+ obj = fz_dictgets(node, "Resources");
+ if (obj)
+ info.resources = obj;
+ obj = fz_dictgets(node, "MediaBox");
+ if (obj)
+ info.mediabox = obj;
+ obj = fz_dictgets(node, "CropBox");
+ if (obj)
+ info.cropbox = obj;
+ obj = fz_dictgets(node, "Rotate");
+ if (obj)
+ info.rotate = obj;
+
+ n = fz_arraylen(kids);
+ for (i = 0; i < n; i++)
{
- pdf_logpage("page %d (%d %d R)\n", *pagenop, fz_tonum(node), fz_togen(node));
- *foundp = 1;
+ obj = fz_arrayget(kids, i);
+ pdf_loadpagetreenode(xref, obj, info);
}
}
-
- else if (!strcmp(typestr, "Pages"))
+ else
{
- if (!fz_isarray(kids))
- fz_warn("page tree node contains no pages");
-
- pdf_logpage("subtree (%d %d R) {\n", fz_tonum(node), fz_togen(node));
-
- for (i = 0; !(*foundp) && i < fz_arraylen(kids); i++)
+ dict = fz_resolveindirect(node);
+
+ if (info.resources && !fz_dictgets(dict, "Resources"))
+ fz_dictputs(dict, "Resources", info.resources);
+ if (info.mediabox && !fz_dictgets(dict, "MediaBox"))
+ fz_dictputs(dict, "MediaBox", info.mediabox);
+ if (info.cropbox && !fz_dictgets(dict, "CropBox"))
+ fz_dictputs(dict, "CropBox", info.cropbox);
+ if (info.rotate && !fz_dictgets(dict, "Rotate"))
+ fz_dictputs(dict, "Rotate", info.rotate);
+
+ if (xref->pagelen == xref->pagecap)
{
- fz_obj *obj = fz_arrayget(kids, i);
-
- /* prevent infinite recursion possible in maliciously crafted PDFs */
- if (obj == node)
- {
- fz_warn("cyclic page tree");
- return;
- }
-
- pdf_findpageobjectimp(xref, obj, page, pagenop, foundp);
+ fz_warn("found more pages than expected");
+ xref->pagecap ++;
+ xref->pagerefs = fz_realloc(xref->pagerefs, sizeof(fz_obj*) * xref->pagecap);
+ xref->pageobjs = fz_realloc(xref->pageobjs, sizeof(fz_obj*) * xref->pagecap);
}
- pdf_logpage("}\n");
+ xref->pagerefs[xref->pagelen] = fz_keepobj(node);
+ xref->pageobjs[xref->pagelen] = fz_keepobj(dict);
+ xref->pagelen ++;
}
}
-int
-pdf_findpageobject(pdf_xref *xref, fz_obj *page)
+fz_error
+pdf_loadpagetree(pdf_xref *xref)
{
- fz_obj *catalog;
- fz_obj *pages;
- int pageno;
- int found;
-
- catalog = fz_dictgets(xref->trailer, "Root");
-
- pages = fz_dictgets(catalog, "Pages");
- pdf_logpage("find page object (%d %d R) (%d %d R) {\n", fz_tonum(page), fz_togen(page), fz_tonum(pages), fz_togen(pages));
-
- pageno = 0;
- found = 0;
- pdf_findpageobjectimp(xref, pages, page, &pageno, &found);
-
- pdf_logpage("}\n");
-
- if (!found)
- fz_warn("cannot find page object (%d %d R)", fz_tonum(page), fz_togen(page));
-
- return pageno;
-}
-
+ struct info info;
+ fz_obj *catalog = fz_dictgets(xref->trailer, "Root");
+ fz_obj *pages = fz_dictgets(catalog, "Pages");
+ fz_obj *count = fz_dictgets(pages, "Count");
+
+ if (!fz_isdict(pages))
+ return fz_throw("missing page tree");
+ if (!fz_isint(count))
+ return fz_throw("missing page count");
+
+ xref->pagecap = fz_toint(count);
+ xref->pagelen = 0;
+ xref->pagerefs = fz_malloc(sizeof(fz_obj*) * xref->pagecap);
+ xref->pageobjs = fz_malloc(sizeof(fz_obj*) * xref->pagecap);
+
+ info.resources = nil;
+ info.mediabox = nil;
+ info.cropbox = nil;
+ info.rotate = nil;
+
+ pdf_loadpagetreenode(xref, pages, info);
+
+ return fz_okay;
+} \ No newline at end of file