summaryrefslogtreecommitdiff
path: root/source/pdf
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2015-01-05 17:34:29 +0000
committerRobin Watts <robin.watts@artifex.com>2015-01-06 17:16:03 +0000
commit90c560641d9b459a658029eefc4cbb02fdbca0b5 (patch)
tree036e64aef09d403c0d2f5d699a9c858eebe556f9 /source/pdf
parente460246943441078ff28dd413a19ee6f186c3764 (diff)
downloadmupdf-90c560641d9b459a658029eefc4cbb02fdbca0b5.tar.xz
Add xref_index to speed searching of sparse xrefs.
Add a new index that quickly maps object number to the first xref in which an object appears. This appears to get us the speed back that we lost when moving to sparse xrefs.
Diffstat (limited to 'source/pdf')
-rw-r--r--source/pdf/pdf-repair.c3
-rw-r--r--source/pdf/pdf-xref.c82
2 files changed, 76 insertions, 9 deletions
diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c
index 7e35c5fb..3bd3850a 100644
--- a/source/pdf/pdf-repair.c
+++ b/source/pdf/pdf-repair.c
@@ -235,7 +235,7 @@ pdf_repair_obj_stm(pdf_document *doc, int num, int gen)
}
void
-pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
+pdf_repair_xref(pdf_document *doc)
{
pdf_obj *dict, *obj = NULL;
pdf_obj *length;
@@ -258,6 +258,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
int next;
int i, n, c;
fz_context *ctx = doc->ctx;
+ pdf_lexbuf *buf = &doc->lexbuf.base;
fz_var(encrypt);
fz_var(id);
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 0444085f..c99f8350 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -57,6 +57,19 @@ static void pdf_free_xref_sections(pdf_document *doc)
doc->num_xref_sections = 0;
}
+static void
+extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen)
+{
+ int i;
+
+ doc->xref_index = fz_resize_array(ctx, doc->xref_index, newlen, sizeof(int));
+ for (i = doc->max_xref_len; i < newlen; i++)
+ {
+ doc->xref_index[i] = 0;
+ }
+ doc->max_xref_len = newlen;
+}
+
/* This is only ever called when we already have an incremental
* xref. This means there will only be 1 subsec, and it will be
* a complete subsec. */
@@ -84,7 +97,7 @@ static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen)
xref->num_objects = newlen;
sub->len = newlen;
if (doc->max_xref_len < newlen)
- doc->max_xref_len = newlen;
+ extend_xref_index(ctx, doc, newlen);
}
static void pdf_populate_next_xref_level(pdf_document *doc)
@@ -174,7 +187,7 @@ ensure_solid_xref(pdf_document *doc, int num, int which)
xref->num_objects = num;
xref->subsec = new_sub;
if (doc->max_xref_len < num)
- doc->max_xref_len = num;
+ extend_xref_index(ctx, doc, num);
}
/* Used while reading the individual xref sections from a file */
@@ -225,8 +238,13 @@ pdf_xref_entry *pdf_get_xref_entry(pdf_document *doc, int i)
if (i < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "Negative object number requested");
+ if (i <= doc->max_xref_len)
+ j = doc->xref_index[i];
+ else
+ j = 0;
+
/* Find the first xref section where the entry is defined. */
- for (j = 0; j < doc->num_xref_sections; j++)
+ for (; j < doc->num_xref_sections; j++)
{
xref = &doc->xref_sections[j];
@@ -241,13 +259,17 @@ pdf_xref_entry *pdf_get_xref_entry(pdf_document *doc, int i)
entry = &sub->table[i - sub->start];
if (entry->type)
+ {
+ doc->xref_index[i] = j;
return entry;
+ }
}
}
}
/* Didn't find the entry in any section. Return the entry from
* the final section. */
+ doc->xref_index[i] = 0;
if (i < xref->num_objects)
{
xref = &doc->xref_sections[0];
@@ -284,6 +306,7 @@ static void ensure_incremental_xref(pdf_document *doc)
pdf_xref_entry *new_table = fz_calloc(ctx, xref->num_objects, sizeof(pdf_xref_entry));
pdf_xref_subsec *sub;
pdf_obj *trailer = NULL;
+ int i;
fz_var(trailer);
fz_try(ctx)
@@ -311,6 +334,12 @@ static void ensure_incremental_xref(pdf_document *doc)
pdf_drop_obj(trailer);
fz_rethrow(ctx);
}
+
+ /* Update the xref_index */
+ for (i = 0; i < doc->max_xref_len; i++)
+ {
+ doc->xref_index[i]++;
+ }
}
}
@@ -331,6 +360,7 @@ static pdf_xref_entry *pdf_get_incremental_xref_entry(pdf_document *doc, int i)
sub = xref->subsec;
assert(sub != NULL && sub->next == NULL);
assert(i >= sub->start && i < sub->start + sub->len);
+ doc->xref_index[i] = 0;
return &sub->table[i - sub->start];
}
@@ -365,7 +395,7 @@ void pdf_xref_ensure_incremental_object(pdf_document *doc, int num)
ensure_incremental_xref(doc);
/* Search for the section that contains this object */
- for (i = 0; i < doc->num_xref_sections; i++)
+ for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
{
pdf_xref *xref = &doc->xref_sections[i];
@@ -386,6 +416,7 @@ void pdf_xref_ensure_incremental_object(pdf_document *doc, int num)
return;
/* Move the object to the incremental section */
+ doc->xref_index[num] = 0;
old_entry = &sub->table[num - sub->start];
new_entry = pdf_get_incremental_xref_entry(doc, num);
*new_entry = *old_entry;
@@ -396,13 +427,14 @@ void pdf_xref_ensure_incremental_object(pdf_document *doc, int num)
void pdf_replace_xref(pdf_document *doc, pdf_xref_entry *entries, int n)
{
fz_context *ctx = doc->ctx;
- pdf_xref *xref;
+ pdf_xref *xref = NULL;
pdf_xref_subsec *sub;
pdf_obj *trailer = pdf_keep_obj(pdf_trailer(doc));
fz_var(xref);
fz_try(ctx)
{
+ doc->xref_index = fz_calloc(ctx, n, sizeof(int));
xref = fz_malloc_struct(ctx, pdf_xref);
sub = fz_malloc_struct(ctx, pdf_xref_subsec);
@@ -420,6 +452,8 @@ void pdf_replace_xref(pdf_document *doc, pdf_xref_entry *entries, int n)
doc->xref_sections = xref;
doc->num_xref_sections = 1;
doc->max_xref_len = n;
+
+ memset(doc->xref_index, 0, sizeof(int)*doc->max_xref_len);
}
fz_catch(ctx)
{
@@ -619,7 +653,7 @@ pdf_xref_find_subsection(pdf_document *doc, int ofs, int len)
}
xref->num_objects = new_max;
if (doc->max_xref_len < new_max)
- doc->max_xref_len = new_max;
+ extend_xref_index(ctx, doc, new_max);
}
else
{
@@ -988,6 +1022,32 @@ pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf, int read_pre
}
}
+static void
+pdf_prime_xref_index(pdf_document *doc)
+{
+ int i, j;
+ int *idx = doc->xref_index;
+
+ for (i = doc->num_xref_sections-1; i >= 0; i--)
+ {
+ pdf_xref *xref = &doc->xref_sections[i];
+ pdf_xref_subsec *subsec = xref->subsec;
+ while (subsec != NULL)
+ {
+ int start = subsec->start;
+ int end = subsec->start + subsec->len;
+ for (j = start; j < end; j++)
+ {
+ char t = subsec->table[j-start].type;
+ if (t != 0 && t != 'f')
+ idx[j] = i;
+ }
+
+ subsec = subsec->next;
+ }
+ }
+}
+
/*
* load xref tables from pdf
*
@@ -1009,6 +1069,8 @@ pdf_load_xref(pdf_document *doc, pdf_lexbuf *buf)
if (pdf_xref_len(doc) == 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "found xref was empty");
+ pdf_prime_xref_index(doc);
+
entry = pdf_get_xref_entry(doc, 0);
/* broken pdfs where first object is missing */
if (!entry->type)
@@ -1308,7 +1370,10 @@ pdf_init_document(pdf_document *doc)
int hasroot, hasinfo;
if (repaired)
- pdf_repair_xref(doc, &doc->lexbuf.base);
+ {
+ pdf_repair_xref(doc);
+ pdf_prime_xref_index(doc);
+ }
encrypt = pdf_dict_gets(pdf_trailer(doc), "Encrypt");
id = pdf_dict_gets(pdf_trailer(doc), "ID");
@@ -1888,7 +1953,8 @@ object_updated:
{
fz_try(ctx)
{
- pdf_repair_xref(doc, &doc->lexbuf.base);
+ pdf_repair_xref(doc);
+ pdf_prime_xref_index(doc);
}
fz_catch(ctx)
{