summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2014-12-29 19:07:48 +0000
committerRobin Watts <robin.watts@artifex.com>2014-12-29 19:10:46 +0000
commit319c9d2315ad4fb13abe423571311ddb63202e2c (patch)
tree803e5a6cd31e500add8ec0bb8236277267a731a9
parent7209f8811b61007f999afcb78d4440a805d853f4 (diff)
downloadmupdf-319c9d2315ad4fb13abe423571311ddb63202e2c.tar.xz
Performance optimisation with pdf_cache_object/pdf_get_xref_entry
The recent change to holding pdf xrefs in a sparse format has resulted in a significant decrease in speed (x10). Malc points out that some of this (2x) can be recovered simply by making pdf_cache_object return the entry which it found the object in. This saves us having to immediately call pdf_get_xref_entry again afterwards. I am still thinking about ways to try and get the remaining time back.
-rw-r--r--include/mupdf/pdf/xref.h2
-rw-r--r--source/pdf/pdf-stream.c11
-rw-r--r--source/pdf/pdf-xref.c19
3 files changed, 13 insertions, 19 deletions
diff --git a/include/mupdf/pdf/xref.h b/include/mupdf/pdf/xref.h
index 70826314..ac8525ac 100644
--- a/include/mupdf/pdf/xref.h
+++ b/include/mupdf/pdf/xref.h
@@ -66,7 +66,7 @@ struct pdf_xref_s
pdf_obj *pre_repair_trailer;
};
-void pdf_cache_object(pdf_document *doc, int num, int gen);
+pdf_xref_entry *pdf_cache_object(pdf_document *doc, int num, int gen);
int pdf_count_objects(pdf_document *doc);
pdf_obj *pdf_resolve_indirect(pdf_obj *ref);
diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c
index eb6b616e..f859719b 100644
--- a/source/pdf/pdf-stream.c
+++ b/source/pdf/pdf-stream.c
@@ -11,9 +11,8 @@ pdf_is_stream(pdf_document *doc, int num, int gen)
if (num <= 0 || num >= pdf_xref_len(doc))
return 0;
- pdf_cache_object(doc, num, gen);
+ entry = pdf_cache_object(doc, num, gen);
- entry = pdf_get_xref_entry(doc, num);
return entry->stm_ofs != 0 || entry->stm_buf;
}
@@ -408,9 +407,7 @@ pdf_open_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num
if (num <= 0 || num >= pdf_xref_len(doc))
fz_throw(doc->ctx, FZ_ERROR_GENERIC, "object id out of range (%d %d R)", num, gen);
- pdf_cache_object(doc, num, gen);
-
- x = pdf_get_xref_entry(doc, num);
+ x = pdf_cache_object(doc, num, gen);
if (x->stm_ofs == 0)
fz_throw(doc->ctx, FZ_ERROR_GENERIC, "object is not a stream");
@@ -425,9 +422,7 @@ pdf_open_image_stream(pdf_document *doc, int num, int gen, int orig_num, int ori
if (num <= 0 || num >= pdf_xref_len(doc))
fz_throw(doc->ctx, FZ_ERROR_GENERIC, "object id out of range (%d %d R)", num, gen);
- pdf_cache_object(doc, num, gen);
-
- x = pdf_get_xref_entry(doc, num);
+ x = pdf_cache_object(doc, num, gen);
if (x->stm_ofs == 0 && x->stm_buf == NULL)
fz_throw(doc->ctx, FZ_ERROR_GENERIC, "object is not a stream");
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index d72ae55c..b7ac1e20 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -1833,7 +1833,7 @@ read_hinted_object(pdf_document *doc, int num)
return 1;
}
-void
+pdf_xref_entry *
pdf_cache_object(pdf_document *doc, int num, int gen)
{
pdf_xref_entry *x;
@@ -1851,8 +1851,8 @@ object_updated:
x = pdf_get_xref_entry(doc, num);
- if (x->obj)
- return;
+ if (x->obj != NULL)
+ return x;
if (x->type == 'f')
{
@@ -1930,6 +1930,7 @@ object_updated:
}
pdf_set_obj_parent(x->obj, num);
+ return x;
}
pdf_obj *
@@ -1940,16 +1941,14 @@ pdf_load_object(pdf_document *doc, int num, int gen)
fz_try(ctx)
{
- pdf_cache_object(doc, num, gen);
+ entry = pdf_cache_object(doc, num, gen);
}
fz_catch(ctx)
{
fz_rethrow_message(ctx, "cannot load object (%d %d R) into cache", num, gen);
}
- entry = pdf_get_xref_entry(doc, num);
-
- assert(entry->obj);
+ assert(entry->obj != NULL);
return pdf_keep_obj(entry->obj);
}
@@ -1986,7 +1985,7 @@ pdf_resolve_indirect(pdf_obj *ref)
fz_try(ctx)
{
- pdf_cache_object(doc, num, gen);
+ entry = pdf_cache_object(doc, num, gen);
}
fz_catch(ctx)
{
@@ -1994,8 +1993,8 @@ pdf_resolve_indirect(pdf_obj *ref)
fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen);
return NULL;
}
- entry = pdf_get_xref_entry(doc, num);
- if (!entry->obj)
+
+ if (entry->obj == NULL)
return NULL;
ref = entry->obj;
}