From 319c9d2315ad4fb13abe423571311ddb63202e2c Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Mon, 29 Dec 2014 19:07:48 +0000 Subject: Performance optimisation with pdf_cache_object/pdf_get_xref_entry The recent change to holding pdf xrefs in a sparse format has resulted in a significant decrease in speed (x10). Malc points out that some of this (2x) can be recovered simply by making pdf_cache_object return the entry which it found the object in. This saves us having to immediately call pdf_get_xref_entry again afterwards. I am still thinking about ways to try and get the remaining time back. --- source/pdf/pdf-stream.c | 11 +++-------- source/pdf/pdf-xref.c | 19 +++++++++---------- 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'source') diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c index eb6b616e..f859719b 100644 --- a/source/pdf/pdf-stream.c +++ b/source/pdf/pdf-stream.c @@ -11,9 +11,8 @@ pdf_is_stream(pdf_document *doc, int num, int gen) if (num <= 0 || num >= pdf_xref_len(doc)) return 0; - pdf_cache_object(doc, num, gen); + entry = pdf_cache_object(doc, num, gen); - entry = pdf_get_xref_entry(doc, num); return entry->stm_ofs != 0 || entry->stm_buf; } @@ -408,9 +407,7 @@ pdf_open_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num if (num <= 0 || num >= pdf_xref_len(doc)) fz_throw(doc->ctx, FZ_ERROR_GENERIC, "object id out of range (%d %d R)", num, gen); - pdf_cache_object(doc, num, gen); - - x = pdf_get_xref_entry(doc, num); + x = pdf_cache_object(doc, num, gen); if (x->stm_ofs == 0) fz_throw(doc->ctx, FZ_ERROR_GENERIC, "object is not a stream"); @@ -425,9 +422,7 @@ pdf_open_image_stream(pdf_document *doc, int num, int gen, int orig_num, int ori if (num <= 0 || num >= pdf_xref_len(doc)) fz_throw(doc->ctx, FZ_ERROR_GENERIC, "object id out of range (%d %d R)", num, gen); - pdf_cache_object(doc, num, gen); - - x = pdf_get_xref_entry(doc, num); + x = pdf_cache_object(doc, num, gen); if (x->stm_ofs == 0 && x->stm_buf == NULL) fz_throw(doc->ctx, FZ_ERROR_GENERIC, "object is not a stream"); diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index d72ae55c..b7ac1e20 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -1833,7 +1833,7 @@ read_hinted_object(pdf_document *doc, int num) return 1; } -void +pdf_xref_entry * pdf_cache_object(pdf_document *doc, int num, int gen) { pdf_xref_entry *x; @@ -1851,8 +1851,8 @@ object_updated: x = pdf_get_xref_entry(doc, num); - if (x->obj) - return; + if (x->obj != NULL) + return x; if (x->type == 'f') { @@ -1930,6 +1930,7 @@ object_updated: } pdf_set_obj_parent(x->obj, num); + return x; } pdf_obj * @@ -1940,16 +1941,14 @@ pdf_load_object(pdf_document *doc, int num, int gen) fz_try(ctx) { - pdf_cache_object(doc, num, gen); + entry = pdf_cache_object(doc, num, gen); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load object (%d %d R) into cache", num, gen); } - entry = pdf_get_xref_entry(doc, num); - - assert(entry->obj); + assert(entry->obj != NULL); return pdf_keep_obj(entry->obj); } @@ -1986,7 +1985,7 @@ pdf_resolve_indirect(pdf_obj *ref) fz_try(ctx) { - pdf_cache_object(doc, num, gen); + entry = pdf_cache_object(doc, num, gen); } fz_catch(ctx) { @@ -1994,8 +1993,8 @@ pdf_resolve_indirect(pdf_obj *ref) fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen); return NULL; } - entry = pdf_get_xref_entry(doc, num); - if (!entry->obj) + + if (entry->obj == NULL) return NULL; ref = entry->obj; } -- cgit v1.2.3