summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2016-07-01 17:33:30 +0200
committerTor Andersson <tor.andersson@artifex.com>2016-07-06 15:45:16 +0200
commit5983a7de30e84af04dab3c7835c04dba493391fa (patch)
tree67381a4cb938d86b88c116739bcc54a0bcc9de23
parent255776a778b519183d6935ff0cb4b766644fa830 (diff)
downloadmupdf-5983a7de30e84af04dab3c7835c04dba493391fa.tar.xz
Fix garbage collection and page grafting for indirect reference chains.
The mark & sweep pass of garbage collection, and resolving indirect objects when grafting objects was following the full chain of indirect references. In the unusual case where a numbered object is itself only an indirect reference to another object, this intermediate numbered object would be missed both when marking for garbage collection, and when copying objects for grafting. Add a function to resolve only one step for these two uses. The following is an example of a file that would break during garbage collection if we follow full indirect reference chains: %PDF-1.3 1 0 obj <</Type/Catalog /Foo[2 0 R 3 0 R]>> endobj 2 0 obj 4 0 R endobj 3 0 obj 5 0 R endobj 4 0 obj <</Length 1>> stream A endstream endobj 5 0 obj <</Length 1>> stream B endstream endobj
-rw-r--r--include/mupdf/pdf/xref.h1
-rw-r--r--source/pdf/pdf-object.c2
-rw-r--r--source/pdf/pdf-page.c2
-rw-r--r--source/pdf/pdf-write.c9
-rw-r--r--source/pdf/pdf-xref.c41
-rw-r--r--source/tools/murun.c2
6 files changed, 30 insertions, 27 deletions
diff --git a/include/mupdf/pdf/xref.h b/include/mupdf/pdf/xref.h
index 2c8ab858..b85d7334 100644
--- a/include/mupdf/pdf/xref.h
+++ b/include/mupdf/pdf/xref.h
@@ -77,6 +77,7 @@ pdf_xref_entry *pdf_cache_object(fz_context *ctx, pdf_document *doc, int num);
int pdf_count_objects(fz_context *ctx, pdf_document *doc);
pdf_obj *pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref);
+pdf_obj *pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref);
pdf_obj *pdf_load_object(fz_context *ctx, pdf_document *doc, int num);
fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_document *doc, int num);
diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c
index 5d990210..b189a125 100644
--- a/source/pdf/pdf-object.c
+++ b/source/pdf/pdf-object.c
@@ -216,7 +216,7 @@ int pdf_is_indirect(fz_context *ctx, pdf_obj *obj)
#define RESOLVE(obj) \
if (obj >= PDF_OBJ__LIMIT && obj->kind == PDF_INDIRECT) \
- obj = pdf_resolve_indirect(ctx, obj); \
+ obj = pdf_resolve_indirect_chain(ctx, obj); \
int pdf_is_null(fz_context *ctx, pdf_obj *obj)
{
diff --git a/source/pdf/pdf-page.c b/source/pdf/pdf-page.c
index 8ba02562..430c033b 100644
--- a/source/pdf/pdf-page.c
+++ b/source/pdf/pdf-page.c
@@ -506,7 +506,7 @@ pdf_load_page(fz_context *ctx, pdf_document *doc, int number)
}
else
pageref = pdf_lookup_page_obj(ctx, doc, number);
- pageobj = pdf_resolve_indirect(ctx, pageref);
+ pageobj = pdf_resolve_indirect_chain(ctx, pageref);
page = pdf_new_page(ctx, doc);
page->me = pdf_keep_obj(ctx, pageobj);
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index aa801b3e..0a7a0f2f 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -589,7 +589,7 @@ static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pd
DEBUGGING_MARKING(depth++);
- if (pdf_is_indirect(ctx, obj))
+ while (pdf_is_indirect(ctx, obj))
{
int duff;
DEBUGGING_MARKING(indent(); printf("Marking object %d\n", pdf_to_num(ctx, obj)));
@@ -648,6 +648,8 @@ static void removeduplicateobjs(fz_context *ctx, pdf_document *doc, pdf_write_st
if (num == other || !opts->use_list[num] || !opts->use_list[other])
continue;
+ /* TODO: resolve indirect references to see if we can omit them */
+
/*
* Comparing stream objects data contents would take too long.
*
@@ -673,9 +675,6 @@ static void removeduplicateobjs(fz_context *ctx, pdf_document *doc, pdf_write_st
a = pdf_get_xref_entry(ctx, doc, num)->obj;
b = pdf_get_xref_entry(ctx, doc, other)->obj;
- a = pdf_resolve_indirect(ctx, a);
- b = pdf_resolve_indirect(ctx, b);
-
if (pdf_objcmp(ctx, a, b))
continue;
@@ -1439,7 +1438,7 @@ linearize(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
#endif
/* Find the split point */
- for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++);
+ for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++) {}
opts->start = i;
/* Roll the reordering into the renumber_map */
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 54dbc4b6..19b1f4f7 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -2097,25 +2097,14 @@ pdf_load_object(fz_context *ctx, pdf_document *doc, int num)
pdf_obj *
pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
{
- int sanity = 10;
- int num;
- pdf_xref_entry *entry;
-
- while (pdf_is_indirect(ctx, ref))
+ if (pdf_is_indirect(ctx, ref))
{
- pdf_document *doc;
-
- if (--sanity == 0)
- {
- fz_warn(ctx, "too many indirections (possible indirection cycle involving %d 0 R)", num);
- return NULL;
- }
+ pdf_document *doc = pdf_get_indirect_document(ctx, ref);
+ int num = pdf_to_num(ctx, ref);
+ pdf_xref_entry *entry;
- doc = pdf_get_indirect_document(ctx, ref);
if (!doc)
return NULL;
- num = pdf_to_num(ctx, ref);
-
if (num <= 0)
{
fz_warn(ctx, "invalid indirect reference (%d 0 R)", num);
@@ -2123,9 +2112,7 @@ pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
}
fz_try(ctx)
- {
entry = pdf_cache_object(ctx, doc, num);
- }
fz_catch(ctx)
{
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
@@ -2133,10 +2120,26 @@ pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
return NULL;
}
- if (entry->obj == NULL)
- return NULL;
ref = entry->obj;
}
+ return ref;
+}
+
+pdf_obj *
+pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref)
+{
+ int sanity = 10;
+
+ while (pdf_is_indirect(ctx, ref))
+ {
+ if (--sanity == 0)
+ {
+ fz_warn(ctx, "too many indirections (possible indirection cycle involving %d 0 R)", pdf_to_num(ctx, ref));
+ return NULL;
+ }
+
+ ref = pdf_resolve_indirect(ctx, ref);
+ }
return ref;
}
diff --git a/source/tools/murun.c b/source/tools/murun.c
index df0cbdc2..c3b01cda 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -2986,7 +2986,7 @@ static void ffi_PDFObject_forEach(js_State *J)
int i, n;
fz_try(ctx)
- obj = pdf_resolve_indirect(ctx, obj);
+ obj = pdf_resolve_indirect_chain(ctx, obj);
fz_catch(ctx)
rethrow(J);