summaryrefslogtreecommitdiff
path: root/pdf
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-04-21 01:06:39 +0100
committerRobin Watts <robin.watts@artifex.com>2012-04-21 13:41:19 +0100
commit3f891013ab59b617f6c8c2a486446d3ed7d0e340 (patch)
tree953e36809ff1fde6e19af7a535d4e486903e46de /pdf
parenta5e78404887363b73bf7a41b54340ac3875682cd (diff)
downloadmupdf-3f891013ab59b617f6c8c2a486446d3ed7d0e340.tar.xz
Big 692996: Eliminate recursion to avoid exception stack overflows.
Avoid recursion in pdf_load_page_tree_node. Avoid recursion (most of the time) in pdf_read_xref_sections.
Diffstat (limited to 'pdf')
-rw-r--r--pdf/pdf_page.c137
-rw-r--r--pdf/pdf_xref.c35
2 files changed, 111 insertions, 61 deletions
diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c
index bbc335bb..a5a25989 100644
--- a/pdf/pdf_page.c
+++ b/pdf/pdf_page.c
@@ -27,75 +27,114 @@ put_marker_bool(fz_context *ctx, pdf_obj *rdb, char *marker, int val)
pdf_drop_obj(tmp);
}
+typedef struct pdf_page_load_s pdf_page_load;
+
+struct pdf_page_load_s
+{
+ int max;
+ int pos;
+ pdf_obj *node;
+ pdf_obj *kids;
+ struct info info;
+};
+
static void
pdf_load_page_tree_node(pdf_document *xref, pdf_obj *node, struct info info)
{
pdf_obj *dict, *kids, *count;
pdf_obj *obj;
- int i, n;
fz_context *ctx = xref->ctx;
-
- /* prevent infinite recursion */
- if (!node || pdf_dict_mark(node))
- return;
+ pdf_page_load *stack = NULL;
+ int stacklen = -1;
+ int stackmax = 0;
fz_try(ctx)
{
- kids = pdf_dict_gets(node, "Kids");
- count = pdf_dict_gets(node, "Count");
-
- if (pdf_is_array(kids) && pdf_is_int(count))
+ do
{
- obj = pdf_dict_gets(node, "Resources");
- if (obj)
- info.resources = obj;
- obj = pdf_dict_gets(node, "MediaBox");
- if (obj)
- info.mediabox = obj;
- obj = pdf_dict_gets(node, "CropBox");
- if (obj)
- info.cropbox = obj;
- obj = pdf_dict_gets(node, "Rotate");
- if (obj)
- info.rotate = obj;
-
- n = pdf_array_len(kids);
- for (i = 0; i < n; i++)
+ if (!node || pdf_dict_mark(node))
{
- obj = pdf_array_get(kids, i);
- pdf_load_page_tree_node(xref, obj, info);
+ /* NULL node, or we've been here before.
+ * Nothing to do. */
}
- }
- else if ((dict = pdf_to_dict(node)) != NULL)
- {
- if (info.resources && !pdf_dict_gets(dict, "Resources"))
- pdf_dict_puts(dict, "Resources", info.resources);
- if (info.mediabox && !pdf_dict_gets(dict, "MediaBox"))
- pdf_dict_puts(dict, "MediaBox", info.mediabox);
- if (info.cropbox && !pdf_dict_gets(dict, "CropBox"))
- pdf_dict_puts(dict, "CropBox", info.cropbox);
- if (info.rotate && !pdf_dict_gets(dict, "Rotate"))
- pdf_dict_puts(dict, "Rotate", info.rotate);
-
- if (xref->page_len == xref->page_cap)
+ else
{
- fz_warn(ctx, "found more pages than expected");
- xref->page_refs = fz_resize_array(ctx, xref->page_refs, xref->page_cap+1, sizeof(pdf_obj*));
- xref->page_objs = fz_resize_array(ctx, xref->page_objs, xref->page_cap+1, sizeof(pdf_obj*));
- xref->page_cap ++;
+ kids = pdf_dict_gets(node, "Kids");
+ count = pdf_dict_gets(node, "Count");
+ if (pdf_is_array(kids) && pdf_is_int(count))
+ {
+ /* Push this onto the stack */
+ obj = pdf_dict_gets(node, "Resources");
+ if (obj)
+ info.resources = obj;
+ obj = pdf_dict_gets(node, "MediaBox");
+ if (obj)
+ info.mediabox = obj;
+ obj = pdf_dict_gets(node, "CropBox");
+ if (obj)
+ info.cropbox = obj;
+ obj = pdf_dict_gets(node, "Rotate");
+ if (obj)
+ info.rotate = obj;
+ stacklen++;
+ if (stacklen == stackmax)
+ {
+ stack = fz_resize_array(ctx, stack, stackmax ? stackmax*2 : 10, sizeof(*stack));
+ stackmax = stackmax ? stackmax*2 : 10;
+ }
+ stack[stacklen].kids = kids;
+ stack[stacklen].node = node;
+ stack[stacklen].pos = -1;
+ stack[stacklen].max = pdf_array_len(kids);
+ stack[stacklen].info = info;
+ }
+ else if ((dict = pdf_to_dict(node)) != NULL)
+ {
+ if (info.resources && !pdf_dict_gets(dict, "Resources"))
+ pdf_dict_puts(dict, "Resources", info.resources);
+ if (info.mediabox && !pdf_dict_gets(dict, "MediaBox"))
+ pdf_dict_puts(dict, "MediaBox", info.mediabox);
+ if (info.cropbox && !pdf_dict_gets(dict, "CropBox"))
+ pdf_dict_puts(dict, "CropBox", info.cropbox);
+ if (info.rotate && !pdf_dict_gets(dict, "Rotate"))
+ pdf_dict_puts(dict, "Rotate", info.rotate);
+
+ if (xref->page_len == xref->page_cap)
+ {
+ fz_warn(ctx, "found more pages than expected");
+ xref->page_refs = fz_resize_array(ctx, xref->page_refs, xref->page_cap+1, sizeof(pdf_obj*));
+ xref->page_objs = fz_resize_array(ctx, xref->page_objs, xref->page_cap+1, sizeof(pdf_obj*));
+ xref->page_cap ++;
+ }
+
+ xref->page_refs[xref->page_len] = pdf_keep_obj(node);
+ xref->page_objs[xref->page_len] = pdf_keep_obj(dict);
+ xref->page_len ++;
+ pdf_dict_unmark(node);
+ }
}
-
- xref->page_refs[xref->page_len] = pdf_keep_obj(node);
- xref->page_objs[xref->page_len] = pdf_keep_obj(dict);
- xref->page_len ++;
+ /* Get the next node */
+ while (++stack[stacklen].pos == stack[stacklen].max)
+ {
+ pdf_dict_unmark(stack[stacklen].node);
+ stacklen--;
+ if (stacklen < 0) /* No more to pop! */
+ break;
+ node = stack[stacklen].node;
+ info = stack[stacklen].info;
+ pdf_dict_unmark(node); /* Unmark it, cos we're about to mark it again */
+ }
+ if (stacklen >= 0)
+ node = pdf_array_get(stack[stacklen].kids, stack[stacklen].pos);
}
+ while (stacklen >= 0);
}
fz_catch(ctx)
{
- pdf_dict_unmark(node);
+ while (stacklen >= 0)
+ pdf_dict_unmark(stack[stacklen--].node);
fz_rethrow(ctx);
}
- pdf_dict_unmark(node);
}
static void
diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c
index 11cf4bf0..70cbd9de 100644
--- a/pdf/pdf_xref.c
+++ b/pdf/pdf_xref.c
@@ -414,26 +414,37 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf)
pdf_obj *prev = NULL;
fz_context *ctx = xref->ctx;
+ fz_var(trailer);
+ fz_var(xrefstm);
+ fz_var(prev);
+
fz_try(ctx)
{
- trailer = pdf_read_xref(xref, ofs, buf);
-
- /* FIXME: do we overwrite free entries properly? */
- xrefstm = pdf_dict_gets(trailer, "XRefStm");
- if (xrefstm)
- pdf_read_xref_sections(xref, pdf_to_int(xrefstm), buf);
-
- prev = pdf_dict_gets(trailer, "Prev");
- if (prev)
- pdf_read_xref_sections(xref, pdf_to_int(prev), buf);
+ do
+ {
+ trailer = pdf_read_xref(xref, ofs, buf);
+
+ /* FIXME: do we overwrite free entries properly? */
+ xrefstm = pdf_dict_gets(trailer, "XRefStm");
+ prev = pdf_dict_gets(trailer, "Prev");
+ /* We only recurse if we have both xrefstm and prev.
+ * Hopefully this happens infrequently. */
+ if (xrefstm && prev)
+ pdf_read_xref_sections(xref, pdf_to_int(xrefstm), buf);
+ if (prev)
+ ofs = pdf_to_int(prev);
+ else if (xrefstm)
+ ofs = pdf_to_int(xrefstm);
+ pdf_drop_obj(trailer);
+ trailer = NULL;
+ }
+ while (prev || xrefstm);
}
fz_catch(ctx)
{
pdf_drop_obj(trailer);
fz_throw(ctx, "cannot read xref at offset %d", ofs);
}
-
- pdf_drop_obj(trailer);
}
/*