diff options
author | Robin Watts <robin.watts@artifex.com> | 2012-04-21 01:06:39 +0100 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2012-04-21 13:41:19 +0100 |
commit | 3f891013ab59b617f6c8c2a486446d3ed7d0e340 (patch) | |
tree | 953e36809ff1fde6e19af7a535d4e486903e46de /pdf | |
parent | a5e78404887363b73bf7a41b54340ac3875682cd (diff) | |
download | mupdf-3f891013ab59b617f6c8c2a486446d3ed7d0e340.tar.xz |
Big 692996: Eliminate recursion to avoid exception stack overflows.
Avoid recursion in pdf_load_page_tree_node.
Avoid recursion (most of the time) in pdf_read_xref_sections.
Diffstat (limited to 'pdf')
-rw-r--r-- | pdf/pdf_page.c | 137 | ||||
-rw-r--r-- | pdf/pdf_xref.c | 35 |
2 files changed, 111 insertions, 61 deletions
diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c index bbc335bb..a5a25989 100644 --- a/pdf/pdf_page.c +++ b/pdf/pdf_page.c @@ -27,75 +27,114 @@ put_marker_bool(fz_context *ctx, pdf_obj *rdb, char *marker, int val) pdf_drop_obj(tmp); } +typedef struct pdf_page_load_s pdf_page_load; + +struct pdf_page_load_s +{ + int max; + int pos; + pdf_obj *node; + pdf_obj *kids; + struct info info; +}; + static void pdf_load_page_tree_node(pdf_document *xref, pdf_obj *node, struct info info) { pdf_obj *dict, *kids, *count; pdf_obj *obj; - int i, n; fz_context *ctx = xref->ctx; - - /* prevent infinite recursion */ - if (!node || pdf_dict_mark(node)) - return; + pdf_page_load *stack = NULL; + int stacklen = -1; + int stackmax = 0; fz_try(ctx) { - kids = pdf_dict_gets(node, "Kids"); - count = pdf_dict_gets(node, "Count"); - - if (pdf_is_array(kids) && pdf_is_int(count)) + do { - obj = pdf_dict_gets(node, "Resources"); - if (obj) - info.resources = obj; - obj = pdf_dict_gets(node, "MediaBox"); - if (obj) - info.mediabox = obj; - obj = pdf_dict_gets(node, "CropBox"); - if (obj) - info.cropbox = obj; - obj = pdf_dict_gets(node, "Rotate"); - if (obj) - info.rotate = obj; - - n = pdf_array_len(kids); - for (i = 0; i < n; i++) + if (!node || pdf_dict_mark(node)) { - obj = pdf_array_get(kids, i); - pdf_load_page_tree_node(xref, obj, info); + /* NULL node, or we've been here before. + * Nothing to do. */ } - } - else if ((dict = pdf_to_dict(node)) != NULL) - { - if (info.resources && !pdf_dict_gets(dict, "Resources")) - pdf_dict_puts(dict, "Resources", info.resources); - if (info.mediabox && !pdf_dict_gets(dict, "MediaBox")) - pdf_dict_puts(dict, "MediaBox", info.mediabox); - if (info.cropbox && !pdf_dict_gets(dict, "CropBox")) - pdf_dict_puts(dict, "CropBox", info.cropbox); - if (info.rotate && !pdf_dict_gets(dict, "Rotate")) - pdf_dict_puts(dict, "Rotate", info.rotate); - - if (xref->page_len == xref->page_cap) + else { - fz_warn(ctx, "found more pages than expected"); - xref->page_refs = fz_resize_array(ctx, xref->page_refs, xref->page_cap+1, sizeof(pdf_obj*)); - xref->page_objs = fz_resize_array(ctx, xref->page_objs, xref->page_cap+1, sizeof(pdf_obj*)); - xref->page_cap ++; + kids = pdf_dict_gets(node, "Kids"); + count = pdf_dict_gets(node, "Count"); + if (pdf_is_array(kids) && pdf_is_int(count)) + { + /* Push this onto the stack */ + obj = pdf_dict_gets(node, "Resources"); + if (obj) + info.resources = obj; + obj = pdf_dict_gets(node, "MediaBox"); + if (obj) + info.mediabox = obj; + obj = pdf_dict_gets(node, "CropBox"); + if (obj) + info.cropbox = obj; + obj = pdf_dict_gets(node, "Rotate"); + if (obj) + info.rotate = obj; + stacklen++; + if (stacklen == stackmax) + { + stack = fz_resize_array(ctx, stack, stackmax ? stackmax*2 : 10, sizeof(*stack)); + stackmax = stackmax ? stackmax*2 : 10; + } + stack[stacklen].kids = kids; + stack[stacklen].node = node; + stack[stacklen].pos = -1; + stack[stacklen].max = pdf_array_len(kids); + stack[stacklen].info = info; + } + else if ((dict = pdf_to_dict(node)) != NULL) + { + if (info.resources && !pdf_dict_gets(dict, "Resources")) + pdf_dict_puts(dict, "Resources", info.resources); + if (info.mediabox && !pdf_dict_gets(dict, "MediaBox")) + pdf_dict_puts(dict, "MediaBox", info.mediabox); + if (info.cropbox && !pdf_dict_gets(dict, "CropBox")) + pdf_dict_puts(dict, "CropBox", info.cropbox); + if (info.rotate && !pdf_dict_gets(dict, "Rotate")) + pdf_dict_puts(dict, "Rotate", info.rotate); + + if (xref->page_len == xref->page_cap) + { + fz_warn(ctx, "found more pages than expected"); + xref->page_refs = fz_resize_array(ctx, xref->page_refs, xref->page_cap+1, sizeof(pdf_obj*)); + xref->page_objs = fz_resize_array(ctx, xref->page_objs, xref->page_cap+1, sizeof(pdf_obj*)); + xref->page_cap ++; + } + + xref->page_refs[xref->page_len] = pdf_keep_obj(node); + xref->page_objs[xref->page_len] = pdf_keep_obj(dict); + xref->page_len ++; + pdf_dict_unmark(node); + } } - - xref->page_refs[xref->page_len] = pdf_keep_obj(node); - xref->page_objs[xref->page_len] = pdf_keep_obj(dict); - xref->page_len ++; + /* Get the next node */ + while (++stack[stacklen].pos == stack[stacklen].max) + { + pdf_dict_unmark(stack[stacklen].node); + stacklen--; + if (stacklen < 0) /* No more to pop! */ + break; + node = stack[stacklen].node; + info = stack[stacklen].info; + pdf_dict_unmark(node); /* Unmark it, cos we're about to mark it again */ + } + if (stacklen >= 0) + node = pdf_array_get(stack[stacklen].kids, stack[stacklen].pos); } + while (stacklen >= 0); } fz_catch(ctx) { - pdf_dict_unmark(node); + while (stacklen >= 0) + pdf_dict_unmark(stack[stacklen--].node); fz_rethrow(ctx); } - pdf_dict_unmark(node); } static void diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c index 11cf4bf0..70cbd9de 100644 --- a/pdf/pdf_xref.c +++ b/pdf/pdf_xref.c @@ -414,26 +414,37 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf) pdf_obj *prev = NULL; fz_context *ctx = xref->ctx; + fz_var(trailer); + fz_var(xrefstm); + fz_var(prev); + fz_try(ctx) { - trailer = pdf_read_xref(xref, ofs, buf); - - /* FIXME: do we overwrite free entries properly? */ - xrefstm = pdf_dict_gets(trailer, "XRefStm"); - if (xrefstm) - pdf_read_xref_sections(xref, pdf_to_int(xrefstm), buf); - - prev = pdf_dict_gets(trailer, "Prev"); - if (prev) - pdf_read_xref_sections(xref, pdf_to_int(prev), buf); + do + { + trailer = pdf_read_xref(xref, ofs, buf); + + /* FIXME: do we overwrite free entries properly? */ + xrefstm = pdf_dict_gets(trailer, "XRefStm"); + prev = pdf_dict_gets(trailer, "Prev"); + /* We only recurse if we have both xrefstm and prev. + * Hopefully this happens infrequently. */ + if (xrefstm && prev) + pdf_read_xref_sections(xref, pdf_to_int(xrefstm), buf); + if (prev) + ofs = pdf_to_int(prev); + else if (xrefstm) + ofs = pdf_to_int(xrefstm); + pdf_drop_obj(trailer); + trailer = NULL; + } + while (prev || xrefstm); } fz_catch(ctx) { pdf_drop_obj(trailer); fz_throw(ctx, "cannot read xref at offset %d", ofs); } - - pdf_drop_obj(trailer); } /* |