From 636652daee46a9cf9836746135e3f9678db796ec Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Mon, 7 May 2012 11:30:05 +0100 Subject: Switch to reading content streams on the fly during interpretation. Previously, before interpreting a pages content stream we would load it entirely into a buffer. Then we would interpret that buffer. This has a cost in memory use. Here, we update the code to read from a stream on the fly. This has required changes in various different parts of the code. Firstly, we have removed all use of the FILE lock - as stream reads can now safely be interrupted by resource (or object) reads from elsewhere in the file, the file lock becomes a very hard thing to maintain, and doesn't actually benefit us at all. The choices were to either use a recursive lock, or to remove it entirely; I opted for the latter. The file lock enum value remains as a placeholder for future use in extendable data streams. Secondly, we add a new 'concat' filter that concatenates a series of streams together into one, optionally putting whitespace between each stream (as the pdf parser requires this). Finally, we change page/xobject/pattern content streams to work on the fly, but we leave type3 glyphs using buffers (as presumably these will be run repeatedly). --- fitz/filt_basic.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fitz/fitz-internal.h | 5 ++- fitz/res_font.c | 6 ++-- fitz/stm_open.c | 15 -------- pdf/base_object.c | 5 ++- pdf/mupdf-internal.h | 7 ++-- pdf/pdf_interpret.c | 91 +++++++++++++++++++++++++++++++++--------------- pdf/pdf_outline.c | 2 +- pdf/pdf_page.c | 70 ++----------------------------------- pdf/pdf_pattern.c | 6 ++-- pdf/pdf_repair.c | 15 ++------ pdf/pdf_stream.c | 52 +++++++++++++++++++++++++--- pdf/pdf_type3.c | 5 ++- pdf/pdf_xobject.c | 7 ++-- pdf/pdf_xref.c | 62 +++++---------------------------- scripts/cmapdump.c | 1 - xps/xps_zip.c | 30 ++++++---------- 17 files changed, 251 insertions(+), 226 deletions(-) diff --git a/fitz/filt_basic.c b/fitz/filt_basic.c index ac6a5903..7d504f29 100644 --- a/fitz/filt_basic.c +++ b/fitz/filt_basic.c @@ -62,6 +62,104 @@ fz_open_null(fz_stream *chain, int len, int offset) return fz_new_stream(ctx, state, read_null, close_null); } +/* Concat filter concatenates several streams into one */ + +struct concat_filter +{ + int max; + int count; + int current; + int pad; /* 1 if we should add whitespace padding between streams */ + int ws; /* 1 if we should send a whitespace padding byte next */ + fz_stream *chain[1]; +}; + +static int +read_concat(fz_stream *stm, unsigned char *buf, int len) +{ + struct concat_filter *state = (struct concat_filter *)stm->state; + int n; + int read = 0; + + if (len <= 0) + return 0; + + while (state->current != state->count && len > 0) + { + /* If we need to send a whitespace char, do that */ + if (state->ws) + { + *buf++ = 32; + read++; + len--; + state->ws = 0; + continue; + } + /* Otherwise, read as much data as will fit in the buffer */ + n = fz_read(state->chain[state->current], buf, len); + read += n; + buf += n; + len -= n; + /* If we didn't read any, then we must have hit the end of + * our buffer space. Move to the next stream, and remember to + * pad. */ + if (n == 0) + { + fz_close(state->chain[state->current]); + state->current++; + state->ws = state->pad; + } + } + + return read; +} + +static void +close_concat(fz_context *ctx, void *state_) +{ + struct concat_filter *state = (struct concat_filter *)state_; + int i; + + for (i = state->current; i < state->count; i++) + { + fz_close(state->chain[i]); + } + fz_free(ctx, state); +} + +fz_stream * +fz_open_concat(fz_context *ctx, int len, int pad) +{ + struct concat_filter *state; + + fz_try(ctx) + { + state = fz_calloc(ctx, 1, sizeof(struct concat_filter) + (len-1)*sizeof(fz_stream *)); + state->max = len; + state->count = 0; + state->current = 0; + state->pad = pad; + state->ws = 0; /* We never send padding byte at the start */ + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + return fz_new_stream(ctx, state, read_concat, close_concat); +} + +void +fz_concat_push(fz_stream *concat, fz_stream *chain) +{ + struct concat_filter *state = (struct concat_filter *)concat->state; + + if (state->count == state->max) + fz_throw(concat->ctx, "Concat filter size exceeded"); + + state->chain[state->count++] = chain; +} + /* ASCII Hex Decode */ typedef struct fz_ahxd_s fz_ahxd; diff --git a/fitz/fitz-internal.h b/fitz/fitz-internal.h index 197d0279..b3f7a73c 100644 --- a/fitz/fitz-internal.h +++ b/fitz/fitz-internal.h @@ -419,7 +419,6 @@ struct fz_stream_s int pos; int avail; int bits; - int locked; unsigned char *bp, *rp, *wp, *ep; void *state; int (*read)(fz_stream *stm, unsigned char *buf, int len); @@ -428,8 +427,6 @@ struct fz_stream_s unsigned char buf[4096]; }; -void fz_lock_stream(fz_stream *stm); - fz_stream *fz_new_stream(fz_context *ctx, void*, int(*)(fz_stream*, unsigned char*, int), void(*)(fz_context *, void *)); fz_stream *fz_keep_stream(fz_stream *stm); void fz_fill_buffer(fz_stream *stm); @@ -521,6 +518,8 @@ static inline int fz_is_eof_bits(fz_stream *stm) fz_stream *fz_open_copy(fz_stream *chain); fz_stream *fz_open_null(fz_stream *chain, int len, int offset); +fz_stream *fz_open_concat(fz_context *ctx, int max, int pad); +void fz_concat_push(fz_stream *concat, fz_stream *chain); /* Ownership of chain is passed in */ fz_stream *fz_open_arc4(fz_stream *chain, unsigned char *key, unsigned keylen); fz_stream *fz_open_aesd(fz_stream *chain, unsigned char *key, unsigned keylen); fz_stream *fz_open_a85d(fz_stream *chain); diff --git a/fitz/res_font.c b/fitz/res_font.c index c279c75b..966cbc6e 100644 --- a/fitz/res_font.c +++ b/fitz/res_font.c @@ -692,7 +692,7 @@ static fz_rect fz_bound_t3_glyph(fz_context *ctx, fz_font *font, int gid, fz_matrix trm) { fz_matrix ctm; - fz_buffer *contents; + void *contents; fz_rect bounds; fz_bbox bbox; fz_device *dev; @@ -726,7 +726,7 @@ fz_pixmap * fz_render_t3_glyph(fz_context *ctx, fz_font *font, int gid, fz_matrix trm, fz_colorspace *model) { fz_matrix ctm; - fz_buffer *contents; + void *contents; fz_bbox bbox; fz_device *dev; fz_pixmap *glyph; @@ -786,7 +786,7 @@ void fz_render_t3_glyph_direct(fz_context *ctx, fz_device *dev, fz_font *font, int gid, fz_matrix trm, void *gstate) { fz_matrix ctm; - fz_buffer *contents; + void *contents; if (gid < 0 || gid > 255) return; diff --git a/fitz/stm_open.c b/fitz/stm_open.c index ced32d80..be069fb9 100644 --- a/fitz/stm_open.c +++ b/fitz/stm_open.c @@ -24,7 +24,6 @@ fz_new_stream(fz_context *ctx, void *state, stm->bits = 0; stm->avail = 0; - stm->locked = 0; stm->bp = stm->buf; stm->rp = stm->bp; @@ -40,16 +39,6 @@ fz_new_stream(fz_context *ctx, void *state, return stm; } -void -fz_lock_stream(fz_stream *stm) -{ - if (stm) - { - fz_lock(stm->ctx, FZ_LOCK_FILE); - stm->locked = 1; - } -} - fz_stream * fz_keep_stream(fz_stream *stm) { @@ -67,8 +56,6 @@ fz_close(fz_stream *stm) { if (stm->close) stm->close(stm->ctx, stm->state); - if (stm->locked) - fz_unlock(stm->ctx, FZ_LOCK_FILE); fz_free(stm->ctx, stm); } } @@ -78,7 +65,6 @@ fz_close(fz_stream *stm) static int read_file(fz_stream *stm, unsigned char *buf, int len) { int n = read(*(int*)stm->state, buf, len); - fz_assert_lock_held(stm->ctx, FZ_LOCK_FILE); if (n < 0) fz_throw(stm->ctx, "read error: %s", strerror(errno)); return n; @@ -87,7 +73,6 @@ static int read_file(fz_stream *stm, unsigned char *buf, int len) static void seek_file(fz_stream *stm, int offset, int whence) { int n = lseek(*(int*)stm->state, offset, whence); - fz_assert_lock_held(stm->ctx, FZ_LOCK_FILE); if (n < 0) fz_throw(stm->ctx, "cannot lseek: %s", strerror(errno)); stm->pos = n; diff --git a/pdf/base_object.c b/pdf/base_object.c index 97cb97cc..2a5e6d78 100644 --- a/pdf/base_object.c +++ b/pdf/base_object.c @@ -145,8 +145,8 @@ pdf_new_indirect(fz_context *ctx, int num, int gen, void *xref) pdf_obj * pdf_keep_obj(pdf_obj *obj) { - assert(obj); - obj->refs ++; + if (obj) + obj->refs ++; return obj; } @@ -159,7 +159,6 @@ int pdf_is_indirect(pdf_obj *obj) do { \ if (obj && obj->kind == PDF_INDIRECT) \ {\ - fz_assert_lock_not_held(obj->ctx, FZ_LOCK_FILE); \ obj = pdf_resolve_indirect(obj); \ } \ } while (0) diff --git a/pdf/mupdf-internal.h b/pdf/mupdf-internal.h index 3e4a4729..3396bb8a 100644 --- a/pdf/mupdf-internal.h +++ b/pdf/mupdf-internal.h @@ -198,6 +198,7 @@ fz_buffer *pdf_load_image_stream(pdf_document *doc, int num, int gen, pdf_image_ fz_stream *pdf_open_image_stream(pdf_document *doc, int num, int gen, pdf_image_params *params); fz_stream *pdf_open_stream_with_offset(pdf_document *doc, int num, int gen, pdf_obj *dict, int stm_ofs); fz_stream *pdf_open_image_decomp_stream(fz_context *ctx, fz_buffer *, pdf_image_params *params, int *factor); +fz_stream *pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj); void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf); void pdf_repair_obj_stms(pdf_document *doc); @@ -257,7 +258,7 @@ struct pdf_pattern_s fz_matrix matrix; fz_rect bbox; pdf_obj *resources; - fz_buffer *contents; + pdf_obj *contents; }; pdf_pattern *pdf_load_pattern(pdf_document *doc, pdf_obj *obj); @@ -280,7 +281,7 @@ struct pdf_xobject_s int transparency; fz_colorspace *colorspace; pdf_obj *resources; - fz_buffer *contents; + pdf_obj *contents; pdf_obj *me; }; @@ -513,7 +514,7 @@ struct pdf_page_s int rotate; int transparency; pdf_obj *resources; - fz_buffer *contents; + pdf_obj *contents; fz_link *links; pdf_annot *annots; }; diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c index a11b8c30..8851be6e 100644 --- a/pdf/pdf_interpret.c +++ b/pdf/pdf_interpret.c @@ -104,7 +104,7 @@ struct pdf_csi_s fz_cookie *cookie; }; -static void pdf_run_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents); +static void pdf_run_contents_object(pdf_csi *csi, pdf_obj *rdb, pdf_obj *contents); static void pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, fz_matrix transform); static void pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what); @@ -1291,7 +1291,7 @@ pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what) gstate->ctm = ptm; csi->top_ctm = gstate->ctm; pdf_gsave(csi); - pdf_run_buffer(csi, pat->resources, pat->contents); + pdf_run_contents_object(csi, pat->resources, pat->contents); /* RJW: "cannot render pattern tile" */ pdf_grestore(csi); while (oldtop < csi->gtop) @@ -1310,7 +1310,7 @@ pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what) pdf_gsave(csi); fz_try(ctx) { - pdf_run_buffer(csi, pat->resources, pat->contents); + pdf_run_contents_object(csi, pat->resources, pat->contents); } fz_catch(ctx) { @@ -1407,7 +1407,7 @@ pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, fz_matrix t if (xobj->resources) resources = xobj->resources; - pdf_run_buffer(csi, resources, xobj->contents); + pdf_run_contents_object(csi, resources, xobj->contents); /* RJW: "cannot interpret XObject stream" */ } fz_always(ctx) @@ -2528,7 +2528,6 @@ pdf_run_keyword(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, char *buf) fz_warn(ctx, "unknown keyword: '%s'", buf); break; } - fz_assert_lock_not_held(ctx, FZ_LOCK_FILE); } static void @@ -2662,44 +2661,78 @@ pdf_run_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, pdf_lexbuf *buf) */ static void -pdf_run_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents) +pdf_run_contents_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file) { fz_context *ctx = csi->dev->ctx; pdf_lexbuf_large *buf; - fz_stream * file = NULL; int save_in_text; fz_var(buf); - fz_var(file); + + if (file == NULL) + return; + + buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */ + buf->base.size = PDF_LEXBUF_LARGE; + save_in_text = csi->in_text; + csi->in_text = 0; + fz_try(ctx) + { + pdf_run_stream(csi, rdb, file, &buf->base); + } + fz_catch(ctx) + { + fz_warn(ctx, "Content stream parsing error - rendering truncated"); + } + csi->in_text = save_in_text; + fz_free(ctx, buf); +} + +static void +pdf_run_contents_object(pdf_csi *csi, pdf_obj *rdb, pdf_obj *contents) +{ + fz_context *ctx = csi->dev->ctx; + fz_stream *file = NULL; if (contents == NULL) return; + file = pdf_open_contents_stream(csi->xref, contents); fz_try(ctx) { - buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */ - buf->base.size = PDF_LEXBUF_LARGE; - file = fz_open_buffer(ctx, contents); - save_in_text = csi->in_text; - csi->in_text = 0; - fz_try(ctx) - { - pdf_run_stream(csi, rdb, file, &buf->base); - } - fz_catch(ctx) - { - fz_warn(ctx, "Content stream parsing error - rendering truncated"); - } - csi->in_text = save_in_text; + pdf_run_contents_stream(csi, rdb, file); } fz_always(ctx) { fz_close(file); - fz_free(ctx, buf); } fz_catch(ctx) { - fz_throw(ctx, "cannot parse context stream"); + fz_rethrow(ctx); + } +} + +static void +pdf_run_contents_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents) +{ + fz_context *ctx = csi->dev->ctx; + fz_stream *file = NULL; + + if (contents == NULL) + return; + + file = fz_open_buffer(ctx, contents); + fz_try(ctx) + { + pdf_run_contents_stream(csi, rdb, file); + } + fz_always(ctx) + { + fz_close(file); + } + fz_catch(ctx) + { + fz_rethrow(ctx); } } @@ -2719,14 +2752,16 @@ pdf_run_page_with_usage(pdf_document *xref, pdf_page *page, fz_device *dev, fz_m csi = pdf_new_csi(xref, dev, ctm, event, cookie, NULL); fz_try(ctx) { - pdf_run_buffer(csi, page->resources, page->contents); + pdf_run_contents_object(csi, page->resources, page->contents); } - fz_catch(ctx) + fz_always(ctx) { pdf_free_csi(csi); + } + fz_catch(ctx) + { fz_throw(ctx, "cannot parse page content stream"); } - pdf_free_csi(csi); if (cookie && cookie->progress_max != -1) { @@ -2792,7 +2827,7 @@ pdf_run_glyph(pdf_document *xref, pdf_obj *resources, fz_buffer *contents, fz_de fz_try(ctx) { - pdf_run_buffer(csi, resources, contents); + pdf_run_contents_buffer(csi, resources, contents); } fz_catch(ctx) { diff --git a/pdf/pdf_outline.c b/pdf/pdf_outline.c index d4bea75a..e88d613d 100644 --- a/pdf/pdf_outline.c +++ b/pdf/pdf_outline.c @@ -65,7 +65,7 @@ pdf_load_outline(pdf_document *xref) obj = pdf_dict_gets(root, "Outlines"); first = pdf_dict_gets(obj, "First"); if (first) - return pdf_load_outline_imp(xref, first); + return pdf_load_outline_imp(xref, first); return NULL; } diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c index f5fbc0b0..42e830da 100644 --- a/pdf/pdf_page.c +++ b/pdf/pdf_page.c @@ -281,72 +281,6 @@ found: return useBM; } -/* we need to combine all sub-streams into one for the content stream interpreter */ - -static fz_buffer * -pdf_load_page_contents_array(pdf_document *xref, pdf_obj *list) -{ - fz_buffer *big; - fz_buffer *one; - int i, n; - fz_context *ctx = xref->ctx; - - big = fz_new_buffer(ctx, 32 * 1024); - - n = pdf_array_len(list); - fz_var(i); /* Workaround Mac compiler bug */ - for (i = 0; i < n; i++) - { - pdf_obj *stm = pdf_array_get(list, i); - fz_try(ctx) - { - one = pdf_load_stream(xref, pdf_to_num(stm), pdf_to_gen(stm)); - } - fz_catch(ctx) - { - fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n); - continue; - } - - if (big->len + one->len + 1 > big->cap) - fz_resize_buffer(ctx, big, big->len + one->len + 1); - memcpy(big->data + big->len, one->data, one->len); - big->data[big->len + one->len] = ' '; - big->len += one->len + 1; - - fz_drop_buffer(ctx, one); - } - - if (n > 0 && big->len == 0) - { - fz_drop_buffer(ctx, big); - fz_throw(ctx, "cannot load content stream"); - } - fz_trim_buffer(ctx, big); - - return big; -} - -static fz_buffer * -pdf_load_page_contents(pdf_document *xref, pdf_obj *obj) -{ - fz_context *ctx = xref->ctx; - - if (pdf_is_array(obj)) - { - return pdf_load_page_contents_array(xref, obj); - /* RJW: "cannot load content stream array" */ - } - else if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))) - { - return pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)); - /* RJW: "cannot load content stream (%d 0 R)", pdf_to_num(obj) */ - } - - fz_warn(ctx, "page contents missing, leaving page blank"); - return fz_new_buffer(ctx, 0); -} - pdf_page * pdf_load_page(pdf_document *xref, int number) { @@ -424,7 +358,7 @@ pdf_load_page(pdf_document *xref, int number) obj = pdf_dict_gets(pageobj, "Contents"); fz_try(ctx) { - page->contents = pdf_load_page_contents(xref, obj); + page->contents = pdf_keep_obj(obj); if (pdf_resources_use_blending(ctx, page->resources)) page->transparency = 1; @@ -464,7 +398,7 @@ pdf_free_page(pdf_document *xref, pdf_page *page) if (page->resources) pdf_drop_obj(page->resources); if (page->contents) - fz_drop_buffer(xref->ctx, page->contents); + pdf_drop_obj(page->contents); if (page->links) fz_drop_link(xref->ctx, page->links); if (page->annots) diff --git a/pdf/pdf_pattern.c b/pdf/pdf_pattern.c index 14175670..af96c2d5 100644 --- a/pdf/pdf_pattern.c +++ b/pdf/pdf_pattern.c @@ -21,7 +21,7 @@ pdf_free_pattern_imp(fz_context *ctx, fz_storable *pat_) if (pat->resources) pdf_drop_obj(pat->resources); if (pat->contents) - fz_drop_buffer(ctx, pat->contents); + pdf_drop_obj(pat->contents); fz_free(ctx, pat); } @@ -30,7 +30,7 @@ pdf_pattern_size(pdf_pattern *pat) { if (pat == NULL) return 0; - return sizeof(*pat) + (pat->contents ? pat->contents->cap : 0); + return sizeof(*pat); } pdf_pattern * @@ -72,7 +72,7 @@ pdf_load_pattern(pdf_document *xref, pdf_obj *dict) fz_try(ctx) { - pat->contents = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); + pat->contents = pdf_keep_obj(dict); } fz_catch(ctx) { diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c index a51b9631..27846855 100644 --- a/pdf/pdf_repair.c +++ b/pdf/pdf_repair.c @@ -195,6 +195,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen) } } +/* Entered with file locked, remains locked throughout. */ void pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) { @@ -389,19 +390,7 @@ pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) /* corrected stream length */ if (list[i].stm_len >= 0) { - fz_unlock(ctx, FZ_LOCK_FILE); - fz_try(ctx) - { - dict = pdf_load_object(xref, list[i].num, list[i].gen); - } - fz_always(ctx) - { - fz_lock(ctx, FZ_LOCK_FILE); - } - fz_catch(ctx) - { - fz_rethrow(ctx); - } + dict = pdf_load_object(xref, list[i].num, list[i].gen); /* RJW: "cannot load stream object (%d %d R)", list[i].num, list[i].gen */ length = pdf_new_int(ctx, list[i].stm_len); diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c index 5338d81c..3086fbc9 100644 --- a/pdf/pdf_stream.c +++ b/pdf/pdf_stream.c @@ -273,7 +273,6 @@ pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, else if (pdf_array_len(filters) > 0) chain = build_filter_chain(chain, xref, filters, params, num, gen, imparams); - fz_lock_stream(chain); return chain; } @@ -309,7 +308,6 @@ fz_stream * pdf_open_raw_stream(pdf_document *xref, int num, int gen) { pdf_xref_entry *x; - fz_stream *stm; fz_var(x); @@ -324,9 +322,7 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen) if (x->stm_ofs == 0) fz_throw(xref->ctx, "object is not a stream"); - stm = pdf_open_raw_filter(xref->file, xref, x->obj, num, gen, x->stm_ofs); - fz_lock_stream(stm); - return stm; + return pdf_open_raw_filter(xref->file, xref, x->obj, num, gen, x->stm_ofs); } /* @@ -506,3 +502,49 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa return buf; } + +static fz_stream * +pdf_open_object_array(pdf_document *xref, pdf_obj *list) +{ + int i, n; + fz_context *ctx = xref->ctx; + fz_stream *stm; + + n = pdf_array_len(list); + stm = fz_open_concat(ctx, n, 1); + + fz_var(i); /* Workaround Mac compiler bug */ + for (i = 0; i < n; i++) + { + pdf_obj *obj = pdf_array_get(list, i); + fz_try(ctx) + { + fz_concat_push(stm, pdf_open_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))); + } + fz_catch(ctx) + { + fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n); + continue; + } + } + + return stm; +} + +fz_stream * +pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj) +{ + fz_context *ctx = xref->ctx; + + if (pdf_is_array(obj)) + { + return pdf_open_object_array(xref, obj); + } + else if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))) + { + return pdf_open_image_stream(xref, pdf_to_num(obj), pdf_to_gen(obj), NULL); + } + + fz_warn(ctx, "pdf object stream missing (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); + return NULL; +} diff --git a/pdf/pdf_type3.c b/pdf/pdf_type3.c index 6603fc8b..4df46ce3 100644 --- a/pdf/pdf_type3.c +++ b/pdf/pdf_type3.c @@ -2,10 +2,9 @@ #include "mupdf-internal.h" static void -pdf_run_glyph_func(void *doc, void *rdb_, fz_buffer *contents, fz_device *dev, fz_matrix ctm, void *gstate) +pdf_run_glyph_func(void *doc, void *rdb, fz_buffer *contents, fz_device *dev, fz_matrix ctm, void *gstate) { - pdf_obj *rdb = (pdf_obj *)rdb_; - pdf_run_glyph(doc, rdb, contents, dev, ctm, gstate); + pdf_run_glyph(doc, (pdf_obj *)rdb, contents, dev, ctm, gstate); } static void diff --git a/pdf/pdf_xobject.c b/pdf/pdf_xobject.c index ffa86184..1de0b20e 100644 --- a/pdf/pdf_xobject.c +++ b/pdf/pdf_xobject.c @@ -23,7 +23,8 @@ pdf_free_xobject_imp(fz_context *ctx, fz_storable *xobj_) if (xobj->resources) pdf_drop_obj(xobj->resources); if (xobj->contents) - fz_drop_buffer(ctx, xobj->contents); + //fz_drop_buffer(ctx, xobj->contents); + pdf_drop_obj(xobj->contents); pdf_drop_obj(xobj->me); fz_free(ctx, xobj); } @@ -33,7 +34,7 @@ pdf_xobject_size(pdf_xobject *xobj) { if (xobj == NULL) return 0; - return sizeof(*xobj) + (xobj->colorspace ? xobj->colorspace->size : 0) + (xobj->contents ? xobj->contents->len : 0); + return sizeof(*xobj) + (xobj->colorspace ? xobj->colorspace->size : 0); } pdf_xobject * @@ -98,7 +99,7 @@ pdf_load_xobject(pdf_document *xref, pdf_obj *dict) fz_try(ctx) { - form->contents = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); + form->contents = pdf_keep_obj(dict); } fz_catch(ctx) { diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c index fb8adc7a..3af2901c 100644 --- a/pdf/pdf_xref.c +++ b/pdf/pdf_xref.c @@ -305,8 +305,7 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in } } -/* Entered with file locked. Drops the lock in the middle, but then picks - * it up again before exiting. */ +/* Entered with file locked, remains locked throughout. */ static pdf_obj * pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) { @@ -333,7 +332,6 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) fz_try(ctx) { - fz_unlock(ctx, FZ_LOCK_FILE); obj = pdf_dict_gets(trailer, "Size"); if (!obj) fz_throw(ctx, "xref stream missing Size entry (%d %d R)", num, gen); @@ -383,7 +381,6 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) pdf_drop_obj(index); fz_rethrow(ctx); } - fz_lock(ctx, FZ_LOCK_FILE); return trailer; } @@ -436,22 +433,9 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf) { trailer = pdf_read_xref(xref, ofs, buf); - /* Unlock file in case XRefStm or Prev are indirect. */ - fz_unlock(ctx, FZ_LOCK_FILE); - fz_try(ctx) - { - /* FIXME: do we overwrite free entries properly? */ - xrefstmofs = pdf_to_int(pdf_dict_gets(trailer, "XRefStm")); - prevofs = pdf_to_int(pdf_dict_gets(trailer, "Prev")); - } - fz_always(ctx) - { - fz_lock(ctx, FZ_LOCK_FILE); - } - fz_catch(ctx) - { - fz_rethrow(ctx); - } + /* FIXME: do we overwrite free entries properly? */ + xrefstmofs = pdf_to_int(pdf_dict_gets(trailer, "XRefStm")); + prevofs = pdf_to_int(pdf_dict_gets(trailer, "Prev")); /* We only recurse if we have both xrefstm and prev. * Hopefully this happens infrequently. */ @@ -476,7 +460,7 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf) /* * load xref tables from pdf * - * File locked on entry and exit; lock may be dropped in the middle. + * File locked on entry, throughout and on exit. */ static void @@ -492,22 +476,9 @@ pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf) pdf_read_trailer(xref, buf); - /* Unlock (and relock) in case Size is indirect. */ - fz_unlock(ctx, FZ_LOCK_FILE); - fz_try(ctx) - { - size = pdf_to_int(pdf_dict_gets(xref->trailer, "Size")); - if (!size) - fz_throw(ctx, "trailer missing Size entry"); - } - fz_always(ctx) - { - fz_lock(ctx, FZ_LOCK_FILE); - } - fz_catch(ctx) - { - fz_rethrow(ctx); - } + size = pdf_to_int(pdf_dict_gets(xref->trailer, "Size")); + if (!size) + fz_throw(ctx, "trailer missing Size entry"); pdf_resize_xref(xref, size); @@ -712,12 +683,10 @@ pdf_open_document_with_stream(fz_stream *file) pdf_obj *obj; pdf_obj *nobj = NULL; int i, repaired = 0; - int locked; fz_context *ctx = file->ctx; fz_var(dict); fz_var(nobj); - fz_var(locked); xref = fz_malloc_struct(ctx, pdf_document); pdf_init_document(xref); @@ -726,9 +695,6 @@ pdf_open_document_with_stream(fz_stream *file) xref->file = fz_keep_stream(file); xref->ctx = ctx; - fz_lock(ctx, FZ_LOCK_FILE); - locked = 1; - fz_try(ctx) { pdf_load_xref(xref, &xref->lexbuf.base); @@ -757,9 +723,6 @@ pdf_open_document_with_stream(fz_stream *file) if (repaired) pdf_repair_xref(xref, &xref->lexbuf.base); - fz_unlock(ctx, FZ_LOCK_FILE); - locked = 0; - encrypt = pdf_dict_gets(xref->trailer, "Encrypt"); id = pdf_dict_gets(xref->trailer, "ID"); if (pdf_is_dict(encrypt)) @@ -818,11 +781,6 @@ pdf_open_document_with_stream(fz_stream *file) } } } - fz_always(ctx) - { - if (locked) - fz_unlock(ctx, FZ_LOCK_FILE); - } fz_catch(ctx) { pdf_drop_obj(dict); @@ -1023,7 +981,6 @@ pdf_cache_object(pdf_document *xref, int num, int gen) } else if (x->type == 'n') { - fz_lock(ctx, FZ_LOCK_FILE); fz_seek(xref->file, x->ofs, 0); fz_try(ctx) @@ -1033,7 +990,6 @@ pdf_cache_object(pdf_document *xref, int num, int gen) } fz_catch(ctx) { - fz_unlock(ctx, FZ_LOCK_FILE); fz_throw(ctx, "cannot parse object (%d %d R)", num, gen); } @@ -1041,13 +997,11 @@ pdf_cache_object(pdf_document *xref, int num, int gen) { pdf_drop_obj(x->obj); x->obj = NULL; - fz_unlock(ctx, FZ_LOCK_FILE); fz_throw(ctx, "found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen); } if (xref->crypt) pdf_crypt_obj(ctx, xref->crypt, x->obj, num, gen); - fz_unlock(ctx, FZ_LOCK_FILE); } else if (x->type == 'o') { diff --git a/scripts/cmapdump.c b/scripts/cmapdump.c index 57cfe54b..53247339 100644 --- a/scripts/cmapdump.c +++ b/scripts/cmapdump.c @@ -85,7 +85,6 @@ main(int argc, char **argv) clean(name); fi = fz_open_file(ctx, argv[i]); - fz_lock_stream(fi); cmap = pdf_load_cmap(ctx, fi); fz_close(fi); diff --git a/xps/xps_zip.c b/xps/xps_zip.c index ff43abf2..bb6857ee 100644 --- a/xps/xps_zip.c +++ b/xps/xps_zip.c @@ -108,13 +108,11 @@ xps_read_zip_entry(xps_document *doc, xps_entry *ent, unsigned char *outbuf) int code; fz_context *ctx = doc->ctx; - fz_lock(ctx, FZ_LOCK_FILE); fz_seek(doc->file, ent->offset, 0); sig = getlong(doc->file); if (sig != ZIP_LOCAL_FILE_SIG) { - fz_unlock(ctx, FZ_LOCK_FILE); fz_throw(doc->ctx, "wrong zip local file signature (0x%x)", sig); } @@ -137,7 +135,7 @@ xps_read_zip_entry(xps_document *doc, xps_entry *ent, unsigned char *outbuf) } else if (method == 8) { - inbuf = fz_malloc(doc->ctx, ent->csize); + inbuf = fz_malloc(ctx, ent->csize); fz_read(doc->file, inbuf, ent->csize); @@ -153,34 +151,29 @@ xps_read_zip_entry(xps_document *doc, xps_entry *ent, unsigned char *outbuf) code = inflateInit2(&stream, -15); if (code != Z_OK) { - fz_unlock(ctx, FZ_LOCK_FILE); - fz_free(doc->ctx, inbuf); - fz_throw(doc->ctx, "zlib inflateInit2 error: %s", stream.msg); + fz_free(ctx, inbuf); + fz_throw(ctx, "zlib inflateInit2 error: %s", stream.msg); } code = inflate(&stream, Z_FINISH); if (code != Z_STREAM_END) { inflateEnd(&stream); - fz_unlock(ctx, FZ_LOCK_FILE); - fz_free(doc->ctx, inbuf); - fz_throw(doc->ctx, "zlib inflate error: %s", stream.msg); + fz_free(ctx, inbuf); + fz_throw(ctx, "zlib inflate error: %s", stream.msg); } code = inflateEnd(&stream); if (code != Z_OK) { - fz_unlock(ctx, FZ_LOCK_FILE); - fz_free(doc->ctx, inbuf); - fz_throw(doc->ctx, "zlib inflateEnd error: %s", stream.msg); + fz_free(ctx, inbuf); + fz_throw(ctx, "zlib inflateEnd error: %s", stream.msg); } - fz_free(doc->ctx, inbuf); + fz_free(ctx, inbuf); } else { - fz_unlock(ctx, FZ_LOCK_FILE); - fz_throw(doc->ctx, "unknown compression method (%d)", method); + fz_throw(ctx, "unknown compression method (%d)", method); } - fz_unlock(ctx, FZ_LOCK_FILE); } /* @@ -306,7 +299,6 @@ xps_find_and_read_zip_dir(xps_document *doc) int i, n; fz_context *ctx = doc->ctx; - fz_lock(ctx, FZ_LOCK_FILE); fz_seek(doc->file, 0, SEEK_END); file_size = fz_tell(doc->file); @@ -322,7 +314,6 @@ xps_find_and_read_zip_dir(xps_document *doc) if (!memcmp(buf + i, "PK\5\6", 4)) { xps_read_zip_dir(doc, file_size - back + i); - fz_unlock(ctx, FZ_LOCK_FILE); return; } } @@ -330,8 +321,7 @@ xps_find_and_read_zip_dir(xps_document *doc) back += sizeof buf - 4; } - fz_unlock(ctx, FZ_LOCK_FILE); - fz_throw(doc->ctx, "cannot find end of central directory"); + fz_throw(ctx, "cannot find end of central directory"); } /* -- cgit v1.2.3