diff options
author | Paul Gardiner <paul@glidos.net> | 2012-06-01 09:38:51 +0100 |
---|---|---|
committer | Paul Gardiner <paul@glidos.net> | 2012-06-01 09:38:51 +0100 |
commit | 2aea3684b5571f469f4ad29f409b61fb9da35e2a (patch) | |
tree | 7b2c80c1e49e18549a2210101579235a915b4ea2 /pdf | |
parent | 741fe4a4d7bbd6ee914504be7e85356a741601c4 (diff) | |
parent | d9982f81e105d5e1084283225ed72b1943ca3693 (diff) | |
download | mupdf-2aea3684b5571f469f4ad29f409b61fb9da35e2a.tar.xz |
Merge branch 'master' into forms
Conflicts:
fitz/doc_document.c
fitz/fitz-internal.h
fitz/fitz.h
fitz/stm_buffer.c
pdf/mupdf-internal.h
pdf/pdf_object.c
pdf/pdf_xobject.c
pdf/pdf_xref.c
win32/mupdf.sln
Diffstat (limited to 'pdf')
-rw-r--r-- | pdf/mupdf-internal.h | 24 | ||||
-rw-r--r-- | pdf/mupdf.h | 42 | ||||
-rw-r--r-- | pdf/pdf_cmap.c | 2 | ||||
-rw-r--r-- | pdf/pdf_font.c | 2 | ||||
-rw-r--r-- | pdf/pdf_form.c | 22 | ||||
-rw-r--r-- | pdf/pdf_image.c | 13 | ||||
-rw-r--r-- | pdf/pdf_interpret.c | 91 | ||||
-rw-r--r-- | pdf/pdf_js_none.c | 4 | ||||
-rw-r--r-- | pdf/pdf_object.c (renamed from pdf/base_object.c) | 9 | ||||
-rw-r--r-- | pdf/pdf_page.c | 72 | ||||
-rw-r--r-- | pdf/pdf_parse.c | 8 | ||||
-rw-r--r-- | pdf/pdf_pattern.c | 6 | ||||
-rw-r--r-- | pdf/pdf_repair.c | 15 | ||||
-rw-r--r-- | pdf/pdf_stream.c | 126 | ||||
-rw-r--r-- | pdf/pdf_type3.c | 5 | ||||
-rw-r--r-- | pdf/pdf_write.c | 690 | ||||
-rw-r--r-- | pdf/pdf_xobject.c | 18 | ||||
-rw-r--r-- | pdf/pdf_xref.c | 329 | ||||
-rw-r--r-- | pdf/pdf_xref_aux.c | 31 |
19 files changed, 1183 insertions, 326 deletions
diff --git a/pdf/mupdf-internal.h b/pdf/mupdf-internal.h index b50ba2fe..b1117726 100644 --- a/pdf/mupdf-internal.h +++ b/pdf/mupdf-internal.h @@ -146,11 +146,12 @@ typedef struct pdf_xref_entry_s pdf_xref_entry; struct pdf_xref_entry_s { + char type; /* 0=unset (f)ree i(n)use (o)bjstm */ int ofs; /* file offset / objstm object number */ int gen; /* generation / objstm index */ int stm_ofs; /* on-disk stream */ + fz_buffer *stm_buf; /* in-memory stream (for updated objects) */ pdf_obj *obj; /* stored/cached object */ - int type; /* 0=unset (f)ree i(n)use (o)bjstm */ }; typedef struct pdf_crypt_s pdf_crypt; @@ -217,19 +218,25 @@ struct pdf_document_s pdf_js *js; }; +pdf_document *pdf_open_document_no_run(fz_context *ctx, const char *filename); +pdf_document *pdf_open_document_no_run_with_stream(fz_stream *file); + void pdf_cache_object(pdf_document *doc, int num, int gen); fz_stream *pdf_open_inline_stream(pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *chain, pdf_image_params *params); -fz_buffer *pdf_load_image_stream(pdf_document *doc, int num, int gen, pdf_image_params *params); -fz_stream *pdf_open_image_stream(pdf_document *doc, int num, int gen, pdf_image_params *params); +fz_buffer *pdf_load_image_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params); +fz_stream *pdf_open_image_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params); fz_stream *pdf_open_stream_with_offset(pdf_document *doc, int num, int gen, pdf_obj *dict, int stm_ofs); fz_stream *pdf_open_image_decomp_stream(fz_context *ctx, fz_buffer *, pdf_image_params *params, int *factor); +fz_stream *pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj); +fz_buffer *pdf_load_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen); +fz_buffer *pdf_load_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen); +fz_stream *pdf_open_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen); void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf); void pdf_repair_obj_stms(pdf_document *doc); void pdf_print_xref(pdf_document *); void pdf_resize_xref(pdf_document *doc, int newcap); -pdf_obj *pdf_new_stream_indirection(pdf_document *doc, pdf_obj *obj); /* * Encryption @@ -239,6 +246,7 @@ pdf_crypt *pdf_new_crypt(fz_context *ctx, pdf_obj *enc, pdf_obj *id); void pdf_free_crypt(fz_context *ctx, pdf_crypt *crypt); void pdf_crypt_obj(fz_context *ctx, pdf_crypt *crypt, pdf_obj *obj, int num, int gen); +void pdf_crypt_buffer(fz_context *ctx, pdf_crypt *crypt, fz_buffer *buf, int num, int gen); fz_stream *pdf_open_crypt(fz_stream *chain, pdf_crypt *crypt, int num, int gen); fz_stream *pdf_open_crypt_with_filter(fz_stream *chain, pdf_crypt *crypt, char *name, int num, int gen); @@ -284,7 +292,7 @@ struct pdf_pattern_s fz_matrix matrix; fz_rect bbox; pdf_obj *resources; - fz_buffer *contents; + pdf_obj *contents; }; pdf_pattern *pdf_load_pattern(pdf_document *doc, pdf_obj *obj); @@ -307,7 +315,7 @@ struct pdf_xobject_s int transparency; fz_colorspace *colorspace; pdf_obj *resources; - fz_buffer *contents; + pdf_obj *contents; pdf_obj *me; }; @@ -315,7 +323,7 @@ pdf_xobject *pdf_load_xobject(pdf_document *doc, pdf_obj *obj); pdf_obj *pdf_new_xobject(pdf_document *doc, fz_rect *bbox, fz_matrix *mat); pdf_xobject *pdf_keep_xobject(fz_context *ctx, pdf_xobject *xobj); void pdf_drop_xobject(fz_context *ctx, pdf_xobject *xobj); -void pdf_xobject_set_contents(fz_context *ctx, pdf_xobject *from, fz_buffer *buffer); +void pdf_xobject_set_contents(pdf_document *xref, pdf_xobject *from, fz_buffer *buffer); void pdf_update_appearance(pdf_document *doc, pdf_obj *obj); @@ -553,7 +561,7 @@ struct pdf_page_s int rotate; int transparency; pdf_obj *resources; - fz_buffer *contents; + pdf_obj *contents; fz_link *links; pdf_annot *annots; }; diff --git a/pdf/mupdf.h b/pdf/mupdf.h index 2d27f4af..2d93b793 100644 --- a/pdf/mupdf.h +++ b/pdf/mupdf.h @@ -20,7 +20,6 @@ pdf_obj *pdf_new_real(fz_context *ctx, float f); pdf_obj *fz_new_name(fz_context *ctx, char *str); pdf_obj *pdf_new_string(fz_context *ctx, char *str, int len); pdf_obj *pdf_new_indirect(fz_context *ctx, int num, int gen, void *doc); - pdf_obj *pdf_new_array(fz_context *ctx, int initialcap); pdf_obj *pdf_new_dict(fz_context *ctx, int initialcap); pdf_obj *pdf_new_rect(fz_context *ctx, fz_rect *rect); @@ -97,7 +96,6 @@ fz_matrix pdf_to_matrix(fz_context *ctx, pdf_obj *array); int pdf_count_objects(pdf_document *doc); pdf_obj *pdf_resolve_indirect(pdf_obj *ref); pdf_obj *pdf_load_object(pdf_document *doc, int num, int gen); -void pdf_update_object(pdf_document *doc, int num, int gen, pdf_obj *newobj); fz_buffer *pdf_load_raw_stream(pdf_document *doc, int num, int gen); fz_buffer *pdf_load_stream(pdf_document *doc, int num, int gen); @@ -109,6 +107,41 @@ fz_image *pdf_load_image(pdf_document *doc, pdf_obj *obj); fz_outline *pdf_load_outline(pdf_document *doc); /* + pdf_create_object: Allocate a slot in the xref table and return a fresh unused object number. +*/ +int pdf_create_object(pdf_document *xref); + +/* + pdf_delete_object: Remove object from xref table, marking the slot as free. +*/ +void pdf_delete_object(pdf_document *xref, int num); + +/* + pdf_update_object: Replace object in xref table with the passed in object. +*/ +void pdf_update_object(pdf_document *xref, int num, pdf_obj *obj); + +/* + pdf_get_stream: Return the contents for object in xref table +*/ +fz_buffer *pdf_get_stream(pdf_document *xref, int num); + +/* + pdf_update_stream: Replace stream contents for object in xref table with the passed in buffer. + + The buffer contents must match the /Filter setting. + If storing uncompressed data, make sure to delete the /Filter key from + the stream dictionary. If storing deflated data, make sure to set the + /Filter value to /FlateDecode. +*/ +void pdf_update_stream(pdf_document *xref, int num, fz_buffer *buf); + +/* + pdf_write_document: Write out the document to a file with all changes finalised. +*/ +void pdf_write_document(pdf_document *doc, char *filename, fz_write_options *opts); + +/* pdf_open_document: Open a PDF document. Open a PDF document by reading its cross reference table, so @@ -218,4 +251,9 @@ void pdf_run_page(pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix c void pdf_run_page_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix ctm, char *event, fz_cookie *cookie); +/* + Metadata interface. +*/ +int pdf_meta(pdf_document *doc, int key, void *ptr, int size); + #endif diff --git a/pdf/pdf_cmap.c b/pdf/pdf_cmap.c index 71066986..1f1117fe 100644 --- a/pdf/pdf_cmap.c +++ b/pdf/pdf_cmap.c @@ -189,7 +189,7 @@ add_table(fz_context *ctx, pdf_cmap *cmap, int value) } if (cmap->tlen + 1 > cmap->tcap) { - int new_cap = cmap->tcap > 1 ? (cmap->tcap * 3) / 2 : 256; + int new_cap = cmap->tcap > 1 ? (cmap->tcap * 3) / 2 : 256; cmap->table = fz_resize_array(ctx, cmap->table, new_cap, sizeof(unsigned short)); cmap->tcap = new_cap; } diff --git a/pdf/pdf_font.c b/pdf/pdf_font.c index ab60af72..9e4e60ea 100644 --- a/pdf/pdf_font.c +++ b/pdf/pdf_font.c @@ -148,7 +148,7 @@ static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid) { int gid = ft_cid_to_gid(fontdesc, cid); int fterr; - + fterr = FT_Load_Glyph(fontdesc->font->ft_face, gid, FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM); if (fterr) diff --git a/pdf/pdf_form.c b/pdf/pdf_form.c index 92eda83f..a517c339 100644 --- a/pdf/pdf_form.c +++ b/pdf/pdf_form.c @@ -71,6 +71,11 @@ static const char *fmt_ET = "ET\n"; static const char *fmt_Q = "Q\n"; static const char *fmt_EMC = "EMC\n"; +static fz_buffer *form_contents(pdf_document *doc, pdf_xobject *form) +{ + return pdf_get_stream(doc, pdf_to_num(form->contents)); +} + static void account_for_rot(fz_rect *rect, fz_matrix *mat, int rot) { float width = rect->x1; @@ -659,8 +664,9 @@ fz_buffer *create_text_appearance(pdf_document *doc, fz_rect *bbox, fz_matrix *o return fzbuf; } -static void update_marked_content(fz_context *ctx, pdf_xobject *form, fz_buffer *fzbuf) +static void update_marked_content(pdf_document *doc, pdf_xobject *form, fz_buffer *fzbuf) { + fz_context *ctx = doc->ctx; int tok; pdf_lexbuf lbuf; fz_stream *str_outer = NULL; @@ -681,7 +687,7 @@ static void update_marked_content(fz_context *ctx, pdf_xobject *form, fz_buffer int first = 1; newbuf = fz_new_buffer(ctx, 0); - len = fz_buffer_storage(ctx, form->contents, &buf); + len = fz_buffer_storage(ctx, form_contents(doc, form), &buf); str_outer = fz_open_memory(ctx, buf, len); len = fz_buffer_storage(ctx, fzbuf, &buf); str_inner = fz_open_memory(ctx, buf, len); @@ -733,7 +739,7 @@ static void update_marked_content(fz_context *ctx, pdf_xobject *form, fz_buffer } /* Use newbuf in place of the existing appearance stream */ - pdf_xobject_set_contents(ctx, form, newbuf); + pdf_xobject_set_contents(doc, form, newbuf); } fz_always(ctx) { @@ -756,7 +762,7 @@ int get_matrix(pdf_document *doc, pdf_xobject *form, int q, fz_matrix *mt) pdf_lexbuf lbuf; fz_stream *str; - bufsize = fz_buffer_storage(ctx, form->contents, &buf); + bufsize = fz_buffer_storage(ctx, form_contents(doc, form), &buf); str = fz_open_memory(ctx, buf, bufsize); memset(lbuf.scratch, 0, sizeof(lbuf.scratch)); @@ -807,7 +813,7 @@ int get_matrix(pdf_document *doc, pdf_xobject *form, int q, fz_matrix *mt) if (q != Q_Left) { /* Offset the matrix to refer to the alignment position */ - fz_rect bbox = measure_text(doc, form->resources, form->contents); + fz_rect bbox = measure_text(doc, form->resources, form_contents(doc, form)); mt->e += q == Q_Right ? (bbox.x1 - bbox.x0) : (bbox.x1 - bbox.x0) / 2; } @@ -859,7 +865,7 @@ static void update_text_appearance(pdf_document *doc, pdf_obj *obj, char *text) has_tm = get_matrix(doc, form, q, &tm); fzbuf = create_text_appearance(doc, &form->bbox, has_tm ? &tm : NULL, q, dr, pdf_to_str_buf(da), text); - update_marked_content(ctx, form, fzbuf); + update_marked_content(doc, form, fzbuf); } } } @@ -921,7 +927,7 @@ static void synthesize_text_widget(pdf_document *doc, pdf_obj *obj) form = pdf_load_xobject(doc, formobj); fzbuf = fz_new_buffer(ctx, 0); fz_buffer_printf(ctx, fzbuf, "/Tx BMC EMC"); - pdf_xobject_set_contents(ctx, form, fzbuf); + pdf_xobject_set_contents(doc, form, fzbuf); ap = pdf_new_dict(ctx, 1); pdf_dict_puts(ap, "N", formobj); @@ -1145,7 +1151,7 @@ static void update_pushbutton_widget(pdf_document *doc, pdf_obj *obj) fzbuf_print_text(ctx, fzbuf, &clip, da, 0, &mat, text); } - pdf_xobject_set_contents(ctx, form, fzbuf); + pdf_xobject_set_contents(doc, form, fzbuf); } fz_always(ctx) { diff --git a/pdf/pdf_image.c b/pdf/pdf_image.c index dc9cc088..b4571bbe 100644 --- a/pdf/pdf_image.c +++ b/pdf/pdf_image.c @@ -95,7 +95,7 @@ static fz_store_type pdf_image_store_type = }; static fz_pixmap * -decomp_image_from_stream(fz_context *ctx, fz_stream *stm, pdf_image *image, int in_line, int indexed, int factor) +decomp_image_from_stream(fz_context *ctx, fz_stream *stm, pdf_image *image, int in_line, int indexed, int factor, int cache) { fz_pixmap *tile = NULL; fz_pixmap *existing_tile; @@ -190,6 +190,9 @@ decomp_image_from_stream(fz_context *ctx, fz_stream *stm, pdf_image *image, int fz_rethrow(ctx); } + if (!cache) + return tile; + /* Now we try to cache the pixmap. Any failure here will just result * in us not caching. */ fz_try(ctx) @@ -279,7 +282,7 @@ pdf_image_get_pixmap(fz_context *ctx, fz_image *image_, int w, int h) /* We need to make a new one. */ stm = pdf_open_image_decomp_stream(ctx, image->buffer, &image->params, &factor); - return decomp_image_from_stream(ctx, stm, image, 0, 0, factor); + return decomp_image_from_stream(ctx, stm, image, 0, 0, factor, 1); } static pdf_image * @@ -427,7 +430,9 @@ pdf_load_image_imp(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, fz_stream *c { /* Just load the compressed image data now and we can * decode it on demand. */ - image->buffer = pdf_load_image_stream(xref, pdf_to_num(dict), pdf_to_gen(dict), &image->params); + int num = pdf_to_num(dict); + int gen = pdf_to_gen(dict); + image->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &image->params); break; /* Out of fz_try */ } @@ -443,7 +448,7 @@ pdf_load_image_imp(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, fz_stream *c /* RJW: "cannot open image data stream (%d 0 R)", pdf_to_num(dict) */ } - image->tile = decomp_image_from_stream(ctx, stm, image, cstm != NULL, indexed, 1); + image->tile = decomp_image_from_stream(ctx, stm, image, cstm != NULL, indexed, 1, 0); } fz_catch(ctx) { diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c index a11b8c30..8851be6e 100644 --- a/pdf/pdf_interpret.c +++ b/pdf/pdf_interpret.c @@ -104,7 +104,7 @@ struct pdf_csi_s fz_cookie *cookie; }; -static void pdf_run_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents); +static void pdf_run_contents_object(pdf_csi *csi, pdf_obj *rdb, pdf_obj *contents); static void pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, fz_matrix transform); static void pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what); @@ -1291,7 +1291,7 @@ pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what) gstate->ctm = ptm; csi->top_ctm = gstate->ctm; pdf_gsave(csi); - pdf_run_buffer(csi, pat->resources, pat->contents); + pdf_run_contents_object(csi, pat->resources, pat->contents); /* RJW: "cannot render pattern tile" */ pdf_grestore(csi); while (oldtop < csi->gtop) @@ -1310,7 +1310,7 @@ pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what) pdf_gsave(csi); fz_try(ctx) { - pdf_run_buffer(csi, pat->resources, pat->contents); + pdf_run_contents_object(csi, pat->resources, pat->contents); } fz_catch(ctx) { @@ -1407,7 +1407,7 @@ pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, fz_matrix t if (xobj->resources) resources = xobj->resources; - pdf_run_buffer(csi, resources, xobj->contents); + pdf_run_contents_object(csi, resources, xobj->contents); /* RJW: "cannot interpret XObject stream" */ } fz_always(ctx) @@ -2528,7 +2528,6 @@ pdf_run_keyword(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, char *buf) fz_warn(ctx, "unknown keyword: '%s'", buf); break; } - fz_assert_lock_not_held(ctx, FZ_LOCK_FILE); } static void @@ -2662,44 +2661,78 @@ pdf_run_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, pdf_lexbuf *buf) */ static void -pdf_run_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents) +pdf_run_contents_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file) { fz_context *ctx = csi->dev->ctx; pdf_lexbuf_large *buf; - fz_stream * file = NULL; int save_in_text; fz_var(buf); - fz_var(file); + + if (file == NULL) + return; + + buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */ + buf->base.size = PDF_LEXBUF_LARGE; + save_in_text = csi->in_text; + csi->in_text = 0; + fz_try(ctx) + { + pdf_run_stream(csi, rdb, file, &buf->base); + } + fz_catch(ctx) + { + fz_warn(ctx, "Content stream parsing error - rendering truncated"); + } + csi->in_text = save_in_text; + fz_free(ctx, buf); +} + +static void +pdf_run_contents_object(pdf_csi *csi, pdf_obj *rdb, pdf_obj *contents) +{ + fz_context *ctx = csi->dev->ctx; + fz_stream *file = NULL; if (contents == NULL) return; + file = pdf_open_contents_stream(csi->xref, contents); fz_try(ctx) { - buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */ - buf->base.size = PDF_LEXBUF_LARGE; - file = fz_open_buffer(ctx, contents); - save_in_text = csi->in_text; - csi->in_text = 0; - fz_try(ctx) - { - pdf_run_stream(csi, rdb, file, &buf->base); - } - fz_catch(ctx) - { - fz_warn(ctx, "Content stream parsing error - rendering truncated"); - } - csi->in_text = save_in_text; + pdf_run_contents_stream(csi, rdb, file); } fz_always(ctx) { fz_close(file); - fz_free(ctx, buf); } fz_catch(ctx) { - fz_throw(ctx, "cannot parse context stream"); + fz_rethrow(ctx); + } +} + +static void +pdf_run_contents_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents) +{ + fz_context *ctx = csi->dev->ctx; + fz_stream *file = NULL; + + if (contents == NULL) + return; + + file = fz_open_buffer(ctx, contents); + fz_try(ctx) + { + pdf_run_contents_stream(csi, rdb, file); + } + fz_always(ctx) + { + fz_close(file); + } + fz_catch(ctx) + { + fz_rethrow(ctx); } } @@ -2719,14 +2752,16 @@ pdf_run_page_with_usage(pdf_document *xref, pdf_page *page, fz_device *dev, fz_m csi = pdf_new_csi(xref, dev, ctm, event, cookie, NULL); fz_try(ctx) { - pdf_run_buffer(csi, page->resources, page->contents); + pdf_run_contents_object(csi, page->resources, page->contents); } - fz_catch(ctx) + fz_always(ctx) { pdf_free_csi(csi); + } + fz_catch(ctx) + { fz_throw(ctx, "cannot parse page content stream"); } - pdf_free_csi(csi); if (cookie && cookie->progress_max != -1) { @@ -2792,7 +2827,7 @@ pdf_run_glyph(pdf_document *xref, pdf_obj *resources, fz_buffer *contents, fz_de fz_try(ctx) { - pdf_run_buffer(csi, resources, contents); + pdf_run_contents_buffer(csi, resources, contents); } fz_catch(ctx) { diff --git a/pdf/pdf_js_none.c b/pdf/pdf_js_none.c index a85103e4..bc8d2ee7 100644 --- a/pdf/pdf_js_none.c +++ b/pdf/pdf_js_none.c @@ -13,6 +13,10 @@ void pdf_drop_js(pdf_js *js) { } +void pdf_js_setup_event(pdf_js *js, pdf_obj *target) +{ +} + void pdf_js_execute(pdf_js *js, char *code) { } diff --git a/pdf/base_object.c b/pdf/pdf_object.c index 142bc128..3fb48019 100644 --- a/pdf/base_object.c +++ b/pdf/pdf_object.c @@ -145,8 +145,8 @@ pdf_new_indirect(fz_context *ctx, int num, int gen, void *xref) pdf_obj * pdf_keep_obj(pdf_obj *obj) { - assert(obj); - obj->refs ++; + if (obj) + obj->refs ++; return obj; } @@ -159,7 +159,6 @@ int pdf_is_indirect(pdf_obj *obj) do { \ if (obj && obj->kind == PDF_INDIRECT) \ {\ - fz_assert_lock_not_held(obj->ctx, FZ_LOCK_FILE); \ obj = pdf_resolve_indirect(obj); \ } \ } while (0) @@ -566,6 +565,8 @@ pdf_obj *pdf_new_rect(fz_context *ctx, fz_rect *rect) pdf_obj *arr = NULL; pdf_obj *item = NULL; + fz_var(arr); + fz_var(item); fz_try(ctx) { arr = pdf_new_array(ctx, 4); @@ -605,6 +606,8 @@ pdf_obj *pdf_new_matrix(fz_context *ctx, fz_matrix *mtx) pdf_obj *arr = NULL; pdf_obj *item = NULL; + fz_var(arr); + fz_var(item); fz_try(ctx) { arr = pdf_new_array(ctx, 6); diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c index 3e95e9a5..39554551 100644 --- a/pdf/pdf_page.c +++ b/pdf/pdf_page.c @@ -114,6 +114,8 @@ pdf_load_page_tree_node(pdf_document *xref, pdf_obj *node, struct info info) } } /* Get the next node */ + if (stacklen < 0) + break; while (++stack[stacklen].pos == stack[stacklen].max) { pdf_dict_unmark(stack[stacklen].node); @@ -279,72 +281,6 @@ found: return useBM; } -/* we need to combine all sub-streams into one for the content stream interpreter */ - -static fz_buffer * -pdf_load_page_contents_array(pdf_document *xref, pdf_obj *list) -{ - fz_buffer *big; - fz_buffer *one; - int i, n; - fz_context *ctx = xref->ctx; - - big = fz_new_buffer(ctx, 32 * 1024); - - n = pdf_array_len(list); - fz_var(i); /* Workaround Mac compiler bug */ - for (i = 0; i < n; i++) - { - pdf_obj *stm = pdf_array_get(list, i); - fz_try(ctx) - { - one = pdf_load_stream(xref, pdf_to_num(stm), pdf_to_gen(stm)); - } - fz_catch(ctx) - { - fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n); - continue; - } - - if (big->len + one->len + 1 > big->cap) - fz_resize_buffer(ctx, big, big->len + one->len + 1); - memcpy(big->data + big->len, one->data, one->len); - big->data[big->len + one->len] = ' '; - big->len += one->len + 1; - - fz_drop_buffer(ctx, one); - } - - if (n > 0 && big->len == 0) - { - fz_drop_buffer(ctx, big); - fz_throw(ctx, "cannot load content stream"); - } - fz_trim_buffer(ctx, big); - - return big; -} - -static fz_buffer * -pdf_load_page_contents(pdf_document *xref, pdf_obj *obj) -{ - fz_context *ctx = xref->ctx; - - if (pdf_is_array(obj)) - { - return pdf_load_page_contents_array(xref, obj); - /* RJW: "cannot load content stream array" */ - } - else if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))) - { - return pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)); - /* RJW: "cannot load content stream (%d 0 R)", pdf_to_num(obj) */ - } - - fz_warn(ctx, "page contents missing, leaving page blank"); - return fz_new_buffer(ctx, 0); -} - pdf_page * pdf_load_page(pdf_document *xref, int number) { @@ -422,7 +358,7 @@ pdf_load_page(pdf_document *xref, int number) obj = pdf_dict_gets(pageobj, "Contents"); fz_try(ctx) { - page->contents = pdf_load_page_contents(xref, obj); + page->contents = pdf_keep_obj(obj); if (pdf_resources_use_blending(ctx, page->resources)) page->transparency = 1; @@ -462,7 +398,7 @@ pdf_free_page(pdf_document *xref, pdf_page *page) if (page->resources) pdf_drop_obj(page->resources); if (page->contents) - fz_drop_buffer(xref->ctx, page->contents); + pdf_drop_obj(page->contents); if (page->links) fz_drop_link(xref->ctx, page->links); if (page->annots) diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c index fe9db368..b1472d1a 100644 --- a/pdf/pdf_parse.c +++ b/pdf/pdf_parse.c @@ -453,21 +453,19 @@ pdf_parse_ind_obj(pdf_document *xref, fz_var(obj); tok = pdf_lex(file, buf); - /* RJW: cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) - fz_throw(ctx, "expected object number (%d %d R)", num, gen); + fz_throw(ctx, "expected object number"); num = buf->i; tok = pdf_lex(file, buf); - /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) - fz_throw(ctx, "expected generation number (%d %d R)", num, gen); + fz_throw(ctx, "expected generation number (%d ? obj)", num); gen = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_OBJ) - fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen); + fz_throw(ctx, "expected 'obj' keyword (%d %d ?)", num, gen); tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ diff --git a/pdf/pdf_pattern.c b/pdf/pdf_pattern.c index 14175670..af96c2d5 100644 --- a/pdf/pdf_pattern.c +++ b/pdf/pdf_pattern.c @@ -21,7 +21,7 @@ pdf_free_pattern_imp(fz_context *ctx, fz_storable *pat_) if (pat->resources) pdf_drop_obj(pat->resources); if (pat->contents) - fz_drop_buffer(ctx, pat->contents); + pdf_drop_obj(pat->contents); fz_free(ctx, pat); } @@ -30,7 +30,7 @@ pdf_pattern_size(pdf_pattern *pat) { if (pat == NULL) return 0; - return sizeof(*pat) + (pat->contents ? pat->contents->cap : 0); + return sizeof(*pat); } pdf_pattern * @@ -72,7 +72,7 @@ pdf_load_pattern(pdf_document *xref, pdf_obj *dict) fz_try(ctx) { - pat->contents = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); + pat->contents = pdf_keep_obj(dict); } fz_catch(ctx) { diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c index a51b9631..27846855 100644 --- a/pdf/pdf_repair.c +++ b/pdf/pdf_repair.c @@ -195,6 +195,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen) } } +/* Entered with file locked, remains locked throughout. */ void pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) { @@ -389,19 +390,7 @@ pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) /* corrected stream length */ if (list[i].stm_len >= 0) { - fz_unlock(ctx, FZ_LOCK_FILE); - fz_try(ctx) - { - dict = pdf_load_object(xref, list[i].num, list[i].gen); - } - fz_always(ctx) - { - fz_lock(ctx, FZ_LOCK_FILE); - } - fz_catch(ctx) - { - fz_rethrow(ctx); - } + dict = pdf_load_object(xref, list[i].num, list[i].gen); /* RJW: "cannot load stream object (%d %d R)", list[i].num, list[i].gen */ length = pdf_new_int(ctx, list[i].stm_len); diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c index 84f966ec..89d94004 100644 --- a/pdf/pdf_stream.c +++ b/pdf/pdf_stream.c @@ -13,7 +13,7 @@ pdf_is_stream(pdf_document *xref, int num, int gen) pdf_cache_object(xref, num, gen); /* RJW: "cannot load object, ignoring error" */ - return xref->table[num].stm_ofs > 0; + return xref->table[num].stm_ofs > 0 || xref->table[num].stm_buf; } /* @@ -222,21 +222,27 @@ build_filter_chain(fz_stream *chain, pdf_document *xref, pdf_obj *fs, pdf_obj *p /* * Build a filter for reading raw stream data. - * This is a null filter to constrain reading to the - * stream length, followed by a decryption filter. + * This is a null filter to constrain reading to the stream length (and to + * allow for other people accessing the file), followed by a decryption + * filter. + * + * num and gen are used purely to seed the encryption. */ static fz_stream * -pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen) +pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, int offset) { + fz_context *ctx = chain->ctx; int hascrypt; int len; - fz_context *ctx = chain->ctx; + + if (num > 0 && num < xref->len && xref->table[num].stm_buf) + return fz_open_buffer(ctx, xref->table[num].stm_buf); /* don't close chain when we close this filter */ fz_keep_stream(chain); len = pdf_to_int(pdf_dict_gets(stmobj, "Length")); - chain = fz_open_null(chain, len); + chain = fz_open_null(chain, len, offset); fz_try(ctx) { @@ -258,7 +264,7 @@ pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int n * to stream length and decrypting. */ static fz_stream * -pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, pdf_image_params *imparams) +pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, int offset, pdf_image_params *imparams) { pdf_obj *filters; pdf_obj *params; @@ -266,14 +272,13 @@ pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, filters = pdf_dict_getsa(stmobj, "Filter", "F"); params = pdf_dict_getsa(stmobj, "DecodeParms", "DP"); - chain = pdf_open_raw_filter(chain, xref, stmobj, num, gen); + chain = pdf_open_raw_filter(chain, xref, stmobj, num, gen, offset); if (pdf_is_name(filters)) chain = build_filter(chain, xref, filters, params, num, gen, imparams); else if (pdf_array_len(filters) > 0) chain = build_filter_chain(chain, xref, filters, params, num, gen, imparams); - fz_lock_stream(chain); return chain; } @@ -298,20 +303,22 @@ pdf_open_inline_stream(pdf_document *xref, pdf_obj *stmobj, int length, fz_strea if (pdf_array_len(filters) > 0) return build_filter_chain(chain, xref, filters, params, 0, 0, imparams); - return fz_open_null(chain, length); + return fz_open_null(chain, length, fz_tell(chain)); } /* * Open a stream for reading the raw (compressed but decrypted) data. - * Using xref->file while this is open is a bad idea. */ fz_stream * pdf_open_raw_stream(pdf_document *xref, int num, int gen) { - pdf_xref_entry *x; - fz_stream *stm; + return pdf_open_raw_renumbered_stream(xref, num, gen, num, gen); +} - fz_var(x); +fz_stream * +pdf_open_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen) +{ + pdf_xref_entry *x; if (num < 0 || num >= xref->len) fz_throw(xref->ctx, "object id out of range (%d %d R)", num, gen); @@ -324,10 +331,7 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen) if (x->stm_ofs == 0) fz_throw(xref->ctx, "object is not a stream"); - stm = pdf_open_raw_filter(xref->file, xref, x->obj, num, gen); - fz_lock_stream(stm); - fz_seek(xref->file, x->stm_ofs, 0); - return stm; + return pdf_open_raw_filter(xref->file, xref, x->obj, orig_num, orig_gen, x->stm_ofs); } /* @@ -338,14 +342,13 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen) fz_stream * pdf_open_stream(pdf_document *xref, int num, int gen) { - return pdf_open_image_stream(xref, num, gen, NULL); + return pdf_open_image_stream(xref, num, gen, num, gen, NULL); } fz_stream * -pdf_open_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *params) +pdf_open_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params) { pdf_xref_entry *x; - fz_stream *stm; if (num < 0 || num >= xref->len) fz_throw(xref->ctx, "object id out of range (%d %d R)", num, gen); @@ -355,12 +358,10 @@ pdf_open_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa pdf_cache_object(xref, num, gen); /* RJW: "cannot load stream object (%d %d R)", num, gen */ - if (x->stm_ofs == 0) + if (x->stm_ofs == 0 && x->stm_buf == NULL) fz_throw(xref->ctx, "object is not a stream"); - stm = pdf_open_filter(xref->file, xref, x->obj, num, gen, params); - fz_seek(xref->file, x->stm_ofs, 0); - return stm; + return pdf_open_filter(xref->file, xref, x->obj, orig_num, orig_gen, x->stm_ofs, params); } fz_stream * @@ -410,14 +411,10 @@ pdf_open_image_decomp_stream(fz_context *ctx, fz_buffer *buffer, pdf_image_param fz_stream * pdf_open_stream_with_offset(pdf_document *xref, int num, int gen, pdf_obj *dict, int stm_ofs) { - fz_stream *stm; - if (stm_ofs == 0) fz_throw(xref->ctx, "object is not a stream"); - stm = pdf_open_filter(xref->file, xref, dict, num, gen, NULL); - fz_seek(xref->file, stm_ofs, 0); - return stm; + return pdf_open_filter(xref->file, xref, dict, num, gen, stm_ofs, NULL); } /* @@ -426,11 +423,20 @@ pdf_open_stream_with_offset(pdf_document *xref, int num, int gen, pdf_obj *dict, fz_buffer * pdf_load_raw_stream(pdf_document *xref, int num, int gen) { + return pdf_load_raw_renumbered_stream(xref, num, gen, num, gen); +} + +fz_buffer * +pdf_load_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen) +{ fz_stream *stm; pdf_obj *dict; int len; fz_buffer *buf; + if (num > 0 && num < xref->len && xref->table[num].stm_buf) + return fz_keep_buffer(xref->ctx, xref->table[num].stm_buf); + dict = pdf_load_object(xref, num, gen); /* RJW: "cannot load stream dictionary (%d %d R)", num, gen */ @@ -438,7 +444,7 @@ pdf_load_raw_stream(pdf_document *xref, int num, int gen) pdf_drop_obj(dict); - stm = pdf_open_raw_stream(xref, num, gen); + stm = pdf_open_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen); /* RJW: "cannot open raw stream (%d %d R)", num, gen */ buf = fz_read_all(stm, len); @@ -470,11 +476,17 @@ pdf_guess_filter_length(int len, char *filter) fz_buffer * pdf_load_stream(pdf_document *xref, int num, int gen) { - return pdf_load_image_stream(xref, num, gen, NULL); + return pdf_load_image_stream(xref, num, gen, num, gen, NULL); +} + +fz_buffer * +pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen) +{ + return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL); } fz_buffer * -pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *params) +pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params) { fz_context *ctx = xref->ctx; fz_stream *stm = NULL; @@ -496,7 +508,7 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa pdf_drop_obj(dict); - stm = pdf_open_image_stream(xref, num, gen, params); + stm = pdf_open_image_stream(xref, num, gen, orig_num, orig_gen, params); /* RJW: "cannot open stream (%d %d R)", num, gen */ fz_try(ctx) @@ -514,3 +526,49 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa return buf; } + +static fz_stream * +pdf_open_object_array(pdf_document *xref, pdf_obj *list) +{ + int i, n; + fz_context *ctx = xref->ctx; + fz_stream *stm; + + n = pdf_array_len(list); + stm = fz_open_concat(ctx, n, 1); + + fz_var(i); /* Workaround Mac compiler bug */ + for (i = 0; i < n; i++) + { + pdf_obj *obj = pdf_array_get(list, i); + fz_try(ctx) + { + fz_concat_push(stm, pdf_open_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))); + } + fz_catch(ctx) + { + fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n); + continue; + } + } + + return stm; +} + +fz_stream * +pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj) +{ + fz_context *ctx = xref->ctx; + int num, gen; + + if (pdf_is_array(obj)) + return pdf_open_object_array(xref, obj); + + num = pdf_to_num(obj); + gen = pdf_to_gen(obj); + if (pdf_is_stream(xref, num, gen)) + return pdf_open_image_stream(xref, num, gen, num, gen, NULL); + + fz_warn(ctx, "pdf object stream missing (%d %d R)", num, gen); + return NULL; +} diff --git a/pdf/pdf_type3.c b/pdf/pdf_type3.c index 6603fc8b..4df46ce3 100644 --- a/pdf/pdf_type3.c +++ b/pdf/pdf_type3.c @@ -2,10 +2,9 @@ #include "mupdf-internal.h" static void -pdf_run_glyph_func(void *doc, void *rdb_, fz_buffer *contents, fz_device *dev, fz_matrix ctm, void *gstate) +pdf_run_glyph_func(void *doc, void *rdb, fz_buffer *contents, fz_device *dev, fz_matrix ctm, void *gstate) { - pdf_obj *rdb = (pdf_obj *)rdb_; - pdf_run_glyph(doc, rdb, contents, dev, ctm, gstate); + pdf_run_glyph(doc, (pdf_obj *)rdb, contents, dev, ctm, gstate); } static void diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c new file mode 100644 index 00000000..ed0b8711 --- /dev/null +++ b/pdf/pdf_write.c @@ -0,0 +1,690 @@ +#include "fitz.h" +#include "mupdf-internal.h" + +typedef struct pdf_write_options_s pdf_write_options; + +struct pdf_write_options_s +{ + FILE *out; + int doascii; + int doexpand; + int dogarbage; + char *uselist; + int *ofslist; + int *genlist; + int *renumbermap; + int *revrenumbermap; + int *revgenlist; +}; + +/* + * Garbage collect objects not reachable from the trailer. + */ + +static pdf_obj *sweepref(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj) +{ + int num = pdf_to_num(obj); + int gen = pdf_to_gen(obj); + fz_context *ctx = xref->ctx; + + if (num < 0 || num >= xref->len) + return NULL; + if (opts->uselist[num]) + return NULL; + + opts->uselist[num] = 1; + + /* Bake in /Length in stream objects */ + fz_try(ctx) + { + if (pdf_is_stream(xref, num, gen)) + { + pdf_obj *len = pdf_dict_gets(obj, "Length"); + if (pdf_is_indirect(len)) + { + opts->uselist[pdf_to_num(len)] = 0; + len = pdf_resolve_indirect(len); + pdf_dict_puts(obj, "Length", len); + } + } + } + fz_catch(ctx) + { + /* Leave broken */ + } + + return pdf_resolve_indirect(obj); +} + +static void sweepobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj) +{ + int i; + + if (pdf_is_indirect(obj)) + obj = sweepref(xref, opts, obj); + + if (pdf_is_dict(obj)) + { + int n = pdf_dict_len(obj); + for (i = 0; i < n; i++) + sweepobj(xref, opts, pdf_dict_get_val(obj, i)); + } + + else if (pdf_is_array(obj)) + { + int n = pdf_array_len(obj); + for (i = 0; i < n; i++) + sweepobj(xref, opts, pdf_array_get(obj, i)); + } +} + +/* + * Scan for and remove duplicate objects (slow) + */ + +static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts) +{ + int num, other; + fz_context *ctx = xref->ctx; + + for (num = 1; num < xref->len; num++) + { + /* Only compare an object to objects preceding it */ + for (other = 1; other < num; other++) + { + pdf_obj *a, *b; + int differ, newnum; + + if (num == other || !opts->uselist[num] || !opts->uselist[other]) + continue; + + /* + * Comparing stream objects data contents would take too long. + * + * pdf_is_stream calls pdf_cache_object and ensures + * that the xref table has the objects loaded. + */ + fz_try(ctx) + { + differ = (pdf_is_stream(xref, num, 0) || pdf_is_stream(xref, other, 0)); + } + fz_catch(ctx) + { + /* Assume different */ + differ = 1; + } + if (differ) + continue; + + a = xref->table[num].obj; + b = xref->table[other].obj; + + a = pdf_resolve_indirect(a); + b = pdf_resolve_indirect(b); + + if (pdf_objcmp(a, b)) + continue; + + /* Keep the lowest numbered object */ + newnum = MIN(num, other); + opts->renumbermap[num] = newnum; + opts->renumbermap[other] = newnum; + opts->revrenumbermap[newnum] = num; /* Either will do */ + opts->uselist[MAX(num, other)] = 0; + + /* One duplicate was found, do not look for another */ + break; + } + } +} + +/* + * Renumber objects sequentially so the xref is more compact + * + * This code assumes that any opts->renumbermap[n] <= n for all n. + */ + +static void compactxref(pdf_document *xref, pdf_write_options *opts) +{ + int num, newnum; + + /* + * Update renumbermap in-place, clustering all used + * objects together at low object ids. Objects that + * already should be renumbered will have their new + * object ids be updated to reflect the compaction. + */ + + newnum = 1; + for (num = 1; num < xref->len; num++) + { + /* If it's not used, map it to zero */ + if (!opts->uselist[num]) + { + opts->renumbermap[num] = 0; + } + /* If it's not moved, compact it. */ + else if (opts->renumbermap[num] == num) + { + opts->revrenumbermap[newnum] = opts->revrenumbermap[num]; + opts->revgenlist[newnum] = opts->revgenlist[num]; + opts->renumbermap[num] = newnum++; + } + /* Otherwise it's used, and moved. We know that it must have + * moved down, so the place it's moved to will be in the right + * place already. */ + else + { + opts->renumbermap[num] = opts->renumbermap[opts->renumbermap[num]]; + } + } +} + +/* + * Update indirect objects according to renumbering established when + * removing duplicate objects and compacting the xref. + */ + +static void renumberobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj) +{ + int i; + fz_context *ctx = xref->ctx; + + if (pdf_is_dict(obj)) + { + int n = pdf_dict_len(obj); + for (i = 0; i < n; i++) + { + pdf_obj *key = pdf_dict_get_key(obj, i); + pdf_obj *val = pdf_dict_get_val(obj, i); + if (pdf_is_indirect(val)) + { + val = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(val)], 0, xref); + fz_dict_put(obj, key, val); + pdf_drop_obj(val); + } + else + { + renumberobj(xref, opts, val); + } + } + } + + else if (pdf_is_array(obj)) + { + int n = pdf_array_len(obj); + for (i = 0; i < n; i++) + { + pdf_obj *val = pdf_array_get(obj, i); + if (pdf_is_indirect(val)) + { + val = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(val)], 0, xref); + pdf_array_put(obj, i, val); + pdf_drop_obj(val); + } + else + { + renumberobj(xref, opts, val); + } + } + } +} + +static void renumberobjs(pdf_document *xref, pdf_write_options *opts) +{ + pdf_xref_entry *oldxref; + int newlen; + int num; + fz_context *ctx = xref->ctx; + + /* Apply renumber map to indirect references in all objects in xref */ + renumberobj(xref, opts, xref->trailer); + for (num = 0; num < xref->len; num++) + { + pdf_obj *obj = xref->table[num].obj; + + if (pdf_is_indirect(obj)) + { + obj = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(obj)], 0, xref); + pdf_update_object(xref, num, obj); + pdf_drop_obj(obj); + } + else + { + renumberobj(xref, opts, obj); + } + } + + /* Create new table for the reordered, compacted xref */ + oldxref = xref->table; + xref->table = fz_malloc_array(xref->ctx, xref->len, sizeof(pdf_xref_entry)); + xref->table[0] = oldxref[0]; + + /* Move used objects into the new compacted xref */ + newlen = 0; + for (num = 1; num < xref->len; num++) + { + if (opts->uselist[num]) + { + if (newlen < opts->renumbermap[num]) + newlen = opts->renumbermap[num]; + xref->table[opts->renumbermap[num]] = oldxref[num]; + } + else + { + if (oldxref[num].obj) + pdf_drop_obj(oldxref[num].obj); + } + } + + fz_free(xref->ctx, oldxref); + + /* Update the used objects count in compacted xref */ + xref->len = newlen + 1; + + /* Update list of used objects to fit with compacted xref */ + for (num = 1; num < xref->len; num++) + opts->uselist[num] = 1; +} + +/* + * Make sure we have loaded objects from object streams. + */ + +static void preloadobjstms(pdf_document *xref) +{ + pdf_obj *obj; + int num; + + for (num = 0; num < xref->len; num++) + { + if (xref->table[num].type == 'o') + { + obj = pdf_load_object(xref, num, 0); + pdf_drop_obj(obj); + } + } +} + +/* + * Save streams and objects to the output + */ + +static inline int isbinary(int c) +{ + if (c == '\n' || c == '\r' || c == '\t') + return 0; + return c < 32 || c > 127; +} + +static int isbinarystream(fz_buffer *buf) +{ + int i; + for (i = 0; i < buf->len; i++) + if (isbinary(buf->data[i])) + return 1; + return 0; +} + +static fz_buffer *hexbuf(fz_context *ctx, unsigned char *p, int n) +{ + static const char hex[16] = "0123456789abcdef"; + fz_buffer *buf; + int x = 0; + + buf = fz_new_buffer(ctx, n * 2 + (n / 32) + 2); + + while (n--) + { + buf->data[buf->len++] = hex[*p >> 4]; + buf->data[buf->len++] = hex[*p & 15]; + if (++x == 32) + { + buf->data[buf->len++] = '\n'; + x = 0; + } + p++; + } + + buf->data[buf->len++] = '>'; + buf->data[buf->len++] = '\n'; + + return buf; +} + +static void addhexfilter(pdf_document *xref, pdf_obj *dict) +{ + pdf_obj *f, *dp, *newf, *newdp; + pdf_obj *ahx, *nullobj; + fz_context *ctx = xref->ctx; + + ahx = fz_new_name(ctx, "ASCIIHexDecode"); + nullobj = pdf_new_null(ctx); + newf = newdp = NULL; + + f = pdf_dict_gets(dict, "Filter"); + dp = pdf_dict_gets(dict, "DecodeParms"); + + if (pdf_is_name(f)) + { + newf = pdf_new_array(ctx, 2); + pdf_array_push(newf, ahx); + pdf_array_push(newf, f); + f = newf; + if (pdf_is_dict(dp)) + { + newdp = pdf_new_array(ctx, 2); + pdf_array_push(newdp, nullobj); + pdf_array_push(newdp, dp); + dp = newdp; + } + } + else if (pdf_is_array(f)) + { + pdf_array_insert(f, ahx); + if (pdf_is_array(dp)) + pdf_array_insert(dp, nullobj); + } + else + f = ahx; + + pdf_dict_puts(dict, "Filter", f); + if (dp) + pdf_dict_puts(dict, "DecodeParms", dp); + + pdf_drop_obj(ahx); + pdf_drop_obj(nullobj); + if (newf) + pdf_drop_obj(newf); + if (newdp) + pdf_drop_obj(newdp); +} + +static void copystream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj, int num, int gen) +{ + fz_buffer *buf, *tmp; + pdf_obj *newlen; + fz_context *ctx = xref->ctx; + int orig_num = opts->revrenumbermap[num]; + int orig_gen = opts->revgenlist[num]; + + buf = pdf_load_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen); + + if (opts->doascii && isbinarystream(buf)) + { + tmp = hexbuf(ctx, buf->data, buf->len); + fz_drop_buffer(ctx, buf); + buf = tmp; + + addhexfilter(xref, obj); + + newlen = pdf_new_int(ctx, buf->len); + pdf_dict_puts(obj, "Length", newlen); + pdf_drop_obj(newlen); + } + + fprintf(opts->out, "%d %d obj\n", num, gen); + pdf_fprint_obj(opts->out, obj, opts->doexpand == 0); + fprintf(opts->out, "stream\n"); + fwrite(buf->data, 1, buf->len, opts->out); + fprintf(opts->out, "endstream\nendobj\n\n"); + + fz_drop_buffer(ctx, buf); +} + +static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj, int num, int gen) +{ + fz_buffer *buf, *tmp; + pdf_obj *newlen; + fz_context *ctx = xref->ctx; + int orig_num = opts->revrenumbermap[num]; + int orig_gen = opts->revgenlist[num]; + + buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen); + + pdf_dict_dels(obj, "Filter"); + pdf_dict_dels(obj, "DecodeParms"); + + if (opts->doascii && isbinarystream(buf)) + { + tmp = hexbuf(ctx, buf->data, buf->len); + fz_drop_buffer(ctx, buf); + buf = tmp; + + addhexfilter(xref, obj); + } + + newlen = pdf_new_int(ctx, buf->len); + pdf_dict_puts(obj, "Length", newlen); + pdf_drop_obj(newlen); + + fprintf(opts->out, "%d %d obj\n", num, gen); + pdf_fprint_obj(opts->out, obj, opts->doexpand == 0); + fprintf(opts->out, "stream\n"); + fwrite(buf->data, 1, buf->len, opts->out); + fprintf(opts->out, "endstream\nendobj\n\n"); + + fz_drop_buffer(ctx, buf); +} + +static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, int gen) +{ + pdf_obj *obj; + pdf_obj *type; + fz_context *ctx = xref->ctx; + + obj = pdf_load_object(xref, num, gen); + + /* skip ObjStm and XRef objects */ + if (pdf_is_dict(obj)) + { + type = pdf_dict_gets(obj, "Type"); + if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "ObjStm")) + { + opts->uselist[num] = 0; + pdf_drop_obj(obj); + return; + } + if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "XRef")) + { + opts->uselist[num] = 0; + pdf_drop_obj(obj); + return; + } + } + + if (!pdf_is_stream(xref, num, gen)) + { + fprintf(opts->out, "%d %d obj\n", num, gen); + pdf_fprint_obj(opts->out, obj, opts->doexpand == 0); + fprintf(opts->out, "endobj\n\n"); + } + else + { + int dontexpand = 0; + if (opts->doexpand != 0 && opts->doexpand != fz_expand_all) + { + pdf_obj *o; + + if ((o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "XObject")) && + (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Image"))) + dontexpand = !(opts->doexpand & fz_expand_images); + if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "Font")) + dontexpand = !(opts->doexpand & fz_expand_fonts); + if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "FontDescriptor")) + dontexpand = !(opts->doexpand & fz_expand_fonts); + if ((o = pdf_dict_gets(obj, "Length1")) != NULL) + dontexpand = !(opts->doexpand & fz_expand_fonts); + if ((o = pdf_dict_gets(obj, "Length2")) != NULL) + dontexpand = !(opts->doexpand & fz_expand_fonts); + if ((o = pdf_dict_gets(obj, "Length3")) != NULL) + dontexpand = !(opts->doexpand & fz_expand_fonts); + if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Type1C")) + dontexpand = !(opts->doexpand & fz_expand_fonts); + if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "CIDFontType0C")) + dontexpand = !(opts->doexpand & fz_expand_fonts); + } + if (opts->doexpand && !dontexpand && !pdf_is_jpx_image(ctx, obj)) + expandstream(xref, opts, obj, num, gen); + else + copystream(xref, opts, obj, num, gen); + } + + pdf_drop_obj(obj); +} + +static void writexref(pdf_document *xref, pdf_write_options *opts) +{ + pdf_obj *trailer; + pdf_obj *obj; + int startxref; + int num; + fz_context *ctx = xref->ctx; + + startxref = ftell(opts->out); + + fprintf(opts->out, "xref\n0 %d\n", xref->len); + for (num = 0; num < xref->len; num++) + { + if (opts->uselist[num]) + fprintf(opts->out, "%010d %05d n \n", opts->ofslist[num], opts->genlist[num]); + else + fprintf(opts->out, "%010d %05d f \n", opts->ofslist[num], opts->genlist[num]); + } + fprintf(opts->out, "\n"); + + trailer = pdf_new_dict(ctx, 5); + + obj = pdf_new_int(ctx, xref->len); + pdf_dict_puts(trailer, "Size", obj); + pdf_drop_obj(obj); + + obj = pdf_dict_gets(xref->trailer, "Info"); + if (obj) + pdf_dict_puts(trailer, "Info", obj); + + obj = pdf_dict_gets(xref->trailer, "Root"); + if (obj) + pdf_dict_puts(trailer, "Root", obj); + + obj = pdf_dict_gets(xref->trailer, "ID"); + if (obj) + pdf_dict_puts(trailer, "ID", obj); + + fprintf(opts->out, "trailer\n"); + pdf_fprint_obj(opts->out, trailer, opts->doexpand == 0); + fprintf(opts->out, "\n"); + + pdf_drop_obj(trailer); + + fprintf(opts->out, "startxref\n%d\n%%%%EOF\n", startxref); +} + +void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz_opts) +{ + int lastfree; + int num; + pdf_write_options opts = { 0 }; + fz_context *ctx; + + if (!xref || !fz_opts) + return; + + ctx = xref->ctx; + + opts.out = fopen(filename, "wb"); + if (!opts.out) + fz_throw(ctx, "cannot open output file '%s'", filename); + + fz_try(ctx) + { + opts.doexpand = fz_opts ? fz_opts->doexpand : 0; + opts.dogarbage = fz_opts ? fz_opts->dogarbage : 0; + opts.doascii = fz_opts ? fz_opts->doascii: 0; + opts.uselist = fz_malloc_array(ctx, xref->len + 1, sizeof(char)); + opts.ofslist = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); + opts.genlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); + opts.renumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); + opts.revrenumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); + opts.revgenlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); + + fprintf(opts.out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10); + fprintf(opts.out, "%%\316\274\341\277\246\n\n"); + + for (num = 0; num < xref->len; num++) + { + opts.uselist[num] = 0; + opts.ofslist[num] = 0; + opts.renumbermap[num] = num; + opts.revrenumbermap[num] = num; + opts.revgenlist[num] = xref->table[num].gen; + } + + /* Make sure any objects hidden in compressed streams have been loaded */ + preloadobjstms(xref); + + /* Sweep & mark objects from the trailer */ + if (opts.dogarbage >= 1) + sweepobj(xref, &opts, xref->trailer); + + /* Coalesce and renumber duplicate objects */ + if (opts.dogarbage >= 3) + removeduplicateobjs(xref, &opts); + + /* Compact xref by renumbering and removing unused objects */ + if (opts.dogarbage >= 2) + compactxref(xref, &opts); + + /* Make renumbering affect all indirect references and update xref */ + if (opts.dogarbage >= 2) + renumberobjs(xref, &opts); + + for (num = 0; num < xref->len; num++) + { + if (xref->table[num].type == 'f') + opts.genlist[num] = xref->table[num].gen; + if (xref->table[num].type == 'n') + opts.genlist[num] = xref->table[num].gen; + if (xref->table[num].type == 'o') + opts.genlist[num] = 0; + + if (opts.dogarbage && !opts.uselist[num]) + continue; + + if (xref->table[num].type == 'n' || xref->table[num].type == 'o') + { + opts.uselist[num] = 1; + opts.ofslist[num] = ftell(opts.out); + writeobject(xref, &opts, num, opts.genlist[num]); + } + } + + /* Construct linked list of free object slots */ + lastfree = 0; + for (num = 0; num < xref->len; num++) + { + if (!opts.uselist[num]) + { + opts.genlist[num]++; + opts.ofslist[lastfree] = num; + lastfree = num; + } + } + + writexref(xref, &opts); + } + fz_always(ctx) + { + fz_free(ctx, opts.uselist); + fz_free(ctx, opts.ofslist); + fz_free(ctx, opts.genlist); + fz_free(ctx, opts.renumbermap); + fz_free(ctx, opts.revrenumbermap); + fz_free(ctx, opts.revgenlist); + fclose(opts.out); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} diff --git a/pdf/pdf_xobject.c b/pdf/pdf_xobject.c index 175f84b5..88ab5a01 100644 --- a/pdf/pdf_xobject.c +++ b/pdf/pdf_xobject.c @@ -23,7 +23,7 @@ pdf_free_xobject_imp(fz_context *ctx, fz_storable *xobj_) if (xobj->resources) pdf_drop_obj(xobj->resources); if (xobj->contents) - fz_drop_buffer(ctx, xobj->contents); + pdf_drop_obj(xobj->contents); pdf_drop_obj(xobj->me); fz_free(ctx, xobj); } @@ -33,7 +33,7 @@ pdf_xobject_size(pdf_xobject *xobj) { if (xobj == NULL) return 0; - return sizeof(*xobj) + (xobj->colorspace ? xobj->colorspace->size : 0) + (xobj->contents ? xobj->contents->len : 0); + return sizeof(*xobj) + (xobj->colorspace ? xobj->colorspace->size : 0); } pdf_xobject * @@ -98,7 +98,7 @@ pdf_load_xobject(pdf_document *xref, pdf_obj *dict) fz_try(ctx) { - form->contents = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); + form->contents = pdf_keep_obj(dict); } fz_catch(ctx) { @@ -114,6 +114,7 @@ pdf_load_xobject(pdf_document *xref, pdf_obj *dict) pdf_obj * pdf_new_xobject(pdf_document *xref, fz_rect *bbox, fz_matrix *mat) { + int idict_num; pdf_obj *idict = NULL; pdf_obj *dict = NULL; pdf_xobject *form = NULL; @@ -195,12 +196,15 @@ pdf_new_xobject(pdf_document *xref, fz_rect *bbox, fz_matrix *mat) form->resources = res; res = NULL; - idict = pdf_new_stream_indirection(xref, dict); + idict_num = pdf_create_object(xref); + pdf_update_object(xref, idict_num, dict); + idict = pdf_new_indirect(ctx, idict_num, 0, xref); pdf_drop_obj(dict); dict = NULL; pdf_store_item(ctx, idict, form, pdf_xobject_size(form)); + form->contents = pdf_keep_obj(idict); form->me = pdf_keep_obj(idict); pdf_drop_xobject(ctx, form); @@ -220,8 +224,8 @@ pdf_new_xobject(pdf_document *xref, fz_rect *bbox, fz_matrix *mat) return idict; } -void pdf_xobject_set_contents(fz_context *ctx, pdf_xobject *form, fz_buffer *buffer) +void pdf_xobject_set_contents(pdf_document *xref, pdf_xobject *form, fz_buffer *buffer) { - fz_drop_buffer(ctx, form->contents); - form->contents = fz_keep_buffer(ctx, buffer); + pdf_dict_dels(form->contents, "Filter"); + pdf_update_stream(xref, pdf_to_num(form->contents), buffer); } diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c index cd15051c..ea99d3c2 100644 --- a/pdf/pdf_xref.c +++ b/pdf/pdf_xref.c @@ -173,6 +173,7 @@ pdf_resize_xref(pdf_document *xref, int newlen) xref->table[i].ofs = 0; xref->table[i].gen = 0; xref->table[i].stm_ofs = 0; + xref->table[i].stm_buf = NULL; xref->table[i].obj = NULL; } xref->len = newlen; @@ -293,8 +294,7 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in } } -/* Entered with file locked. Drops the lock in the middle, but then picks - * it up again before exiting. */ +/* Entered with file locked, remains locked throughout. */ static pdf_obj * pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) { @@ -321,7 +321,6 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) fz_try(ctx) { - fz_unlock(ctx, FZ_LOCK_FILE); obj = pdf_dict_gets(trailer, "Size"); if (!obj) fz_throw(ctx, "xref stream missing Size entry (%d %d R)", num, gen); @@ -371,7 +370,6 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) pdf_drop_obj(index); fz_rethrow(ctx); } - fz_lock(ctx, FZ_LOCK_FILE); return trailer; } @@ -410,13 +408,13 @@ static void pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf) { pdf_obj *trailer = NULL; - pdf_obj *xrefstm = NULL; - pdf_obj *prev = NULL; fz_context *ctx = xref->ctx; + int xrefstmofs = 0; + int prevofs = 0; fz_var(trailer); - fz_var(xrefstm); - fz_var(prev); + fz_var(xrefstmofs); + fz_var(prevofs); fz_try(ctx) { @@ -425,20 +423,21 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf) trailer = pdf_read_xref(xref, ofs, buf); /* FIXME: do we overwrite free entries properly? */ - xrefstm = pdf_dict_gets(trailer, "XRefStm"); - prev = pdf_dict_gets(trailer, "Prev"); + xrefstmofs = pdf_to_int(pdf_dict_gets(trailer, "XRefStm")); + prevofs = pdf_to_int(pdf_dict_gets(trailer, "Prev")); + /* We only recurse if we have both xrefstm and prev. * Hopefully this happens infrequently. */ - if (xrefstm && prev) - pdf_read_xref_sections(xref, pdf_to_int(xrefstm), buf); - if (prev) - ofs = pdf_to_int(prev); - else if (xrefstm) - ofs = pdf_to_int(xrefstm); + if (xrefstmofs && prevofs) + pdf_read_xref_sections(xref, xrefstmofs, buf); + if (prevofs) + ofs = prevofs; + else if (xrefstmofs) + ofs = xrefstmofs; pdf_drop_obj(trailer); trailer = NULL; } - while (prev || xrefstm); + while (prevofs || xrefstmofs); } fz_catch(ctx) { @@ -449,12 +448,14 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf) /* * load xref tables from pdf + * + * File locked on entry, throughout and on exit. */ static void pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf) { - pdf_obj *size; + int size; int i; fz_context *ctx = xref->ctx; @@ -464,11 +465,11 @@ pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf) pdf_read_trailer(xref, buf); - size = pdf_dict_gets(xref->trailer, "Size"); + size = pdf_to_int(pdf_dict_gets(xref->trailer, "Size")); if (!size) fz_throw(ctx, "trailer missing Size entry"); - pdf_resize_xref(xref, pdf_to_int(size)); + pdf_resize_xref(xref, size); pdf_read_xref_sections(xref, xref->startxref, buf); @@ -660,33 +661,18 @@ pdf_free_ocg(fz_context *ctx, pdf_ocg_descriptor *desc) * If password is not null, try to decrypt. */ -static void pdf_init_document(pdf_document *xref); - -pdf_document * -pdf_open_document_with_stream(fz_stream *file) +static void +pdf_init_document(pdf_document *xref) { - pdf_document *xref; + fz_context *ctx = xref->ctx; pdf_obj *encrypt, *id; pdf_obj *dict = NULL; pdf_obj *obj; pdf_obj *nobj = NULL; int i, repaired = 0; - int locked; - fz_context *ctx = file->ctx; fz_var(dict); fz_var(nobj); - fz_var(locked); - - xref = fz_malloc_struct(ctx, pdf_document); - pdf_init_document(xref); - xref->lexbuf.base.size = PDF_LEXBUF_LARGE; - - xref->file = fz_keep_stream(file); - xref->ctx = ctx; - - fz_lock(ctx, FZ_LOCK_FILE); - locked = 1; fz_try(ctx) { @@ -717,9 +703,6 @@ pdf_open_document_with_stream(fz_stream *file) if (repaired) pdf_repair_xref(xref, &xref->lexbuf.base); - fz_unlock(ctx, FZ_LOCK_FILE); - locked = 0; - encrypt = pdf_dict_gets(xref->trailer, "Encrypt"); id = pdf_dict_gets(xref->trailer, "ID"); if (pdf_is_dict(encrypt)) @@ -778,11 +761,6 @@ pdf_open_document_with_stream(fz_stream *file) } } } - fz_always(ctx) - { - if (locked) - fz_unlock(ctx, FZ_LOCK_FILE); - } fz_catch(ctx) { pdf_drop_obj(dict); @@ -799,8 +777,6 @@ pdf_open_document_with_stream(fz_stream *file) { fz_warn(ctx, "Ignoring Broken Optional Content"); } - - return xref; } void @@ -865,11 +841,12 @@ pdf_print_xref(pdf_document *xref) printf("xref\n0 %d\n", xref->len); for (i = 0; i < xref->len; i++) { - printf("%05d: %010d %05d %c (stm_ofs=%d)\n", i, + printf("%05d: %010d %05d %c (stm_ofs=%d; stm_buf=%p)\n", i, xref->table[i].ofs, xref->table[i].gen, xref->table[i].type ? xref->table[i].type : '-', - xref->table[i].stm_ofs); + xref->table[i].stm_ofs, + xref->table[i].stm_buf); } } @@ -987,7 +964,6 @@ pdf_cache_object(pdf_document *xref, int num, int gen) } else if (x->type == 'n') { - fz_lock(ctx, FZ_LOCK_FILE); fz_seek(xref->file, x->ofs, 0); fz_try(ctx) @@ -997,7 +973,6 @@ pdf_cache_object(pdf_document *xref, int num, int gen) } fz_catch(ctx) { - fz_unlock(ctx, FZ_LOCK_FILE); fz_throw(ctx, "cannot parse object (%d %d R)", num, gen); } @@ -1005,13 +980,11 @@ pdf_cache_object(pdf_document *xref, int num, int gen) { pdf_drop_obj(x->obj); x->obj = NULL; - fz_unlock(ctx, FZ_LOCK_FILE); fz_throw(ctx, "found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen); } if (xref->crypt) pdf_crypt_obj(ctx, xref->crypt, x->obj, num, gen); - fz_unlock(ctx, FZ_LOCK_FILE); } else if (x->type == 'o') { @@ -1093,127 +1066,105 @@ pdf_resolve_indirect(pdf_obj *ref) return ref; } -int pdf_count_objects(pdf_document *doc) +int +pdf_count_objects(pdf_document *doc) { return doc->len; } -/* Replace numbered object -- for use by pdfclean and similar tools */ +int +pdf_create_object(pdf_document *xref) +{ + /* TODO: reuse free object slots by properly linking free object chains in the ofs field */ + int num = xref->len; + pdf_resize_xref(xref, num + 1); + xref->table[num].type = 'f'; + xref->table[num].ofs = -1; + xref->table[num].gen = 0; + xref->table[num].stm_ofs = 0; + xref->table[num].stm_buf = NULL; + xref->table[num].obj = NULL; + return num; +} + void -pdf_update_object(pdf_document *xref, int num, int gen, pdf_obj *newobj) +pdf_delete_object(pdf_document *xref, int num) { pdf_xref_entry *x; if (num < 0 || num >= xref->len) { - fz_warn(xref->ctx, "object out of range (%d %d R); xref size %d", num, gen, xref->len); + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); return; } x = &xref->table[num]; - if (x->obj) - pdf_drop_obj(x->obj); + fz_drop_buffer(xref->ctx, x->stm_buf); + pdf_drop_obj(x->obj); - x->obj = pdf_keep_obj(newobj); - x->type = 'n'; + x->type = 'f'; x->ofs = 0; + x->gen = 0; + x->stm_ofs = 0; + x->stm_buf = NULL; + x->obj = NULL; } -pdf_obj * -pdf_new_stream_indirection(pdf_document *xref, pdf_obj *obj) -{ - int num = xref->len; - pdf_resize_xref(xref, xref->len + 1); - pdf_update_object(xref, num, 0, obj); - /* Set stm_ofs, so that obj is treated as a stream */ - xref->table[num].stm_ofs = 1; - - return pdf_new_indirect(xref->ctx, num, 0, xref); -} - -/* - * Convenience function to open a file then call pdf_open_document_with_stream. - */ - -pdf_document * -pdf_open_document(fz_context *ctx, const char *filename) +void +pdf_update_object(pdf_document *xref, int num, pdf_obj *newobj) { - fz_stream *file = NULL; - pdf_document *xref; + pdf_xref_entry *x; - fz_var(file); - fz_try(ctx) - { - file = fz_open_file(ctx, filename); - xref = pdf_open_document_with_stream(file); - } - fz_catch(ctx) + if (num < 0 || num >= xref->len) { - fz_close(file); - fz_throw(ctx, "cannot load document '%s'", filename); + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); + return; } - fz_close(file); - return xref; -} - -/* Document interface wrappers */ + x = &xref->table[num]; -static void pdf_close_document_shim(fz_document *doc) -{ - pdf_close_document((pdf_document*)doc); -} + if (x->obj) + pdf_drop_obj(x->obj); -static int pdf_needs_password_shim(fz_document *doc) -{ - return pdf_needs_password((pdf_document*)doc); + x->type = 'n'; + x->ofs = 0; + x->obj = pdf_keep_obj(newobj); } -static int pdf_authenticate_password_shim(fz_document *doc, char *password) +fz_buffer * +pdf_get_stream(pdf_document *xref, int num) { - return pdf_authenticate_password((pdf_document*)doc, password); -} + pdf_xref_entry *x; -static fz_outline *pdf_load_outline_shim(fz_document *doc) -{ - return pdf_load_outline((pdf_document*)doc); -} + if (num < 0 || num >= xref->len) + fz_throw(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); -static int pdf_count_pages_shim(fz_document *doc) -{ - return pdf_count_pages((pdf_document*)doc); -} + x = &xref->table[num]; -static fz_page *pdf_load_page_shim(fz_document *doc, int number) -{ - return (fz_page*) pdf_load_page((pdf_document*)doc, number); + return x->stm_buf; } -static fz_link *pdf_load_links_shim(fz_document *doc, fz_page *page) +void +pdf_update_stream(pdf_document *xref, int num, fz_buffer *newbuf) { - return pdf_load_links((pdf_document*)doc, (pdf_page*)page); -} + pdf_xref_entry *x; -static fz_rect pdf_bound_page_shim(fz_document *doc, fz_page *page) -{ - return pdf_bound_page((pdf_document*)doc, (pdf_page*)page); -} + if (num < 0 || num >= xref->len) + { + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); + return; + } -static void pdf_run_page_shim(fz_document *doc, fz_page *page, fz_device *dev, fz_matrix transform, fz_cookie *cookie) -{ - pdf_run_page((pdf_document*)doc, (pdf_page*)page, dev, transform, cookie); -} + x = &xref->table[num]; -static void pdf_free_page_shim(fz_document *doc, fz_page *page) -{ - pdf_free_page((pdf_document*)doc, (pdf_page*)page); + fz_drop_buffer(xref->ctx, x->stm_buf); + x->stm_buf = fz_keep_buffer(xref->ctx, newbuf); } -static int pdf_meta(fz_document *doc_, int key, void *ptr, int size) +int +pdf_meta(pdf_document *doc, int key, void *ptr, int size) { - pdf_document *doc = (pdf_document *)doc_; - switch(key) { /* @@ -1295,9 +1246,72 @@ static fz_interactive *pdf_interact_shim(fz_document *doc) return (fz_interactive *)doc; } -static void -pdf_init_document(pdf_document *doc) +/* + Wrappers to implement the fz_document interface for pdf_document. + + The functions are split across two files to allow calls to a + version of the constructor that does not link in the interpreter. + The interpreter references the built-in font and cmap resources + which are quite big. Not linking those into the mubusy binary + saves roughly 6MB of space. +*/ + +static void pdf_close_document_shim(fz_document *doc) { + pdf_close_document((pdf_document*)doc); +} + +static int pdf_needs_password_shim(fz_document *doc) +{ + return pdf_needs_password((pdf_document*)doc); +} + +static int pdf_authenticate_password_shim(fz_document *doc, char *password) +{ + return pdf_authenticate_password((pdf_document*)doc, password); +} + +static fz_outline *pdf_load_outline_shim(fz_document *doc) +{ + return pdf_load_outline((pdf_document*)doc); +} + +static int pdf_count_pages_shim(fz_document *doc) +{ + return pdf_count_pages((pdf_document*)doc); +} + +static fz_page *pdf_load_page_shim(fz_document *doc, int number) +{ + return (fz_page*) pdf_load_page((pdf_document*)doc, number); +} + +static fz_link *pdf_load_links_shim(fz_document *doc, fz_page *page) +{ + return pdf_load_links((pdf_document*)doc, (pdf_page*)page); +} + +static fz_rect pdf_bound_page_shim(fz_document *doc, fz_page *page) +{ + return pdf_bound_page((pdf_document*)doc, (pdf_page*)page); +} + +static void pdf_free_page_shim(fz_document *doc, fz_page *page) +{ + pdf_free_page((pdf_document*)doc, (pdf_page*)page); +} + +static int pdf_meta_shim(fz_document *doc, int key, void *ptr, int size) +{ + return pdf_meta((pdf_document*)doc, key, ptr, size); +} + +static pdf_document * +pdf_new_document(fz_stream *file) +{ + fz_context *ctx = file->ctx; + pdf_document *doc = fz_malloc_struct(ctx, pdf_document); + doc->super.close = pdf_close_document_shim; doc->super.needs_password = pdf_needs_password_shim; doc->super.authenticate_password = pdf_authenticate_password_shim; @@ -1306,8 +1320,47 @@ pdf_init_document(pdf_document *doc) doc->super.load_page = pdf_load_page_shim; doc->super.load_links = pdf_load_links_shim; doc->super.bound_page = pdf_bound_page_shim; - doc->super.run_page = pdf_run_page_shim; + doc->super.run_page = NULL; /* see pdf_xref_aux.c */ doc->super.free_page = pdf_free_page_shim; - doc->super.meta = pdf_meta; + doc->super.meta = pdf_meta_shim; doc->super.interact = pdf_interact_shim; + + doc->lexbuf.base.size = PDF_LEXBUF_LARGE; + doc->file = fz_keep_stream(file); + doc->ctx = ctx; + + return doc; +} + +pdf_document * +pdf_open_document_no_run_with_stream(fz_stream *file) +{ + pdf_document *doc = pdf_new_document(file); + pdf_init_document(doc); + return doc; +} + +pdf_document * +pdf_open_document_no_run(fz_context *ctx, const char *filename) +{ + fz_stream *file = NULL; + pdf_document *doc; + + fz_var(file); + + fz_try(ctx) + { + file = fz_open_file(ctx, filename); + doc = pdf_new_document(file); + pdf_init_document(doc); + } + fz_always(ctx) + { + fz_close(file); + } + fz_catch(ctx) + { + fz_throw(ctx, "cannot load document '%s'", filename); + } + return doc; } diff --git a/pdf/pdf_xref_aux.c b/pdf/pdf_xref_aux.c new file mode 100644 index 00000000..2d760334 --- /dev/null +++ b/pdf/pdf_xref_aux.c @@ -0,0 +1,31 @@ +#include "fitz-internal.h" +#include "mupdf-internal.h" + +/* + These functions have been split out of pdf_xref.c to allow tools + to be linked without pulling in the interpreter. The interpreter + references the built-in font and cmap resources which are quite + big. Not linking those into the tools saves roughly 6MB in the + resulting executables. +*/ + +static void pdf_run_page_shim(fz_document *doc, fz_page *page, fz_device *dev, fz_matrix transform, fz_cookie *cookie) +{ + pdf_run_page((pdf_document*)doc, (pdf_page*)page, dev, transform, cookie); +} + +pdf_document * +pdf_open_document_with_stream(fz_stream *file) +{ + pdf_document *doc = pdf_open_document_no_run_with_stream(file); + doc->super.run_page = pdf_run_page_shim; + return doc; +} + +pdf_document * +pdf_open_document(fz_context *ctx, const char *filename) +{ + pdf_document *doc = pdf_open_document_no_run(ctx, filename); + doc->super.run_page = pdf_run_page_shim; + return doc; +} |