summaryrefslogtreecommitdiff
path: root/pdf
diff options
context:
space:
mode:
authorPaul Gardiner <paul@glidos.net>2012-06-01 09:38:51 +0100
committerPaul Gardiner <paul@glidos.net>2012-06-01 09:38:51 +0100
commit2aea3684b5571f469f4ad29f409b61fb9da35e2a (patch)
tree7b2c80c1e49e18549a2210101579235a915b4ea2 /pdf
parent741fe4a4d7bbd6ee914504be7e85356a741601c4 (diff)
parentd9982f81e105d5e1084283225ed72b1943ca3693 (diff)
downloadmupdf-2aea3684b5571f469f4ad29f409b61fb9da35e2a.tar.xz
Merge branch 'master' into forms
Conflicts: fitz/doc_document.c fitz/fitz-internal.h fitz/fitz.h fitz/stm_buffer.c pdf/mupdf-internal.h pdf/pdf_object.c pdf/pdf_xobject.c pdf/pdf_xref.c win32/mupdf.sln
Diffstat (limited to 'pdf')
-rw-r--r--pdf/mupdf-internal.h24
-rw-r--r--pdf/mupdf.h42
-rw-r--r--pdf/pdf_cmap.c2
-rw-r--r--pdf/pdf_font.c2
-rw-r--r--pdf/pdf_form.c22
-rw-r--r--pdf/pdf_image.c13
-rw-r--r--pdf/pdf_interpret.c91
-rw-r--r--pdf/pdf_js_none.c4
-rw-r--r--pdf/pdf_object.c (renamed from pdf/base_object.c)9
-rw-r--r--pdf/pdf_page.c72
-rw-r--r--pdf/pdf_parse.c8
-rw-r--r--pdf/pdf_pattern.c6
-rw-r--r--pdf/pdf_repair.c15
-rw-r--r--pdf/pdf_stream.c126
-rw-r--r--pdf/pdf_type3.c5
-rw-r--r--pdf/pdf_write.c690
-rw-r--r--pdf/pdf_xobject.c18
-rw-r--r--pdf/pdf_xref.c329
-rw-r--r--pdf/pdf_xref_aux.c31
19 files changed, 1183 insertions, 326 deletions
diff --git a/pdf/mupdf-internal.h b/pdf/mupdf-internal.h
index b50ba2fe..b1117726 100644
--- a/pdf/mupdf-internal.h
+++ b/pdf/mupdf-internal.h
@@ -146,11 +146,12 @@ typedef struct pdf_xref_entry_s pdf_xref_entry;
struct pdf_xref_entry_s
{
+ char type; /* 0=unset (f)ree i(n)use (o)bjstm */
int ofs; /* file offset / objstm object number */
int gen; /* generation / objstm index */
int stm_ofs; /* on-disk stream */
+ fz_buffer *stm_buf; /* in-memory stream (for updated objects) */
pdf_obj *obj; /* stored/cached object */
- int type; /* 0=unset (f)ree i(n)use (o)bjstm */
};
typedef struct pdf_crypt_s pdf_crypt;
@@ -217,19 +218,25 @@ struct pdf_document_s
pdf_js *js;
};
+pdf_document *pdf_open_document_no_run(fz_context *ctx, const char *filename);
+pdf_document *pdf_open_document_no_run_with_stream(fz_stream *file);
+
void pdf_cache_object(pdf_document *doc, int num, int gen);
fz_stream *pdf_open_inline_stream(pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *chain, pdf_image_params *params);
-fz_buffer *pdf_load_image_stream(pdf_document *doc, int num, int gen, pdf_image_params *params);
-fz_stream *pdf_open_image_stream(pdf_document *doc, int num, int gen, pdf_image_params *params);
+fz_buffer *pdf_load_image_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params);
+fz_stream *pdf_open_image_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params);
fz_stream *pdf_open_stream_with_offset(pdf_document *doc, int num, int gen, pdf_obj *dict, int stm_ofs);
fz_stream *pdf_open_image_decomp_stream(fz_context *ctx, fz_buffer *, pdf_image_params *params, int *factor);
+fz_stream *pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj);
+fz_buffer *pdf_load_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen);
+fz_buffer *pdf_load_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen);
+fz_stream *pdf_open_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen);
void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf);
void pdf_repair_obj_stms(pdf_document *doc);
void pdf_print_xref(pdf_document *);
void pdf_resize_xref(pdf_document *doc, int newcap);
-pdf_obj *pdf_new_stream_indirection(pdf_document *doc, pdf_obj *obj);
/*
* Encryption
@@ -239,6 +246,7 @@ pdf_crypt *pdf_new_crypt(fz_context *ctx, pdf_obj *enc, pdf_obj *id);
void pdf_free_crypt(fz_context *ctx, pdf_crypt *crypt);
void pdf_crypt_obj(fz_context *ctx, pdf_crypt *crypt, pdf_obj *obj, int num, int gen);
+void pdf_crypt_buffer(fz_context *ctx, pdf_crypt *crypt, fz_buffer *buf, int num, int gen);
fz_stream *pdf_open_crypt(fz_stream *chain, pdf_crypt *crypt, int num, int gen);
fz_stream *pdf_open_crypt_with_filter(fz_stream *chain, pdf_crypt *crypt, char *name, int num, int gen);
@@ -284,7 +292,7 @@ struct pdf_pattern_s
fz_matrix matrix;
fz_rect bbox;
pdf_obj *resources;
- fz_buffer *contents;
+ pdf_obj *contents;
};
pdf_pattern *pdf_load_pattern(pdf_document *doc, pdf_obj *obj);
@@ -307,7 +315,7 @@ struct pdf_xobject_s
int transparency;
fz_colorspace *colorspace;
pdf_obj *resources;
- fz_buffer *contents;
+ pdf_obj *contents;
pdf_obj *me;
};
@@ -315,7 +323,7 @@ pdf_xobject *pdf_load_xobject(pdf_document *doc, pdf_obj *obj);
pdf_obj *pdf_new_xobject(pdf_document *doc, fz_rect *bbox, fz_matrix *mat);
pdf_xobject *pdf_keep_xobject(fz_context *ctx, pdf_xobject *xobj);
void pdf_drop_xobject(fz_context *ctx, pdf_xobject *xobj);
-void pdf_xobject_set_contents(fz_context *ctx, pdf_xobject *from, fz_buffer *buffer);
+void pdf_xobject_set_contents(pdf_document *xref, pdf_xobject *from, fz_buffer *buffer);
void pdf_update_appearance(pdf_document *doc, pdf_obj *obj);
@@ -553,7 +561,7 @@ struct pdf_page_s
int rotate;
int transparency;
pdf_obj *resources;
- fz_buffer *contents;
+ pdf_obj *contents;
fz_link *links;
pdf_annot *annots;
};
diff --git a/pdf/mupdf.h b/pdf/mupdf.h
index 2d27f4af..2d93b793 100644
--- a/pdf/mupdf.h
+++ b/pdf/mupdf.h
@@ -20,7 +20,6 @@ pdf_obj *pdf_new_real(fz_context *ctx, float f);
pdf_obj *fz_new_name(fz_context *ctx, char *str);
pdf_obj *pdf_new_string(fz_context *ctx, char *str, int len);
pdf_obj *pdf_new_indirect(fz_context *ctx, int num, int gen, void *doc);
-
pdf_obj *pdf_new_array(fz_context *ctx, int initialcap);
pdf_obj *pdf_new_dict(fz_context *ctx, int initialcap);
pdf_obj *pdf_new_rect(fz_context *ctx, fz_rect *rect);
@@ -97,7 +96,6 @@ fz_matrix pdf_to_matrix(fz_context *ctx, pdf_obj *array);
int pdf_count_objects(pdf_document *doc);
pdf_obj *pdf_resolve_indirect(pdf_obj *ref);
pdf_obj *pdf_load_object(pdf_document *doc, int num, int gen);
-void pdf_update_object(pdf_document *doc, int num, int gen, pdf_obj *newobj);
fz_buffer *pdf_load_raw_stream(pdf_document *doc, int num, int gen);
fz_buffer *pdf_load_stream(pdf_document *doc, int num, int gen);
@@ -109,6 +107,41 @@ fz_image *pdf_load_image(pdf_document *doc, pdf_obj *obj);
fz_outline *pdf_load_outline(pdf_document *doc);
/*
+ pdf_create_object: Allocate a slot in the xref table and return a fresh unused object number.
+*/
+int pdf_create_object(pdf_document *xref);
+
+/*
+ pdf_delete_object: Remove object from xref table, marking the slot as free.
+*/
+void pdf_delete_object(pdf_document *xref, int num);
+
+/*
+ pdf_update_object: Replace object in xref table with the passed in object.
+*/
+void pdf_update_object(pdf_document *xref, int num, pdf_obj *obj);
+
+/*
+ pdf_get_stream: Return the contents for object in xref table
+*/
+fz_buffer *pdf_get_stream(pdf_document *xref, int num);
+
+/*
+ pdf_update_stream: Replace stream contents for object in xref table with the passed in buffer.
+
+ The buffer contents must match the /Filter setting.
+ If storing uncompressed data, make sure to delete the /Filter key from
+ the stream dictionary. If storing deflated data, make sure to set the
+ /Filter value to /FlateDecode.
+*/
+void pdf_update_stream(pdf_document *xref, int num, fz_buffer *buf);
+
+/*
+ pdf_write_document: Write out the document to a file with all changes finalised.
+*/
+void pdf_write_document(pdf_document *doc, char *filename, fz_write_options *opts);
+
+/*
pdf_open_document: Open a PDF document.
Open a PDF document by reading its cross reference table, so
@@ -218,4 +251,9 @@ void pdf_run_page(pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix c
void pdf_run_page_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix ctm, char *event, fz_cookie *cookie);
+/*
+ Metadata interface.
+*/
+int pdf_meta(pdf_document *doc, int key, void *ptr, int size);
+
#endif
diff --git a/pdf/pdf_cmap.c b/pdf/pdf_cmap.c
index 71066986..1f1117fe 100644
--- a/pdf/pdf_cmap.c
+++ b/pdf/pdf_cmap.c
@@ -189,7 +189,7 @@ add_table(fz_context *ctx, pdf_cmap *cmap, int value)
}
if (cmap->tlen + 1 > cmap->tcap)
{
- int new_cap = cmap->tcap > 1 ? (cmap->tcap * 3) / 2 : 256;
+ int new_cap = cmap->tcap > 1 ? (cmap->tcap * 3) / 2 : 256;
cmap->table = fz_resize_array(ctx, cmap->table, new_cap, sizeof(unsigned short));
cmap->tcap = new_cap;
}
diff --git a/pdf/pdf_font.c b/pdf/pdf_font.c
index ab60af72..9e4e60ea 100644
--- a/pdf/pdf_font.c
+++ b/pdf/pdf_font.c
@@ -148,7 +148,7 @@ static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
{
int gid = ft_cid_to_gid(fontdesc, cid);
int fterr;
-
+
fterr = FT_Load_Glyph(fontdesc->font->ft_face, gid,
FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM);
if (fterr)
diff --git a/pdf/pdf_form.c b/pdf/pdf_form.c
index 92eda83f..a517c339 100644
--- a/pdf/pdf_form.c
+++ b/pdf/pdf_form.c
@@ -71,6 +71,11 @@ static const char *fmt_ET = "ET\n";
static const char *fmt_Q = "Q\n";
static const char *fmt_EMC = "EMC\n";
+static fz_buffer *form_contents(pdf_document *doc, pdf_xobject *form)
+{
+ return pdf_get_stream(doc, pdf_to_num(form->contents));
+}
+
static void account_for_rot(fz_rect *rect, fz_matrix *mat, int rot)
{
float width = rect->x1;
@@ -659,8 +664,9 @@ fz_buffer *create_text_appearance(pdf_document *doc, fz_rect *bbox, fz_matrix *o
return fzbuf;
}
-static void update_marked_content(fz_context *ctx, pdf_xobject *form, fz_buffer *fzbuf)
+static void update_marked_content(pdf_document *doc, pdf_xobject *form, fz_buffer *fzbuf)
{
+ fz_context *ctx = doc->ctx;
int tok;
pdf_lexbuf lbuf;
fz_stream *str_outer = NULL;
@@ -681,7 +687,7 @@ static void update_marked_content(fz_context *ctx, pdf_xobject *form, fz_buffer
int first = 1;
newbuf = fz_new_buffer(ctx, 0);
- len = fz_buffer_storage(ctx, form->contents, &buf);
+ len = fz_buffer_storage(ctx, form_contents(doc, form), &buf);
str_outer = fz_open_memory(ctx, buf, len);
len = fz_buffer_storage(ctx, fzbuf, &buf);
str_inner = fz_open_memory(ctx, buf, len);
@@ -733,7 +739,7 @@ static void update_marked_content(fz_context *ctx, pdf_xobject *form, fz_buffer
}
/* Use newbuf in place of the existing appearance stream */
- pdf_xobject_set_contents(ctx, form, newbuf);
+ pdf_xobject_set_contents(doc, form, newbuf);
}
fz_always(ctx)
{
@@ -756,7 +762,7 @@ int get_matrix(pdf_document *doc, pdf_xobject *form, int q, fz_matrix *mt)
pdf_lexbuf lbuf;
fz_stream *str;
- bufsize = fz_buffer_storage(ctx, form->contents, &buf);
+ bufsize = fz_buffer_storage(ctx, form_contents(doc, form), &buf);
str = fz_open_memory(ctx, buf, bufsize);
memset(lbuf.scratch, 0, sizeof(lbuf.scratch));
@@ -807,7 +813,7 @@ int get_matrix(pdf_document *doc, pdf_xobject *form, int q, fz_matrix *mt)
if (q != Q_Left)
{
/* Offset the matrix to refer to the alignment position */
- fz_rect bbox = measure_text(doc, form->resources, form->contents);
+ fz_rect bbox = measure_text(doc, form->resources, form_contents(doc, form));
mt->e += q == Q_Right ? (bbox.x1 - bbox.x0)
: (bbox.x1 - bbox.x0) / 2;
}
@@ -859,7 +865,7 @@ static void update_text_appearance(pdf_document *doc, pdf_obj *obj, char *text)
has_tm = get_matrix(doc, form, q, &tm);
fzbuf = create_text_appearance(doc, &form->bbox, has_tm ? &tm : NULL, q, dr, pdf_to_str_buf(da), text);
- update_marked_content(ctx, form, fzbuf);
+ update_marked_content(doc, form, fzbuf);
}
}
}
@@ -921,7 +927,7 @@ static void synthesize_text_widget(pdf_document *doc, pdf_obj *obj)
form = pdf_load_xobject(doc, formobj);
fzbuf = fz_new_buffer(ctx, 0);
fz_buffer_printf(ctx, fzbuf, "/Tx BMC EMC");
- pdf_xobject_set_contents(ctx, form, fzbuf);
+ pdf_xobject_set_contents(doc, form, fzbuf);
ap = pdf_new_dict(ctx, 1);
pdf_dict_puts(ap, "N", formobj);
@@ -1145,7 +1151,7 @@ static void update_pushbutton_widget(pdf_document *doc, pdf_obj *obj)
fzbuf_print_text(ctx, fzbuf, &clip, da, 0, &mat, text);
}
- pdf_xobject_set_contents(ctx, form, fzbuf);
+ pdf_xobject_set_contents(doc, form, fzbuf);
}
fz_always(ctx)
{
diff --git a/pdf/pdf_image.c b/pdf/pdf_image.c
index dc9cc088..b4571bbe 100644
--- a/pdf/pdf_image.c
+++ b/pdf/pdf_image.c
@@ -95,7 +95,7 @@ static fz_store_type pdf_image_store_type =
};
static fz_pixmap *
-decomp_image_from_stream(fz_context *ctx, fz_stream *stm, pdf_image *image, int in_line, int indexed, int factor)
+decomp_image_from_stream(fz_context *ctx, fz_stream *stm, pdf_image *image, int in_line, int indexed, int factor, int cache)
{
fz_pixmap *tile = NULL;
fz_pixmap *existing_tile;
@@ -190,6 +190,9 @@ decomp_image_from_stream(fz_context *ctx, fz_stream *stm, pdf_image *image, int
fz_rethrow(ctx);
}
+ if (!cache)
+ return tile;
+
/* Now we try to cache the pixmap. Any failure here will just result
* in us not caching. */
fz_try(ctx)
@@ -279,7 +282,7 @@ pdf_image_get_pixmap(fz_context *ctx, fz_image *image_, int w, int h)
/* We need to make a new one. */
stm = pdf_open_image_decomp_stream(ctx, image->buffer, &image->params, &factor);
- return decomp_image_from_stream(ctx, stm, image, 0, 0, factor);
+ return decomp_image_from_stream(ctx, stm, image, 0, 0, factor, 1);
}
static pdf_image *
@@ -427,7 +430,9 @@ pdf_load_image_imp(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, fz_stream *c
{
/* Just load the compressed image data now and we can
* decode it on demand. */
- image->buffer = pdf_load_image_stream(xref, pdf_to_num(dict), pdf_to_gen(dict), &image->params);
+ int num = pdf_to_num(dict);
+ int gen = pdf_to_gen(dict);
+ image->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &image->params);
break; /* Out of fz_try */
}
@@ -443,7 +448,7 @@ pdf_load_image_imp(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, fz_stream *c
/* RJW: "cannot open image data stream (%d 0 R)", pdf_to_num(dict) */
}
- image->tile = decomp_image_from_stream(ctx, stm, image, cstm != NULL, indexed, 1);
+ image->tile = decomp_image_from_stream(ctx, stm, image, cstm != NULL, indexed, 1, 0);
}
fz_catch(ctx)
{
diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c
index a11b8c30..8851be6e 100644
--- a/pdf/pdf_interpret.c
+++ b/pdf/pdf_interpret.c
@@ -104,7 +104,7 @@ struct pdf_csi_s
fz_cookie *cookie;
};
-static void pdf_run_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents);
+static void pdf_run_contents_object(pdf_csi *csi, pdf_obj *rdb, pdf_obj *contents);
static void pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, fz_matrix transform);
static void pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what);
@@ -1291,7 +1291,7 @@ pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what)
gstate->ctm = ptm;
csi->top_ctm = gstate->ctm;
pdf_gsave(csi);
- pdf_run_buffer(csi, pat->resources, pat->contents);
+ pdf_run_contents_object(csi, pat->resources, pat->contents);
/* RJW: "cannot render pattern tile" */
pdf_grestore(csi);
while (oldtop < csi->gtop)
@@ -1310,7 +1310,7 @@ pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, fz_rect area, int what)
pdf_gsave(csi);
fz_try(ctx)
{
- pdf_run_buffer(csi, pat->resources, pat->contents);
+ pdf_run_contents_object(csi, pat->resources, pat->contents);
}
fz_catch(ctx)
{
@@ -1407,7 +1407,7 @@ pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, fz_matrix t
if (xobj->resources)
resources = xobj->resources;
- pdf_run_buffer(csi, resources, xobj->contents);
+ pdf_run_contents_object(csi, resources, xobj->contents);
/* RJW: "cannot interpret XObject stream" */
}
fz_always(ctx)
@@ -2528,7 +2528,6 @@ pdf_run_keyword(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, char *buf)
fz_warn(ctx, "unknown keyword: '%s'", buf);
break;
}
- fz_assert_lock_not_held(ctx, FZ_LOCK_FILE);
}
static void
@@ -2662,44 +2661,78 @@ pdf_run_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, pdf_lexbuf *buf)
*/
static void
-pdf_run_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents)
+pdf_run_contents_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file)
{
fz_context *ctx = csi->dev->ctx;
pdf_lexbuf_large *buf;
- fz_stream * file = NULL;
int save_in_text;
fz_var(buf);
- fz_var(file);
+
+ if (file == NULL)
+ return;
+
+ buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */
+ buf->base.size = PDF_LEXBUF_LARGE;
+ save_in_text = csi->in_text;
+ csi->in_text = 0;
+ fz_try(ctx)
+ {
+ pdf_run_stream(csi, rdb, file, &buf->base);
+ }
+ fz_catch(ctx)
+ {
+ fz_warn(ctx, "Content stream parsing error - rendering truncated");
+ }
+ csi->in_text = save_in_text;
+ fz_free(ctx, buf);
+}
+
+static void
+pdf_run_contents_object(pdf_csi *csi, pdf_obj *rdb, pdf_obj *contents)
+{
+ fz_context *ctx = csi->dev->ctx;
+ fz_stream *file = NULL;
if (contents == NULL)
return;
+ file = pdf_open_contents_stream(csi->xref, contents);
fz_try(ctx)
{
- buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */
- buf->base.size = PDF_LEXBUF_LARGE;
- file = fz_open_buffer(ctx, contents);
- save_in_text = csi->in_text;
- csi->in_text = 0;
- fz_try(ctx)
- {
- pdf_run_stream(csi, rdb, file, &buf->base);
- }
- fz_catch(ctx)
- {
- fz_warn(ctx, "Content stream parsing error - rendering truncated");
- }
- csi->in_text = save_in_text;
+ pdf_run_contents_stream(csi, rdb, file);
}
fz_always(ctx)
{
fz_close(file);
- fz_free(ctx, buf);
}
fz_catch(ctx)
{
- fz_throw(ctx, "cannot parse context stream");
+ fz_rethrow(ctx);
+ }
+}
+
+static void
+pdf_run_contents_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents)
+{
+ fz_context *ctx = csi->dev->ctx;
+ fz_stream *file = NULL;
+
+ if (contents == NULL)
+ return;
+
+ file = fz_open_buffer(ctx, contents);
+ fz_try(ctx)
+ {
+ pdf_run_contents_stream(csi, rdb, file);
+ }
+ fz_always(ctx)
+ {
+ fz_close(file);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
}
}
@@ -2719,14 +2752,16 @@ pdf_run_page_with_usage(pdf_document *xref, pdf_page *page, fz_device *dev, fz_m
csi = pdf_new_csi(xref, dev, ctm, event, cookie, NULL);
fz_try(ctx)
{
- pdf_run_buffer(csi, page->resources, page->contents);
+ pdf_run_contents_object(csi, page->resources, page->contents);
}
- fz_catch(ctx)
+ fz_always(ctx)
{
pdf_free_csi(csi);
+ }
+ fz_catch(ctx)
+ {
fz_throw(ctx, "cannot parse page content stream");
}
- pdf_free_csi(csi);
if (cookie && cookie->progress_max != -1)
{
@@ -2792,7 +2827,7 @@ pdf_run_glyph(pdf_document *xref, pdf_obj *resources, fz_buffer *contents, fz_de
fz_try(ctx)
{
- pdf_run_buffer(csi, resources, contents);
+ pdf_run_contents_buffer(csi, resources, contents);
}
fz_catch(ctx)
{
diff --git a/pdf/pdf_js_none.c b/pdf/pdf_js_none.c
index a85103e4..bc8d2ee7 100644
--- a/pdf/pdf_js_none.c
+++ b/pdf/pdf_js_none.c
@@ -13,6 +13,10 @@ void pdf_drop_js(pdf_js *js)
{
}
+void pdf_js_setup_event(pdf_js *js, pdf_obj *target)
+{
+}
+
void pdf_js_execute(pdf_js *js, char *code)
{
}
diff --git a/pdf/base_object.c b/pdf/pdf_object.c
index 142bc128..3fb48019 100644
--- a/pdf/base_object.c
+++ b/pdf/pdf_object.c
@@ -145,8 +145,8 @@ pdf_new_indirect(fz_context *ctx, int num, int gen, void *xref)
pdf_obj *
pdf_keep_obj(pdf_obj *obj)
{
- assert(obj);
- obj->refs ++;
+ if (obj)
+ obj->refs ++;
return obj;
}
@@ -159,7 +159,6 @@ int pdf_is_indirect(pdf_obj *obj)
do { \
if (obj && obj->kind == PDF_INDIRECT) \
{\
- fz_assert_lock_not_held(obj->ctx, FZ_LOCK_FILE); \
obj = pdf_resolve_indirect(obj); \
} \
} while (0)
@@ -566,6 +565,8 @@ pdf_obj *pdf_new_rect(fz_context *ctx, fz_rect *rect)
pdf_obj *arr = NULL;
pdf_obj *item = NULL;
+ fz_var(arr);
+ fz_var(item);
fz_try(ctx)
{
arr = pdf_new_array(ctx, 4);
@@ -605,6 +606,8 @@ pdf_obj *pdf_new_matrix(fz_context *ctx, fz_matrix *mtx)
pdf_obj *arr = NULL;
pdf_obj *item = NULL;
+ fz_var(arr);
+ fz_var(item);
fz_try(ctx)
{
arr = pdf_new_array(ctx, 6);
diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c
index 3e95e9a5..39554551 100644
--- a/pdf/pdf_page.c
+++ b/pdf/pdf_page.c
@@ -114,6 +114,8 @@ pdf_load_page_tree_node(pdf_document *xref, pdf_obj *node, struct info info)
}
}
/* Get the next node */
+ if (stacklen < 0)
+ break;
while (++stack[stacklen].pos == stack[stacklen].max)
{
pdf_dict_unmark(stack[stacklen].node);
@@ -279,72 +281,6 @@ found:
return useBM;
}
-/* we need to combine all sub-streams into one for the content stream interpreter */
-
-static fz_buffer *
-pdf_load_page_contents_array(pdf_document *xref, pdf_obj *list)
-{
- fz_buffer *big;
- fz_buffer *one;
- int i, n;
- fz_context *ctx = xref->ctx;
-
- big = fz_new_buffer(ctx, 32 * 1024);
-
- n = pdf_array_len(list);
- fz_var(i); /* Workaround Mac compiler bug */
- for (i = 0; i < n; i++)
- {
- pdf_obj *stm = pdf_array_get(list, i);
- fz_try(ctx)
- {
- one = pdf_load_stream(xref, pdf_to_num(stm), pdf_to_gen(stm));
- }
- fz_catch(ctx)
- {
- fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
- continue;
- }
-
- if (big->len + one->len + 1 > big->cap)
- fz_resize_buffer(ctx, big, big->len + one->len + 1);
- memcpy(big->data + big->len, one->data, one->len);
- big->data[big->len + one->len] = ' ';
- big->len += one->len + 1;
-
- fz_drop_buffer(ctx, one);
- }
-
- if (n > 0 && big->len == 0)
- {
- fz_drop_buffer(ctx, big);
- fz_throw(ctx, "cannot load content stream");
- }
- fz_trim_buffer(ctx, big);
-
- return big;
-}
-
-static fz_buffer *
-pdf_load_page_contents(pdf_document *xref, pdf_obj *obj)
-{
- fz_context *ctx = xref->ctx;
-
- if (pdf_is_array(obj))
- {
- return pdf_load_page_contents_array(xref, obj);
- /* RJW: "cannot load content stream array" */
- }
- else if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)))
- {
- return pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj));
- /* RJW: "cannot load content stream (%d 0 R)", pdf_to_num(obj) */
- }
-
- fz_warn(ctx, "page contents missing, leaving page blank");
- return fz_new_buffer(ctx, 0);
-}
-
pdf_page *
pdf_load_page(pdf_document *xref, int number)
{
@@ -422,7 +358,7 @@ pdf_load_page(pdf_document *xref, int number)
obj = pdf_dict_gets(pageobj, "Contents");
fz_try(ctx)
{
- page->contents = pdf_load_page_contents(xref, obj);
+ page->contents = pdf_keep_obj(obj);
if (pdf_resources_use_blending(ctx, page->resources))
page->transparency = 1;
@@ -462,7 +398,7 @@ pdf_free_page(pdf_document *xref, pdf_page *page)
if (page->resources)
pdf_drop_obj(page->resources);
if (page->contents)
- fz_drop_buffer(xref->ctx, page->contents);
+ pdf_drop_obj(page->contents);
if (page->links)
fz_drop_link(xref->ctx, page->links);
if (page->annots)
diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c
index fe9db368..b1472d1a 100644
--- a/pdf/pdf_parse.c
+++ b/pdf/pdf_parse.c
@@ -453,21 +453,19 @@ pdf_parse_ind_obj(pdf_document *xref,
fz_var(obj);
tok = pdf_lex(file, buf);
- /* RJW: cannot parse indirect object (%d %d R)", num, gen */
if (tok != PDF_TOK_INT)
- fz_throw(ctx, "expected object number (%d %d R)", num, gen);
+ fz_throw(ctx, "expected object number");
num = buf->i;
tok = pdf_lex(file, buf);
- /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
if (tok != PDF_TOK_INT)
- fz_throw(ctx, "expected generation number (%d %d R)", num, gen);
+ fz_throw(ctx, "expected generation number (%d ? obj)", num);
gen = buf->i;
tok = pdf_lex(file, buf);
/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
if (tok != PDF_TOK_OBJ)
- fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen);
+ fz_throw(ctx, "expected 'obj' keyword (%d %d ?)", num, gen);
tok = pdf_lex(file, buf);
/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
diff --git a/pdf/pdf_pattern.c b/pdf/pdf_pattern.c
index 14175670..af96c2d5 100644
--- a/pdf/pdf_pattern.c
+++ b/pdf/pdf_pattern.c
@@ -21,7 +21,7 @@ pdf_free_pattern_imp(fz_context *ctx, fz_storable *pat_)
if (pat->resources)
pdf_drop_obj(pat->resources);
if (pat->contents)
- fz_drop_buffer(ctx, pat->contents);
+ pdf_drop_obj(pat->contents);
fz_free(ctx, pat);
}
@@ -30,7 +30,7 @@ pdf_pattern_size(pdf_pattern *pat)
{
if (pat == NULL)
return 0;
- return sizeof(*pat) + (pat->contents ? pat->contents->cap : 0);
+ return sizeof(*pat);
}
pdf_pattern *
@@ -72,7 +72,7 @@ pdf_load_pattern(pdf_document *xref, pdf_obj *dict)
fz_try(ctx)
{
- pat->contents = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict));
+ pat->contents = pdf_keep_obj(dict);
}
fz_catch(ctx)
{
diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c
index a51b9631..27846855 100644
--- a/pdf/pdf_repair.c
+++ b/pdf/pdf_repair.c
@@ -195,6 +195,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
}
}
+/* Entered with file locked, remains locked throughout. */
void
pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf)
{
@@ -389,19 +390,7 @@ pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf)
/* corrected stream length */
if (list[i].stm_len >= 0)
{
- fz_unlock(ctx, FZ_LOCK_FILE);
- fz_try(ctx)
- {
- dict = pdf_load_object(xref, list[i].num, list[i].gen);
- }
- fz_always(ctx)
- {
- fz_lock(ctx, FZ_LOCK_FILE);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
+ dict = pdf_load_object(xref, list[i].num, list[i].gen);
/* RJW: "cannot load stream object (%d %d R)", list[i].num, list[i].gen */
length = pdf_new_int(ctx, list[i].stm_len);
diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c
index 84f966ec..89d94004 100644
--- a/pdf/pdf_stream.c
+++ b/pdf/pdf_stream.c
@@ -13,7 +13,7 @@ pdf_is_stream(pdf_document *xref, int num, int gen)
pdf_cache_object(xref, num, gen);
/* RJW: "cannot load object, ignoring error" */
- return xref->table[num].stm_ofs > 0;
+ return xref->table[num].stm_ofs > 0 || xref->table[num].stm_buf;
}
/*
@@ -222,21 +222,27 @@ build_filter_chain(fz_stream *chain, pdf_document *xref, pdf_obj *fs, pdf_obj *p
/*
* Build a filter for reading raw stream data.
- * This is a null filter to constrain reading to the
- * stream length, followed by a decryption filter.
+ * This is a null filter to constrain reading to the stream length (and to
+ * allow for other people accessing the file), followed by a decryption
+ * filter.
+ *
+ * num and gen are used purely to seed the encryption.
*/
static fz_stream *
-pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen)
+pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, int offset)
{
+ fz_context *ctx = chain->ctx;
int hascrypt;
int len;
- fz_context *ctx = chain->ctx;
+
+ if (num > 0 && num < xref->len && xref->table[num].stm_buf)
+ return fz_open_buffer(ctx, xref->table[num].stm_buf);
/* don't close chain when we close this filter */
fz_keep_stream(chain);
len = pdf_to_int(pdf_dict_gets(stmobj, "Length"));
- chain = fz_open_null(chain, len);
+ chain = fz_open_null(chain, len, offset);
fz_try(ctx)
{
@@ -258,7 +264,7 @@ pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int n
* to stream length and decrypting.
*/
static fz_stream *
-pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, pdf_image_params *imparams)
+pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, int offset, pdf_image_params *imparams)
{
pdf_obj *filters;
pdf_obj *params;
@@ -266,14 +272,13 @@ pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num,
filters = pdf_dict_getsa(stmobj, "Filter", "F");
params = pdf_dict_getsa(stmobj, "DecodeParms", "DP");
- chain = pdf_open_raw_filter(chain, xref, stmobj, num, gen);
+ chain = pdf_open_raw_filter(chain, xref, stmobj, num, gen, offset);
if (pdf_is_name(filters))
chain = build_filter(chain, xref, filters, params, num, gen, imparams);
else if (pdf_array_len(filters) > 0)
chain = build_filter_chain(chain, xref, filters, params, num, gen, imparams);
- fz_lock_stream(chain);
return chain;
}
@@ -298,20 +303,22 @@ pdf_open_inline_stream(pdf_document *xref, pdf_obj *stmobj, int length, fz_strea
if (pdf_array_len(filters) > 0)
return build_filter_chain(chain, xref, filters, params, 0, 0, imparams);
- return fz_open_null(chain, length);
+ return fz_open_null(chain, length, fz_tell(chain));
}
/*
* Open a stream for reading the raw (compressed but decrypted) data.
- * Using xref->file while this is open is a bad idea.
*/
fz_stream *
pdf_open_raw_stream(pdf_document *xref, int num, int gen)
{
- pdf_xref_entry *x;
- fz_stream *stm;
+ return pdf_open_raw_renumbered_stream(xref, num, gen, num, gen);
+}
- fz_var(x);
+fz_stream *
+pdf_open_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen)
+{
+ pdf_xref_entry *x;
if (num < 0 || num >= xref->len)
fz_throw(xref->ctx, "object id out of range (%d %d R)", num, gen);
@@ -324,10 +331,7 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen)
if (x->stm_ofs == 0)
fz_throw(xref->ctx, "object is not a stream");
- stm = pdf_open_raw_filter(xref->file, xref, x->obj, num, gen);
- fz_lock_stream(stm);
- fz_seek(xref->file, x->stm_ofs, 0);
- return stm;
+ return pdf_open_raw_filter(xref->file, xref, x->obj, orig_num, orig_gen, x->stm_ofs);
}
/*
@@ -338,14 +342,13 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen)
fz_stream *
pdf_open_stream(pdf_document *xref, int num, int gen)
{
- return pdf_open_image_stream(xref, num, gen, NULL);
+ return pdf_open_image_stream(xref, num, gen, num, gen, NULL);
}
fz_stream *
-pdf_open_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *params)
+pdf_open_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params)
{
pdf_xref_entry *x;
- fz_stream *stm;
if (num < 0 || num >= xref->len)
fz_throw(xref->ctx, "object id out of range (%d %d R)", num, gen);
@@ -355,12 +358,10 @@ pdf_open_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa
pdf_cache_object(xref, num, gen);
/* RJW: "cannot load stream object (%d %d R)", num, gen */
- if (x->stm_ofs == 0)
+ if (x->stm_ofs == 0 && x->stm_buf == NULL)
fz_throw(xref->ctx, "object is not a stream");
- stm = pdf_open_filter(xref->file, xref, x->obj, num, gen, params);
- fz_seek(xref->file, x->stm_ofs, 0);
- return stm;
+ return pdf_open_filter(xref->file, xref, x->obj, orig_num, orig_gen, x->stm_ofs, params);
}
fz_stream *
@@ -410,14 +411,10 @@ pdf_open_image_decomp_stream(fz_context *ctx, fz_buffer *buffer, pdf_image_param
fz_stream *
pdf_open_stream_with_offset(pdf_document *xref, int num, int gen, pdf_obj *dict, int stm_ofs)
{
- fz_stream *stm;
-
if (stm_ofs == 0)
fz_throw(xref->ctx, "object is not a stream");
- stm = pdf_open_filter(xref->file, xref, dict, num, gen, NULL);
- fz_seek(xref->file, stm_ofs, 0);
- return stm;
+ return pdf_open_filter(xref->file, xref, dict, num, gen, stm_ofs, NULL);
}
/*
@@ -426,11 +423,20 @@ pdf_open_stream_with_offset(pdf_document *xref, int num, int gen, pdf_obj *dict,
fz_buffer *
pdf_load_raw_stream(pdf_document *xref, int num, int gen)
{
+ return pdf_load_raw_renumbered_stream(xref, num, gen, num, gen);
+}
+
+fz_buffer *
+pdf_load_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen)
+{
fz_stream *stm;
pdf_obj *dict;
int len;
fz_buffer *buf;
+ if (num > 0 && num < xref->len && xref->table[num].stm_buf)
+ return fz_keep_buffer(xref->ctx, xref->table[num].stm_buf);
+
dict = pdf_load_object(xref, num, gen);
/* RJW: "cannot load stream dictionary (%d %d R)", num, gen */
@@ -438,7 +444,7 @@ pdf_load_raw_stream(pdf_document *xref, int num, int gen)
pdf_drop_obj(dict);
- stm = pdf_open_raw_stream(xref, num, gen);
+ stm = pdf_open_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen);
/* RJW: "cannot open raw stream (%d %d R)", num, gen */
buf = fz_read_all(stm, len);
@@ -470,11 +476,17 @@ pdf_guess_filter_length(int len, char *filter)
fz_buffer *
pdf_load_stream(pdf_document *xref, int num, int gen)
{
- return pdf_load_image_stream(xref, num, gen, NULL);
+ return pdf_load_image_stream(xref, num, gen, num, gen, NULL);
+}
+
+fz_buffer *
+pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen)
+{
+ return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL);
}
fz_buffer *
-pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *params)
+pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params)
{
fz_context *ctx = xref->ctx;
fz_stream *stm = NULL;
@@ -496,7 +508,7 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa
pdf_drop_obj(dict);
- stm = pdf_open_image_stream(xref, num, gen, params);
+ stm = pdf_open_image_stream(xref, num, gen, orig_num, orig_gen, params);
/* RJW: "cannot open stream (%d %d R)", num, gen */
fz_try(ctx)
@@ -514,3 +526,49 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa
return buf;
}
+
+static fz_stream *
+pdf_open_object_array(pdf_document *xref, pdf_obj *list)
+{
+ int i, n;
+ fz_context *ctx = xref->ctx;
+ fz_stream *stm;
+
+ n = pdf_array_len(list);
+ stm = fz_open_concat(ctx, n, 1);
+
+ fz_var(i); /* Workaround Mac compiler bug */
+ for (i = 0; i < n; i++)
+ {
+ pdf_obj *obj = pdf_array_get(list, i);
+ fz_try(ctx)
+ {
+ fz_concat_push(stm, pdf_open_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)));
+ }
+ fz_catch(ctx)
+ {
+ fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
+ continue;
+ }
+ }
+
+ return stm;
+}
+
+fz_stream *
+pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj)
+{
+ fz_context *ctx = xref->ctx;
+ int num, gen;
+
+ if (pdf_is_array(obj))
+ return pdf_open_object_array(xref, obj);
+
+ num = pdf_to_num(obj);
+ gen = pdf_to_gen(obj);
+ if (pdf_is_stream(xref, num, gen))
+ return pdf_open_image_stream(xref, num, gen, num, gen, NULL);
+
+ fz_warn(ctx, "pdf object stream missing (%d %d R)", num, gen);
+ return NULL;
+}
diff --git a/pdf/pdf_type3.c b/pdf/pdf_type3.c
index 6603fc8b..4df46ce3 100644
--- a/pdf/pdf_type3.c
+++ b/pdf/pdf_type3.c
@@ -2,10 +2,9 @@
#include "mupdf-internal.h"
static void
-pdf_run_glyph_func(void *doc, void *rdb_, fz_buffer *contents, fz_device *dev, fz_matrix ctm, void *gstate)
+pdf_run_glyph_func(void *doc, void *rdb, fz_buffer *contents, fz_device *dev, fz_matrix ctm, void *gstate)
{
- pdf_obj *rdb = (pdf_obj *)rdb_;
- pdf_run_glyph(doc, rdb, contents, dev, ctm, gstate);
+ pdf_run_glyph(doc, (pdf_obj *)rdb, contents, dev, ctm, gstate);
}
static void
diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c
new file mode 100644
index 00000000..ed0b8711
--- /dev/null
+++ b/pdf/pdf_write.c
@@ -0,0 +1,690 @@
+#include "fitz.h"
+#include "mupdf-internal.h"
+
+typedef struct pdf_write_options_s pdf_write_options;
+
+struct pdf_write_options_s
+{
+ FILE *out;
+ int doascii;
+ int doexpand;
+ int dogarbage;
+ char *uselist;
+ int *ofslist;
+ int *genlist;
+ int *renumbermap;
+ int *revrenumbermap;
+ int *revgenlist;
+};
+
+/*
+ * Garbage collect objects not reachable from the trailer.
+ */
+
+static pdf_obj *sweepref(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj)
+{
+ int num = pdf_to_num(obj);
+ int gen = pdf_to_gen(obj);
+ fz_context *ctx = xref->ctx;
+
+ if (num < 0 || num >= xref->len)
+ return NULL;
+ if (opts->uselist[num])
+ return NULL;
+
+ opts->uselist[num] = 1;
+
+ /* Bake in /Length in stream objects */
+ fz_try(ctx)
+ {
+ if (pdf_is_stream(xref, num, gen))
+ {
+ pdf_obj *len = pdf_dict_gets(obj, "Length");
+ if (pdf_is_indirect(len))
+ {
+ opts->uselist[pdf_to_num(len)] = 0;
+ len = pdf_resolve_indirect(len);
+ pdf_dict_puts(obj, "Length", len);
+ }
+ }
+ }
+ fz_catch(ctx)
+ {
+ /* Leave broken */
+ }
+
+ return pdf_resolve_indirect(obj);
+}
+
+static void sweepobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj)
+{
+ int i;
+
+ if (pdf_is_indirect(obj))
+ obj = sweepref(xref, opts, obj);
+
+ if (pdf_is_dict(obj))
+ {
+ int n = pdf_dict_len(obj);
+ for (i = 0; i < n; i++)
+ sweepobj(xref, opts, pdf_dict_get_val(obj, i));
+ }
+
+ else if (pdf_is_array(obj))
+ {
+ int n = pdf_array_len(obj);
+ for (i = 0; i < n; i++)
+ sweepobj(xref, opts, pdf_array_get(obj, i));
+ }
+}
+
+/*
+ * Scan for and remove duplicate objects (slow)
+ */
+
+static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
+{
+ int num, other;
+ fz_context *ctx = xref->ctx;
+
+ for (num = 1; num < xref->len; num++)
+ {
+ /* Only compare an object to objects preceding it */
+ for (other = 1; other < num; other++)
+ {
+ pdf_obj *a, *b;
+ int differ, newnum;
+
+ if (num == other || !opts->uselist[num] || !opts->uselist[other])
+ continue;
+
+ /*
+ * Comparing stream objects data contents would take too long.
+ *
+ * pdf_is_stream calls pdf_cache_object and ensures
+ * that the xref table has the objects loaded.
+ */
+ fz_try(ctx)
+ {
+ differ = (pdf_is_stream(xref, num, 0) || pdf_is_stream(xref, other, 0));
+ }
+ fz_catch(ctx)
+ {
+ /* Assume different */
+ differ = 1;
+ }
+ if (differ)
+ continue;
+
+ a = xref->table[num].obj;
+ b = xref->table[other].obj;
+
+ a = pdf_resolve_indirect(a);
+ b = pdf_resolve_indirect(b);
+
+ if (pdf_objcmp(a, b))
+ continue;
+
+ /* Keep the lowest numbered object */
+ newnum = MIN(num, other);
+ opts->renumbermap[num] = newnum;
+ opts->renumbermap[other] = newnum;
+ opts->revrenumbermap[newnum] = num; /* Either will do */
+ opts->uselist[MAX(num, other)] = 0;
+
+ /* One duplicate was found, do not look for another */
+ break;
+ }
+ }
+}
+
+/*
+ * Renumber objects sequentially so the xref is more compact
+ *
+ * This code assumes that any opts->renumbermap[n] <= n for all n.
+ */
+
+static void compactxref(pdf_document *xref, pdf_write_options *opts)
+{
+ int num, newnum;
+
+ /*
+ * Update renumbermap in-place, clustering all used
+ * objects together at low object ids. Objects that
+ * already should be renumbered will have their new
+ * object ids be updated to reflect the compaction.
+ */
+
+ newnum = 1;
+ for (num = 1; num < xref->len; num++)
+ {
+ /* If it's not used, map it to zero */
+ if (!opts->uselist[num])
+ {
+ opts->renumbermap[num] = 0;
+ }
+ /* If it's not moved, compact it. */
+ else if (opts->renumbermap[num] == num)
+ {
+ opts->revrenumbermap[newnum] = opts->revrenumbermap[num];
+ opts->revgenlist[newnum] = opts->revgenlist[num];
+ opts->renumbermap[num] = newnum++;
+ }
+ /* Otherwise it's used, and moved. We know that it must have
+ * moved down, so the place it's moved to will be in the right
+ * place already. */
+ else
+ {
+ opts->renumbermap[num] = opts->renumbermap[opts->renumbermap[num]];
+ }
+ }
+}
+
+/*
+ * Update indirect objects according to renumbering established when
+ * removing duplicate objects and compacting the xref.
+ */
+
+static void renumberobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj)
+{
+ int i;
+ fz_context *ctx = xref->ctx;
+
+ if (pdf_is_dict(obj))
+ {
+ int n = pdf_dict_len(obj);
+ for (i = 0; i < n; i++)
+ {
+ pdf_obj *key = pdf_dict_get_key(obj, i);
+ pdf_obj *val = pdf_dict_get_val(obj, i);
+ if (pdf_is_indirect(val))
+ {
+ val = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(val)], 0, xref);
+ fz_dict_put(obj, key, val);
+ pdf_drop_obj(val);
+ }
+ else
+ {
+ renumberobj(xref, opts, val);
+ }
+ }
+ }
+
+ else if (pdf_is_array(obj))
+ {
+ int n = pdf_array_len(obj);
+ for (i = 0; i < n; i++)
+ {
+ pdf_obj *val = pdf_array_get(obj, i);
+ if (pdf_is_indirect(val))
+ {
+ val = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(val)], 0, xref);
+ pdf_array_put(obj, i, val);
+ pdf_drop_obj(val);
+ }
+ else
+ {
+ renumberobj(xref, opts, val);
+ }
+ }
+ }
+}
+
+static void renumberobjs(pdf_document *xref, pdf_write_options *opts)
+{
+ pdf_xref_entry *oldxref;
+ int newlen;
+ int num;
+ fz_context *ctx = xref->ctx;
+
+ /* Apply renumber map to indirect references in all objects in xref */
+ renumberobj(xref, opts, xref->trailer);
+ for (num = 0; num < xref->len; num++)
+ {
+ pdf_obj *obj = xref->table[num].obj;
+
+ if (pdf_is_indirect(obj))
+ {
+ obj = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(obj)], 0, xref);
+ pdf_update_object(xref, num, obj);
+ pdf_drop_obj(obj);
+ }
+ else
+ {
+ renumberobj(xref, opts, obj);
+ }
+ }
+
+ /* Create new table for the reordered, compacted xref */
+ oldxref = xref->table;
+ xref->table = fz_malloc_array(xref->ctx, xref->len, sizeof(pdf_xref_entry));
+ xref->table[0] = oldxref[0];
+
+ /* Move used objects into the new compacted xref */
+ newlen = 0;
+ for (num = 1; num < xref->len; num++)
+ {
+ if (opts->uselist[num])
+ {
+ if (newlen < opts->renumbermap[num])
+ newlen = opts->renumbermap[num];
+ xref->table[opts->renumbermap[num]] = oldxref[num];
+ }
+ else
+ {
+ if (oldxref[num].obj)
+ pdf_drop_obj(oldxref[num].obj);
+ }
+ }
+
+ fz_free(xref->ctx, oldxref);
+
+ /* Update the used objects count in compacted xref */
+ xref->len = newlen + 1;
+
+ /* Update list of used objects to fit with compacted xref */
+ for (num = 1; num < xref->len; num++)
+ opts->uselist[num] = 1;
+}
+
+/*
+ * Make sure we have loaded objects from object streams.
+ */
+
+static void preloadobjstms(pdf_document *xref)
+{
+ pdf_obj *obj;
+ int num;
+
+ for (num = 0; num < xref->len; num++)
+ {
+ if (xref->table[num].type == 'o')
+ {
+ obj = pdf_load_object(xref, num, 0);
+ pdf_drop_obj(obj);
+ }
+ }
+}
+
+/*
+ * Save streams and objects to the output
+ */
+
+static inline int isbinary(int c)
+{
+ if (c == '\n' || c == '\r' || c == '\t')
+ return 0;
+ return c < 32 || c > 127;
+}
+
+static int isbinarystream(fz_buffer *buf)
+{
+ int i;
+ for (i = 0; i < buf->len; i++)
+ if (isbinary(buf->data[i]))
+ return 1;
+ return 0;
+}
+
+static fz_buffer *hexbuf(fz_context *ctx, unsigned char *p, int n)
+{
+ static const char hex[16] = "0123456789abcdef";
+ fz_buffer *buf;
+ int x = 0;
+
+ buf = fz_new_buffer(ctx, n * 2 + (n / 32) + 2);
+
+ while (n--)
+ {
+ buf->data[buf->len++] = hex[*p >> 4];
+ buf->data[buf->len++] = hex[*p & 15];
+ if (++x == 32)
+ {
+ buf->data[buf->len++] = '\n';
+ x = 0;
+ }
+ p++;
+ }
+
+ buf->data[buf->len++] = '>';
+ buf->data[buf->len++] = '\n';
+
+ return buf;
+}
+
+static void addhexfilter(pdf_document *xref, pdf_obj *dict)
+{
+ pdf_obj *f, *dp, *newf, *newdp;
+ pdf_obj *ahx, *nullobj;
+ fz_context *ctx = xref->ctx;
+
+ ahx = fz_new_name(ctx, "ASCIIHexDecode");
+ nullobj = pdf_new_null(ctx);
+ newf = newdp = NULL;
+
+ f = pdf_dict_gets(dict, "Filter");
+ dp = pdf_dict_gets(dict, "DecodeParms");
+
+ if (pdf_is_name(f))
+ {
+ newf = pdf_new_array(ctx, 2);
+ pdf_array_push(newf, ahx);
+ pdf_array_push(newf, f);
+ f = newf;
+ if (pdf_is_dict(dp))
+ {
+ newdp = pdf_new_array(ctx, 2);
+ pdf_array_push(newdp, nullobj);
+ pdf_array_push(newdp, dp);
+ dp = newdp;
+ }
+ }
+ else if (pdf_is_array(f))
+ {
+ pdf_array_insert(f, ahx);
+ if (pdf_is_array(dp))
+ pdf_array_insert(dp, nullobj);
+ }
+ else
+ f = ahx;
+
+ pdf_dict_puts(dict, "Filter", f);
+ if (dp)
+ pdf_dict_puts(dict, "DecodeParms", dp);
+
+ pdf_drop_obj(ahx);
+ pdf_drop_obj(nullobj);
+ if (newf)
+ pdf_drop_obj(newf);
+ if (newdp)
+ pdf_drop_obj(newdp);
+}
+
+static void copystream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj, int num, int gen)
+{
+ fz_buffer *buf, *tmp;
+ pdf_obj *newlen;
+ fz_context *ctx = xref->ctx;
+ int orig_num = opts->revrenumbermap[num];
+ int orig_gen = opts->revgenlist[num];
+
+ buf = pdf_load_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen);
+
+ if (opts->doascii && isbinarystream(buf))
+ {
+ tmp = hexbuf(ctx, buf->data, buf->len);
+ fz_drop_buffer(ctx, buf);
+ buf = tmp;
+
+ addhexfilter(xref, obj);
+
+ newlen = pdf_new_int(ctx, buf->len);
+ pdf_dict_puts(obj, "Length", newlen);
+ pdf_drop_obj(newlen);
+ }
+
+ fprintf(opts->out, "%d %d obj\n", num, gen);
+ pdf_fprint_obj(opts->out, obj, opts->doexpand == 0);
+ fprintf(opts->out, "stream\n");
+ fwrite(buf->data, 1, buf->len, opts->out);
+ fprintf(opts->out, "endstream\nendobj\n\n");
+
+ fz_drop_buffer(ctx, buf);
+}
+
+static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj, int num, int gen)
+{
+ fz_buffer *buf, *tmp;
+ pdf_obj *newlen;
+ fz_context *ctx = xref->ctx;
+ int orig_num = opts->revrenumbermap[num];
+ int orig_gen = opts->revgenlist[num];
+
+ buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen);
+
+ pdf_dict_dels(obj, "Filter");
+ pdf_dict_dels(obj, "DecodeParms");
+
+ if (opts->doascii && isbinarystream(buf))
+ {
+ tmp = hexbuf(ctx, buf->data, buf->len);
+ fz_drop_buffer(ctx, buf);
+ buf = tmp;
+
+ addhexfilter(xref, obj);
+ }
+
+ newlen = pdf_new_int(ctx, buf->len);
+ pdf_dict_puts(obj, "Length", newlen);
+ pdf_drop_obj(newlen);
+
+ fprintf(opts->out, "%d %d obj\n", num, gen);
+ pdf_fprint_obj(opts->out, obj, opts->doexpand == 0);
+ fprintf(opts->out, "stream\n");
+ fwrite(buf->data, 1, buf->len, opts->out);
+ fprintf(opts->out, "endstream\nendobj\n\n");
+
+ fz_drop_buffer(ctx, buf);
+}
+
+static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, int gen)
+{
+ pdf_obj *obj;
+ pdf_obj *type;
+ fz_context *ctx = xref->ctx;
+
+ obj = pdf_load_object(xref, num, gen);
+
+ /* skip ObjStm and XRef objects */
+ if (pdf_is_dict(obj))
+ {
+ type = pdf_dict_gets(obj, "Type");
+ if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "ObjStm"))
+ {
+ opts->uselist[num] = 0;
+ pdf_drop_obj(obj);
+ return;
+ }
+ if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "XRef"))
+ {
+ opts->uselist[num] = 0;
+ pdf_drop_obj(obj);
+ return;
+ }
+ }
+
+ if (!pdf_is_stream(xref, num, gen))
+ {
+ fprintf(opts->out, "%d %d obj\n", num, gen);
+ pdf_fprint_obj(opts->out, obj, opts->doexpand == 0);
+ fprintf(opts->out, "endobj\n\n");
+ }
+ else
+ {
+ int dontexpand = 0;
+ if (opts->doexpand != 0 && opts->doexpand != fz_expand_all)
+ {
+ pdf_obj *o;
+
+ if ((o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "XObject")) &&
+ (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Image")))
+ dontexpand = !(opts->doexpand & fz_expand_images);
+ if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "Font"))
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "FontDescriptor"))
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if ((o = pdf_dict_gets(obj, "Length1")) != NULL)
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if ((o = pdf_dict_gets(obj, "Length2")) != NULL)
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if ((o = pdf_dict_gets(obj, "Length3")) != NULL)
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Type1C"))
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "CIDFontType0C"))
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ }
+ if (opts->doexpand && !dontexpand && !pdf_is_jpx_image(ctx, obj))
+ expandstream(xref, opts, obj, num, gen);
+ else
+ copystream(xref, opts, obj, num, gen);
+ }
+
+ pdf_drop_obj(obj);
+}
+
+static void writexref(pdf_document *xref, pdf_write_options *opts)
+{
+ pdf_obj *trailer;
+ pdf_obj *obj;
+ int startxref;
+ int num;
+ fz_context *ctx = xref->ctx;
+
+ startxref = ftell(opts->out);
+
+ fprintf(opts->out, "xref\n0 %d\n", xref->len);
+ for (num = 0; num < xref->len; num++)
+ {
+ if (opts->uselist[num])
+ fprintf(opts->out, "%010d %05d n \n", opts->ofslist[num], opts->genlist[num]);
+ else
+ fprintf(opts->out, "%010d %05d f \n", opts->ofslist[num], opts->genlist[num]);
+ }
+ fprintf(opts->out, "\n");
+
+ trailer = pdf_new_dict(ctx, 5);
+
+ obj = pdf_new_int(ctx, xref->len);
+ pdf_dict_puts(trailer, "Size", obj);
+ pdf_drop_obj(obj);
+
+ obj = pdf_dict_gets(xref->trailer, "Info");
+ if (obj)
+ pdf_dict_puts(trailer, "Info", obj);
+
+ obj = pdf_dict_gets(xref->trailer, "Root");
+ if (obj)
+ pdf_dict_puts(trailer, "Root", obj);
+
+ obj = pdf_dict_gets(xref->trailer, "ID");
+ if (obj)
+ pdf_dict_puts(trailer, "ID", obj);
+
+ fprintf(opts->out, "trailer\n");
+ pdf_fprint_obj(opts->out, trailer, opts->doexpand == 0);
+ fprintf(opts->out, "\n");
+
+ pdf_drop_obj(trailer);
+
+ fprintf(opts->out, "startxref\n%d\n%%%%EOF\n", startxref);
+}
+
+void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz_opts)
+{
+ int lastfree;
+ int num;
+ pdf_write_options opts = { 0 };
+ fz_context *ctx;
+
+ if (!xref || !fz_opts)
+ return;
+
+ ctx = xref->ctx;
+
+ opts.out = fopen(filename, "wb");
+ if (!opts.out)
+ fz_throw(ctx, "cannot open output file '%s'", filename);
+
+ fz_try(ctx)
+ {
+ opts.doexpand = fz_opts ? fz_opts->doexpand : 0;
+ opts.dogarbage = fz_opts ? fz_opts->dogarbage : 0;
+ opts.doascii = fz_opts ? fz_opts->doascii: 0;
+ opts.uselist = fz_malloc_array(ctx, xref->len + 1, sizeof(char));
+ opts.ofslist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+ opts.genlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+ opts.renumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+ opts.revrenumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+ opts.revgenlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+
+ fprintf(opts.out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10);
+ fprintf(opts.out, "%%\316\274\341\277\246\n\n");
+
+ for (num = 0; num < xref->len; num++)
+ {
+ opts.uselist[num] = 0;
+ opts.ofslist[num] = 0;
+ opts.renumbermap[num] = num;
+ opts.revrenumbermap[num] = num;
+ opts.revgenlist[num] = xref->table[num].gen;
+ }
+
+ /* Make sure any objects hidden in compressed streams have been loaded */
+ preloadobjstms(xref);
+
+ /* Sweep & mark objects from the trailer */
+ if (opts.dogarbage >= 1)
+ sweepobj(xref, &opts, xref->trailer);
+
+ /* Coalesce and renumber duplicate objects */
+ if (opts.dogarbage >= 3)
+ removeduplicateobjs(xref, &opts);
+
+ /* Compact xref by renumbering and removing unused objects */
+ if (opts.dogarbage >= 2)
+ compactxref(xref, &opts);
+
+ /* Make renumbering affect all indirect references and update xref */
+ if (opts.dogarbage >= 2)
+ renumberobjs(xref, &opts);
+
+ for (num = 0; num < xref->len; num++)
+ {
+ if (xref->table[num].type == 'f')
+ opts.genlist[num] = xref->table[num].gen;
+ if (xref->table[num].type == 'n')
+ opts.genlist[num] = xref->table[num].gen;
+ if (xref->table[num].type == 'o')
+ opts.genlist[num] = 0;
+
+ if (opts.dogarbage && !opts.uselist[num])
+ continue;
+
+ if (xref->table[num].type == 'n' || xref->table[num].type == 'o')
+ {
+ opts.uselist[num] = 1;
+ opts.ofslist[num] = ftell(opts.out);
+ writeobject(xref, &opts, num, opts.genlist[num]);
+ }
+ }
+
+ /* Construct linked list of free object slots */
+ lastfree = 0;
+ for (num = 0; num < xref->len; num++)
+ {
+ if (!opts.uselist[num])
+ {
+ opts.genlist[num]++;
+ opts.ofslist[lastfree] = num;
+ lastfree = num;
+ }
+ }
+
+ writexref(xref, &opts);
+ }
+ fz_always(ctx)
+ {
+ fz_free(ctx, opts.uselist);
+ fz_free(ctx, opts.ofslist);
+ fz_free(ctx, opts.genlist);
+ fz_free(ctx, opts.renumbermap);
+ fz_free(ctx, opts.revrenumbermap);
+ fz_free(ctx, opts.revgenlist);
+ fclose(opts.out);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
diff --git a/pdf/pdf_xobject.c b/pdf/pdf_xobject.c
index 175f84b5..88ab5a01 100644
--- a/pdf/pdf_xobject.c
+++ b/pdf/pdf_xobject.c
@@ -23,7 +23,7 @@ pdf_free_xobject_imp(fz_context *ctx, fz_storable *xobj_)
if (xobj->resources)
pdf_drop_obj(xobj->resources);
if (xobj->contents)
- fz_drop_buffer(ctx, xobj->contents);
+ pdf_drop_obj(xobj->contents);
pdf_drop_obj(xobj->me);
fz_free(ctx, xobj);
}
@@ -33,7 +33,7 @@ pdf_xobject_size(pdf_xobject *xobj)
{
if (xobj == NULL)
return 0;
- return sizeof(*xobj) + (xobj->colorspace ? xobj->colorspace->size : 0) + (xobj->contents ? xobj->contents->len : 0);
+ return sizeof(*xobj) + (xobj->colorspace ? xobj->colorspace->size : 0);
}
pdf_xobject *
@@ -98,7 +98,7 @@ pdf_load_xobject(pdf_document *xref, pdf_obj *dict)
fz_try(ctx)
{
- form->contents = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict));
+ form->contents = pdf_keep_obj(dict);
}
fz_catch(ctx)
{
@@ -114,6 +114,7 @@ pdf_load_xobject(pdf_document *xref, pdf_obj *dict)
pdf_obj *
pdf_new_xobject(pdf_document *xref, fz_rect *bbox, fz_matrix *mat)
{
+ int idict_num;
pdf_obj *idict = NULL;
pdf_obj *dict = NULL;
pdf_xobject *form = NULL;
@@ -195,12 +196,15 @@ pdf_new_xobject(pdf_document *xref, fz_rect *bbox, fz_matrix *mat)
form->resources = res;
res = NULL;
- idict = pdf_new_stream_indirection(xref, dict);
+ idict_num = pdf_create_object(xref);
+ pdf_update_object(xref, idict_num, dict);
+ idict = pdf_new_indirect(ctx, idict_num, 0, xref);
pdf_drop_obj(dict);
dict = NULL;
pdf_store_item(ctx, idict, form, pdf_xobject_size(form));
+ form->contents = pdf_keep_obj(idict);
form->me = pdf_keep_obj(idict);
pdf_drop_xobject(ctx, form);
@@ -220,8 +224,8 @@ pdf_new_xobject(pdf_document *xref, fz_rect *bbox, fz_matrix *mat)
return idict;
}
-void pdf_xobject_set_contents(fz_context *ctx, pdf_xobject *form, fz_buffer *buffer)
+void pdf_xobject_set_contents(pdf_document *xref, pdf_xobject *form, fz_buffer *buffer)
{
- fz_drop_buffer(ctx, form->contents);
- form->contents = fz_keep_buffer(ctx, buffer);
+ pdf_dict_dels(form->contents, "Filter");
+ pdf_update_stream(xref, pdf_to_num(form->contents), buffer);
}
diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c
index cd15051c..ea99d3c2 100644
--- a/pdf/pdf_xref.c
+++ b/pdf/pdf_xref.c
@@ -173,6 +173,7 @@ pdf_resize_xref(pdf_document *xref, int newlen)
xref->table[i].ofs = 0;
xref->table[i].gen = 0;
xref->table[i].stm_ofs = 0;
+ xref->table[i].stm_buf = NULL;
xref->table[i].obj = NULL;
}
xref->len = newlen;
@@ -293,8 +294,7 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in
}
}
-/* Entered with file locked. Drops the lock in the middle, but then picks
- * it up again before exiting. */
+/* Entered with file locked, remains locked throughout. */
static pdf_obj *
pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf)
{
@@ -321,7 +321,6 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf)
fz_try(ctx)
{
- fz_unlock(ctx, FZ_LOCK_FILE);
obj = pdf_dict_gets(trailer, "Size");
if (!obj)
fz_throw(ctx, "xref stream missing Size entry (%d %d R)", num, gen);
@@ -371,7 +370,6 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf)
pdf_drop_obj(index);
fz_rethrow(ctx);
}
- fz_lock(ctx, FZ_LOCK_FILE);
return trailer;
}
@@ -410,13 +408,13 @@ static void
pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf)
{
pdf_obj *trailer = NULL;
- pdf_obj *xrefstm = NULL;
- pdf_obj *prev = NULL;
fz_context *ctx = xref->ctx;
+ int xrefstmofs = 0;
+ int prevofs = 0;
fz_var(trailer);
- fz_var(xrefstm);
- fz_var(prev);
+ fz_var(xrefstmofs);
+ fz_var(prevofs);
fz_try(ctx)
{
@@ -425,20 +423,21 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf)
trailer = pdf_read_xref(xref, ofs, buf);
/* FIXME: do we overwrite free entries properly? */
- xrefstm = pdf_dict_gets(trailer, "XRefStm");
- prev = pdf_dict_gets(trailer, "Prev");
+ xrefstmofs = pdf_to_int(pdf_dict_gets(trailer, "XRefStm"));
+ prevofs = pdf_to_int(pdf_dict_gets(trailer, "Prev"));
+
/* We only recurse if we have both xrefstm and prev.
* Hopefully this happens infrequently. */
- if (xrefstm && prev)
- pdf_read_xref_sections(xref, pdf_to_int(xrefstm), buf);
- if (prev)
- ofs = pdf_to_int(prev);
- else if (xrefstm)
- ofs = pdf_to_int(xrefstm);
+ if (xrefstmofs && prevofs)
+ pdf_read_xref_sections(xref, xrefstmofs, buf);
+ if (prevofs)
+ ofs = prevofs;
+ else if (xrefstmofs)
+ ofs = xrefstmofs;
pdf_drop_obj(trailer);
trailer = NULL;
}
- while (prev || xrefstm);
+ while (prevofs || xrefstmofs);
}
fz_catch(ctx)
{
@@ -449,12 +448,14 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf)
/*
* load xref tables from pdf
+ *
+ * File locked on entry, throughout and on exit.
*/
static void
pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf)
{
- pdf_obj *size;
+ int size;
int i;
fz_context *ctx = xref->ctx;
@@ -464,11 +465,11 @@ pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf)
pdf_read_trailer(xref, buf);
- size = pdf_dict_gets(xref->trailer, "Size");
+ size = pdf_to_int(pdf_dict_gets(xref->trailer, "Size"));
if (!size)
fz_throw(ctx, "trailer missing Size entry");
- pdf_resize_xref(xref, pdf_to_int(size));
+ pdf_resize_xref(xref, size);
pdf_read_xref_sections(xref, xref->startxref, buf);
@@ -660,33 +661,18 @@ pdf_free_ocg(fz_context *ctx, pdf_ocg_descriptor *desc)
* If password is not null, try to decrypt.
*/
-static void pdf_init_document(pdf_document *xref);
-
-pdf_document *
-pdf_open_document_with_stream(fz_stream *file)
+static void
+pdf_init_document(pdf_document *xref)
{
- pdf_document *xref;
+ fz_context *ctx = xref->ctx;
pdf_obj *encrypt, *id;
pdf_obj *dict = NULL;
pdf_obj *obj;
pdf_obj *nobj = NULL;
int i, repaired = 0;
- int locked;
- fz_context *ctx = file->ctx;
fz_var(dict);
fz_var(nobj);
- fz_var(locked);
-
- xref = fz_malloc_struct(ctx, pdf_document);
- pdf_init_document(xref);
- xref->lexbuf.base.size = PDF_LEXBUF_LARGE;
-
- xref->file = fz_keep_stream(file);
- xref->ctx = ctx;
-
- fz_lock(ctx, FZ_LOCK_FILE);
- locked = 1;
fz_try(ctx)
{
@@ -717,9 +703,6 @@ pdf_open_document_with_stream(fz_stream *file)
if (repaired)
pdf_repair_xref(xref, &xref->lexbuf.base);
- fz_unlock(ctx, FZ_LOCK_FILE);
- locked = 0;
-
encrypt = pdf_dict_gets(xref->trailer, "Encrypt");
id = pdf_dict_gets(xref->trailer, "ID");
if (pdf_is_dict(encrypt))
@@ -778,11 +761,6 @@ pdf_open_document_with_stream(fz_stream *file)
}
}
}
- fz_always(ctx)
- {
- if (locked)
- fz_unlock(ctx, FZ_LOCK_FILE);
- }
fz_catch(ctx)
{
pdf_drop_obj(dict);
@@ -799,8 +777,6 @@ pdf_open_document_with_stream(fz_stream *file)
{
fz_warn(ctx, "Ignoring Broken Optional Content");
}
-
- return xref;
}
void
@@ -865,11 +841,12 @@ pdf_print_xref(pdf_document *xref)
printf("xref\n0 %d\n", xref->len);
for (i = 0; i < xref->len; i++)
{
- printf("%05d: %010d %05d %c (stm_ofs=%d)\n", i,
+ printf("%05d: %010d %05d %c (stm_ofs=%d; stm_buf=%p)\n", i,
xref->table[i].ofs,
xref->table[i].gen,
xref->table[i].type ? xref->table[i].type : '-',
- xref->table[i].stm_ofs);
+ xref->table[i].stm_ofs,
+ xref->table[i].stm_buf);
}
}
@@ -987,7 +964,6 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
}
else if (x->type == 'n')
{
- fz_lock(ctx, FZ_LOCK_FILE);
fz_seek(xref->file, x->ofs, 0);
fz_try(ctx)
@@ -997,7 +973,6 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
}
fz_catch(ctx)
{
- fz_unlock(ctx, FZ_LOCK_FILE);
fz_throw(ctx, "cannot parse object (%d %d R)", num, gen);
}
@@ -1005,13 +980,11 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
{
pdf_drop_obj(x->obj);
x->obj = NULL;
- fz_unlock(ctx, FZ_LOCK_FILE);
fz_throw(ctx, "found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen);
}
if (xref->crypt)
pdf_crypt_obj(ctx, xref->crypt, x->obj, num, gen);
- fz_unlock(ctx, FZ_LOCK_FILE);
}
else if (x->type == 'o')
{
@@ -1093,127 +1066,105 @@ pdf_resolve_indirect(pdf_obj *ref)
return ref;
}
-int pdf_count_objects(pdf_document *doc)
+int
+pdf_count_objects(pdf_document *doc)
{
return doc->len;
}
-/* Replace numbered object -- for use by pdfclean and similar tools */
+int
+pdf_create_object(pdf_document *xref)
+{
+ /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
+ int num = xref->len;
+ pdf_resize_xref(xref, num + 1);
+ xref->table[num].type = 'f';
+ xref->table[num].ofs = -1;
+ xref->table[num].gen = 0;
+ xref->table[num].stm_ofs = 0;
+ xref->table[num].stm_buf = NULL;
+ xref->table[num].obj = NULL;
+ return num;
+}
+
void
-pdf_update_object(pdf_document *xref, int num, int gen, pdf_obj *newobj)
+pdf_delete_object(pdf_document *xref, int num)
{
pdf_xref_entry *x;
if (num < 0 || num >= xref->len)
{
- fz_warn(xref->ctx, "object out of range (%d %d R); xref size %d", num, gen, xref->len);
+ fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len);
return;
}
x = &xref->table[num];
- if (x->obj)
- pdf_drop_obj(x->obj);
+ fz_drop_buffer(xref->ctx, x->stm_buf);
+ pdf_drop_obj(x->obj);
- x->obj = pdf_keep_obj(newobj);
- x->type = 'n';
+ x->type = 'f';
x->ofs = 0;
+ x->gen = 0;
+ x->stm_ofs = 0;
+ x->stm_buf = NULL;
+ x->obj = NULL;
}
-pdf_obj *
-pdf_new_stream_indirection(pdf_document *xref, pdf_obj *obj)
-{
- int num = xref->len;
- pdf_resize_xref(xref, xref->len + 1);
- pdf_update_object(xref, num, 0, obj);
- /* Set stm_ofs, so that obj is treated as a stream */
- xref->table[num].stm_ofs = 1;
-
- return pdf_new_indirect(xref->ctx, num, 0, xref);
-}
-
-/*
- * Convenience function to open a file then call pdf_open_document_with_stream.
- */
-
-pdf_document *
-pdf_open_document(fz_context *ctx, const char *filename)
+void
+pdf_update_object(pdf_document *xref, int num, pdf_obj *newobj)
{
- fz_stream *file = NULL;
- pdf_document *xref;
+ pdf_xref_entry *x;
- fz_var(file);
- fz_try(ctx)
- {
- file = fz_open_file(ctx, filename);
- xref = pdf_open_document_with_stream(file);
- }
- fz_catch(ctx)
+ if (num < 0 || num >= xref->len)
{
- fz_close(file);
- fz_throw(ctx, "cannot load document '%s'", filename);
+ fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len);
+ return;
}
- fz_close(file);
- return xref;
-}
-
-/* Document interface wrappers */
+ x = &xref->table[num];
-static void pdf_close_document_shim(fz_document *doc)
-{
- pdf_close_document((pdf_document*)doc);
-}
+ if (x->obj)
+ pdf_drop_obj(x->obj);
-static int pdf_needs_password_shim(fz_document *doc)
-{
- return pdf_needs_password((pdf_document*)doc);
+ x->type = 'n';
+ x->ofs = 0;
+ x->obj = pdf_keep_obj(newobj);
}
-static int pdf_authenticate_password_shim(fz_document *doc, char *password)
+fz_buffer *
+pdf_get_stream(pdf_document *xref, int num)
{
- return pdf_authenticate_password((pdf_document*)doc, password);
-}
+ pdf_xref_entry *x;
-static fz_outline *pdf_load_outline_shim(fz_document *doc)
-{
- return pdf_load_outline((pdf_document*)doc);
-}
+ if (num < 0 || num >= xref->len)
+ fz_throw(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len);
-static int pdf_count_pages_shim(fz_document *doc)
-{
- return pdf_count_pages((pdf_document*)doc);
-}
+ x = &xref->table[num];
-static fz_page *pdf_load_page_shim(fz_document *doc, int number)
-{
- return (fz_page*) pdf_load_page((pdf_document*)doc, number);
+ return x->stm_buf;
}
-static fz_link *pdf_load_links_shim(fz_document *doc, fz_page *page)
+void
+pdf_update_stream(pdf_document *xref, int num, fz_buffer *newbuf)
{
- return pdf_load_links((pdf_document*)doc, (pdf_page*)page);
-}
+ pdf_xref_entry *x;
-static fz_rect pdf_bound_page_shim(fz_document *doc, fz_page *page)
-{
- return pdf_bound_page((pdf_document*)doc, (pdf_page*)page);
-}
+ if (num < 0 || num >= xref->len)
+ {
+ fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len);
+ return;
+ }
-static void pdf_run_page_shim(fz_document *doc, fz_page *page, fz_device *dev, fz_matrix transform, fz_cookie *cookie)
-{
- pdf_run_page((pdf_document*)doc, (pdf_page*)page, dev, transform, cookie);
-}
+ x = &xref->table[num];
-static void pdf_free_page_shim(fz_document *doc, fz_page *page)
-{
- pdf_free_page((pdf_document*)doc, (pdf_page*)page);
+ fz_drop_buffer(xref->ctx, x->stm_buf);
+ x->stm_buf = fz_keep_buffer(xref->ctx, newbuf);
}
-static int pdf_meta(fz_document *doc_, int key, void *ptr, int size)
+int
+pdf_meta(pdf_document *doc, int key, void *ptr, int size)
{
- pdf_document *doc = (pdf_document *)doc_;
-
switch(key)
{
/*
@@ -1295,9 +1246,72 @@ static fz_interactive *pdf_interact_shim(fz_document *doc)
return (fz_interactive *)doc;
}
-static void
-pdf_init_document(pdf_document *doc)
+/*
+ Wrappers to implement the fz_document interface for pdf_document.
+
+ The functions are split across two files to allow calls to a
+ version of the constructor that does not link in the interpreter.
+ The interpreter references the built-in font and cmap resources
+ which are quite big. Not linking those into the mubusy binary
+ saves roughly 6MB of space.
+*/
+
+static void pdf_close_document_shim(fz_document *doc)
{
+ pdf_close_document((pdf_document*)doc);
+}
+
+static int pdf_needs_password_shim(fz_document *doc)
+{
+ return pdf_needs_password((pdf_document*)doc);
+}
+
+static int pdf_authenticate_password_shim(fz_document *doc, char *password)
+{
+ return pdf_authenticate_password((pdf_document*)doc, password);
+}
+
+static fz_outline *pdf_load_outline_shim(fz_document *doc)
+{
+ return pdf_load_outline((pdf_document*)doc);
+}
+
+static int pdf_count_pages_shim(fz_document *doc)
+{
+ return pdf_count_pages((pdf_document*)doc);
+}
+
+static fz_page *pdf_load_page_shim(fz_document *doc, int number)
+{
+ return (fz_page*) pdf_load_page((pdf_document*)doc, number);
+}
+
+static fz_link *pdf_load_links_shim(fz_document *doc, fz_page *page)
+{
+ return pdf_load_links((pdf_document*)doc, (pdf_page*)page);
+}
+
+static fz_rect pdf_bound_page_shim(fz_document *doc, fz_page *page)
+{
+ return pdf_bound_page((pdf_document*)doc, (pdf_page*)page);
+}
+
+static void pdf_free_page_shim(fz_document *doc, fz_page *page)
+{
+ pdf_free_page((pdf_document*)doc, (pdf_page*)page);
+}
+
+static int pdf_meta_shim(fz_document *doc, int key, void *ptr, int size)
+{
+ return pdf_meta((pdf_document*)doc, key, ptr, size);
+}
+
+static pdf_document *
+pdf_new_document(fz_stream *file)
+{
+ fz_context *ctx = file->ctx;
+ pdf_document *doc = fz_malloc_struct(ctx, pdf_document);
+
doc->super.close = pdf_close_document_shim;
doc->super.needs_password = pdf_needs_password_shim;
doc->super.authenticate_password = pdf_authenticate_password_shim;
@@ -1306,8 +1320,47 @@ pdf_init_document(pdf_document *doc)
doc->super.load_page = pdf_load_page_shim;
doc->super.load_links = pdf_load_links_shim;
doc->super.bound_page = pdf_bound_page_shim;
- doc->super.run_page = pdf_run_page_shim;
+ doc->super.run_page = NULL; /* see pdf_xref_aux.c */
doc->super.free_page = pdf_free_page_shim;
- doc->super.meta = pdf_meta;
+ doc->super.meta = pdf_meta_shim;
doc->super.interact = pdf_interact_shim;
+
+ doc->lexbuf.base.size = PDF_LEXBUF_LARGE;
+ doc->file = fz_keep_stream(file);
+ doc->ctx = ctx;
+
+ return doc;
+}
+
+pdf_document *
+pdf_open_document_no_run_with_stream(fz_stream *file)
+{
+ pdf_document *doc = pdf_new_document(file);
+ pdf_init_document(doc);
+ return doc;
+}
+
+pdf_document *
+pdf_open_document_no_run(fz_context *ctx, const char *filename)
+{
+ fz_stream *file = NULL;
+ pdf_document *doc;
+
+ fz_var(file);
+
+ fz_try(ctx)
+ {
+ file = fz_open_file(ctx, filename);
+ doc = pdf_new_document(file);
+ pdf_init_document(doc);
+ }
+ fz_always(ctx)
+ {
+ fz_close(file);
+ }
+ fz_catch(ctx)
+ {
+ fz_throw(ctx, "cannot load document '%s'", filename);
+ }
+ return doc;
}
diff --git a/pdf/pdf_xref_aux.c b/pdf/pdf_xref_aux.c
new file mode 100644
index 00000000..2d760334
--- /dev/null
+++ b/pdf/pdf_xref_aux.c
@@ -0,0 +1,31 @@
+#include "fitz-internal.h"
+#include "mupdf-internal.h"
+
+/*
+ These functions have been split out of pdf_xref.c to allow tools
+ to be linked without pulling in the interpreter. The interpreter
+ references the built-in font and cmap resources which are quite
+ big. Not linking those into the tools saves roughly 6MB in the
+ resulting executables.
+*/
+
+static void pdf_run_page_shim(fz_document *doc, fz_page *page, fz_device *dev, fz_matrix transform, fz_cookie *cookie)
+{
+ pdf_run_page((pdf_document*)doc, (pdf_page*)page, dev, transform, cookie);
+}
+
+pdf_document *
+pdf_open_document_with_stream(fz_stream *file)
+{
+ pdf_document *doc = pdf_open_document_no_run_with_stream(file);
+ doc->super.run_page = pdf_run_page_shim;
+ return doc;
+}
+
+pdf_document *
+pdf_open_document(fz_context *ctx, const char *filename)
+{
+ pdf_document *doc = pdf_open_document_no_run(ctx, filename);
+ doc->super.run_page = pdf_run_page_shim;
+ return doc;
+}