summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-04-25 20:40:29 +0100
committerRobin Watts <robin.watts@artifex.com>2012-04-28 09:44:49 +0100
commit4e3d351910f0927283fdc6911574ffe982ca678b (patch)
tree854e8367ae8906f957c3bb766ae578188e38cd61
parent463e9b6c69cf9232da5ae9659eaad6e7050b594d (diff)
downloadmupdf-4e3d351910f0927283fdc6911574ffe982ca678b.tar.xz
Move guts of pdfclean into new pdf_write function.
Expose pdf_write function through the document interface.
-rw-r--r--apps/mupdfclean.c656
-rw-r--r--fitz/doc_document.c7
-rw-r--r--fitz/fitz-internal.h1
-rw-r--r--fitz/fitz.h46
-rw-r--r--pdf/mupdf.h2
-rw-r--r--pdf/pdf_write.c662
-rw-r--r--win32/libmupdf.vcproj4
7 files changed, 735 insertions, 643 deletions
diff --git a/apps/mupdfclean.c b/apps/mupdfclean.c
index 5b95fdae..f2676782 100644
--- a/apps/mupdfclean.c
+++ b/apps/mupdfclean.c
@@ -14,29 +14,13 @@
static FILE *out = NULL;
-enum
-{
- expand_images = 1,
- expand_fonts = 2,
- expand_all = -1
-};
-
-static char *uselist = NULL;
-static int *ofslist = NULL;
-static int *genlist = NULL;
-static int *renumbermap = NULL;
-
-static int dogarbage = 0;
-static int doexpand = 0;
-static int doascii = 0;
-
static pdf_document *xref = NULL;
static fz_context *ctx = NULL;
static void usage(void)
{
fprintf(stderr,
- "usage: pdfclean [options] input.pdf [output.pdf] [pages]\n"
+ "usage: mupdfclean [options] input.pdf [output.pdf] [pages]\n"
"\t-p -\tpassword\n"
"\t-g\tgarbage collect unused objects\n"
"\t-gg\tin addition to -g compact xref table\n"
@@ -50,251 +34,6 @@ static void usage(void)
}
/*
- * Garbage collect objects not reachable from the trailer.
- */
-
-static pdf_obj *sweepref(pdf_obj *obj)
-{
- int num = pdf_to_num(obj);
- int gen = pdf_to_gen(obj);
-
- if (num < 0 || num >= xref->len)
- return NULL;
- if (uselist[num])
- return NULL;
-
- uselist[num] = 1;
-
- /* Bake in /Length in stream objects */
- fz_try(ctx)
- {
- if (pdf_is_stream(xref, num, gen))
- {
- pdf_obj *len = pdf_dict_gets(obj, "Length");
- if (pdf_is_indirect(len))
- {
- uselist[pdf_to_num(len)] = 0;
- len = pdf_resolve_indirect(len);
- pdf_dict_puts(obj, "Length", len);
- }
- }
- }
- fz_catch(ctx)
- {
- /* Leave broken */
- }
-
- return pdf_resolve_indirect(obj);
-}
-
-static void sweepobj(pdf_obj *obj)
-{
- int i;
-
- if (pdf_is_indirect(obj))
- obj = sweepref(obj);
-
- if (pdf_is_dict(obj))
- {
- int n = pdf_dict_len(obj);
- for (i = 0; i < n; i++)
- sweepobj(pdf_dict_get_val(obj, i));
- }
-
- else if (pdf_is_array(obj))
- {
- int n = pdf_array_len(obj);
- for (i = 0; i < n; i++)
- sweepobj(pdf_array_get(obj, i));
- }
-}
-
-/*
- * Scan for and remove duplicate objects (slow)
- */
-
-static void removeduplicateobjs(void)
-{
- int num, other;
-
- for (num = 1; num < xref->len; num++)
- {
- /* Only compare an object to objects preceding it */
- for (other = 1; other < num; other++)
- {
- pdf_obj *a, *b;
-
- if (num == other || !uselist[num] || !uselist[other])
- continue;
-
- /*
- * Comparing stream objects data contents would take too long.
- *
- * pdf_is_stream calls pdf_cache_object and ensures
- * that the xref table has the objects loaded.
- */
- fz_try(ctx)
- {
- if (pdf_is_stream(xref, num, 0) || pdf_is_stream(xref, other, 0))
- continue;
- }
- fz_catch(ctx)
- {
- /* Assume different */
- }
-
- a = xref->table[num].obj;
- b = xref->table[other].obj;
-
- a = pdf_resolve_indirect(a);
- b = pdf_resolve_indirect(b);
-
- if (pdf_objcmp(a, b))
- continue;
-
- /* Keep the lowest numbered object */
- renumbermap[num] = MIN(num, other);
- renumbermap[other] = MIN(num, other);
- uselist[MAX(num, other)] = 0;
-
- /* One duplicate was found, do not look for another */
- break;
- }
- }
-}
-
-/*
- * Renumber objects sequentially so the xref is more compact
- */
-
-static void compactxref(void)
-{
- int num, newnum;
-
- /*
- * Update renumbermap in-place, clustering all used
- * objects together at low object ids. Objects that
- * already should be renumbered will have their new
- * object ids be updated to reflect the compaction.
- */
-
- newnum = 1;
- for (num = 1; num < xref->len; num++)
- {
- if (uselist[num] && renumbermap[num] == num)
- renumbermap[num] = newnum++;
- else if (renumbermap[num] != num)
- renumbermap[num] = renumbermap[renumbermap[num]];
- }
-}
-
-/*
- * Update indirect objects according to renumbering established when
- * removing duplicate objects and compacting the xref.
- */
-
-static void renumberobj(pdf_obj *obj)
-{
- int i;
- fz_context *ctx = xref->ctx;
-
- if (pdf_is_dict(obj))
- {
- int n = pdf_dict_len(obj);
- for (i = 0; i < n; i++)
- {
- pdf_obj *key = pdf_dict_get_key(obj, i);
- pdf_obj *val = pdf_dict_get_val(obj, i);
- if (pdf_is_indirect(val))
- {
- val = pdf_new_indirect(ctx, renumbermap[pdf_to_num(val)], 0, xref);
- fz_dict_put(obj, key, val);
- pdf_drop_obj(val);
- }
- else
- {
- renumberobj(val);
- }
- }
- }
-
- else if (pdf_is_array(obj))
- {
- int n = pdf_array_len(obj);
- for (i = 0; i < n; i++)
- {
- pdf_obj *val = pdf_array_get(obj, i);
- if (pdf_is_indirect(val))
- {
- val = pdf_new_indirect(ctx, renumbermap[pdf_to_num(val)], 0, xref);
- pdf_array_put(obj, i, val);
- pdf_drop_obj(val);
- }
- else
- {
- renumberobj(val);
- }
- }
- }
-}
-
-static void renumberobjs(void)
-{
- pdf_xref_entry *oldxref;
- int newlen;
- int num;
-
- /* Apply renumber map to indirect references in all objects in xref */
- renumberobj(xref->trailer);
- for (num = 0; num < xref->len; num++)
- {
- pdf_obj *obj = xref->table[num].obj;
-
- if (pdf_is_indirect(obj))
- {
- obj = pdf_new_indirect(ctx, renumbermap[pdf_to_num(obj)], 0, xref);
- pdf_update_object(xref, num, 0, obj);
- pdf_drop_obj(obj);
- }
- else
- {
- renumberobj(obj);
- }
- }
-
- /* Create new table for the reordered, compacted xref */
- oldxref = xref->table;
- xref->table = fz_malloc_array(xref->ctx, xref->len, sizeof(pdf_xref_entry));
- xref->table[0] = oldxref[0];
-
- /* Move used objects into the new compacted xref */
- newlen = 0;
- for (num = 1; num < xref->len; num++)
- {
- if (uselist[num])
- {
- if (newlen < renumbermap[num])
- newlen = renumbermap[num];
- xref->table[renumbermap[num]] = oldxref[num];
- }
- else
- {
- if (oldxref[num].obj)
- pdf_drop_obj(oldxref[num].obj);
- }
- }
-
- fz_free(xref->ctx, oldxref);
-
- /* Update the used objects count in compacted xref */
- xref->len = newlen + 1;
-
- /* Update list of used objects to fit with compacted xref */
- for (num = 1; num < xref->len; num++)
- uselist[num] = 1;
-}
-
-/*
* Recreate page tree to only retain specified pages.
*/
@@ -415,330 +154,6 @@ static void retainpages(int argc, char **argv)
}
}
-/*
- * Make sure we have loaded objects from object streams.
- */
-
-static void preloadobjstms(void)
-{
- pdf_obj *obj;
- int num;
-
- for (num = 0; num < xref->len; num++)
- {
- if (xref->table[num].type == 'o')
- {
- obj = pdf_load_object(xref, num, 0);
- pdf_drop_obj(obj);
- }
- }
-}
-
-/*
- * Save streams and objects to the output
- */
-
-static inline int isbinary(int c)
-{
- if (c == '\n' || c == '\r' || c == '\t')
- return 0;
- return c < 32 || c > 127;
-}
-
-static int isbinarystream(fz_buffer *buf)
-{
- int i;
- for (i = 0; i < buf->len; i++)
- if (isbinary(buf->data[i]))
- return 1;
- return 0;
-}
-
-static fz_buffer *hexbuf(unsigned char *p, int n)
-{
- static const char hex[16] = "0123456789abcdef";
- fz_buffer *buf;
- int x = 0;
-
- buf = fz_new_buffer(ctx, n * 2 + (n / 32) + 2);
-
- while (n--)
- {
- buf->data[buf->len++] = hex[*p >> 4];
- buf->data[buf->len++] = hex[*p & 15];
- if (++x == 32)
- {
- buf->data[buf->len++] = '\n';
- x = 0;
- }
- p++;
- }
-
- buf->data[buf->len++] = '>';
- buf->data[buf->len++] = '\n';
-
- return buf;
-}
-
-static void addhexfilter(pdf_obj *dict)
-{
- pdf_obj *f, *dp, *newf, *newdp;
- pdf_obj *ahx, *nullobj;
-
- ahx = fz_new_name(ctx, "ASCIIHexDecode");
- nullobj = pdf_new_null(ctx);
- newf = newdp = NULL;
-
- f = pdf_dict_gets(dict, "Filter");
- dp = pdf_dict_gets(dict, "DecodeParms");
-
- if (pdf_is_name(f))
- {
- newf = pdf_new_array(ctx, 2);
- pdf_array_push(newf, ahx);
- pdf_array_push(newf, f);
- f = newf;
- if (pdf_is_dict(dp))
- {
- newdp = pdf_new_array(ctx, 2);
- pdf_array_push(newdp, nullobj);
- pdf_array_push(newdp, dp);
- dp = newdp;
- }
- }
- else if (pdf_is_array(f))
- {
- pdf_array_insert(f, ahx);
- if (pdf_is_array(dp))
- pdf_array_insert(dp, nullobj);
- }
- else
- f = ahx;
-
- pdf_dict_puts(dict, "Filter", f);
- if (dp)
- pdf_dict_puts(dict, "DecodeParms", dp);
-
- pdf_drop_obj(ahx);
- pdf_drop_obj(nullobj);
- if (newf)
- pdf_drop_obj(newf);
- if (newdp)
- pdf_drop_obj(newdp);
-}
-
-static void copystream(pdf_obj *obj, int num, int gen)
-{
- fz_buffer *buf, *tmp;
- pdf_obj *newlen;
-
- buf = pdf_load_raw_stream(xref, num, gen);
-
- if (doascii && isbinarystream(buf))
- {
- tmp = hexbuf(buf->data, buf->len);
- fz_drop_buffer(ctx, buf);
- buf = tmp;
-
- addhexfilter(obj);
-
- newlen = pdf_new_int(ctx, buf->len);
- pdf_dict_puts(obj, "Length", newlen);
- pdf_drop_obj(newlen);
- }
-
- fprintf(out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(out, obj, doexpand == 0);
- fprintf(out, "stream\n");
- fwrite(buf->data, 1, buf->len, out);
- fprintf(out, "endstream\nendobj\n\n");
-
- fz_drop_buffer(ctx, buf);
-}
-
-static void expandstream(pdf_obj *obj, int num, int gen)
-{
- fz_buffer *buf, *tmp;
- pdf_obj *newlen;
-
- buf = pdf_load_stream(xref, num, gen);
-
- pdf_dict_dels(obj, "Filter");
- pdf_dict_dels(obj, "DecodeParms");
-
- if (doascii && isbinarystream(buf))
- {
- tmp = hexbuf(buf->data, buf->len);
- fz_drop_buffer(ctx, buf);
- buf = tmp;
-
- addhexfilter(obj);
- }
-
- newlen = pdf_new_int(ctx, buf->len);
- pdf_dict_puts(obj, "Length", newlen);
- pdf_drop_obj(newlen);
-
- fprintf(out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(out, obj, doexpand == 0);
- fprintf(out, "stream\n");
- fwrite(buf->data, 1, buf->len, out);
- fprintf(out, "endstream\nendobj\n\n");
-
- fz_drop_buffer(ctx, buf);
-}
-
-static void writeobject(int num, int gen)
-{
- pdf_obj *obj;
- pdf_obj *type;
-
- obj = pdf_load_object(xref, num, gen);
-
- /* skip ObjStm and XRef objects */
- if (pdf_is_dict(obj))
- {
- type = pdf_dict_gets(obj, "Type");
- if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "ObjStm"))
- {
- uselist[num] = 0;
- pdf_drop_obj(obj);
- return;
- }
- if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "XRef"))
- {
- uselist[num] = 0;
- pdf_drop_obj(obj);
- return;
- }
- }
-
- if (!pdf_is_stream(xref, num, gen))
- {
- fprintf(out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(out, obj, doexpand == 0);
- fprintf(out, "endobj\n\n");
- }
- else
- {
- int dontexpand = 0;
- if (doexpand != 0 && doexpand != expand_all)
- {
- pdf_obj *o;
-
- if ((o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "XObject")) &&
- (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Image")))
- dontexpand = !(doexpand & expand_images);
- if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "Font"))
- dontexpand = !(doexpand & expand_fonts);
- if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "FontDescriptor"))
- dontexpand = !(doexpand & expand_fonts);
- if ((o = pdf_dict_gets(obj, "Length1")) != NULL)
- dontexpand = !(doexpand & expand_fonts);
- if ((o = pdf_dict_gets(obj, "Length2")) != NULL)
- dontexpand = !(doexpand & expand_fonts);
- if ((o = pdf_dict_gets(obj, "Length3")) != NULL)
- dontexpand = !(doexpand & expand_fonts);
- if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Type1C"))
- dontexpand = !(doexpand & expand_fonts);
- if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "CIDFontType0C"))
- dontexpand = !(doexpand & expand_fonts);
- }
- if (doexpand && !dontexpand && !pdf_is_jpx_image(ctx, obj))
- expandstream(obj, num, gen);
- else
- copystream(obj, num, gen);
- }
-
- pdf_drop_obj(obj);
-}
-
-static void writexref(void)
-{
- pdf_obj *trailer;
- pdf_obj *obj;
- int startxref;
- int num;
-
- startxref = ftell(out);
-
- fprintf(out, "xref\n0 %d\n", xref->len);
- for (num = 0; num < xref->len; num++)
- {
- if (uselist[num])
- fprintf(out, "%010d %05d n \n", ofslist[num], genlist[num]);
- else
- fprintf(out, "%010d %05d f \n", ofslist[num], genlist[num]);
- }
- fprintf(out, "\n");
-
- trailer = pdf_new_dict(ctx, 5);
-
- obj = pdf_new_int(ctx, xref->len);
- pdf_dict_puts(trailer, "Size", obj);
- pdf_drop_obj(obj);
-
- obj = pdf_dict_gets(xref->trailer, "Info");
- if (obj)
- pdf_dict_puts(trailer, "Info", obj);
-
- obj = pdf_dict_gets(xref->trailer, "Root");
- if (obj)
- pdf_dict_puts(trailer, "Root", obj);
-
- obj = pdf_dict_gets(xref->trailer, "ID");
- if (obj)
- pdf_dict_puts(trailer, "ID", obj);
-
- fprintf(out, "trailer\n");
- pdf_fprint_obj(out, trailer, doexpand == 0);
- fprintf(out, "\n");
-
- pdf_drop_obj(trailer);
-
- fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref);
-}
-
-static void writepdf(void)
-{
- int lastfree;
- int num;
-
- for (num = 0; num < xref->len; num++)
- {
- if (xref->table[num].type == 'f')
- genlist[num] = xref->table[num].gen;
- if (xref->table[num].type == 'n')
- genlist[num] = xref->table[num].gen;
- if (xref->table[num].type == 'o')
- genlist[num] = 0;
-
- if (dogarbage && !uselist[num])
- continue;
-
- if (xref->table[num].type == 'n' || xref->table[num].type == 'o')
- {
- uselist[num] = 1;
- ofslist[num] = ftell(out);
- writeobject(num, genlist[num]);
- }
- }
-
- /* Construct linked list of free object slots */
- lastfree = 0;
- for (num = 0; num < xref->len; num++)
- {
- if (!uselist[num])
- {
- genlist[num]++;
- ofslist[lastfree] = num;
- lastfree = num;
- }
- }
-
- writexref();
-}
-
#ifdef MUPDF_COMBINED_EXE
int pdfclean_main(int argc, char **argv)
#else
@@ -748,19 +163,24 @@ int main(int argc, char **argv)
char *infile;
char *outfile = "out.pdf";
char *password = "";
- int c, num;
+ int c;
int subset;
+ fz_write_options opts;
+
+ opts.dogarbage = 0;
+ opts.doexpand = 0;
+ opts.doascii = 0;
while ((c = fz_getopt(argc, argv, "adfgip:")) != -1)
{
switch (c)
{
case 'p': password = fz_optarg; break;
- case 'g': dogarbage ++; break;
- case 'd': doexpand ^= expand_all; break;
- case 'f': doexpand ^= expand_fonts; break;
- case 'i': doexpand ^= expand_images; break;
- case 'a': doascii ++; break;
+ case 'g': opts.dogarbage ++; break;
+ case 'd': opts.doexpand ^= fz_expand_all; break;
+ case 'f': opts.doexpand ^= fz_expand_fonts; break;
+ case 'i': opts.doexpand ^= fz_expand_images; break;
+ case 'a': opts.doascii ++; break;
default: usage(); break;
}
}
@@ -792,61 +212,11 @@ int main(int argc, char **argv)
if (!pdf_authenticate_password(xref, password))
fz_throw(ctx, "cannot authenticate password: %s", infile);
- out = fopen(outfile, "wb");
- if (!out)
- fz_throw(ctx, "cannot open output file '%s'", outfile);
-
- fprintf(out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10);
- fprintf(out, "%%\316\274\341\277\246\n\n");
-
- uselist = fz_malloc_array(ctx, xref->len + 1, sizeof(char));
- ofslist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
- genlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
- renumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
-
- for (num = 0; num < xref->len; num++)
- {
- uselist[num] = 0;
- ofslist[num] = 0;
- genlist[num] = 0;
- renumbermap[num] = num;
- }
-
- /* Make sure any objects hidden in compressed streams have been loaded */
- preloadobjstms();
-
/* Only retain the specified subset of the pages */
if (subset)
retainpages(argc, argv);
- /* Sweep & mark objects from the trailer */
- if (dogarbage >= 1)
- sweepobj(xref->trailer);
-
- /* Coalesce and renumber duplicate objects */
- if (dogarbage >= 3)
- removeduplicateobjs();
-
- /* Compact xref by renumbering and removing unused objects */
- if (dogarbage >= 2)
- compactxref();
-
- /* Make renumbering affect all indirect references and update xref */
- /* Do not renumber objects if encryption is in use, as the object
- * numbers are baked into the streams/strings, and we can't currently
- * cope with moving them. See bug 692627. */
- if (dogarbage >= 2 && !xref->crypt)
- renumberobjs();
-
- writepdf();
-
- if (fclose(out))
- fz_throw(ctx, "cannot close output file '%s'", outfile);
-
- fz_free(xref->ctx, uselist);
- fz_free(xref->ctx, ofslist);
- fz_free(xref->ctx, genlist);
- fz_free(xref->ctx, renumbermap);
+ pdf_write(xref, outfile, &opts);
pdf_close_document(xref);
fz_free_context(ctx);
diff --git a/fitz/doc_document.c b/fitz/doc_document.c
index 22229892..83dbe53f 100644
--- a/fitz/doc_document.c
+++ b/fitz/doc_document.c
@@ -129,3 +129,10 @@ fz_meta(fz_document *doc, int key, void *ptr, int size)
return doc->meta(doc, key, ptr, size);
return FZ_META_UNKNOWN_KEY;
}
+
+void
+fz_write(fz_document *doc, char *filename, fz_write_opts *opts)
+{
+ if (doc && doc->write)
+ doc->write(doc, filename, opts);
+} \ No newline at end of file
diff --git a/fitz/fitz-internal.h b/fitz/fitz-internal.h
index bd19886c..1e54c900 100644
--- a/fitz/fitz-internal.h
+++ b/fitz/fitz-internal.h
@@ -1080,6 +1080,7 @@ struct fz_document_s
void (*run_page)(fz_document *doc, fz_page *page, fz_device *dev, fz_matrix transform, fz_cookie *cookie);
void (*free_page)(fz_document *doc, fz_page *page);
int (*meta)(fz_document *doc, int key, void *ptr, int size);
+ void (*write)(fz_document *doc, char *filename, fz_write_options *opts);
};
#endif
diff --git a/fitz/fitz.h b/fitz/fitz.h
index 04215c71..61f80756 100644
--- a/fitz/fitz.h
+++ b/fitz/fitz.h
@@ -2231,4 +2231,50 @@ enum
FZ_META_INFO = 4,
};
+typedef struct fz_write_options_s fz_write_options;
+
+/*
+ In calls to fz_write, the following options structure can be used
+ to control aspects of the writing process. This structure may grow
+ in future, and should be zero-filled to allow forwards compatiblity.
+*/
+struct fz_write_options_s
+{
+ int doascii; /* If non-zero then attempt (where possible) to
+ make the output ascii. */
+ int doexpand; /* Bitflags; each non zero bit indicates an aspect
+ of the file that should be 'expanded' on
+ writing. */
+ int dogarbage; /* If non-zero then attempt (where possible) to
+ garbage collect the file before writing. */
+};
+
+/* An enumeration of bitflags to use in the above 'doexpand' field of
+ fz_write_options.
+*/
+enum
+{
+ fz_expand_images = 1,
+ fz_expand_fonts = 2,
+ fz_expand_all = -1
+};
+
+/*
+ fz_write: Write a document out.
+
+ (In development - Subject to change in future versions)
+
+ Save a copy of the current document in its original format.
+ Internally the document may change.
+
+ doc: The document to save.
+
+ filename: The filename to save to.
+
+ opts: NULL, or a pointer to an options structure.
+
+ May throw exceptions.
+*/
+void fz_write(fz_document *doc, char *filename, fz_write_options *opts);
+
#endif
diff --git a/pdf/mupdf.h b/pdf/mupdf.h
index b88f7423..72c5c4cf 100644
--- a/pdf/mupdf.h
+++ b/pdf/mupdf.h
@@ -144,6 +144,8 @@ pdf_document *pdf_open_document_with_stream(fz_stream *file);
*/
void pdf_close_document(pdf_document *doc);
+void pdf_write(pdf_document *doc, char *filename, fz_write_options *opts);
+
int pdf_needs_password(pdf_document *doc);
int pdf_authenticate_password(pdf_document *doc, char *pw);
diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c
new file mode 100644
index 00000000..e14cfa28
--- /dev/null
+++ b/pdf/pdf_write.c
@@ -0,0 +1,662 @@
+#include "fitz.h"
+#include "mupdf-internal.h"
+
+typedef struct pdf_write_options_s pdf_write_options;
+
+struct pdf_write_options_s
+{
+ FILE *out;
+ int doascii;
+ int doexpand;
+ int dogarbage;
+ char *uselist;
+ int *ofslist;
+ int *genlist;
+ int *renumbermap;
+};
+
+/*
+ * Garbage collect objects not reachable from the trailer.
+ */
+
+static pdf_obj *sweepref(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj)
+{
+ int num = pdf_to_num(obj);
+ int gen = pdf_to_gen(obj);
+ fz_context *ctx = xref->ctx;
+
+ if (num < 0 || num >= xref->len)
+ return NULL;
+ if (opts->uselist[num])
+ return NULL;
+
+ opts->uselist[num] = 1;
+
+ /* Bake in /Length in stream objects */
+ fz_try(ctx)
+ {
+ if (pdf_is_stream(xref, num, gen))
+ {
+ pdf_obj *len = pdf_dict_gets(obj, "Length");
+ if (pdf_is_indirect(len))
+ {
+ opts->uselist[pdf_to_num(len)] = 0;
+ len = pdf_resolve_indirect(len);
+ pdf_dict_puts(obj, "Length", len);
+ }
+ }
+ }
+ fz_catch(ctx)
+ {
+ /* Leave broken */
+ }
+
+ return pdf_resolve_indirect(obj);
+}
+
+static void sweepobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj)
+{
+ int i;
+
+ if (pdf_is_indirect(obj))
+ obj = sweepref(xref, opts, obj);
+
+ if (pdf_is_dict(obj))
+ {
+ int n = pdf_dict_len(obj);
+ for (i = 0; i < n; i++)
+ sweepobj(xref, opts, pdf_dict_get_val(obj, i));
+ }
+
+ else if (pdf_is_array(obj))
+ {
+ int n = pdf_array_len(obj);
+ for (i = 0; i < n; i++)
+ sweepobj(xref, opts, pdf_array_get(obj, i));
+ }
+}
+
+/*
+ * Scan for and remove duplicate objects (slow)
+ */
+
+static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
+{
+ int num, other;
+ fz_context *ctx = xref->ctx;
+
+ for (num = 1; num < xref->len; num++)
+ {
+ /* Only compare an object to objects preceding it */
+ for (other = 1; other < num; other++)
+ {
+ pdf_obj *a, *b;
+ int match;
+
+ if (num == other || !opts->uselist[num] || !opts->uselist[other])
+ continue;
+
+ /*
+ * Comparing stream objects data contents would take too long.
+ *
+ * pdf_is_stream calls pdf_cache_object and ensures
+ * that the xref table has the objects loaded.
+ */
+ fz_try(ctx)
+ {
+ match = (pdf_is_stream(xref, num, 0) || pdf_is_stream(xref, other, 0));
+ }
+ fz_catch(ctx)
+ {
+ /* Assume different */
+ match = 0;
+ }
+ if (match)
+ continue;
+
+ a = xref->table[num].obj;
+ b = xref->table[other].obj;
+
+ a = pdf_resolve_indirect(a);
+ b = pdf_resolve_indirect(b);
+
+ if (pdf_objcmp(a, b))
+ continue;
+
+ /* Keep the lowest numbered object */
+ opts->renumbermap[num] = MIN(num, other);
+ opts->renumbermap[other] = MIN(num, other);
+ opts->uselist[MAX(num, other)] = 0;
+
+ /* One duplicate was found, do not look for another */
+ break;
+ }
+ }
+}
+
+/*
+ * Renumber objects sequentially so the xref is more compact
+ */
+
+static void compactxref(pdf_document *xref, pdf_write_options *opts)
+{
+ int num, newnum;
+
+ /*
+ * Update renumbermap in-place, clustering all used
+ * objects together at low object ids. Objects that
+ * already should be renumbered will have their new
+ * object ids be updated to reflect the compaction.
+ */
+
+ newnum = 1;
+ for (num = 1; num < xref->len; num++)
+ {
+ if (opts->uselist[num] && opts->renumbermap[num] == num)
+ opts->renumbermap[num] = newnum++;
+ else if (opts->renumbermap[num] != num)
+ opts->renumbermap[num] = opts->renumbermap[opts->renumbermap[num]];
+ }
+}
+
+/*
+ * Update indirect objects according to renumbering established when
+ * removing duplicate objects and compacting the xref.
+ */
+
+static void renumberobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj)
+{
+ int i;
+ fz_context *ctx = xref->ctx;
+
+ if (pdf_is_dict(obj))
+ {
+ int n = pdf_dict_len(obj);
+ for (i = 0; i < n; i++)
+ {
+ pdf_obj *key = pdf_dict_get_key(obj, i);
+ pdf_obj *val = pdf_dict_get_val(obj, i);
+ if (pdf_is_indirect(val))
+ {
+ val = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(val)], 0, xref);
+ fz_dict_put(obj, key, val);
+ pdf_drop_obj(val);
+ }
+ else
+ {
+ renumberobj(xref, opts, val);
+ }
+ }
+ }
+
+ else if (pdf_is_array(obj))
+ {
+ int n = pdf_array_len(obj);
+ for (i = 0; i < n; i++)
+ {
+ pdf_obj *val = pdf_array_get(obj, i);
+ if (pdf_is_indirect(val))
+ {
+ val = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(val)], 0, xref);
+ pdf_array_put(obj, i, val);
+ pdf_drop_obj(val);
+ }
+ else
+ {
+ renumberobj(xref, opts, val);
+ }
+ }
+ }
+}
+
+static void renumberobjs(pdf_document *xref, pdf_write_options *opts)
+{
+ pdf_xref_entry *oldxref;
+ int newlen;
+ int num;
+ fz_context *ctx = xref->ctx;
+
+ /* Apply renumber map to indirect references in all objects in xref */
+ renumberobj(xref, opts, xref->trailer);
+ for (num = 0; num < xref->len; num++)
+ {
+ pdf_obj *obj = xref->table[num].obj;
+
+ if (pdf_is_indirect(obj))
+ {
+ obj = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(obj)], 0, xref);
+ pdf_update_object(xref, num, 0, obj);
+ pdf_drop_obj(obj);
+ }
+ else
+ {
+ renumberobj(xref, opts, obj);
+ }
+ }
+
+ /* Create new table for the reordered, compacted xref */
+ oldxref = xref->table;
+ xref->table = fz_malloc_array(xref->ctx, xref->len, sizeof(pdf_xref_entry));
+ xref->table[0] = oldxref[0];
+
+ /* Move used objects into the new compacted xref */
+ newlen = 0;
+ for (num = 1; num < xref->len; num++)
+ {
+ if (opts->uselist[num])
+ {
+ if (newlen < opts->renumbermap[num])
+ newlen = opts->renumbermap[num];
+ xref->table[opts->renumbermap[num]] = oldxref[num];
+ }
+ else
+ {
+ if (oldxref[num].obj)
+ pdf_drop_obj(oldxref[num].obj);
+ }
+ }
+
+ fz_free(xref->ctx, oldxref);
+
+ /* Update the used objects count in compacted xref */
+ xref->len = newlen + 1;
+
+ /* Update list of used objects to fit with compacted xref */
+ for (num = 1; num < xref->len; num++)
+ opts->uselist[num] = 1;
+}
+
+/*
+ * Make sure we have loaded objects from object streams.
+ */
+
+static void preloadobjstms(pdf_document *xref)
+{
+ pdf_obj *obj;
+ int num;
+
+ for (num = 0; num < xref->len; num++)
+ {
+ if (xref->table[num].type == 'o')
+ {
+ obj = pdf_load_object(xref, num, 0);
+ pdf_drop_obj(obj);
+ }
+ }
+}
+
+/*
+ * Save streams and objects to the output
+ */
+
+static inline int isbinary(int c)
+{
+ if (c == '\n' || c == '\r' || c == '\t')
+ return 0;
+ return c < 32 || c > 127;
+}
+
+static int isbinarystream(fz_buffer *buf)
+{
+ int i;
+ for (i = 0; i < buf->len; i++)
+ if (isbinary(buf->data[i]))
+ return 1;
+ return 0;
+}
+
+static fz_buffer *hexbuf(fz_context *ctx, unsigned char *p, int n)
+{
+ static const char hex[16] = "0123456789abcdef";
+ fz_buffer *buf;
+ int x = 0;
+
+ buf = fz_new_buffer(ctx, n * 2 + (n / 32) + 2);
+
+ while (n--)
+ {
+ buf->data[buf->len++] = hex[*p >> 4];
+ buf->data[buf->len++] = hex[*p & 15];
+ if (++x == 32)
+ {
+ buf->data[buf->len++] = '\n';
+ x = 0;
+ }
+ p++;
+ }
+
+ buf->data[buf->len++] = '>';
+ buf->data[buf->len++] = '\n';
+
+ return buf;
+}
+
+static void addhexfilter(pdf_document *xref, pdf_obj *dict)
+{
+ pdf_obj *f, *dp, *newf, *newdp;
+ pdf_obj *ahx, *nullobj;
+ fz_context *ctx = xref->ctx;
+
+ ahx = fz_new_name(ctx, "ASCIIHexDecode");
+ nullobj = pdf_new_null(ctx);
+ newf = newdp = NULL;
+
+ f = pdf_dict_gets(dict, "Filter");
+ dp = pdf_dict_gets(dict, "DecodeParms");
+
+ if (pdf_is_name(f))
+ {
+ newf = pdf_new_array(ctx, 2);
+ pdf_array_push(newf, ahx);
+ pdf_array_push(newf, f);
+ f = newf;
+ if (pdf_is_dict(dp))
+ {
+ newdp = pdf_new_array(ctx, 2);
+ pdf_array_push(newdp, nullobj);
+ pdf_array_push(newdp, dp);
+ dp = newdp;
+ }
+ }
+ else if (pdf_is_array(f))
+ {
+ pdf_array_insert(f, ahx);
+ if (pdf_is_array(dp))
+ pdf_array_insert(dp, nullobj);
+ }
+ else
+ f = ahx;
+
+ pdf_dict_puts(dict, "Filter", f);
+ if (dp)
+ pdf_dict_puts(dict, "DecodeParms", dp);
+
+ pdf_drop_obj(ahx);
+ pdf_drop_obj(nullobj);
+ if (newf)
+ pdf_drop_obj(newf);
+ if (newdp)
+ pdf_drop_obj(newdp);
+}
+
+static void copystream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj, int num, int gen)
+{
+ fz_buffer *buf, *tmp;
+ pdf_obj *newlen;
+ fz_context *ctx = xref->ctx;
+
+ buf = pdf_load_raw_stream(xref, num, gen);
+
+ if (opts->doascii && isbinarystream(buf))
+ {
+ tmp = hexbuf(ctx, buf->data, buf->len);
+ fz_drop_buffer(ctx, buf);
+ buf = tmp;
+
+ addhexfilter(xref, obj);
+
+ newlen = pdf_new_int(ctx, buf->len);
+ pdf_dict_puts(obj, "Length", newlen);
+ pdf_drop_obj(newlen);
+ }
+
+ fprintf(opts->out, "%d %d obj\n", num, gen);
+ pdf_fprint_obj(opts->out, obj, opts->doexpand == 0);
+ fprintf(opts->out, "stream\n");
+ fwrite(buf->data, 1, buf->len, opts->out);
+ fprintf(opts->out, "endstream\nendobj\n\n");
+
+ fz_drop_buffer(ctx, buf);
+}
+
+static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj, int num, int gen)
+{
+ fz_buffer *buf, *tmp;
+ pdf_obj *newlen;
+ fz_context *ctx = xref->ctx;
+
+ buf = pdf_load_stream(xref, num, gen);
+
+ pdf_dict_dels(obj, "Filter");
+ pdf_dict_dels(obj, "DecodeParms");
+
+ if (opts->doascii && isbinarystream(buf))
+ {
+ tmp = hexbuf(ctx, buf->data, buf->len);
+ fz_drop_buffer(ctx, buf);
+ buf = tmp;
+
+ addhexfilter(xref, obj);
+ }
+
+ newlen = pdf_new_int(ctx, buf->len);
+ pdf_dict_puts(obj, "Length", newlen);
+ pdf_drop_obj(newlen);
+
+ fprintf(opts->out, "%d %d obj\n", num, gen);
+ pdf_fprint_obj(opts->out, obj, opts->doexpand == 0);
+ fprintf(opts->out, "stream\n");
+ fwrite(buf->data, 1, buf->len, opts->out);
+ fprintf(opts->out, "endstream\nendobj\n\n");
+
+ fz_drop_buffer(ctx, buf);
+}
+
+static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, int gen)
+{
+ pdf_obj *obj;
+ pdf_obj *type;
+ fz_context *ctx = xref->ctx;
+
+ obj = pdf_load_object(xref, num, gen);
+
+ /* skip ObjStm and XRef objects */
+ if (pdf_is_dict(obj))
+ {
+ type = pdf_dict_gets(obj, "Type");
+ if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "ObjStm"))
+ {
+ opts->uselist[num] = 0;
+ pdf_drop_obj(obj);
+ return;
+ }
+ if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "XRef"))
+ {
+ opts->uselist[num] = 0;
+ pdf_drop_obj(obj);
+ return;
+ }
+ }
+
+ if (!pdf_is_stream(xref, num, gen))
+ {
+ fprintf(opts->out, "%d %d obj\n", num, gen);
+ pdf_fprint_obj(opts->out, obj, opts->doexpand == 0);
+ fprintf(opts->out, "endobj\n\n");
+ }
+ else
+ {
+ int dontexpand = 0;
+ if (opts->doexpand != 0 && opts->doexpand != fz_expand_all)
+ {
+ pdf_obj *o;
+
+ if ((o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "XObject")) &&
+ (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Image")))
+ dontexpand = !(opts->doexpand & fz_expand_images);
+ if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "Font"))
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "FontDescriptor"))
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if ((o = pdf_dict_gets(obj, "Length1")) != NULL)
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if ((o = pdf_dict_gets(obj, "Length2")) != NULL)
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if ((o = pdf_dict_gets(obj, "Length3")) != NULL)
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Type1C"))
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "CIDFontType0C"))
+ dontexpand = !(opts->doexpand & fz_expand_fonts);
+ }
+ if (opts->doexpand && !dontexpand && !pdf_is_jpx_image(ctx, obj))
+ expandstream(xref, opts, obj, num, gen);
+ else
+ copystream(xref, opts, obj, num, gen);
+ }
+
+ pdf_drop_obj(obj);
+}
+
+static void writexref(pdf_document *xref, pdf_write_options *opts)
+{
+ pdf_obj *trailer;
+ pdf_obj *obj;
+ int startxref;
+ int num;
+ fz_context *ctx = xref->ctx;
+
+ startxref = ftell(opts->out);
+
+ fprintf(opts->out, "xref\n0 %d\n", xref->len);
+ for (num = 0; num < xref->len; num++)
+ {
+ if (opts->uselist[num])
+ fprintf(opts->out, "%010d %05d n \n", opts->ofslist[num], opts->genlist[num]);
+ else
+ fprintf(opts->out, "%010d %05d f \n", opts->ofslist[num], opts->genlist[num]);
+ }
+ fprintf(opts->out, "\n");
+
+ trailer = pdf_new_dict(ctx, 5);
+
+ obj = pdf_new_int(ctx, xref->len);
+ pdf_dict_puts(trailer, "Size", obj);
+ pdf_drop_obj(obj);
+
+ obj = pdf_dict_gets(xref->trailer, "Info");
+ if (obj)
+ pdf_dict_puts(trailer, "Info", obj);
+
+ obj = pdf_dict_gets(xref->trailer, "Root");
+ if (obj)
+ pdf_dict_puts(trailer, "Root", obj);
+
+ obj = pdf_dict_gets(xref->trailer, "ID");
+ if (obj)
+ pdf_dict_puts(trailer, "ID", obj);
+
+ fprintf(opts->out, "trailer\n");
+ pdf_fprint_obj(opts->out, trailer, opts->doexpand == 0);
+ fprintf(opts->out, "\n");
+
+ pdf_drop_obj(trailer);
+
+ fprintf(opts->out, "startxref\n%d\n%%%%EOF\n", startxref);
+}
+
+void pdf_write(pdf_document *xref, char *filename, fz_write_options *fz_opts)
+{
+ int lastfree;
+ int num;
+ pdf_write_options opts = { 0 };
+ fz_context *ctx;
+
+ if (!xref || !fz_opts)
+ return;
+
+ ctx = xref->ctx;
+
+ opts.out = fopen(filename, "wb");
+ if (!opts.out)
+ fz_throw(ctx, "cannot open output file '%s'", filename);
+
+ fz_try(ctx)
+ {
+ opts.doexpand = fz_opts ? fz_opts->doexpand : 0;
+ opts.dogarbage = fz_opts ? fz_opts->dogarbage : 0;
+ opts.doascii = fz_opts ? fz_opts->doascii: 0;
+ opts.uselist = fz_malloc_array(ctx, xref->len + 1, sizeof(char));
+ opts.ofslist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+ opts.genlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+ opts.renumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+
+ fprintf(opts.out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10);
+ fprintf(opts.out, "%%\316\274\341\277\246\n\n");
+
+ for (num = 0; num < xref->len; num++)
+ {
+ opts.uselist[num] = 0;
+ opts.ofslist[num] = 0;
+ opts.renumbermap[num] = num;
+ }
+
+ /* Make sure any objects hidden in compressed streams have been loaded */
+ preloadobjstms(xref);
+
+ /* Sweep & mark objects from the trailer */
+ if (opts.dogarbage >= 1)
+ sweepobj(xref, &opts, xref->trailer);
+
+ /* Coalesce and renumber duplicate objects */
+ if (opts.dogarbage >= 3)
+ removeduplicateobjs(xref, &opts);
+
+ /* Compact xref by renumbering and removing unused objects */
+ if (opts.dogarbage >= 2)
+ compactxref(xref, &opts);
+
+ /* Make renumbering affect all indirect references and update xref */
+ /* Do not renumber objects if encryption is in use, as the object
+ * numbers are baked into the streams/strings, and we can't currently
+ * cope with moving them. See bug 692627. */
+ if (opts.dogarbage >= 2 && !xref->crypt)
+ renumberobjs(xref, &opts);
+
+ for (num = 0; num < xref->len; num++)
+ {
+ if (xref->table[num].type == 'f')
+ opts.genlist[num] = xref->table[num].gen;
+ if (xref->table[num].type == 'n')
+ opts.genlist[num] = xref->table[num].gen;
+ if (xref->table[num].type == 'o')
+ opts.genlist[num] = 0;
+
+ if (opts.dogarbage && !opts.uselist[num])
+ continue;
+
+ if (xref->table[num].type == 'n' || xref->table[num].type == 'o')
+ {
+ opts.uselist[num] = 1;
+ opts.ofslist[num] = ftell(opts.out);
+ writeobject(xref, &opts, num, opts.genlist[num]);
+ }
+ }
+
+ /* Construct linked list of free object slots */
+ lastfree = 0;
+ for (num = 0; num < xref->len; num++)
+ {
+ if (!opts.uselist[num])
+ {
+ opts.genlist[num]++;
+ opts.ofslist[lastfree] = num;
+ lastfree = num;
+ }
+ }
+
+ writexref(xref, &opts);
+ }
+ fz_always(ctx)
+ {
+ fz_free(ctx, opts.uselist);
+ fz_free(ctx, opts.ofslist);
+ fz_free(ctx, opts.genlist);
+ fz_free(ctx, opts.renumbermap);
+ fclose(opts.out);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
diff --git a/win32/libmupdf.vcproj b/win32/libmupdf.vcproj
index b984cc8f..98b1aa5e 100644
--- a/win32/libmupdf.vcproj
+++ b/win32/libmupdf.vcproj
@@ -334,6 +334,10 @@
>
</File>
<File
+ RelativePath="..\pdf\pdf_write.c"
+ >
+ </File>
+ <File
RelativePath="..\pdf\pdf_xobject.c"
>
</File>