summaryrefslogtreecommitdiff
path: root/pdf/pdf_write.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-05-14 19:34:35 +0100
committerRobin Watts <robin.watts@artifex.com>2012-05-31 13:26:00 +0100
commit65b8ae915465849babc1fa712971c701136f4ed5 (patch)
tree9724b5951eb266f7192c647d4fb26a127cd901fd /pdf/pdf_write.c
parent97716e53a6fa6947a183ed88df54702f96ba97b5 (diff)
downloadmupdf-65b8ae915465849babc1fa712971c701136f4ed5.tar.xz
Add linearization to pdf_write function.
Extend mupdfclean to have a new -l file that writes the file linearized. This should still be considered experimental When writing a pdf file, analyse object use, flatten resource use, reorder the objects, generate a hintstream and output with linearisaton parameters. This is enough for Acrobat to accept the file as being optimised for Fast Web View. We ought to add more tables to the hintstream in some cases, but I doubt anyone actually uses it, the spec is so badly written. Certainly acrobat accepts the file as being optimised for 'Fast Web View'. Update fz_dict_put to allow for us adding a reference to the dictionary that is the sole owner of that reference already (i.e. don't drop then keep something that has a reference count of just 1). Update pdf_load_image_stream to use the stm_buf from the xref if there is one. Update pdf_close_document to discard any stm_bufs it may be holding. Update fz_dict_put to be pdf_dict_put - this was missed in a renaming ages ago and has been inconsistent since.
Diffstat (limited to 'pdf/pdf_write.c')
-rw-r--r--pdf/pdf_write.c1792
1 files changed, 1639 insertions, 153 deletions
diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c
index ed0b8711..8d32febe 100644
--- a/pdf/pdf_write.c
+++ b/pdf/pdf_write.c
@@ -1,23 +1,482 @@
-#include "fitz.h"
+#include "fitz-internal.h"
#include "mupdf-internal.h"
+/* #define DEBUG_LINEARIZATION */
+/* #define DEBUG_HEAP_SORT */
+
typedef struct pdf_write_options_s pdf_write_options;
+/* As part of linearization, we need to keep a list of what objects are used
+ * by what page. We do this by recording the objects used in a given page
+ * in a page_objects structure. We have a list of these structures (one per
+ * page) in the page_objects_list structure.
+ *
+ * The page_objects structure maintains a heap in the object array, so
+ * insertion takes log n time, and we can heapsort and dedupe at the end for
+ * a total worse case n log n time.
+ *
+ * The magic heap invariant is that:
+ * entry[n] >= entry[(n+1)*2-1] & entry[n] >= entry[(n+1)*2]
+ * or equivalently:
+ * entry[(n-1)>>1] >= entry[n]
+ *
+ * For a discussion of the heap data structure (and heapsort) see Kingston,
+ * "Algorithms and Data Structures".
+ */
+typedef struct {
+ int num_shared;
+ int page_object_number;
+ int num_objects;
+ int min_ofs;
+ int max_ofs;
+ /* Extensible list of objects used on this page */
+ int cap;
+ int len;
+ int object[1];
+} page_objects;
+
+typedef struct {
+ int cap;
+ int len;
+ page_objects *page[1];
+} page_objects_list;
+
struct pdf_write_options_s
{
FILE *out;
- int doascii;
- int doexpand;
- int dogarbage;
- char *uselist;
- int *ofslist;
- int *genlist;
- int *renumbermap;
- int *revrenumbermap;
- int *revgenlist;
+ int do_ascii;
+ int do_expand;
+ int do_garbage;
+ int do_linear;
+ int *use_list;
+ int *ofs_list;
+ int *gen_list;
+ int *renumber_map;
+ /* The following extras are required for linearization */
+ int *rev_renumber_map;
+ int *rev_gen_list;
+ int start;
+ int first_xref_offset;
+ int main_xref_offset;
+ int first_xref_entry_offset;
+ int file_len;
+ int hints_shared_offset;
+ int hintstream_len;
+ pdf_obj *linear_l;
+ pdf_obj *linear_h0;
+ pdf_obj *linear_h1;
+ pdf_obj *linear_o;
+ pdf_obj *linear_e;
+ pdf_obj *linear_n;
+ pdf_obj *linear_t;
+ pdf_obj *hints_s;
+ pdf_obj *hints_length;
+ int page_count;
+ page_objects_list *page_object_lists;
};
/*
+ * Constants for use with use_list.
+ *
+ * If use_list[num] = 0, then object num is unused.
+ * If use_list[num] & PARAMS, then object num is the linearisation params obj.
+ * If use_list[num] & CATALOGUE, then object num is used by the catalogue.
+ * If use_list[num] & PAGE1, then object num is used by page 1.
+ * If use_list[num] & SHARED, then object num is shared between pages.
+ * If use_list[num] & PAGE_OBJECT then this must be the first object in a page.
+ * Otherwise object num is used by page (use_list[num]>>USE_PAGE_SHIFT).
+ */
+enum
+{
+ USE_CATALOGUE = 2,
+ USE_PAGE1 = 4,
+ USE_SHARED = 8,
+ USE_PARAMS = 16,
+ USE_HINTS = 32,
+ USE_PAGE_OBJECT = 64,
+ USE_PAGE_MASK = ~127,
+ USE_PAGE_SHIFT = 7
+};
+
+/*
+ * page_objects and page_object_list handling functions
+ */
+static page_objects_list *
+page_objects_list_create(fz_context *ctx)
+{
+ page_objects_list *pol = fz_calloc(ctx, 1, sizeof(*pol));
+
+ pol->cap = 1;
+ pol->len = 0;
+ return pol;
+}
+
+static void
+page_objects_list_destroy(fz_context *ctx, page_objects_list *pol)
+{
+ int i;
+
+ if (!pol)
+ return;
+ for (i = 0; i < pol->len; i++)
+ {
+ fz_free(ctx, pol->page[i]);
+ }
+ fz_free(ctx, pol);
+}
+
+static void
+page_objects_list_ensure(fz_context *ctx, page_objects_list **pol, int newcap)
+{
+ if (newcap <= (*pol)->cap)
+ return;
+ *pol = fz_resize_array(ctx, *pol, 1, sizeof(**pol) + (newcap-1)*sizeof(int));
+ memset(&(*pol)->page[(*pol)->cap], 0, sizeof(page_objects *)*(newcap-(*pol)->cap));
+ (*pol)->cap = newcap;
+}
+
+static page_objects *
+page_objects_create(fz_context *ctx)
+{
+ int initial_cap = 8;
+ page_objects *po = fz_calloc(ctx, 1, sizeof(*po) + (initial_cap-1) * sizeof(int));
+
+ po->cap = initial_cap;
+ po->len = 0;
+ return po;
+
+}
+
+static void
+page_objects_insert(fz_context *ctx, page_objects **ppo, int i)
+{
+ page_objects *po;
+ int j;
+
+ /* Make a page_objects if we don't have one */
+ if (*ppo == NULL)
+ *ppo = page_objects_create(ctx);
+
+ po = *ppo;
+ /* page_objects insertion: extend the page_objects by 1, and put us on the end */
+ if (po->len == po->cap)
+ {
+ po = fz_resize_array(ctx, po, 1, sizeof(page_objects) + (po->cap*2 - 1)*sizeof(int));
+ po->cap *= 2;
+ *ppo = po;
+ }
+ j = po->len;
+ po->object[po->len++] = i;
+}
+
+static void
+page_objects_list_insert(fz_context *ctx, pdf_write_options *opts, int page, int object)
+{
+ page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
+ if (opts->page_object_lists->len < page+1)
+ opts->page_object_lists->len = page+1;
+ page_objects_insert(ctx, &opts->page_object_lists->page[page], object);
+}
+
+static void
+page_objects_list_set_page_object(fz_context *ctx, pdf_write_options *opts, int page, int object)
+{
+ page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
+ opts->page_object_lists->page[page]->page_object_number = object;
+}
+
+static void
+page_objects_sort(fz_context *ctx, page_objects *po)
+{
+ int i, j;
+ int n = po->len;
+
+ /* Step 1: Make a heap */
+ /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
+ for (i = 1; i < n; i++)
+ {
+ /* Now bubble backwards to maintain heap invariant */
+ j = i;
+ while (j != 0)
+ {
+ int tmp;
+ int k = (j-1)>>1;
+ if (po->object[k] >= po->object[j])
+ break;
+ tmp = po->object[k];
+ po->object[k] = po->object[j];
+ po->object[j] = tmp;
+ j = k;
+ }
+ }
+
+ /* Step 2: Heap sort */
+ /* Invariant: valid heap in [0..i), sorted list in [i..n) */
+ /* Initially: i = n */
+ for (i = n-1; i > 0; i--)
+ {
+ /* Swap the maximum (0th) element from the page_objects into its place
+ * in the sorted list (position i). */
+ int tmp = po->object[0];
+ po->object[0] = po->object[i];
+ po->object[i] = tmp;
+ /* Now, the page_objects is invalid because the 0th element is out
+ * of place. Bubble it until the page_objects is valid. */
+ j = 0;
+ while (1)
+ {
+ /* Children are k and k+1 */
+ int k = (j+1)*2-1;
+ /* If both children out of the page_objects, we're done */
+ if (k > i-1)
+ break;
+ /* If both are in the page_objects, pick the larger one */
+ if (k < i-1 && po->object[k] < po->object[k+1])
+ k++;
+ /* If j is bigger than k (i.e. both of it's children),
+ * we're done */
+ if (po->object[j] > po->object[k])
+ break;
+ tmp = po->object[k];
+ po->object[k] = po->object[j];
+ po->object[j] = tmp;
+ j = k;
+ }
+ }
+}
+
+static int
+order_ge(int ui, int uj)
+{
+ /* For linearization, we need to order the sections as follows:
+ * Remaining pages
+ * Shared objects
+ * Objects not associated with any page
+ * (Linearization params)
+ * Catalogue (and other document level objects)
+ * First page
+ * (Primary Hint stream) (*)
+ * Note, this is NOT the same order they appear in
+ * the final file!
+ *
+ * The PDF reference gives us the option of putting the hint stream
+ * after the first page, and we take it, for simplicity.
+ */
+ /* If the 2 objects are in the same section, then page object comes
+ * first. */
+ if (((ui ^ uj) & ~USE_PAGE_OBJECT) == 0)
+ return ((ui & USE_PAGE_OBJECT) == 0);
+ /* Put the hint stream last. */
+ else if (ui & USE_HINTS)
+ return 1;
+ else if (uj & USE_HINTS)
+ return 0;
+ /* Put page 1 before that... */
+ else if (ui & USE_PAGE1)
+ return 1;
+ else if (uj & USE_PAGE1)
+ return 0;
+ /* Put the calagoue before that... */
+ else if (ui & USE_CATALOGUE)
+ return 1;
+ else if (uj & USE_CATALOGUE)
+ return 0;
+ /* Put the linearization params before that... */
+ else if (ui & USE_PARAMS)
+ return 1;
+ else if (uj & USE_PARAMS)
+ return 0;
+ /* Put objects not associated with any page (anything
+ * not touched by the catalogue) before that... */
+ else if (ui == 0)
+ return 1;
+ else if (uj == 0)
+ return 0;
+ /* Put shared objects before that... */
+ else if (ui & USE_SHARED)
+ return 1;
+ else if (uj & USE_SHARED)
+ return 0;
+ /* And otherwise, order by the page number on which
+ * they are used. */
+ return (ui>>USE_PAGE_SHIFT) >= (uj>>USE_PAGE_SHIFT);
+}
+
+static void
+heap_sort(int *list, int n, int *val, int (*ge)(int, int))
+{
+ int i, j;
+
+#ifdef DEBUG_HEAP_SORT
+ fprintf(stderr, "Initially:\n");
+ for (i=0; i < n; i++)
+ {
+ fprintf(stderr, "%d: %d %x\n", i, list[i], val[list[i]]);
+ }
+#endif
+ /* Step 1: Make a heap */
+ /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
+ for (i = 1; i < n; i++)
+ {
+ /* Now bubble backwards to maintain heap invariant */
+ j = i;
+ while (j != 0)
+ {
+ int tmp;
+ int k = (j-1)>>1;
+ if (ge(val[list[k]], val[list[j]]))
+ break;
+ tmp = list[k];
+ list[k] = list[j];
+ list[j] = tmp;
+ j = k;
+ }
+ }
+#ifdef DEBUG_HEAP_SORT
+ fprintf(stderr, "Valid heap:\n");
+ for (i=0; i < n; i++)
+ {
+ int k;
+ fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
+ k = (i+1)*2-1;
+ if (k < n)
+ {
+ if (ge(val[list[i]], val[list[k]]))
+ fprintf(stderr, "OK ");
+ else
+ fprintf(stderr, "BAD ");
+ }
+ if (k+1 < n)
+ {
+ if (ge(val[list[i]], val[list[k+1]]))
+ fprintf(stderr, "OK\n");
+ else
+ fprintf(stderr, "BAD\n");
+ }
+ else
+ fprintf(stderr, "\n");
+ }
+#endif
+
+ /* Step 2: Heap sort */
+ /* Invariant: valid heap in [0..i), sorted list in [i..n) */
+ /* Initially: i = n */
+ for (i = n-1; i > 0; i--)
+ {
+ /* Swap the maximum (0th) element from the page_objects into its place
+ * in the sorted list (position i). */
+ int tmp = list[0];
+ list[0] = list[i];
+ list[i] = tmp;
+ /* Now, the page_objects is invalid because the 0th element is out
+ * of place. Bubble it until the page_objects is valid. */
+ j = 0;
+ while (1)
+ {
+ /* Children are k and k+1 */
+ int k = (j+1)*2-1;
+ /* If both children out of the page_objects, we're done */
+ if (k > i-1)
+ break;
+ /* If both are in the page_objects, pick the larger one */
+ if (k < i-1 && ge(val[list[k+1]], val[list[k]]))
+ k++;
+ /* If j is bigger than k (i.e. both of it's children),
+ * we're done */
+ if (ge(val[list[j]], val[list[k]]))
+ break;
+ tmp = list[k];
+ list[k] = list[j];
+ list[j] = tmp;
+ j = k;
+ }
+ }
+#ifdef DEBUG_HEAP_SORT
+ fprintf(stderr, "Sorted:\n");
+ for (i=0; i < n; i++)
+ {
+ fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
+ if (i+1 < n)
+ {
+ if (ge(val[list[i+1]], val[list[i]]))
+ fprintf(stderr, "OK");
+ else
+ fprintf(stderr, "BAD");
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+}
+
+static void
+page_objects_dedupe(fz_context *ctx, page_objects *po)
+{
+ int i, j;
+ int n = po->len-1;
+
+ for (i = 0; i < n; i++)
+ {
+ if (po->object[i] == po->object[i+1])
+ break;
+ }
+ j = i; /* j points to the last valid one */
+ i++; /* i points to the first one we haven't looked at */
+ for (; i < n; i++)
+ {
+ if (po->object[j] != po->object[i])
+ po->object[++j] = po->object[i];
+ }
+ po->len = j+1;
+}
+
+static void
+page_objects_list_sort_and_dedupe(fz_context *ctx, page_objects_list *pol)
+{
+ int i;
+ int n = pol->len;
+
+ for (i = 0; i < n; i++)
+ {
+ page_objects_sort(ctx, pol->page[i]);
+ page_objects_dedupe(ctx, pol->page[i]);
+ }
+}
+
+#ifdef DEBUG_LINEARIZATION
+static void
+page_objects_dump(pdf_write_options *opts)
+{
+ page_objects_list *pol = opts->page_object_lists;
+ int i, j;
+
+ for (i = 0; i < pol->len; i++)
+ {
+ page_objects *p = pol->page[i];
+ fprintf(stderr, "Page %d\n", i+1);
+ for (j = 0; j < p->len; j++)
+ {
+ int o = p->object[j];
+ fprintf(stderr, " Object %d: use=%x\n", o, opts->use_list[o]);
+ }
+ fprintf(stderr, "Byte range=%d->%d\n", p->min_ofs, p->max_ofs);
+ fprintf(stderr, "Number of objects=%d, Number of shared objects=%d\n", p->num_objects, p->num_shared);
+ fprintf(stderr, "Page object number=%d\n", p->page_object_number);
+ }
+}
+
+static void
+objects_dump(pdf_document *xref, pdf_write_options *opts)
+{
+ int i;
+
+ for (i=0; i < xref->len; i++)
+ {
+ fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], opts->ofs_list[i]);
+ }
+}
+#endif
+
+/*
* Garbage collect objects not reachable from the trailer.
*/
@@ -29,10 +488,10 @@ static pdf_obj *sweepref(pdf_document *xref, pdf_write_options *opts, pdf_obj *o
if (num < 0 || num >= xref->len)
return NULL;
- if (opts->uselist[num])
+ if (opts->use_list[num])
return NULL;
- opts->uselist[num] = 1;
+ opts->use_list[num] = 1;
/* Bake in /Length in stream objects */
fz_try(ctx)
@@ -42,7 +501,7 @@ static pdf_obj *sweepref(pdf_document *xref, pdf_write_options *opts, pdf_obj *o
pdf_obj *len = pdf_dict_gets(obj, "Length");
if (pdf_is_indirect(len))
{
- opts->uselist[pdf_to_num(len)] = 0;
+ opts->use_list[pdf_to_num(len)] = 0;
len = pdf_resolve_indirect(len);
pdf_dict_puts(obj, "Length", len);
}
@@ -95,7 +554,7 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
pdf_obj *a, *b;
int differ, newnum;
- if (num == other || !opts->uselist[num] || !opts->uselist[other])
+ if (num == other || !opts->use_list[num] || !opts->use_list[other])
continue;
/*
@@ -127,10 +586,10 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
/* Keep the lowest numbered object */
newnum = MIN(num, other);
- opts->renumbermap[num] = newnum;
- opts->renumbermap[other] = newnum;
- opts->revrenumbermap[newnum] = num; /* Either will do */
- opts->uselist[MAX(num, other)] = 0;
+ opts->renumber_map[num] = newnum;
+ opts->renumber_map[other] = newnum;
+ opts->rev_renumber_map[newnum] = num; /* Either will do */
+ opts->use_list[MAX(num, other)] = 0;
/* One duplicate was found, do not look for another */
break;
@@ -141,7 +600,7 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
/*
* Renumber objects sequentially so the xref is more compact
*
- * This code assumes that any opts->renumbermap[n] <= n for all n.
+ * This code assumes that any opts->renumber_map[n] <= n for all n.
*/
static void compactxref(pdf_document *xref, pdf_write_options *opts)
@@ -149,7 +608,7 @@ static void compactxref(pdf_document *xref, pdf_write_options *opts)
int num, newnum;
/*
- * Update renumbermap in-place, clustering all used
+ * Update renumber_map in-place, clustering all used
* objects together at low object ids. Objects that
* already should be renumbered will have their new
* object ids be updated to reflect the compaction.
@@ -159,23 +618,23 @@ static void compactxref(pdf_document *xref, pdf_write_options *opts)
for (num = 1; num < xref->len; num++)
{
/* If it's not used, map it to zero */
- if (!opts->uselist[num])
+ if (!opts->use_list[num])
{
- opts->renumbermap[num] = 0;
+ opts->renumber_map[num] = 0;
}
/* If it's not moved, compact it. */
- else if (opts->renumbermap[num] == num)
+ else if (opts->renumber_map[num] == num)
{
- opts->revrenumbermap[newnum] = opts->revrenumbermap[num];
- opts->revgenlist[newnum] = opts->revgenlist[num];
- opts->renumbermap[num] = newnum++;
+ opts->rev_renumber_map[newnum] = opts->rev_renumber_map[num];
+ opts->rev_gen_list[newnum] = opts->rev_gen_list[num];
+ opts->renumber_map[num] = newnum++;
}
/* Otherwise it's used, and moved. We know that it must have
* moved down, so the place it's moved to will be in the right
* place already. */
else
{
- opts->renumbermap[num] = opts->renumbermap[opts->renumbermap[num]];
+ opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
}
}
}
@@ -199,8 +658,8 @@ static void renumberobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *ob
pdf_obj *val = pdf_dict_get_val(obj, i);
if (pdf_is_indirect(val))
{
- val = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(val)], 0, xref);
- fz_dict_put(obj, key, val);
+ val = pdf_new_indirect(ctx, opts->renumber_map[pdf_to_num(val)], 0, xref);
+ pdf_dict_put(obj, key, val);
pdf_drop_obj(val);
}
else
@@ -218,7 +677,7 @@ static void renumberobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *ob
pdf_obj *val = pdf_array_get(obj, i);
if (pdf_is_indirect(val))
{
- val = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(val)], 0, xref);
+ val = pdf_new_indirect(ctx, opts->renumber_map[pdf_to_num(val)], 0, xref);
pdf_array_put(obj, i, val);
pdf_drop_obj(val);
}
@@ -236,57 +695,641 @@ static void renumberobjs(pdf_document *xref, pdf_write_options *opts)
int newlen;
int num;
fz_context *ctx = xref->ctx;
+ int *new_use_list;
- /* Apply renumber map to indirect references in all objects in xref */
- renumberobj(xref, opts, xref->trailer);
- for (num = 0; num < xref->len; num++)
+ new_use_list = fz_calloc(ctx, xref->len+3, sizeof(int));
+
+ fz_try(ctx)
{
- pdf_obj *obj = xref->table[num].obj;
+ /* Apply renumber map to indirect references in all objects in xref */
+ renumberobj(xref, opts, xref->trailer);
+ for (num = 0; num < xref->len; num++)
+ {
+ pdf_obj *obj = xref->table[num].obj;
- if (pdf_is_indirect(obj))
+ if (pdf_is_indirect(obj))
+ {
+ obj = pdf_new_indirect(ctx, opts->renumber_map[pdf_to_num(obj)], 0, xref);
+ pdf_update_object(xref, num, obj);
+ pdf_drop_obj(obj);
+ }
+ else
+ {
+ renumberobj(xref, opts, obj);
+ }
+ }
+
+ /* Create new table for the reordered, compacted xref */
+ oldxref = xref->table;
+ xref->table = fz_malloc_array(ctx, xref->len + 3, sizeof(pdf_xref_entry));
+ xref->table[0] = oldxref[0];
+
+ /* Move used objects into the new compacted xref */
+ newlen = 0;
+ for (num = 1; num < xref->len; num++)
{
- obj = pdf_new_indirect(ctx, opts->renumbermap[pdf_to_num(obj)], 0, xref);
- pdf_update_object(xref, num, obj);
- pdf_drop_obj(obj);
+ if (opts->use_list[num])
+ {
+ if (newlen < opts->renumber_map[num])
+ newlen = opts->renumber_map[num];
+ xref->table[opts->renumber_map[num]] = oldxref[num];
+ new_use_list[opts->renumber_map[num]] = opts->use_list[num];
+ }
+ else
+ {
+ if (oldxref[num].obj)
+ pdf_drop_obj(oldxref[num].obj);
+ }
+ }
+ }
+ fz_catch(ctx)
+ {
+ fz_free(ctx, new_use_list);
+ fz_rethrow(ctx);
+ }
+ fz_free(ctx, oldxref);
+ fz_free(ctx, opts->use_list);
+ opts->use_list = new_use_list;
+
+ /* Update the used objects count in compacted xref */
+ xref->len = newlen + 1;
+
+ for (num = 1; num < xref->len; num++)
+ {
+ opts->renumber_map[num] = num;
+ }
+}
+
+static void page_objects_list_renumber(pdf_write_options *opts)
+{
+ int i, j;
+
+ for (i = 0; i < opts->page_object_lists->len; i++)
+ {
+ page_objects *po = opts->page_object_lists->page[i];
+ for (j = 0; j < po->len; j++)
+ {
+ po->object[j] = opts->renumber_map[po->object[j]];
}
+ po->page_object_number = opts->renumber_map[po->page_object_number];
+ }
+}
+
+static void
+mark_all(pdf_document *xref, pdf_write_options *opts, pdf_obj *val, int flag, int page)
+{
+ fz_context *ctx = xref->ctx;
+
+ if (pdf_dict_mark(val))
+ return;
+
+ fz_try(ctx)
+ {
+ if (pdf_is_indirect(val))
+ {
+ int num = pdf_to_num(val);
+ if (flag >= 16 && (opts->use_list[num] & USE_PAGE_MASK))
+ /* Already used */
+ opts->use_list[num] |= USE_SHARED;
+ else
+ opts->use_list[num] |= flag;
+ if (page >= 0)
+ page_objects_list_insert(ctx, opts, page, num);
+ }
+
+ if (pdf_is_dict(val))
+ {
+ int i, n = pdf_dict_len(val);
+
+ for (i = 0; i < n; i++)
+ {
+ mark_all(xref, opts, pdf_dict_get_val(val, i), flag, page);
+ }
+ }
+ else if (pdf_is_array(val))
+ {
+ int i, n = pdf_array_len(val);
+
+ for (i = 0; i < n; i++)
+ {
+ mark_all(xref, opts, pdf_array_get(val, i), flag, page);
+ }
+ }
+ }
+ fz_always(ctx)
+ {
+ pdf_dict_unmark(val);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
+
+static int
+mark_pages(pdf_document *xref, pdf_write_options *opts, pdf_obj *val, int pagenum)
+{
+ fz_context *ctx = xref->ctx;
+
+ if (pdf_dict_mark(val))
+ return pagenum;
+
+ fz_try(ctx)
+ {
+ if (pdf_is_dict(val))
+ {
+ if (!strcmp("Page", pdf_to_name(pdf_dict_gets(val, "Type"))))
+ {
+ int num = pdf_to_num(val);
+ pdf_dict_unmark(val);
+ mark_all(xref, opts, val, pagenum == 0 ? USE_PAGE1 : (pagenum<<USE_PAGE_SHIFT), pagenum);
+ page_objects_list_set_page_object(ctx, opts, pagenum, num);
+ pagenum++;
+ opts->use_list[num] |= USE_PAGE_OBJECT;
+ }
+ else
+ {
+ int i, n = pdf_dict_len(val);
+
+ for (i = 0; i < n; i++)
+ {
+ pdf_obj *key = pdf_dict_get_key(val, i);
+ pdf_obj *obj = pdf_dict_get_val(val, i);
+
+ if (!strcmp("Kids", pdf_to_name(key)))
+ pagenum = mark_pages(xref, opts, obj, pagenum);
+ else
+ mark_all(xref, opts, obj, USE_CATALOGUE, -1);
+ }
+
+ if (pdf_is_indirect(val))
+ {
+ int num = pdf_to_num(val);
+ opts->use_list[num] |= USE_CATALOGUE;
+ }
+ }
+ }
+ else if (pdf_is_array(val))
+ {
+ int i, n = pdf_array_len(val);
+
+ for (i = 0; i < n; i++)
+ {
+ pagenum = mark_pages(xref, opts, pdf_array_get(val, i), pagenum);
+ }
+ if (pdf_is_indirect(val))
+ {
+ int num = pdf_to_num(val);
+ opts->use_list[num] |= USE_CATALOGUE;
+ }
+ }
+ }
+ fz_always(ctx)
+ {
+ pdf_dict_unmark(val);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+ return pagenum;
+}
+
+static void
+mark_root(pdf_document *xref, pdf_write_options *opts, pdf_obj *dict)
+{
+ fz_context *ctx = xref->ctx;
+ int i, n = pdf_dict_len(dict);
+
+ if (pdf_dict_mark(dict))
+ return;
+
+ fz_try(ctx)
+ {
+ if (pdf_is_indirect(dict))
+ {
+ int num = pdf_to_num(dict);
+ opts->use_list[num] |= USE_CATALOGUE;
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ char *key = pdf_to_name(pdf_dict_get_key(dict, i));
+ pdf_obj *val = pdf_dict_get_val(dict, i);
+
+ if (!strcmp("Pages", key))
+ opts->page_count = mark_pages(xref, opts, val, 0);
+ else if (!strcmp("Outlines", key))
+ {
+ /* FIXME: Look at PageMode to decide whether to
+ * USE_OTHERPAGES or USE_PAGE1 here. */
+ if (0 /* PageMode == "Outlines" */)
+ mark_all(xref, opts, val, USE_PAGE1, -1);
+ }
+ else
+ mark_all(xref, opts, val, USE_CATALOGUE, -1);
+ }
+ }
+ fz_always(ctx)
+ {
+ pdf_dict_unmark(dict);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
+
+static void
+mark_trailer(pdf_document *xref, pdf_write_options *opts, pdf_obj *dict)
+{
+ fz_context *ctx = xref->ctx;
+ int i, n = pdf_dict_len(dict);
+
+ if (pdf_dict_mark(dict))
+ return;
+
+ fz_try(ctx)
+ {
+ for (i = 0; i < n; i++)
+ {
+ char *key = pdf_to_name(pdf_dict_get_key(dict, i));
+ pdf_obj *val = pdf_dict_get_val(dict, i);
+
+ if (!strcmp("Root", key))
+ mark_root(xref, opts, val);
+ else
+ mark_all(xref, opts, val, USE_CATALOGUE, -1);
+ }
+ }
+ fz_always(ctx)
+ {
+ pdf_dict_unmark(dict);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
+
+static void
+add_linearization_objs(pdf_document *xref, pdf_write_options *opts)
+{
+ pdf_obj *params_obj = NULL;
+ pdf_obj *params_ref = NULL;
+ pdf_obj *hint_obj = NULL;
+ pdf_obj *hint_ref = NULL;
+ pdf_obj *o = NULL;
+ int params_num, hint_num;
+ fz_context *ctx = xref->ctx;
+
+ fz_var(params_obj);
+ fz_var(params_ref);
+ fz_var(hint_obj);
+ fz_var(hint_ref);
+ fz_var(o);
+
+ fz_try(ctx)
+ {
+ /* Linearization params */
+ params_obj = pdf_new_dict(ctx, 10);
+ params_ref = pdf_new_ref(xref, params_obj);
+ params_num = pdf_to_num(params_ref);
+
+ opts->use_list[params_num] = USE_PARAMS;
+ opts->renumber_map[params_num] = params_num;
+ opts->rev_renumber_map[params_num] = params_num;
+ opts->gen_list[params_num] = 0;
+ opts->rev_gen_list[params_num] = 0;
+ o = pdf_new_real(ctx, 1.0);
+ pdf_dict_puts(params_obj, "Linearized", o);
+ pdf_drop_obj(o);
+ o = NULL;
+ opts->linear_l = pdf_new_int(ctx, INT_MIN);
+ pdf_dict_puts(params_obj, "L", opts->linear_l);
+ opts->linear_h0 = pdf_new_int(ctx, INT_MIN);
+ o = pdf_new_array(ctx, 2);
+ pdf_array_push(o, opts->linear_h0);
+ opts->linear_h1 = pdf_new_int(ctx, INT_MIN);
+ pdf_array_push(o, opts->linear_h1);
+ pdf_dict_puts(params_obj, "H", o);
+ pdf_drop_obj(o);
+ o = NULL;
+ opts->linear_o = pdf_new_int(ctx, INT_MIN);
+ pdf_dict_puts(params_obj, "O", opts->linear_o);
+ opts->linear_e = pdf_new_int(ctx, INT_MIN);
+ pdf_dict_puts(params_obj, "E", opts->linear_e);
+ opts->linear_n = pdf_new_int(ctx, INT_MIN);
+ pdf_dict_puts(params_obj, "N", opts->linear_n);
+ opts->linear_t = pdf_new_int(ctx, INT_MIN);
+ pdf_dict_puts(params_obj, "T", opts->linear_t);
+
+ /* Primary hint stream */
+ hint_obj = pdf_new_dict(ctx, 10);
+ hint_ref = pdf_new_ref(xref, hint_obj);
+ hint_num = pdf_to_num(hint_ref);
+
+ opts->use_list[hint_num] = USE_HINTS;
+ opts->renumber_map[hint_num] = hint_num;
+ opts->rev_renumber_map[hint_num] = hint_num;
+ opts->gen_list[hint_num] = 0;
+ opts->rev_gen_list[hint_num] = 0;
+ o = pdf_new_int(ctx, 0);
+ pdf_dict_puts(hint_obj, "P", o);
+ pdf_drop_obj(o);
+ o = NULL;
+ opts->hints_s = pdf_new_int(ctx, INT_MIN);
+ pdf_dict_puts(hint_obj, "S", opts->hints_s);
+ /* FIXME: Do we have thumbnails? Do a T entry */
+ /* FIXME: Do we have outlines? Do an O entry */
+ /* FIXME: Do we have article threads? Do an A entry */
+ /* FIXME: Do we have named destinations? Do a E entry */
+ /* FIXME: Do we have interactive forms? Do a V entry */
+ /* FIXME: Do we have document information? Do an I entry */
+ /* FIXME: Do we have logical structure heirarchy? Do a C entry */
+ /* FIXME: Do L, Page Label hint table */
+ o = fz_new_name(ctx, "FlateDecode");
+ pdf_dict_puts(hint_obj, "Filter", o);
+ pdf_drop_obj(o);
+ o = NULL;
+ opts->hints_length = pdf_new_int(ctx, INT_MIN);
+ pdf_dict_puts(hint_obj, "Length", opts->hints_length);
+ xref->table[hint_num].stm_ofs = -1;
+ }
+ fz_always(ctx)
+ {
+ pdf_drop_obj(params_obj);
+ pdf_drop_obj(params_ref);
+ pdf_drop_obj(hint_ref);
+ pdf_drop_obj(hint_obj);
+ pdf_drop_obj(o);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
+
+static void
+lpr_inherit_res_contents(fz_context *ctx, pdf_obj *res, pdf_obj *dict, char *text)
+{
+ pdf_obj *o, *r;
+ int i, n;
+
+ /* If the parent node doesn't have an entry of this type, give up. */
+ o = pdf_dict_gets(dict, text);
+ if (!o)
+ return;
+
+ /* If the resources dict we are building doesn't have an entry of this
+ * type yet, then just copy it (ensuring it's not a reference) */
+ r = pdf_dict_gets(res, text);
+ if (r == NULL)
+ {
+ o = pdf_resolve_indirect(o);
+ if (pdf_is_dict(o))
+ o = pdf_copy_dict(ctx, o);
+ else if (pdf_is_array(o))
+ o = pdf_copy_array(ctx, o);
else
+ o = NULL;
+ if (o)
+ pdf_dict_puts(res, text, o);
+ return;
+ }
+
+ /* Otherwise we need to merge o into r */
+ if (pdf_is_dict(o))
+ {
+ n = pdf_dict_len(o);
+ for (i = 0; i < n; i++)
{
- renumberobj(xref, opts, obj);
+ pdf_obj *key = pdf_dict_get_key(o, i);
+ pdf_obj *val = pdf_dict_get_val(o, i);
+
+ if (pdf_dict_gets(res, pdf_to_name(key)))
+ continue;
+ pdf_dict_puts(res, pdf_to_name(key), val);
}
}
+}
- /* Create new table for the reordered, compacted xref */
- oldxref = xref->table;
- xref->table = fz_malloc_array(xref->ctx, xref->len, sizeof(pdf_xref_entry));
- xref->table[0] = oldxref[0];
+static void
+lpr_inherit_res(fz_context *ctx, pdf_obj *node, int depth, pdf_obj *dict)
+{
+ while (1)
+ {
+ pdf_obj *o;
- /* Move used objects into the new compacted xref */
- newlen = 0;
- for (num = 1; num < xref->len; num++)
+ node = pdf_dict_gets(node, "Parent");
+ depth--;
+ if (!node || depth < 0)
+ break;
+
+ o = pdf_dict_gets(node, "Resources");
+ if (o)
+ {
+ lpr_inherit_res_contents(ctx, dict, o, "ExtGState");
+ lpr_inherit_res_contents(ctx, dict, o, "ColorSpace");
+ lpr_inherit_res_contents(ctx, dict, o, "Pattern");
+ lpr_inherit_res_contents(ctx, dict, o, "Shading");
+ lpr_inherit_res_contents(ctx, dict, o, "XObject");
+ lpr_inherit_res_contents(ctx, dict, o, "Font");
+ lpr_inherit_res_contents(ctx, dict, o, "ProcSet");
+ lpr_inherit_res_contents(ctx, dict, o, "Properties");
+ }
+ }
+}
+
+static pdf_obj *
+lpr_inherit(fz_context *ctx, pdf_obj *node, char *text, int depth)
+{
+ do
{
- if (opts->uselist[num])
+ pdf_obj *o = pdf_dict_gets(node, text);
+
+ if (o)
+ return pdf_resolve_indirect(o);
+ node = pdf_dict_gets(node, "Parent");
+ depth--;
+ }
+ while (depth >= 0 && node);
+
+ return NULL;
+}
+
+static int
+lpr(fz_context *ctx, pdf_write_options *opts, pdf_obj *node, int depth, int page)
+{
+ pdf_obj *kids;
+ pdf_obj *o = NULL;
+ int i, n;
+
+ if (pdf_dict_mark(node))
+ return page;
+
+ fz_var(o);
+
+ fz_try(ctx)
+ {
+ if (!strcmp("Page", pdf_to_name(pdf_dict_gets(node, "Type"))))
{
- if (newlen < opts->renumbermap[num])
- newlen = opts->renumbermap[num];
- xref->table[opts->renumbermap[num]] = oldxref[num];
+ pdf_obj *r; /* r is deliberately not cleaned up */
+
+ /* Copy resources down to the child */
+ o = pdf_keep_obj(pdf_dict_gets(node, "Resources"));
+ if (!o)
+ {
+ o = pdf_keep_obj(pdf_new_dict(ctx, 2));
+ pdf_dict_puts(node, "Resources", o);
+ }
+ lpr_inherit_res(ctx, node, depth, o);
+ r = lpr_inherit(ctx, node, "MediaBox", depth);
+ if (r)
+ pdf_dict_puts(node, "MediaBox", r);
+ r = lpr_inherit(ctx, node, "CropBox", depth);
+ if (r)
+ pdf_dict_puts(node, "CropBox", r);
+ r = lpr_inherit(ctx, node, "BleedBox", depth);
+ if (r)
+ pdf_dict_puts(node, "BleedBox", r);
+ r = lpr_inherit(ctx, node, "TrimBox", depth);
+ if (r)
+ pdf_dict_puts(node, "TrimBox", r);
+ r = lpr_inherit(ctx, node, "ArtBox", depth);
+ if (r)
+ pdf_dict_puts(node, "ArtBox", r);
+ r = lpr_inherit(ctx, node, "Rotate", depth);
+ if (r)
+ pdf_dict_puts(node, "Rotate", r);
+ page++;
}
else
{
- if (oldxref[num].obj)
- pdf_drop_obj(oldxref[num].obj);
+ kids = pdf_dict_gets(node, "Kids");
+ n = pdf_array_len(kids);
+ for(i = 0; i < n; i++)
+ {
+ page = lpr(ctx, opts, pdf_array_get(kids, i), depth+1, page);
+ }
+ pdf_dict_dels(node, "Resources");
+ pdf_dict_dels(node, "MediaBox");
+ pdf_dict_dels(node, "CropBox");
+ pdf_dict_dels(node, "BleedBox");
+ pdf_dict_dels(node, "TrimBox");
+ pdf_dict_dels(node, "ArtBox");
+ pdf_dict_dels(node, "Rotate");
}
}
+ fz_always(ctx)
+ {
+ pdf_drop_obj(o);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
- fz_free(xref->ctx, oldxref);
+ pdf_dict_unmark(node);
- /* Update the used objects count in compacted xref */
- xref->len = newlen + 1;
+ return page;
+}
- /* Update list of used objects to fit with compacted xref */
- for (num = 1; num < xref->len; num++)
- opts->uselist[num] = 1;
+static void
+linearize_page_resources(pdf_document *xref, pdf_write_options *opts)
+{
+ fz_context *ctx = xref->ctx;
+
+ lpr(ctx, opts, pdf_dict_gets(pdf_dict_gets(xref->trailer, "Root"), "Pages"), 0, 0);
}
+static void
+linearize(pdf_document *xref, pdf_write_options *opts)
+{
+ int i;
+ int n = xref->len + 2;
+ int *reorder;
+ int *rev_renumber_map;
+ int *rev_gen_list;
+ fz_context *ctx = xref->ctx;
+
+ opts->page_object_lists = page_objects_list_create(ctx);
+
+ /* Ensure that every page has local references of its resources */
+ /* FIXME: We could 'thin' the resources according to what is actually
+ * required for each page, but this would require us to run the page
+ * content streams. */
+ linearize_page_resources(xref, opts);
+
+ /* Walk the objects for each page, marking which ones are used, where */
+ memset(opts->use_list, 0, n * sizeof(int));
+ mark_trailer(xref, opts, xref->trailer);
+
+ /* Add new objects required for linearization */
+ add_linearization_objs(xref, opts);
+
+ /* Allocate/init the structures used for renumbering the objects */
+ reorder = fz_calloc(ctx, n, sizeof(int));
+ rev_renumber_map = fz_calloc(ctx, n, sizeof(int));
+ rev_gen_list = fz_calloc(ctx, n, sizeof(int));
+ for (i = 0; i < n; i++)
+ {
+ reorder[i] = i;
+ }
+
+ /* Heap sort the reordering */
+ heap_sort(reorder+1, n-1, opts->use_list, &order_ge);
+
+ /* Find the split point */
+ for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++);
+ opts->start = i;
+
+ /* Roll the reordering into the renumber_map */
+ for (i = 0; i < n; i++)
+ {
+ opts->renumber_map[reorder[i]] = i;
+ rev_renumber_map[i] = opts->rev_renumber_map[reorder[i]];
+ rev_gen_list[i] = opts->rev_gen_list[reorder[i]];
+ }
+ fz_free(ctx, opts->rev_renumber_map);
+ fz_free(ctx, opts->rev_gen_list);
+ opts->rev_renumber_map = rev_renumber_map;
+ opts->rev_gen_list = rev_gen_list;
+ fz_free(ctx, reorder);
+
+ /* Apply the renumber_map */
+ page_objects_list_renumber(opts);
+ renumberobjs(xref, opts);
+
+ page_objects_list_sort_and_dedupe(ctx, opts->page_object_lists);
+}
+
+static void
+update_linearization_params(pdf_document *xref, pdf_write_options *opts)
+{
+ pdf_set_int(opts->linear_l, opts->file_len);
+ /* Primary hint stream offset (of object, not stream!) */
+ pdf_set_int(opts->linear_h0, opts->ofs_list[xref->len-1]);
+ /* Primary hint stream length (of object, not stream!) */
+ pdf_set_int(opts->linear_h1, opts->ofs_list[1] - opts->ofs_list[xref->len-1] + opts->hintstream_len);
+ /* Object number of first pages page object (the first object of page 0) */
+ pdf_set_int(opts->linear_o, opts->page_object_lists->page[0]->object[0]);
+ /* Offset of end of first page (first page is followed by primary
+ * hint stream (object n-1) then remaining pages (object 1...). The
+ * primary hint stream counts as part of the first pages data, I think.
+ */
+ pdf_set_int(opts->linear_e, opts->ofs_list[1] + opts->hintstream_len);
+ /* Number of pages in document */
+ pdf_set_int(opts->linear_n, opts->page_count);
+ /* Offset of first entry in main xref table */
+ pdf_set_int(opts->linear_t, opts->first_xref_entry_offset + opts->hintstream_len);
+ /* Offset of shared objects hint table in the primary hint stream */
+ pdf_set_int(opts->hints_s, opts->hints_shared_offset);
+ /* Primary hint stream length */
+ pdf_set_int(opts->hints_length, opts->hintstream_len);
+}
+
+
/*
* Make sure we have loaded objects from object streams.
*/
@@ -400,17 +1443,19 @@ static void addhexfilter(pdf_document *xref, pdf_obj *dict)
pdf_drop_obj(newdp);
}
-static void copystream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj, int num, int gen)
+static void copystream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen)
{
fz_buffer *buf, *tmp;
pdf_obj *newlen;
+ pdf_obj *obj;
fz_context *ctx = xref->ctx;
- int orig_num = opts->revrenumbermap[num];
- int orig_gen = opts->revgenlist[num];
+ int orig_num = opts->rev_renumber_map[num];
+ int orig_gen = opts->rev_gen_list[num];
buf = pdf_load_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen);
- if (opts->doascii && isbinarystream(buf))
+ obj = pdf_copy_dict(ctx, obj_orig);
+ if (opts->do_ascii && isbinarystream(buf))
{
tmp = hexbuf(ctx, buf->data, buf->len);
fz_drop_buffer(ctx, buf);
@@ -424,28 +1469,31 @@ static void copystream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj
}
fprintf(opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(opts->out, obj, opts->doexpand == 0);
+ pdf_fprint_obj(opts->out, obj, opts->do_expand == 0);
fprintf(opts->out, "stream\n");
fwrite(buf->data, 1, buf->len, opts->out);
fprintf(opts->out, "endstream\nendobj\n\n");
fz_drop_buffer(ctx, buf);
+ pdf_drop_obj(obj);
}
-static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj, int num, int gen)
+static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen)
{
fz_buffer *buf, *tmp;
pdf_obj *newlen;
+ pdf_obj *obj;
fz_context *ctx = xref->ctx;
- int orig_num = opts->revrenumbermap[num];
- int orig_gen = opts->revgenlist[num];
+ int orig_num = opts->rev_renumber_map[num];
+ int orig_gen = opts->rev_gen_list[num];
buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen);
+ obj = pdf_copy_dict(ctx, obj_orig);
pdf_dict_dels(obj, "Filter");
pdf_dict_dels(obj, "DecodeParms");
- if (opts->doascii && isbinarystream(buf))
+ if (opts->do_ascii && isbinarystream(buf))
{
tmp = hexbuf(ctx, buf->data, buf->len);
fz_drop_buffer(ctx, buf);
@@ -459,12 +1507,13 @@ static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *o
pdf_drop_obj(newlen);
fprintf(opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(opts->out, obj, opts->doexpand == 0);
+ pdf_fprint_obj(opts->out, obj, opts->do_expand == 0);
fprintf(opts->out, "stream\n");
fwrite(buf->data, 1, buf->len, opts->out);
fprintf(opts->out, "endstream\nendobj\n\n");
fz_drop_buffer(ctx, buf);
+ pdf_drop_obj(obj);
}
static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, int gen)
@@ -481,13 +1530,13 @@ static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, in
type = pdf_dict_gets(obj, "Type");
if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "ObjStm"))
{
- opts->uselist[num] = 0;
+ opts->use_list[num] = 0;
pdf_drop_obj(obj);
return;
}
if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "XRef"))
{
- opts->uselist[num] = 0;
+ opts->use_list[num] = 0;
pdf_drop_obj(obj);
return;
}
@@ -496,35 +1545,41 @@ static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, in
if (!pdf_is_stream(xref, num, gen))
{
fprintf(opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(opts->out, obj, opts->doexpand == 0);
+ pdf_fprint_obj(opts->out, obj, opts->do_expand == 0);
fprintf(opts->out, "endobj\n\n");
}
+ else if (xref->table[num].stm_ofs < 0 && xref->table[num].stm_buf == NULL)
+ {
+ fprintf(opts->out, "%d %d obj\n", num, gen);
+ pdf_fprint_obj(opts->out, obj, opts->do_expand == 0);
+ fprintf(opts->out, "stream\nendstream\nendobj\n\n");
+ }
else
{
int dontexpand = 0;
- if (opts->doexpand != 0 && opts->doexpand != fz_expand_all)
+ if (opts->do_expand != 0 && opts->do_expand != fz_expand_all)
{
pdf_obj *o;
if ((o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "XObject")) &&
(o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Image")))
- dontexpand = !(opts->doexpand & fz_expand_images);
+ dontexpand = !(opts->do_expand & fz_expand_images);
if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "Font"))
- dontexpand = !(opts->doexpand & fz_expand_fonts);
+ dontexpand = !(opts->do_expand & fz_expand_fonts);
if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "FontDescriptor"))
- dontexpand = !(opts->doexpand & fz_expand_fonts);
+ dontexpand = !(opts->do_expand & fz_expand_fonts);
if ((o = pdf_dict_gets(obj, "Length1")) != NULL)
- dontexpand = !(opts->doexpand & fz_expand_fonts);
+ dontexpand = !(opts->do_expand & fz_expand_fonts);
if ((o = pdf_dict_gets(obj, "Length2")) != NULL)
- dontexpand = !(opts->doexpand & fz_expand_fonts);
+ dontexpand = !(opts->do_expand & fz_expand_fonts);
if ((o = pdf_dict_gets(obj, "Length3")) != NULL)
- dontexpand = !(opts->doexpand & fz_expand_fonts);
+ dontexpand = !(opts->do_expand & fz_expand_fonts);
if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Type1C"))
- dontexpand = !(opts->doexpand & fz_expand_fonts);
+ dontexpand = !(opts->do_expand & fz_expand_fonts);
if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "CIDFontType0C"))
- dontexpand = !(opts->doexpand & fz_expand_fonts);
+ dontexpand = !(opts->do_expand & fz_expand_fonts);
}
- if (opts->doexpand && !dontexpand && !pdf_is_jpx_image(ctx, obj))
+ if (opts->do_expand && !dontexpand && !pdf_is_jpx_image(ctx, obj))
expandstream(xref, opts, obj, num, gen);
else
copystream(xref, opts, obj, num, gen);
@@ -533,46 +1588,70 @@ static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, in
pdf_drop_obj(obj);
}
-static void writexref(pdf_document *xref, pdf_write_options *opts)
+static void writexref(pdf_document *xref, pdf_write_options *opts, int from, int to, int first, int main_xref_offset, int startxref)
{
- pdf_obj *trailer;
+ pdf_obj *trailer = NULL;
pdf_obj *obj;
- int startxref;
+ pdf_obj *nobj = NULL;
int num;
fz_context *ctx = xref->ctx;
- startxref = ftell(opts->out);
-
- fprintf(opts->out, "xref\n0 %d\n", xref->len);
- for (num = 0; num < xref->len; num++)
+ fprintf(opts->out, "xref\n%d %d\n", from, to - from);
+ opts->first_xref_entry_offset = ftell(opts->out);
+ for (num = from; num < to; num++)
{
- if (opts->uselist[num])
- fprintf(opts->out, "%010d %05d n \n", opts->ofslist[num], opts->genlist[num]);
+ if (opts->use_list[num])
+ fprintf(opts->out, "%010d %05d n \n", opts->ofs_list[num], opts->gen_list[num]);
else
- fprintf(opts->out, "%010d %05d f \n", opts->ofslist[num], opts->genlist[num]);
+ fprintf(opts->out, "%010d %05d f \n", opts->ofs_list[num], opts->gen_list[num]);
}
fprintf(opts->out, "\n");
- trailer = pdf_new_dict(ctx, 5);
+ fz_var(trailer);
+ fz_var(nobj);
- obj = pdf_new_int(ctx, xref->len);
- pdf_dict_puts(trailer, "Size", obj);
- pdf_drop_obj(obj);
+ fz_try(ctx)
+ {
+ trailer = pdf_new_dict(ctx, 5);
- obj = pdf_dict_gets(xref->trailer, "Info");
- if (obj)
- pdf_dict_puts(trailer, "Info", obj);
+ nobj = pdf_new_int(ctx, to);
+ pdf_dict_puts(trailer, "Size", nobj);
+ pdf_drop_obj(nobj);
+ nobj = NULL;
- obj = pdf_dict_gets(xref->trailer, "Root");
- if (obj)
- pdf_dict_puts(trailer, "Root", obj);
+ if (first)
+ {
+ obj = pdf_dict_gets(xref->trailer, "Info");
+ if (obj)
+ pdf_dict_puts(trailer, "Info", obj);
- obj = pdf_dict_gets(xref->trailer, "ID");
- if (obj)
- pdf_dict_puts(trailer, "ID", obj);
+ obj = pdf_dict_gets(xref->trailer, "Root");
+ if (obj)
+ pdf_dict_puts(trailer, "Root", obj);
+
+ obj = pdf_dict_gets(xref->trailer, "ID");
+ if (obj)
+ pdf_dict_puts(trailer, "ID", obj);
+ }
+ if (main_xref_offset != 0)
+ {
+ nobj = pdf_new_int(ctx, main_xref_offset);
+ pdf_dict_puts(trailer, "Prev", nobj);
+ pdf_drop_obj(nobj);
+ nobj = NULL;
+ }
+ }
+ fz_always(ctx)
+ {
+ pdf_drop_obj(nobj);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
fprintf(opts->out, "trailer\n");
- pdf_fprint_obj(opts->out, trailer, opts->doexpand == 0);
+ pdf_fprint_obj(opts->out, trailer, opts->do_expand == 0);
fprintf(opts->out, "\n");
pdf_drop_obj(trailer);
@@ -580,6 +1659,388 @@ static void writexref(pdf_document *xref, pdf_write_options *opts)
fprintf(opts->out, "startxref\n%d\n%%%%EOF\n", startxref);
}
+static void
+padto(FILE *file, int target)
+{
+ int pos = ftell(file);
+
+ while (pos < target)
+ {
+ fputc('\n', file);
+ pos++;
+ }
+}
+
+static void
+dowriteobject(pdf_document *xref, pdf_write_options *opts, int num, int pass)
+{
+ if (xref->table[num].type == 'f')
+ opts->gen_list[num] = xref->table[num].gen;
+ if (xref->table[num].type == 'n')
+ opts->gen_list[num] = xref->table[num].gen;
+ if (xref->table[num].type == 'o')
+ opts->gen_list[num] = 0;
+
+ if (opts->do_garbage && !opts->use_list[num])
+ return;
+
+ if (xref->table[num].type == 'n' || xref->table[num].type == 'o')
+ {
+ if (pass > 0)
+ padto(opts->out, opts->ofs_list[num]);
+ opts->ofs_list[num] = ftell(opts->out);
+ writeobject(xref, opts, num, opts->gen_list[num]);
+ }
+ else
+ opts->use_list[num] = 0;
+}
+
+static void
+writeobjects(pdf_document *xref, pdf_write_options *opts, int pass)
+{
+ int num;
+
+ fprintf(opts->out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10);
+ fprintf(opts->out, "%%\316\274\341\277\246\n\n");
+
+ dowriteobject(xref, opts, opts->start, pass);
+
+ if (opts->do_linear)
+ {
+ /* Write first xref */
+ if (pass == 0)
+ {
+ opts->first_xref_offset = ftell(opts->out);
+ }
+ else
+ {
+ int pos = ftell(opts->out);
+ while (pos < opts->first_xref_offset)
+ {
+ fputc('\n', opts->out);
+ pos++;
+ }
+ }
+ writexref(xref, opts, opts->start, xref->len, 1, opts->main_xref_offset, 0);
+ }
+
+ for (num = opts->start+1; num < xref->len; num++)
+ dowriteobject(xref, opts, num, pass);
+ if (opts->do_linear && pass == 1)
+ padto(opts->out, opts->ofs_list[1] + opts->hintstream_len);
+ for (num = 1; num < opts->start; num++)
+ dowriteobject(xref, opts, num, pass);
+}
+
+static int
+my_log2(int x)
+{
+ int i = 0;
+
+ if (x <= 0)
+ return 0;
+
+ while ((1<<i) <= x && (1<<i) > 0)
+ i++;
+
+ if ((1<<i) <= 0)
+ return 0;
+
+ return i;
+}
+
+static void
+make_page_offset_hints(pdf_document *xref, pdf_write_options *opts, fz_buffer *buf)
+{
+ fz_context *ctx = xref->ctx;
+ int i, j;
+ int min_objs_per_page, max_objs_per_page;
+ int min_page_length, max_page_length;
+ int objs_per_page_bits;
+ int min_shared_object, max_shared_object;
+ int max_shared_object_refs;
+ int min_shared_length, max_shared_length;
+ page_objects **pop = &opts->page_object_lists->page[0];
+ int page_len_bits, shared_object_bits, shared_object_id_bits;
+ int shared_length_bits;
+
+ min_shared_object = xref->len;
+ max_shared_object = 1;
+ min_shared_length = opts->file_len;
+ max_shared_length = 0;
+ for (i=0; i < xref->len; i++)
+ {
+ int min, max, page;
+
+ min = opts->ofs_list[i];
+ if (i == opts->start-1)
+ max = opts->main_xref_offset;
+ else if (i < xref->len-1)
+ max = opts->ofs_list[i+1];
+ else
+ max = opts->ofs_list[1];
+
+ assert(max > min);
+
+ if (opts->use_list[i] & USE_SHARED)
+ {
+ page = -1;
+ if (i < min_shared_object)
+ min_shared_object = i;
+ if (i > max_shared_object)
+ max_shared_object = i;
+ if (min_shared_length > max - min)
+ min_shared_length = max - min;
+ if (max_shared_length < max - min)
+ max_shared_length = max - min;
+ }
+ else if (opts->use_list[i] & (USE_CATALOGUE | USE_HINTS | USE_PARAMS))
+ page = -1;
+ else if (opts->use_list[i] & USE_PAGE1)
+ {
+ page = 0;
+ if (min_shared_length > max - min)
+ min_shared_length = max - min;
+ if (max_shared_length < max - min)
+ max_shared_length = max - min;
+ }
+ else if (opts->use_list[i] == 0)
+ page = -1;
+ else
+ page = opts->use_list[i]>>USE_PAGE_SHIFT;
+
+ if (page >= 0)
+ {
+ pop[page]->num_objects++;
+ if (pop[page]->min_ofs > min)
+ pop[page]->min_ofs = min;
+ if (pop[page]->max_ofs < max)
+ pop[page]->max_ofs = max;
+ }
+ }
+
+ min_objs_per_page = max_objs_per_page = pop[0]->num_objects;
+ min_page_length = max_page_length = pop[0]->max_ofs - pop[0]->min_ofs;
+ for (i=1; i < opts->page_count; i++)
+ {
+ int tmp;
+ if (min_objs_per_page > pop[i]->num_objects)
+ min_objs_per_page = pop[i]->num_objects;
+ if (max_objs_per_page < pop[i]->num_objects)
+ max_objs_per_page = pop[i]->num_objects;
+ tmp = pop[i]->max_ofs - pop[i]->min_ofs;
+ if (tmp < min_page_length)
+ min_page_length = tmp;
+ if (tmp > max_page_length)
+ max_page_length = tmp;
+ }
+
+ for (i=0; i < opts->page_count; i++)
+ {
+ int count = 0;
+ int j;
+ page_objects *po = opts->page_object_lists->page[i];
+ for (j = 0; j < po->len; j++)
+ {
+ if (i == 0 && opts->use_list[po->object[j]] & USE_PAGE1)
+ count++;
+ else if (i != 0 && opts->use_list[po->object[j]] & USE_SHARED)
+ count++;
+ }
+ po->num_shared = count;
+ if (i == 0 || count > max_shared_object_refs)
+ max_shared_object_refs = count;
+ }
+ if (min_shared_object > max_shared_object)
+ min_shared_object = max_shared_object = 0;
+
+ /* Table F.3 - Header */
+ /* Header Item 1: Least number of objects in a page */
+ fz_write_buffer_bits(ctx, buf, min_objs_per_page, 32);
+ /* Header Item 2: Location of first pages page object */
+ fz_write_buffer_bits(ctx, buf, opts->ofs_list[pop[0]->page_object_number], 32);
+ /* Header Item 3: Number of bits required to represent the difference
+ * between the greatest and least number of objects in a page. */
+ objs_per_page_bits = my_log2(max_objs_per_page - min_objs_per_page);
+ fz_write_buffer_bits(ctx, buf, objs_per_page_bits, 16);
+ /* Header Item 4: Least length of a page. */
+ fz_write_buffer_bits(ctx, buf, min_page_length, 32);
+ /* Header Item 5: Number of bits needed to represent the difference
+ * between the greatest and least length of a page. */
+ page_len_bits = my_log2(max_page_length - min_page_length);
+ fz_write_buffer_bits(ctx, buf, page_len_bits, 16);
+ /* Header Item 6: Least offset to start of content stream (Acrobat
+ * sets this to always be 0) */
+ fz_write_buffer_bits(ctx, buf, 0, 32);
+ /* Header Item 7: Number of bits needed to represent the difference
+ * between the greatest and least offset to content stream (Acrobat
+ * sets this to always be 0) */
+ fz_write_buffer_bits(ctx, buf, 0, 16);
+ /* Header Item 8: Least content stream length. (Acrobat
+ * sets this to always be 0) */
+ fz_write_buffer_bits(ctx, buf, 0, 32);
+ /* Header Item 9: Number of bits needed to represent the difference
+ * between the greatest and least content stream length (Acrobat
+ * sets this to always be the same as item 5) */
+ fz_write_buffer_bits(ctx, buf, page_len_bits, 16);
+ /* Header Item 10: Number of bits needed to represent the greatest
+ * number of shared object references. */
+ shared_object_bits = my_log2(max_shared_object_refs);
+ fz_write_buffer_bits(ctx, buf, shared_object_bits, 16);
+ /* Header Item 11: Number of bits needed to represent the greatest
+ * shared object identifier. */
+ shared_object_id_bits = my_log2(max_shared_object - min_shared_object + pop[0]->num_shared);
+ fz_write_buffer_bits(ctx, buf, shared_object_id_bits, 16);
+ /* Header Item 12: Number of bits needed to represent the numerator
+ * of the fractions. We always send 0. */
+ fz_write_buffer_bits(ctx, buf, 0, 16);
+ /* Header Item 13: Number of bits needed to represent the denominator
+ * of the fractions. We always send 0. */
+ fz_write_buffer_bits(ctx, buf, 0, 16);
+
+ /* Table F.4 - Page offset hint table (per page) */
+ /* Item 1: A number that, when added to the least number of objects
+ * on a page, gives the number of objects in the page. */
+ for (i = 0; i < opts->page_count; i++)
+ {
+ fz_write_buffer_bits(ctx, buf, pop[i]->num_objects - min_objs_per_page, objs_per_page_bits);
+ }
+ fz_write_buffer_pad(ctx, buf);
+ /* Item 2: A number that, when added to the least page length, gives
+ * the length of the page in bytes. */
+ for (i = 0; i < opts->page_count; i++)
+ {
+ fz_write_buffer_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
+ }
+ fz_write_buffer_pad(ctx, buf);
+ /* Item 3: The number of shared objects referenced from the page. */
+ for (i = 0; i < opts->page_count; i++)
+ {
+ fz_write_buffer_bits(ctx, buf, pop[i]->num_shared, shared_object_bits);
+ }
+ fz_write_buffer_pad(ctx, buf);
+ /* Item 4: Shared object id for each shared object ref in every page.
+ * Spec says "not for page 1", but acrobat does send page 1's - all
+ * as zeros. */
+ for (i = 0; i < opts->page_count; i++)
+ {
+ for (j = 0; j < pop[i]->len; j++)
+ {
+ int o = pop[i]->object[j];
+ if (i == 0 && opts->use_list[o] & USE_PAGE1)
+ fz_write_buffer_bits(ctx, buf, 0 /* o - pop[0]->page_object_number */, shared_object_id_bits);
+ if (i != 0 && opts->use_list[o] & USE_SHARED)
+ fz_write_buffer_bits(ctx, buf, o - min_shared_object + pop[0]->num_shared, shared_object_id_bits);
+ }
+ }
+ fz_write_buffer_pad(ctx, buf);
+ /* Item 5: Numerator of fractional position for each shared object reference. */
+ /* We always send 0 in 0 bits */
+ /* Item 6: A number that, when added to the least offset to the start
+ * of the content stream (F.3 Item 6), gives the offset in bytes of
+ * start of the pages content stream object relative to the beginning
+ * of the page. Always 0 in 0 bits. */
+ /* Item 7: A number that, when added to the least content stream length
+ * (F.3 Item 8), gives the length of the pages content stream object.
+ * Always == Item 2 as least content stream length = least page stream
+ * length.
+ */
+ for (i = 0; i < opts->page_count; i++)
+ {
+ fz_write_buffer_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
+ }
+
+ /* Pad, and then do shared object hint table */
+ fz_write_buffer_pad(ctx, buf);
+ opts->hints_shared_offset = buf->len;
+
+ /* Table F.5: */
+ /* Header Item 1: Object number of the first object in the shared
+ * objects section. */
+ fz_write_buffer_bits(ctx, buf, min_shared_object, 32);
+ /* Header Item 2: Location of first object in the shared objects
+ * section. */
+ fz_write_buffer_bits(ctx, buf, opts->ofs_list[min_shared_object], 32);
+ /* Header Item 3: The number of shared object entries for the first
+ * page. */
+ fz_write_buffer_bits(ctx, buf, pop[0]->num_shared, 32);
+ /* Header Item 4: The number of shared object entries for the shared
+ * objects section + first page. */
+ fz_write_buffer_bits(ctx, buf, max_shared_object - min_shared_object + pop[0]->num_shared, 32);
+ /* Header Item 5: The number of bits needed to represent the greatest
+ * number of objects in a shared object group (Always 0). */
+ fz_write_buffer_bits(ctx, buf, 0, 16);
+ /* Header Item 6: The least length of a shared object group in bytes. */
+ fz_write_buffer_bits(ctx, buf, min_shared_length, 32);
+ /* Header Item 7: The number of bits required to represent the
+ * difference between the greatest and least length of a shared object
+ * group. */
+ shared_length_bits = my_log2(max_shared_length - min_shared_length);
+ fz_write_buffer_bits(ctx, buf, shared_length_bits, 16);
+
+ /* Table F.6 */
+ /* Item 1: Shared object group length (page 1 objects) */
+ for (j = 0; j < pop[0]->len; j++)
+ {
+ int o = pop[0]->object[j];
+ int min, max;
+ min = opts->ofs_list[o];
+ if (o == opts->start-1)
+ max = opts->main_xref_offset;
+ else if (o < xref->len-1)
+ max = opts->ofs_list[o+1];
+ else
+ max = opts->ofs_list[1];
+ if (opts->use_list[o] & USE_PAGE1)
+ fz_write_buffer_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
+ }
+ /* Item 1: Shared object group length (shared objects) */
+ for (i = min_shared_object; i <= max_shared_object; i++)
+ {
+ int min, max;
+ min = opts->ofs_list[i];
+ if (i == opts->start-1)
+ max = opts->main_xref_offset;
+ else if (i < xref->len-1)
+ max = opts->ofs_list[i+1];
+ else
+ max = opts->ofs_list[1];
+ fz_write_buffer_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
+ }
+ fz_write_buffer_pad(ctx, buf);
+
+ /* Item 2: MD5 presence flags */
+ for (i = max_shared_object - min_shared_object + pop[0]->num_shared; i > 0; i--)
+ {
+ fz_write_buffer_bits(ctx, buf, 0, 1);
+ }
+ fz_write_buffer_pad(ctx, buf);
+ /* Item 3: MD5 sums (not present) */
+ fz_write_buffer_pad(ctx, buf);
+ /* Item 4: Number of objects in the group (not present) */
+}
+
+static void
+make_hint_stream(pdf_document *xref, pdf_write_options *opts)
+{
+ fz_context *ctx = xref->ctx;
+ fz_buffer *buf = fz_new_buffer(ctx, 100);
+
+ fz_try(ctx)
+ {
+ make_page_offset_hints(xref, opts, buf);
+ pdf_update_stream(xref, xref->len-1, buf);
+ opts->hintstream_len = buf->len;
+ fz_drop_buffer(ctx, buf);
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_buffer(ctx, buf);
+ fz_rethrow(ctx);
+ }
+}
+
+
void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz_opts)
{
int lastfree;
@@ -598,89 +2059,114 @@ void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz
fz_try(ctx)
{
- opts.doexpand = fz_opts ? fz_opts->doexpand : 0;
- opts.dogarbage = fz_opts ? fz_opts->dogarbage : 0;
- opts.doascii = fz_opts ? fz_opts->doascii: 0;
- opts.uselist = fz_malloc_array(ctx, xref->len + 1, sizeof(char));
- opts.ofslist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
- opts.genlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
- opts.renumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
- opts.revrenumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
- opts.revgenlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
-
- fprintf(opts.out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10);
- fprintf(opts.out, "%%\316\274\341\277\246\n\n");
+ opts.do_expand = fz_opts ? fz_opts->do_expand : 0;
+ opts.do_garbage = fz_opts ? fz_opts->do_garbage : 0;
+ opts.do_ascii = fz_opts ? fz_opts->do_ascii: 0;
+ opts.do_linear = fz_opts ? fz_opts->do_linear: 0;
+ opts.start = 0;
+ opts.main_xref_offset = INT_MIN;
+ /* We deliberately make these arrays long enough to cope with
+ * 1 to n access rather than 0..n-1, and add space for 2 new
+ * extra entries that may be required for linearization. */
+ opts.use_list = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
+ opts.ofs_list = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
+ opts.gen_list = fz_calloc(ctx, xref->len + 3, sizeof(int));
+ opts.renumber_map = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
+ opts.rev_renumber_map = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
+ opts.rev_gen_list = fz_malloc_array(ctx, xref->len + 3, sizeof(int));
for (num = 0; num < xref->len; num++)
{
- opts.uselist[num] = 0;
- opts.ofslist[num] = 0;
- opts.renumbermap[num] = num;
- opts.revrenumbermap[num] = num;
- opts.revgenlist[num] = xref->table[num].gen;
+ opts.use_list[num] = 0;
+ opts.ofs_list[num] = 0;
+ opts.renumber_map[num] = num;
+ opts.rev_renumber_map[num] = num;
+ opts.rev_gen_list[num] = xref->table[num].gen;
}
/* Make sure any objects hidden in compressed streams have been loaded */
preloadobjstms(xref);
/* Sweep & mark objects from the trailer */
- if (opts.dogarbage >= 1)
+ if (opts.do_garbage >= 1)
sweepobj(xref, &opts, xref->trailer);
/* Coalesce and renumber duplicate objects */
- if (opts.dogarbage >= 3)
+ if (opts.do_garbage >= 3)
removeduplicateobjs(xref, &opts);
/* Compact xref by renumbering and removing unused objects */
- if (opts.dogarbage >= 2)
+ if (opts.do_garbage >= 2)
compactxref(xref, &opts);
/* Make renumbering affect all indirect references and update xref */
- if (opts.dogarbage >= 2)
+ if (opts.do_garbage >= 2)
renumberobjs(xref, &opts);
- for (num = 0; num < xref->len; num++)
+ if (opts.do_linear)
{
- if (xref->table[num].type == 'f')
- opts.genlist[num] = xref->table[num].gen;
- if (xref->table[num].type == 'n')
- opts.genlist[num] = xref->table[num].gen;
- if (xref->table[num].type == 'o')
- opts.genlist[num] = 0;
-
- if (opts.dogarbage && !opts.uselist[num])
- continue;
-
- if (xref->table[num].type == 'n' || xref->table[num].type == 'o')
- {
- opts.uselist[num] = 1;
- opts.ofslist[num] = ftell(opts.out);
- writeobject(xref, &opts, num, opts.genlist[num]);
- }
+ linearize(xref, &opts);
}
+ writeobjects(xref, &opts, 0);
+
/* Construct linked list of free object slots */
lastfree = 0;
for (num = 0; num < xref->len; num++)
{
- if (!opts.uselist[num])
+ if (!opts.use_list[num])
{
- opts.genlist[num]++;
- opts.ofslist[lastfree] = num;
+ opts.gen_list[num]++;
+ opts.ofs_list[lastfree] = num;
lastfree = num;
}
}
- writexref(xref, &opts);
+ if (opts.do_linear)
+ {
+ opts.main_xref_offset = ftell(opts.out);
+ writexref(xref, &opts, 0, xref->len, !opts.do_linear, 0, opts.first_xref_offset);
+ opts.file_len = ftell(opts.out);
+
+ make_hint_stream(xref, &opts);
+ opts.file_len += opts.hintstream_len;
+ opts.main_xref_offset += opts.hintstream_len;
+ update_linearization_params(xref, &opts);
+ fseek(opts.out, 0, 0);
+ writeobjects(xref, &opts, 1);
+
+ padto(opts.out, opts.main_xref_offset);
+
+ }
+ else
+ {
+ opts.first_xref_offset = ftell(opts.out);
+ }
+
+ writexref(xref, &opts, 0, xref->len, !opts.do_linear, 0, opts.first_xref_offset);
}
fz_always(ctx)
{
- fz_free(ctx, opts.uselist);
- fz_free(ctx, opts.ofslist);
- fz_free(ctx, opts.genlist);
- fz_free(ctx, opts.renumbermap);
- fz_free(ctx, opts.revrenumbermap);
- fz_free(ctx, opts.revgenlist);
+#ifdef DEBUG_LINEARIZATION
+ page_objects_dump(&opts);
+ objects_dump(xref, &opts);
+#endif
+ fz_free(ctx, opts.use_list);
+ fz_free(ctx, opts.ofs_list);
+ fz_free(ctx, opts.gen_list);
+ fz_free(ctx, opts.renumber_map);
+ fz_free(ctx, opts.rev_renumber_map);
+ fz_free(ctx, opts.rev_gen_list);
+ pdf_drop_obj(opts.linear_l);
+ pdf_drop_obj(opts.linear_h0);
+ pdf_drop_obj(opts.linear_h1);
+ pdf_drop_obj(opts.linear_o);
+ pdf_drop_obj(opts.linear_e);
+ pdf_drop_obj(opts.linear_n);
+ pdf_drop_obj(opts.linear_t);
+ pdf_drop_obj(opts.hints_s);
+ pdf_drop_obj(opts.hints_length);
+ page_objects_list_destroy(ctx, opts.page_object_lists);
fclose(opts.out);
}
fz_catch(ctx)