summaryrefslogtreecommitdiff
path: root/source/tools/pdfmerge.c
diff options
context:
space:
mode:
authorMichael Vrhel <michael.vrhel@artifex.com>2016-03-15 16:33:22 -0700
committerRobin Watts <robin.watts@artifex.com>2016-04-19 18:30:40 +0100
commit1bbde93ba39613877ded2015965aaab2341d3d36 (patch)
treef2a4936de895c6ddc40deb7bc93ccfb66de514d3 /source/tools/pdfmerge.c
parent335798189cbd40cd518ce49d1fa4c7eaa2811977 (diff)
downloadmupdf-1bbde93ba39613877ded2015965aaab2341d3d36.tar.xz
Add mutool merge
This commit adds a page merging tool. The tool demonstrates the use of object grafting. The object grafting function recursively goes through the object to add all referenced objects. A map is maintained to ensure that objects that have already been copied are not copied again.
Diffstat (limited to 'source/tools/pdfmerge.c')
-rw-r--r--source/tools/pdfmerge.c228
1 files changed, 228 insertions, 0 deletions
diff --git a/source/tools/pdfmerge.c b/source/tools/pdfmerge.c
new file mode 100644
index 00000000..27258a9e
--- /dev/null
+++ b/source/tools/pdfmerge.c
@@ -0,0 +1,228 @@
+/*
+ * PDF merge tool: Tool for merging pdf content.
+ *
+ * Simple test bed to work with merging pages from multiple PDFs into a single PDF.
+ */
+
+#include "mupdf/pdf.h"
+
+static void usage(void)
+{
+ fprintf(stderr,
+ "usage: mutool merge [-o output.pdf] [-adlsz] input.pdf [pages] [input2.pdf] [pages2] ...\n"
+ "\t-o\tname of PDF file to create\n"
+ "\t-a\tascii hex encode binary streams\n"
+ "\t-d\tdecompress all streams\n"
+ "\t-l\tlinearize PDF\n"
+ "\t-s\tclean content streams\n"
+ "\t-z\tdeflate uncompressed streams\n"
+ "\tinput.pdf name of first PDF file from which we are copying pages\n"
+ );
+ exit(1);
+}
+
+static fz_context *ctx = NULL;
+static pdf_document *doc_des = NULL;
+static pdf_document *doc_src = NULL;
+
+/* This isrange is a duplicate with mudraw.c Not sure how we want to organize or if
+ * we are fine with the small amount of code duplication */
+static int isrange(char *s)
+{
+ while (*s)
+ {
+ if ((*s < '0' || *s > '9') && *s != '-' && *s != ',')
+ return 0;
+ s++;
+ }
+ return 1;
+}
+
+static void page_merge(int page_from, int page_to, pdf_graft_map *graft_map)
+{
+ pdf_obj *pageref = NULL;
+ pdf_obj *page_dict;
+ pdf_obj *obj = NULL, *ref = NULL;
+ /* Include minimal number of objects for page. Do not include items that
+ * reference other pages */
+ pdf_obj *known_page_objs[] = { PDF_NAME_Contents, PDF_NAME_Resources,
+ PDF_NAME_MediaBox, PDF_NAME_CropBox, PDF_NAME_BleedBox, PDF_NAME_TrimBox,
+ PDF_NAME_ArtBox, PDF_NAME_Rotate, PDF_NAME_UserUnit};
+ int n = nelem(known_page_objs);
+ int i;
+ int num;
+
+ fz_var(obj);
+ fz_var(ref);
+
+ fz_try(ctx)
+ {
+ pageref = pdf_lookup_page_obj(ctx, doc_src, page_from - 1);
+
+ /* Make a new dictionary and copy over the items from the source object to
+ * the new dict that we want to deep copy. */
+ page_dict = pdf_new_dict(ctx, doc_des, 4);
+
+ pdf_dict_put_drop(ctx, page_dict, PDF_NAME_Type, PDF_NAME_Page);
+
+ for (i = 0; i < n; i++)
+ {
+ obj = pdf_dict_get(ctx, pageref, known_page_objs[i]);
+ if (obj != NULL)
+ pdf_dict_put_drop(ctx, page_dict, known_page_objs[i], pdf_graft_object(ctx, doc_des, doc_src, obj, graft_map));
+ }
+
+ /* Add the dictionary */
+ obj = pdf_add_object_drop(ctx, doc_des, page_dict);
+
+ /* Get indirect ref */
+ num = pdf_to_num(ctx, obj);
+ ref = pdf_new_indirect(ctx, doc_des, num, 0);
+
+ /* Insert */
+ pdf_insert_page(ctx, doc_des, page_to - 1, ref);
+ }
+ fz_always(ctx)
+ {
+ pdf_drop_obj(ctx, obj);
+ pdf_drop_obj(ctx, ref);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
+
+static void merge_range(char *range)
+{
+ int page, spage, epage, src_pagecount, des_pagecount;
+ char *spec, *dash;
+ pdf_graft_map *graft_map;
+
+ src_pagecount = fz_count_pages(ctx, (fz_document*) doc_src);
+ des_pagecount = fz_count_pages(ctx, (fz_document*) doc_des);
+ spec = fz_strsep(&range, ",");
+ graft_map = pdf_new_graft_map(ctx, doc_src);
+
+ fz_try(ctx)
+ {
+ while (spec)
+ {
+ dash = strchr(spec, '-');
+
+ if (dash == spec)
+ spage = epage = src_pagecount;
+ else
+ spage = epage = atoi(spec);
+
+ if (dash)
+ {
+ if (strlen(dash) > 1)
+ epage = atoi(dash + 1);
+ else
+ epage = src_pagecount;
+ }
+
+ spage = fz_clampi(spage, 1, src_pagecount);
+ epage = fz_clampi(epage, 1, src_pagecount);
+
+ if (spage < epage)
+ for (page = spage; page <= epage; page++, des_pagecount++)
+ page_merge(page, des_pagecount + 1, graft_map);
+ else
+ for (page = spage; page >= epage; page--, des_pagecount++)
+ page_merge(page, des_pagecount + 1, graft_map);
+ spec = fz_strsep(&range, ",");
+ }
+ }
+ fz_always(ctx)
+ {
+ pdf_drop_graft_map(ctx, graft_map);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
+
+int pdfmerge_main(int argc, char **argv)
+{
+ pdf_write_options opts = { 0 };
+ char *output = "out.pdf";
+ char *infile_src;
+ int c;
+
+ while ((c = fz_getopt(argc, argv, "adlszo:")) != -1)
+ {
+ switch (c)
+ {
+ case 'o': output = fz_optarg; break;
+ case 'a': opts.do_ascii ++; break;
+ case 'd': opts.do_expand ^= PDF_EXPAND_ALL; break;
+ case 'l': opts.do_linear ++; break;
+ case 's': opts.do_clean ++; break;
+ case 'z': opts.do_deflate ++; break;
+ default: usage(); break;
+ }
+ }
+
+ if (fz_optind == argc)
+ usage();
+
+ ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
+ if (!ctx)
+ {
+ fprintf(stderr, "Cannot initialise context\n");
+ exit(1);
+ }
+
+ fz_try(ctx)
+ {
+ doc_des = pdf_create_document(ctx);
+ }
+ fz_catch(ctx)
+ {
+ fprintf(stderr, "Failed to allocate destination document file %s\n", output);
+ exit(1);
+ }
+
+ /* Step through the source files */
+ while (fz_optind < argc)
+ {
+ fz_try(ctx)
+ {
+ infile_src = argv[fz_optind++];
+ pdf_drop_document(ctx, doc_src);
+ doc_src = pdf_open_document(ctx, infile_src);
+
+ if (fz_optind == argc || !isrange(argv[fz_optind]))
+ merge_range("1-");
+ else
+ merge_range(argv[fz_optind++]);
+ }
+ fz_catch(ctx)
+ {
+ fprintf(stderr, "Failed merging document %s\n", infile_src);
+ exit(1);
+ }
+ }
+
+ fz_try(ctx)
+ {
+ pdf_save_document(ctx, doc_des, output, &opts);
+ }
+ fz_always(ctx)
+ {
+ pdf_drop_document(ctx, doc_des);
+ pdf_drop_document(ctx, doc_src);
+ }
+ fz_catch(ctx)
+ {
+ fprintf(stderr, "Error encountered during file save.\n");
+ exit(1);
+ }
+ fz_flush_warnings(ctx);
+ fz_drop_context(ctx);
+
+ return 0;
+}