Simple mupdfposter app

Divides large format pdfs into a new pdf with multiple pages, that tile the original PDF.
author: Robin Watts <robin.watts@artifex.com> 2012-04-25 20:42:51 +0100
committer: Robin Watts <robin.watts@artifex.com> 2012-04-30 13:55:25 +0100
commit: 338c7077bda3c3d3e9bd7567aa9c8fc94d6fa26c (patch)
tree: d61ea997b99cf761efcd5a20304e574a9057d0bc /apps/mupdfposter.c
parent: a6d09ae879cca757a397a93bbd684b51a023637c (diff)
download: mupdf-338c7077bda3c3d3e9bd7567aa9c8fc94d6fa26c.tar.xz
1 files changed, 184 insertions, 0 deletions
diff --git a/apps/mupdfposter.c b/apps/mupdfposter.c
new file mode 100644
index 00000000..faf6de4f
--- /dev/null
+++ b/apps/mupdfposter.c
@@ -0,0 +1,184 @@
+/*
+ * PDF cleaning tool: general purpose pdf syntax washer.
+ *
+ * Rewrite PDF with pretty printed objects.
+ * Garbage collect unreachable objects.
+ * Inflate compressed streams.
+ * Create subset documents.
+ *
+ * TODO: linearize document for fast web view
+ */
+
+#include "fitz.h"
+#include "mupdf-internal.h"
+
+static FILE *out = NULL;
+
+static int x_factor = 0;
+static int y_factor = 0;
+
+static void usage(void)
+{
+	fprintf(stderr,
+		"usage: mupdfposter [options] input.pdf [output.pdf]\n"
+		"\t-p -\tpassword\n"
+		"\t-x\tx decimation factor\n"
+		"\t-y\ty decimation factor\n");
+	exit(1);
+}
+
+/*
+ * Recreate page tree to only retain specified pages.
+ */
+
+static void decimatepages(pdf_document *xref)
+{
+	pdf_obj *oldroot, *root, *pages, *kids, *parent;
+	fz_context *ctx = xref->ctx;
+	int num_pages = pdf_count_pages(xref);
+	int page, kidcount;
+
+	/* Keep only pages/type and (reduced) dest entries to avoid
+	 * references to unretained pages */
+	oldroot = pdf_dict_gets(xref->trailer, "Root");
+	pages = pdf_dict_gets(oldroot, "Pages");
+
+	root = pdf_new_dict(ctx, 2);
+	pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type"));
+	pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages"));
+
+	pdf_update_object(xref, pdf_to_num(oldroot), pdf_to_gen(oldroot), root);
+
+	pdf_drop_obj(root);
+
+	/* Create a new kids array with only the pages we want to keep */
+	parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref);
+	kids = pdf_new_array(ctx, 1);
+	
+	kidcount = 0;
+	for (page=0; page < num_pages; page++)
+	{
+		pdf_page *page_details = pdf_load_page(xref, page);
+		int xf = x_factor, yf = y_factor;
+		int x, y;
+		float w = page_details->mediabox.x1 - page_details->mediabox.x0;
+		float h = page_details->mediabox.y1 - page_details->mediabox.y0;
+
+		if (xf == 0 && yf == 0)
+		{
+			/* Nothing specified, so split along the long edge */
+			if (w > h)
+				xf = 2, yf = 1;
+			else
+				xf = 1, yf = 2;
+		}
+		else if (xf == 0)
+			xf = 1;
+		else if (yf == 0)
+			yf = 1;
+
+		for (y = yf-1; y >= 0; y--)
+		{
+			for (x = 0; x < xf; x++)
+			{
+				pdf_obj *newpageobj = pdf_copy_dict(ctx, xref->page_objs[page]);
+				pdf_obj *newpageref = pdf_new_ref(xref, newpageobj);
+				pdf_obj *newmediabox = pdf_new_array(ctx, 4);
+				fz_rect mb;
+
+				mb.x0 = page_details->mediabox.x0 + (w/xf)*x;
+				if (x == xf-1)
+					mb.x1 = page_details->mediabox.x1;
+				else
+					mb.x1 = page_details->mediabox.x0 + (w/xf)*(x+1);
+				mb.y0 = page_details->mediabox.y0 + (h/yf)*y;
+				if (y == yf-1)
+					mb.y1 = page_details->mediabox.y1;
+				else
+					mb.y1 = page_details->mediabox.y0 + (h/yf)*(y+1);
+
+				pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x0));
+				pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y0));
+				pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x1));
+				pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y1));
+
+				pdf_dict_puts(newpageobj, "Parent", parent);
+				pdf_dict_puts(newpageobj, "MediaBox", newmediabox);
+
+				/* Store page object in new kids array */
+				pdf_array_push(kids, newpageref);
+
+				kidcount++;
+			}
+		}
+	}
+
+	pdf_drop_obj(parent);
+
+	/* Update page count and kids array */
+	pdf_dict_puts(pages, "Count", pdf_new_int(ctx, kidcount));
+	pdf_dict_puts(pages, "Kids", kids);
+	pdf_drop_obj(kids);
+}
+
+#ifdef MUPDF_COMBINED_EXE
+int pdfposter_main(int argc, char **argv)
+#else
+int main(int argc, char **argv)
+#endif
+{
+	char *infile;
+	char *outfile = "out.pdf";
+	char *password = "";
+	int c;
+	fz_write_options opts;
+	pdf_document *xref;
+	fz_context *ctx;
+
+	opts.dogarbage = 0;
+	opts.doexpand = 0;
+	opts.doascii = 0;
+
+	while ((c = fz_getopt(argc, argv, "x:y:")) != -1)
+	{
+		switch (c)
+		{
+		case 'p': password = fz_optarg; break;
+		case 'x': x_factor = atoi(fz_optarg); break;
+		case 'y': y_factor = atoi(fz_optarg); break;
+		default: usage(); break;
+		}
+	}
+
+	if (argc - fz_optind < 1)
+		usage();
+
+	infile = argv[fz_optind++];
+
+	if (argc - fz_optind > 0 &&
+		(strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF")))
+	{
+		outfile = argv[fz_optind++];
+	}
+
+	ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
+	if (!ctx)
+	{
+		fprintf(stderr, "cannot initialise context\n");
+		exit(1);
+	}
+
+	xref = pdf_open_document(ctx, infile);
+	if (pdf_needs_password(xref))
+		if (!pdf_authenticate_password(xref, password))
+			fz_throw(ctx, "cannot authenticate password: %s", infile);
+
+	/* Only retain the specified subset of the pages */
+	decimatepages(xref);
+
+	pdf_write(xref, outfile, &opts);
+
+	pdf_close_document(xref);
+	fz_free_context(ctx);
+	return 0;
+}
author	Robin Watts <robin.watts@artifex.com>	2012-04-25 20:42:51 +0100
committer	Robin Watts <robin.watts@artifex.com>	2012-04-30 13:55:25 +0100
commit	338c7077bda3c3d3e9bd7567aa9c8fc94d6fa26c (patch)
tree	d61ea997b99cf761efcd5a20304e574a9057d0bc /apps/mupdfposter.c
parent	a6d09ae879cca757a397a93bbd684b51a023637c (diff)
download	mupdf-338c7077bda3c3d3e9bd7567aa9c8fc94d6fa26c.tar.xz