Add routine to clean pdf content streams for pages.

New routine to filter the content streams for pages, xobjects, type3 charprocs, patterns etc. The filtered streams are guaranteed to be properly matched with q/Q's, and to not have changed the top level ctm. Additionally we remove (some) repeated settings of colors etc. This filtering can be extended to be smarter later. The idea of this is to both repair after editing, and to leave the streams in a form that can be easily appended to. This is preparatory to work on Bates numbering and Watermarking. Currently the streams produced are uncompressed.
author: Robin Watts <robin.watts@artifex.com> 2014-03-19 19:04:50 +0000
committer: Robin Watts <robin.watts@artifex.com> 2014-03-19 19:04:50 +0000
commit: e4d36ce68e0467ac4702f717386934a44970f4e5 (patch)
tree: 758a9999434f03a4607d1f23e43d9a58828b6444 /include
parent: 441954b6fb378e3af72500653be5636c7ade29ee (diff)
download: mupdf-e4d36ce68e0467ac4702f717386934a44970f4e5.tar.xz
2 files changed, 30 insertions, 0 deletions
diff --git a/include/mupdf/fitz/write-document.h b/include/mupdf/fitz/write-document.h
index 9fe27f5f..56b9ef76 100644
--- a/include/mupdf/fitz/write-document.h
+++ b/include/mupdf/fitz/write-document.h
@@ -21,6 +21,7 @@ struct fz_write_options_s
 	int do_garbage; /* If non-zero then attempt (where possible) to
 				garbage collect the file before writing. */
 	int do_linear; /* If non-zero then write linearised. */
+	int do_clean; /* If non-zero then clean contents */
 	int continue_on_error; /* If non-zero, errors are (optionally)
 					counted and writing continues. */
 	int *errors; /* Pointer to a place to store a count of errors */
diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h
index e6c4f9b2..c1b690b6 100644
--- a/include/mupdf/pdf/page.h
+++ b/include/mupdf/pdf/page.h
@@ -50,6 +50,19 @@ void pdf_free_page(pdf_document *doc, pdf_page *page);
 */
 void pdf_run_page(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie);
 
+/*
+	pdf_run_page: Interpret a loaded page and render it on a device.
+
+	page: A page loaded by pdf_load_page.
+
+	dev: Device used for rendering, obtained from fz_new_*_device.
+
+	ctm: A transformation matrix applied to the objects on the page,
+	e.g. to scale or rotate the page contents as desired.
+
+	cookie: A pointer to an optional fz_cookie structure that can be used
+	to track progress, collect errors etc.
+*/
 void pdf_run_page_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie);
 
 /*
@@ -66,6 +79,22 @@ void pdf_run_page_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev,
 void pdf_run_page_contents(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie);
 
 /*
+	pdf_clean_page_contents: Clean a loaded pages rendering operations.
+	This involves filtering the PDF operators used to avoid (some cases
+	of) repetition, and leaves the page in a balanced state with an
+	unchanged top level matrix etc. Just the main page contents without
+	the annotations
+
+	page: A page loaded by pdf_load_page.
+
+	dev: Device used for rendering, obtained from fz_new_*_device.
+
+	cookie: A pointer to an optional fz_cookie structure that can be used
+	to track progress, collect errors etc.
+*/
+void pdf_clean_page_contents(pdf_document *doc, pdf_page *page, fz_cookie *cookie);
+
+/*
 	Presentation interface.
 */
 fz_transition *pdf_page_presentation(pdf_document *doc, pdf_page *page, float *duration);
author	Robin Watts <robin.watts@artifex.com>	2014-03-19 19:04:50 +0000
committer	Robin Watts <robin.watts@artifex.com>	2014-03-19 19:04:50 +0000
commit	e4d36ce68e0467ac4702f717386934a44970f4e5 (patch)
tree	758a9999434f03a4607d1f23e43d9a58828b6444 /include
parent	441954b6fb378e3af72500653be5636c7ade29ee (diff)
download	mupdf-e4d36ce68e0467ac4702f717386934a44970f4e5.tar.xz