From e4d36ce68e0467ac4702f717386934a44970f4e5 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Wed, 19 Mar 2014 19:04:50 +0000 Subject: Add routine to clean pdf content streams for pages. New routine to filter the content streams for pages, xobjects, type3 charprocs, patterns etc. The filtered streams are guaranteed to be properly matched with q/Q's, and to not have changed the top level ctm. Additionally we remove (some) repeated settings of colors etc. This filtering can be extended to be smarter later. The idea of this is to both repair after editing, and to leave the streams in a form that can be easily appended to. This is preparatory to work on Bates numbering and Watermarking. Currently the streams produced are uncompressed. --- include/mupdf/fitz/write-document.h | 1 + include/mupdf/pdf/page.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/mupdf/fitz/write-document.h b/include/mupdf/fitz/write-document.h index 9fe27f5f..56b9ef76 100644 --- a/include/mupdf/fitz/write-document.h +++ b/include/mupdf/fitz/write-document.h @@ -21,6 +21,7 @@ struct fz_write_options_s int do_garbage; /* If non-zero then attempt (where possible) to garbage collect the file before writing. */ int do_linear; /* If non-zero then write linearised. */ + int do_clean; /* If non-zero then clean contents */ int continue_on_error; /* If non-zero, errors are (optionally) counted and writing continues. */ int *errors; /* Pointer to a place to store a count of errors */ diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h index e6c4f9b2..c1b690b6 100644 --- a/include/mupdf/pdf/page.h +++ b/include/mupdf/pdf/page.h @@ -50,6 +50,19 @@ void pdf_free_page(pdf_document *doc, pdf_page *page); */ void pdf_run_page(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie); +/* + pdf_run_page: Interpret a loaded page and render it on a device. + + page: A page loaded by pdf_load_page. + + dev: Device used for rendering, obtained from fz_new_*_device. + + ctm: A transformation matrix applied to the objects on the page, + e.g. to scale or rotate the page contents as desired. + + cookie: A pointer to an optional fz_cookie structure that can be used + to track progress, collect errors etc. +*/ void pdf_run_page_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie); /* @@ -65,6 +78,22 @@ void pdf_run_page_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev, */ void pdf_run_page_contents(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie); +/* + pdf_clean_page_contents: Clean a loaded pages rendering operations. + This involves filtering the PDF operators used to avoid (some cases + of) repetition, and leaves the page in a balanced state with an + unchanged top level matrix etc. Just the main page contents without + the annotations + + page: A page loaded by pdf_load_page. + + dev: Device used for rendering, obtained from fz_new_*_device. + + cookie: A pointer to an optional fz_cookie structure that can be used + to track progress, collect errors etc. +*/ +void pdf_clean_page_contents(pdf_document *doc, pdf_page *page, fz_cookie *cookie); + /* Presentation interface. */ -- cgit v1.2.3