From c19e1ab67f06bf11e12dac08685a28753a3b276e Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Wed, 25 Oct 2017 16:58:50 +0100 Subject: Expose text filtering through pdf_clean interface. --- include/mupdf/pdf/font.h | 2 ++ include/mupdf/pdf/interpret.h | 3 +++ include/mupdf/pdf/page.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) (limited to 'include') diff --git a/include/mupdf/pdf/font.h b/include/mupdf/pdf/font.h index 23bc77bf..10571f0b 100644 --- a/include/mupdf/pdf/font.h +++ b/include/mupdf/pdf/font.h @@ -1,6 +1,8 @@ #ifndef MUPDF_PDF_FONT_H #define MUPDF_PDF_FONT_H +#include "mupdf/pdf/cmap.h" + /* * Font */ diff --git a/include/mupdf/pdf/interpret.h b/include/mupdf/pdf/interpret.h index 68f1a71c..f7104499 100644 --- a/include/mupdf/pdf/interpret.h +++ b/include/mupdf/pdf/interpret.h @@ -1,6 +1,9 @@ #ifndef PDF_INTERPRET_H #define PDF_INTERPRET_H +#include "mupdf/pdf/font.h" +#include "mupdf/pdf/resource.h" + typedef struct pdf_csi_s pdf_csi; typedef struct pdf_gstate_s pdf_gstate; typedef struct pdf_processor_s pdf_processor; diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h index 969aedf0..f904a7c1 100644 --- a/include/mupdf/pdf/page.h +++ b/include/mupdf/pdf/page.h @@ -1,6 +1,8 @@ #ifndef MUPDF_PDF_PAGE_H #define MUPDF_PDF_PAGE_H +#include "mupdf/pdf/interpret.h" + int pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *pageobj); int pdf_count_pages(fz_context *ctx, pdf_document *doc); pdf_obj *pdf_lookup_page_obj(fz_context *ctx, pdf_document *doc, int needle); @@ -168,6 +170,38 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, void pdf_clean_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, fz_cookie *cookie, pdf_page_contents_process_fn *proc, void *proc_arg, int ascii); +/* + pdf_filter_page_contents: Performs the same task as + pdf_clean_page_contents, but with an optional text filter + function. + + text_filter: Function to assess whether a given character + should be kept (return 0) or removed (return 1). + + after_text: Function called after each text object is closed + to allow other output to be sent. + + arg: Opaque value to be passed to callback functions. +*/ +void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, + pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg, int ascii); + +/* + pdf_filter_annot_contents: Performs the same task as + pdf_clean_annot_contents, but with an optional text filter + function. + + text_filter: Function to assess whether a given character + should be kept (return 0) or removed (return 1). + + after_text: Function called after each text object is closed + to allow other output to be sent. + + arg: Opaque value to be passed to callback functions. +*/ +void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, fz_cookie *cookie, + pdf_page_contents_process_fn *proc, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg, int ascii); + /* Presentation interface. */ -- cgit v1.2.3