summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2017-10-25 16:58:50 +0100
committerRobin Watts <robin.watts@artifex.com>2017-11-06 11:41:14 +0000
commitc19e1ab67f06bf11e12dac08685a28753a3b276e (patch)
tree54794b85da83e3c88320fa05eab5a465c1ca453c /include
parentf76bc6be7d3addfbd65fdac454b17911337ee2c6 (diff)
downloadmupdf-c19e1ab67f06bf11e12dac08685a28753a3b276e.tar.xz
Expose text filtering through pdf_clean interface.
Diffstat (limited to 'include')
-rw-r--r--include/mupdf/pdf/font.h2
-rw-r--r--include/mupdf/pdf/interpret.h3
-rw-r--r--include/mupdf/pdf/page.h34
3 files changed, 39 insertions, 0 deletions
diff --git a/include/mupdf/pdf/font.h b/include/mupdf/pdf/font.h
index 23bc77bf..10571f0b 100644
--- a/include/mupdf/pdf/font.h
+++ b/include/mupdf/pdf/font.h
@@ -1,6 +1,8 @@
#ifndef MUPDF_PDF_FONT_H
#define MUPDF_PDF_FONT_H
+#include "mupdf/pdf/cmap.h"
+
/*
* Font
*/
diff --git a/include/mupdf/pdf/interpret.h b/include/mupdf/pdf/interpret.h
index 68f1a71c..f7104499 100644
--- a/include/mupdf/pdf/interpret.h
+++ b/include/mupdf/pdf/interpret.h
@@ -1,6 +1,9 @@
#ifndef PDF_INTERPRET_H
#define PDF_INTERPRET_H
+#include "mupdf/pdf/font.h"
+#include "mupdf/pdf/resource.h"
+
typedef struct pdf_csi_s pdf_csi;
typedef struct pdf_gstate_s pdf_gstate;
typedef struct pdf_processor_s pdf_processor;
diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h
index 969aedf0..f904a7c1 100644
--- a/include/mupdf/pdf/page.h
+++ b/include/mupdf/pdf/page.h
@@ -1,6 +1,8 @@
#ifndef MUPDF_PDF_PAGE_H
#define MUPDF_PDF_PAGE_H
+#include "mupdf/pdf/interpret.h"
+
int pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *pageobj);
int pdf_count_pages(fz_context *ctx, pdf_document *doc);
pdf_obj *pdf_lookup_page_obj(fz_context *ctx, pdf_document *doc, int needle);
@@ -169,6 +171,38 @@ void pdf_clean_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *ann
pdf_page_contents_process_fn *proc, void *proc_arg, int ascii);
/*
+ pdf_filter_page_contents: Performs the same task as
+ pdf_clean_page_contents, but with an optional text filter
+ function.
+
+ text_filter: Function to assess whether a given character
+ should be kept (return 0) or removed (return 1).
+
+ after_text: Function called after each text object is closed
+ to allow other output to be sent.
+
+ arg: Opaque value to be passed to callback functions.
+*/
+void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie,
+ pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg, int ascii);
+
+/*
+ pdf_filter_annot_contents: Performs the same task as
+ pdf_clean_annot_contents, but with an optional text filter
+ function.
+
+ text_filter: Function to assess whether a given character
+ should be kept (return 0) or removed (return 1).
+
+ after_text: Function called after each text object is closed
+ to allow other output to be sent.
+
+ arg: Opaque value to be passed to callback functions.
+*/
+void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, fz_cookie *cookie,
+ pdf_page_contents_process_fn *proc, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg, int ascii);
+
+/*
Presentation interface.
*/
fz_transition *pdf_page_presentation(fz_context *ctx, pdf_page *page, fz_transition *transition, float *duration);