summaryrefslogtreecommitdiff
path: root/fitz
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-03-28 17:56:41 +0530
committerRobin Watts <robin.watts@artifex.com>2012-03-28 20:17:23 +0530
commit0c0398c06c6e2a1bc9ca0765836cc4beb997cfa7 (patch)
tree3a1b4e470eface9ce2391d5023bc552b58be8c33 /fitz
parentccf2483b87c006b3e35c371ba0d79adbe34d84d9 (diff)
downloadmupdf-0c0398c06c6e2a1bc9ca0765836cc4beb997cfa7.tar.xz
Add documentation for fz_link_dest and text extraction device.
Diffstat (limited to 'fitz')
-rw-r--r--fitz/fitz.h168
1 files changed, 143 insertions, 25 deletions
diff --git a/fitz/fitz.h b/fitz/fitz.h
index dcd9ca37..732cb9d2 100644
--- a/fitz/fitz.h
+++ b/fitz/fitz.h
@@ -376,7 +376,6 @@ void *fz_calloc(fz_context *ctx, unsigned int count, unsigned int size);
Returns a pointer to allocated (and cleared) structure. Throws
exception on failure to allocate.
*/
-/* alloc and zero a struct, and tag it for memento */
#define fz_malloc_struct(CTX, STRUCT) \
Memento_label(fz_calloc(CTX,1,sizeof(STRUCT)), #STRUCT)
@@ -566,7 +565,9 @@ int fz_runetochar(char *str, int rune);
*/
int fz_runelen(int rune);
-/* getopt */
+/*
+ getopt: Simple functions/variables for use in tools.
+*/
extern int fz_getopt(int nargc, char * const *nargv, const char *ostr);
extern int fz_optind;
extern char *fz_optarg;
@@ -1408,7 +1409,7 @@ void fz_md5_pixmap(fz_pixmap *pixmap, unsigned char digest[16]);
These may be implemented as simple wrappers around a pixmap, or as
more complex things that decode at different subsample settings on
demand.
- */
+*/
typedef struct fz_image_s fz_image;
/*
@@ -1518,7 +1519,9 @@ fz_device *fz_new_draw_device(fz_context *ctx, fz_pixmap *dest);
/*
Text extraction device: Used for searching, format conversion etc.
- */
+
+ (In development - Subject to change in future versions)
+*/
typedef struct fz_text_style_s fz_text_style;
typedef struct fz_text_char_s fz_text_char;
@@ -1529,37 +1532,60 @@ typedef struct fz_text_block_s fz_text_block;
typedef struct fz_text_sheet_s fz_text_sheet;
typedef struct fz_text_page_s fz_text_page;
+/*
+ fz_text_sheet: A text sheet contains a list of distinct text styles
+ used on a page (or a series of pages).
+*/
+struct fz_text_sheet_s
+{
+ int maxid;
+ fz_text_style *style;
+};
+
+/*
+ fz_text_style: A text style contains details of a distinct text style
+ used on a page.
+*/
struct fz_text_style_s
{
+ fz_text_style *next;
int id;
fz_font *font;
float size;
int wmode;
int script;
/* etc... */
- fz_text_style *next;
-};
-
-struct fz_text_sheet_s
-{
- int maxid;
- fz_text_style *style;
};
-struct fz_text_char_s
+/*
+ fz_text_page: A text page is a list of blocks of text, together with
+ an overall bounding box.
+*/
+struct fz_text_page_s
{
- fz_rect bbox;
- int c;
+ fz_rect mediabox;
+ int len, cap;
+ fz_text_block *blocks;
};
-struct fz_text_span_s
+/*
+ fz_text_block: A text block is a list of lines of text. In typical
+ cases this may correspond to a paragraph or a column of text. A
+ collection of blocks makes up a page.
+*/
+struct fz_text_block_s
{
fz_rect bbox;
int len, cap;
- fz_text_char *text;
- fz_text_style *style;
+ fz_text_line *lines;
};
+/*
+ fz_text_line: A text line is a list of text spans, with the same
+ (or very similar) baseline. In typical cases this should correspond
+ (as expected) to complete lines of text. A collection of lines makes
+ up a block.
+*/
struct fz_text_line_s
{
fz_rect bbox;
@@ -1567,18 +1593,30 @@ struct fz_text_line_s
fz_text_span *spans;
};
-struct fz_text_block_s
+/*
+ fz_text_span: A text span is a list of characters in the same style
+ that share a common (or very similar) baseline. In typical cases
+ (where only one font style is used in a line), a single span may be
+ enough to represent a complete line. In cases where multiple
+ font styles are used (for example italics), then a line will be
+ broken down into a series of spans.
+*/
+struct fz_text_span_s
{
fz_rect bbox;
int len, cap;
- fz_text_line *lines;
+ fz_text_char *text;
+ fz_text_style *style;
};
-struct fz_text_page_s
+/*
+ fz_text_char: A text char is a unicode character and the bounding
+ box with which it appears on the page.
+*/
+struct fz_text_char_s
{
- fz_rect mediabox;
- int len, cap;
- fz_text_block *blocks;
+ fz_rect bbox;
+ int c;
};
/*
@@ -1587,6 +1625,16 @@ struct fz_text_page_s
Gather and sort the text on a page into spans of uniform style,
arranged into lines and blocks by reading order. The reading order
is determined by various heuristics, so may not be accurate.
+
+ sheet: The text sheet to which styles should be added. This can
+ either be a newly created (empty) text sheet, or one containing
+ styles from a previous text device. The same sheet cannot be used
+ in multiple threads simultaneously.
+
+ page: The text page to which content should be added. This will
+ usually be a newly created (empty) text page, but it can be one
+ containing data already (for example when merging multiple pages, or
+ watermarking).
*/
fz_device *fz_new_text_device(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page);
@@ -1609,14 +1657,29 @@ void fz_free_text_sheet(fz_context *ctx, fz_text_sheet *sheet);
fz_text_page *fz_new_text_page(fz_context *ctx, fz_rect mediabox);
void fz_free_text_page(fz_context *ctx, fz_text_page *page);
+/*
+ fz_print_text_sheet: Output a text sheet to a file as CSS.
+*/
void fz_print_text_sheet(fz_context *ctx, FILE *out, fz_text_sheet *sheet);
+
+/*
+ fz_print_text_page_html: Output a page to a file in HTML format.
+*/
void fz_print_text_page_html(fz_context *ctx, FILE *out, fz_text_page *page);
+
+/*
+ fz_print_text_page_xml: Output a page to a file in XML format.
+*/
void fz_print_text_page_xml(fz_context *ctx, FILE *out, fz_text_page *page);
+
+/*
+ fz_print_text_page: Output a page to a file in UTF-8 format.
+*/
void fz_print_text_page(fz_context *ctx, FILE *out, fz_text_page *page);
/*
- * Cookie support - simple communication channel between app/library.
- */
+ Cookie support - simple communication channel between app/library.
+*/
typedef struct fz_cookie_s fz_cookie;
@@ -1739,6 +1802,8 @@ void fz_run_display_list(fz_display_list *list, fz_device *dev, fz_matrix ctm, f
void fz_free_display_list(fz_context *ctx, fz_display_list *list);
/*
+ Links
+
NOTE: The link destination struct is scheduled for imminent change!
Use at your own peril.
*/
@@ -1767,6 +1832,59 @@ enum {
fz_link_flag_r_is_zoom = 64 /* rb.x is actually a zoom figure */
};
+/*
+ fz_link_dest: This structure represents the destination of
+ an fz_link; this may be a page to display, a new file to open,
+ a javascript action to perform, etc.
+
+ kind: This identifies the kind of link destination. Different
+ kinds use different sections of the union.
+
+ For FZ_LINK_GOTO or FZ_LINK_GOTOR:
+
+ gotor.page: The target page number to move to (0 being the
+ first page in the document).
+
+ gotor.flags: A bitfield consisting of fz_link_flag_*
+ describing the validity and meaning of the different parts
+ of gotor.lr and gotor.rb. Link destinations are constructed
+ (as far as possible) so that lt and rb can be treated as a
+ bounding box, though the validity flags indicate which of the
+ values was actually specified in the file.
+
+ gotor.lt: The top left corner of the destination bounding box.
+
+ gotor.rb: The bottom right corner of the destination bounding
+ box. If fz_link_flag_r_is_zoom is set, then the r figure
+ should actually be interpretted as a zoom ratio.
+
+ gotor.file_spec: If set, this destination should cause a new
+ file to be opened; this field holds a pointer to a remote
+ file specification (UTF-8). Always NULL in the FZ_LINK_GOTO
+ case.
+
+ gotor.new_window: If true, the destination should open in a
+ new window.
+
+ For FZ_LINK_URI:
+
+ uri.uri: A UTF-8 encoded URI to launch.
+
+ uri.is_map: If true, the x and y coords (as ints, in user
+ space) should be appended to the URI before launch.
+
+ For FZ_LINK_LAUNCH:
+
+ launch.file_spec: A UTF-8 file specification to launch.
+
+ launch.new_window: If true, the destination should be launched
+ in a new window.
+
+ For FZ_LINK_NAMED:
+
+ named.named: The named action to perform. Likely to be
+ client specific.
+*/
struct fz_link_dest_s
{
fz_link_kind kind;