From 0c0398c06c6e2a1bc9ca0765836cc4beb997cfa7 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Wed, 28 Mar 2012 17:56:41 +0530 Subject: Add documentation for fz_link_dest and text extraction device. --- fitz/fitz.h | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 143 insertions(+), 25 deletions(-) (limited to 'fitz') diff --git a/fitz/fitz.h b/fitz/fitz.h index dcd9ca37..732cb9d2 100644 --- a/fitz/fitz.h +++ b/fitz/fitz.h @@ -376,7 +376,6 @@ void *fz_calloc(fz_context *ctx, unsigned int count, unsigned int size); Returns a pointer to allocated (and cleared) structure. Throws exception on failure to allocate. */ -/* alloc and zero a struct, and tag it for memento */ #define fz_malloc_struct(CTX, STRUCT) \ Memento_label(fz_calloc(CTX,1,sizeof(STRUCT)), #STRUCT) @@ -566,7 +565,9 @@ int fz_runetochar(char *str, int rune); */ int fz_runelen(int rune); -/* getopt */ +/* + getopt: Simple functions/variables for use in tools. +*/ extern int fz_getopt(int nargc, char * const *nargv, const char *ostr); extern int fz_optind; extern char *fz_optarg; @@ -1408,7 +1409,7 @@ void fz_md5_pixmap(fz_pixmap *pixmap, unsigned char digest[16]); These may be implemented as simple wrappers around a pixmap, or as more complex things that decode at different subsample settings on demand. - */ +*/ typedef struct fz_image_s fz_image; /* @@ -1518,7 +1519,9 @@ fz_device *fz_new_draw_device(fz_context *ctx, fz_pixmap *dest); /* Text extraction device: Used for searching, format conversion etc. - */ + + (In development - Subject to change in future versions) +*/ typedef struct fz_text_style_s fz_text_style; typedef struct fz_text_char_s fz_text_char; @@ -1529,37 +1532,60 @@ typedef struct fz_text_block_s fz_text_block; typedef struct fz_text_sheet_s fz_text_sheet; typedef struct fz_text_page_s fz_text_page; +/* + fz_text_sheet: A text sheet contains a list of distinct text styles + used on a page (or a series of pages). +*/ +struct fz_text_sheet_s +{ + int maxid; + fz_text_style *style; +}; + +/* + fz_text_style: A text style contains details of a distinct text style + used on a page. +*/ struct fz_text_style_s { + fz_text_style *next; int id; fz_font *font; float size; int wmode; int script; /* etc... */ - fz_text_style *next; -}; - -struct fz_text_sheet_s -{ - int maxid; - fz_text_style *style; }; -struct fz_text_char_s +/* + fz_text_page: A text page is a list of blocks of text, together with + an overall bounding box. +*/ +struct fz_text_page_s { - fz_rect bbox; - int c; + fz_rect mediabox; + int len, cap; + fz_text_block *blocks; }; -struct fz_text_span_s +/* + fz_text_block: A text block is a list of lines of text. In typical + cases this may correspond to a paragraph or a column of text. A + collection of blocks makes up a page. +*/ +struct fz_text_block_s { fz_rect bbox; int len, cap; - fz_text_char *text; - fz_text_style *style; + fz_text_line *lines; }; +/* + fz_text_line: A text line is a list of text spans, with the same + (or very similar) baseline. In typical cases this should correspond + (as expected) to complete lines of text. A collection of lines makes + up a block. +*/ struct fz_text_line_s { fz_rect bbox; @@ -1567,18 +1593,30 @@ struct fz_text_line_s fz_text_span *spans; }; -struct fz_text_block_s +/* + fz_text_span: A text span is a list of characters in the same style + that share a common (or very similar) baseline. In typical cases + (where only one font style is used in a line), a single span may be + enough to represent a complete line. In cases where multiple + font styles are used (for example italics), then a line will be + broken down into a series of spans. +*/ +struct fz_text_span_s { fz_rect bbox; int len, cap; - fz_text_line *lines; + fz_text_char *text; + fz_text_style *style; }; -struct fz_text_page_s +/* + fz_text_char: A text char is a unicode character and the bounding + box with which it appears on the page. +*/ +struct fz_text_char_s { - fz_rect mediabox; - int len, cap; - fz_text_block *blocks; + fz_rect bbox; + int c; }; /* @@ -1587,6 +1625,16 @@ struct fz_text_page_s Gather and sort the text on a page into spans of uniform style, arranged into lines and blocks by reading order. The reading order is determined by various heuristics, so may not be accurate. + + sheet: The text sheet to which styles should be added. This can + either be a newly created (empty) text sheet, or one containing + styles from a previous text device. The same sheet cannot be used + in multiple threads simultaneously. + + page: The text page to which content should be added. This will + usually be a newly created (empty) text page, but it can be one + containing data already (for example when merging multiple pages, or + watermarking). */ fz_device *fz_new_text_device(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page); @@ -1609,14 +1657,29 @@ void fz_free_text_sheet(fz_context *ctx, fz_text_sheet *sheet); fz_text_page *fz_new_text_page(fz_context *ctx, fz_rect mediabox); void fz_free_text_page(fz_context *ctx, fz_text_page *page); +/* + fz_print_text_sheet: Output a text sheet to a file as CSS. +*/ void fz_print_text_sheet(fz_context *ctx, FILE *out, fz_text_sheet *sheet); + +/* + fz_print_text_page_html: Output a page to a file in HTML format. +*/ void fz_print_text_page_html(fz_context *ctx, FILE *out, fz_text_page *page); + +/* + fz_print_text_page_xml: Output a page to a file in XML format. +*/ void fz_print_text_page_xml(fz_context *ctx, FILE *out, fz_text_page *page); + +/* + fz_print_text_page: Output a page to a file in UTF-8 format. +*/ void fz_print_text_page(fz_context *ctx, FILE *out, fz_text_page *page); /* - * Cookie support - simple communication channel between app/library. - */ + Cookie support - simple communication channel between app/library. +*/ typedef struct fz_cookie_s fz_cookie; @@ -1739,6 +1802,8 @@ void fz_run_display_list(fz_display_list *list, fz_device *dev, fz_matrix ctm, f void fz_free_display_list(fz_context *ctx, fz_display_list *list); /* + Links + NOTE: The link destination struct is scheduled for imminent change! Use at your own peril. */ @@ -1767,6 +1832,59 @@ enum { fz_link_flag_r_is_zoom = 64 /* rb.x is actually a zoom figure */ }; +/* + fz_link_dest: This structure represents the destination of + an fz_link; this may be a page to display, a new file to open, + a javascript action to perform, etc. + + kind: This identifies the kind of link destination. Different + kinds use different sections of the union. + + For FZ_LINK_GOTO or FZ_LINK_GOTOR: + + gotor.page: The target page number to move to (0 being the + first page in the document). + + gotor.flags: A bitfield consisting of fz_link_flag_* + describing the validity and meaning of the different parts + of gotor.lr and gotor.rb. Link destinations are constructed + (as far as possible) so that lt and rb can be treated as a + bounding box, though the validity flags indicate which of the + values was actually specified in the file. + + gotor.lt: The top left corner of the destination bounding box. + + gotor.rb: The bottom right corner of the destination bounding + box. If fz_link_flag_r_is_zoom is set, then the r figure + should actually be interpretted as a zoom ratio. + + gotor.file_spec: If set, this destination should cause a new + file to be opened; this field holds a pointer to a remote + file specification (UTF-8). Always NULL in the FZ_LINK_GOTO + case. + + gotor.new_window: If true, the destination should open in a + new window. + + For FZ_LINK_URI: + + uri.uri: A UTF-8 encoded URI to launch. + + uri.is_map: If true, the x and y coords (as ints, in user + space) should be appended to the URI before launch. + + For FZ_LINK_LAUNCH: + + launch.file_spec: A UTF-8 file specification to launch. + + launch.new_window: If true, the destination should be launched + in a new window. + + For FZ_LINK_NAMED: + + named.named: The named action to perform. Likely to be + client specific. +*/ struct fz_link_dest_s { fz_link_kind kind; -- cgit v1.2.3