summaryrefslogtreecommitdiff
path: root/fitz/fitz.h
diff options
context:
space:
mode:
Diffstat (limited to 'fitz/fitz.h')
-rw-r--r--fitz/fitz.h74
1 files changed, 58 insertions, 16 deletions
diff --git a/fitz/fitz.h b/fitz/fitz.h
index c614161f..b5db25c6 100644
--- a/fitz/fitz.h
+++ b/fitz/fitz.h
@@ -1869,6 +1869,8 @@ struct fz_text_style_s
float size;
int wmode;
int script;
+ float ascender;
+ float descender;
/* etc... */
};
@@ -1897,43 +1899,81 @@ struct fz_text_block_s
/*
fz_text_line: A text line is a list of text spans, with the same
- (or very similar) baseline. In typical cases this should correspond
- (as expected) to complete lines of text. A collection of lines makes
- up a block.
+ baseline. In typical cases this should correspond (as expected) to
+ complete lines of text. A collection of lines makes up a block.
*/
struct fz_text_line_s
{
- fz_rect bbox;
int len, cap;
- fz_text_span *spans;
+ fz_text_span **spans;
+
+ /* Cached information */
+ float distance; /* Perpendicular distance from previous line */
+ fz_rect bbox;
};
/*
- fz_text_span: A text span is a list of characters in the same style
- that share a common (or very similar) baseline. In typical cases
- (where only one font style is used in a line), a single span may be
- enough to represent a complete line. In cases where multiple
- font styles are used (for example italics), then a line will be
- broken down into a series of spans.
+ fz_text_span: A text span is a list of characters that share a common
+ baseline/transformation. In typical cases a single span may be enough
+ to represent a complete line. In cases where the text has big gaps in
+ it (perhaps as it crosses columns or tables), a line may be represented
+ by multiple spans.
*/
struct fz_text_span_s
{
- fz_rect bbox;
int len, cap;
fz_text_char *text;
- fz_text_style *style;
+ fz_point min; /* Device space */
+ fz_point max; /* Device space */
+ int wmode; /* 0 for horizontal, 1 for vertical */
+ fz_matrix transform; /* e and f are always 0 here */
+ float ascender_max; /* Document space */
+ float descender_min; /* Document space */
+ fz_rect bbox; /* Device space */
+
+ /* Cached information */
+ float base_offset; /* Perpendicular distance from baseline of line */
+ float spacing; /* Distance along baseline from previous span in this line (or 0 if first) */
};
/*
- fz_text_char: A text char is a unicode character and the bounding
- box with which it appears on the page.
+ fz_text_char: A text char is a unicode character, the style in which
+ is appears, and the point at which it is positioned. Transform
+ (and hence bbox) information is given by the enclosing span.
*/
struct fz_text_char_s
{
- fz_rect bbox;
+ fz_point p; /* Device space */
int c;
+ fz_text_style *style;
};
+typedef struct fz_char_and_box_s fz_char_and_box;
+
+struct fz_char_and_box_s
+{
+ int c;
+ fz_rect bbox;
+};
+
+fz_char_and_box *fz_text_char_at(fz_char_and_box *cab, fz_text_page *page, int idx);
+
+/*
+ fz_text_char_bbox: Return the bbox of a text char. Calculated from
+ the supplied enclosing span.
+
+ bbox: A place to store the bbox
+
+ span: The enclosing span
+
+ idx: The index of the char within the span
+
+ Returns bbox (updated)
+
+ Does not throw exceptions
+*/
+fz_rect *fz_text_char_bbox(fz_rect *bbox, fz_text_span *span, int idx);
+
/*
fz_new_text_device: Create a device to extract the text on a page.
@@ -1972,6 +2012,8 @@ void fz_free_text_sheet(fz_context *ctx, fz_text_sheet *sheet);
fz_text_page *fz_new_text_page(fz_context *ctx, const fz_rect *mediabox);
void fz_free_text_page(fz_context *ctx, fz_text_page *page);
+void fz_text_analysis(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page);
+
typedef struct fz_output_s fz_output;
struct fz_output_s