summaryrefslogtreecommitdiff
path: root/fitz/dev_text.c
diff options
context:
space:
mode:
Diffstat (limited to 'fitz/dev_text.c')
-rw-r--r--fitz/dev_text.c692
1 files changed, 504 insertions, 188 deletions
diff --git a/fitz/dev_text.c b/fitz/dev_text.c
index d38fab21..7a059e5a 100644
--- a/fitz/dev_text.c
+++ b/fitz/dev_text.c
@@ -1,7 +1,8 @@
-#include "fitz.h"
+#include "fitz-internal.h"
#define LINE_DIST 0.9f
#define SPACE_DIST 0.2f
+#define PARAGRAPH_DIST 0.5f
#include <ft2build.h>
#include FT_FREETYPE_H
@@ -11,60 +12,211 @@ typedef struct fz_text_device_s fz_text_device;
struct fz_text_device_s
{
+ fz_text_sheet *sheet;
+ fz_text_page *page;
+ fz_text_line cur_line;
+ fz_text_span cur_span;
fz_point point;
- fz_text_span *head;
- fz_text_span *span;
};
-fz_text_span *
-fz_new_text_span(fz_context *ctx)
+fz_text_sheet *
+fz_new_text_sheet(fz_context *ctx)
{
- fz_text_span *span;
- span = fz_malloc_struct(ctx, fz_text_span);
- span->font = NULL;
- span->wmode = 0;
- span->size = 0;
- span->len = 0;
- span->cap = 0;
- span->text = NULL;
- span->next = NULL;
- span->eol = 0;
- return span;
+ fz_text_sheet *sheet = fz_malloc(ctx, sizeof *sheet);
+ sheet->maxid = 0;
+ sheet->style = NULL;
+ return sheet;
}
void
-fz_free_text_span(fz_context *ctx, fz_text_span *span)
+fz_free_text_sheet(fz_context *ctx, fz_text_sheet *sheet)
+{
+ fz_text_style *style = sheet->style;
+ while (style)
+ {
+ fz_text_style *next = style->next;
+ fz_drop_font(ctx, style->font);
+ fz_free(ctx, style);
+ style = next;
+ }
+}
+
+static fz_text_style *
+fz_lookup_text_style_imp(fz_context *ctx, fz_text_sheet *sheet,
+ float size, fz_font *font, int wmode, int script)
+{
+ fz_text_style *style;
+
+ for (style = sheet->style; style; style = style->next)
+ {
+ if (style->font == font &&
+ style->size == size &&
+ style->wmode == wmode &&
+ style->script == script) /* FIXME: others */
+ {
+ return style;
+ }
+ }
+
+ /* Better make a new one and add it to our list */
+ style = fz_malloc(ctx, sizeof *style);
+ style->id = sheet->maxid++;
+ style->font = fz_keep_font(ctx, font);
+ style->size = size;
+ style->wmode = wmode;
+ style->script = script;
+ style->next = sheet->style;
+ sheet->style = style;
+ return style;
+}
+
+static fz_text_style *
+fz_lookup_text_style(fz_context *ctx, fz_text_sheet *sheet, fz_text *text, fz_matrix *ctm,
+ fz_colorspace *colorspace, float *color, float alpha, fz_stroke_state *stroke)
{
- fz_text_span *next;
+ float size = 1.0f;
+ fz_font *font = text ? text->font : NULL;
+ int wmode = text ? text->wmode : 0;
+ if (ctm && text)
+ {
+ fz_matrix tm = text->trm;
+ fz_matrix trm;
+ tm.e = 0;
+ tm.f = 0;
+ trm = fz_concat(tm, *ctm);
+ size = fz_matrix_expansion(trm);
+ }
+ return fz_lookup_text_style_imp(ctx, sheet, size, font, wmode, 0);
+}
- while (span)
+fz_text_page *
+fz_new_text_page(fz_context *ctx, fz_rect mediabox)
+{
+ fz_text_page *page = fz_malloc(ctx, sizeof(*page));
+ page->mediabox = mediabox;
+ page->len = 0;
+ page->cap = 0;
+ page->blocks = NULL;
+ return page;
+}
+
+void
+fz_free_text_page(fz_context *ctx, fz_text_page *page)
+{
+ fz_text_block *block;
+ fz_text_line *line;
+ fz_text_span *span;
+ for (block = page->blocks; block < page->blocks + page->len; block++)
{
- if (span->font)
- fz_drop_font(ctx, span->font);
- next = span->next;
- fz_free(ctx, span->text);
- fz_free(ctx, span);
- span = next;
+ for (line = block->lines; line < block->lines + block->len; line++)
+ {
+ for (span = line->spans; span < line->spans + line->len; span++)
+ {
+ fz_free(ctx, span->text);
+ }
+ fz_free(ctx, line->spans);
+ }
+ fz_free(ctx, block->lines);
}
+ fz_free(ctx, page->blocks);
+ fz_free(ctx, page);
}
static void
-fz_add_text_char_imp(fz_context *ctx, fz_text_span *span, int c, fz_bbox bbox)
+append_char(fz_context *ctx, fz_text_span *span, int c, fz_rect bbox)
{
- if (span->len + 1 >= span->cap)
+ if (span->len == span->cap)
{
- span->cap = span->cap > 1 ? (span->cap * 3) / 2 : 80;
- span->text = fz_resize_array(ctx, span->text, span->cap, sizeof(fz_text_char));
+ span->cap = MAX(64, span->cap * 2);
+ span->text = fz_resize_array(ctx, span->text, span->cap, sizeof(*span->text));
}
+ span->bbox = fz_union_rect(span->bbox, bbox);
span->text[span->len].c = c;
span->text[span->len].bbox = bbox;
- span->len ++;
+ span->len++;
+}
+
+static void
+init_span(fz_context *ctx, fz_text_span *span, fz_text_style *style)
+{
+ span->style = style;
+ span->bbox = fz_empty_rect;
+ span->len = span->cap = 0;
+ span->text = NULL;
+}
+
+static void
+append_span(fz_context *ctx, fz_text_line *line, fz_text_span *span)
+{
+ if (line->len == line->cap)
+ {
+ line->cap = MAX(8, line->cap * 2);
+ line->spans = fz_resize_array(ctx, line->spans, line->cap, sizeof(*line->spans));
+ }
+ line->bbox = fz_union_rect(line->bbox, span->bbox);
+ line->spans[line->len++] = *span;
}
-static fz_bbox
-fz_split_bbox(fz_bbox bbox, int i, int n)
+static void
+init_line(fz_context *ctx, fz_text_line *line)
+{
+ line->bbox = fz_empty_rect;
+ line->len = line->cap = 0;
+ line->spans = NULL;
+}
+
+static void
+append_line(fz_context *ctx, fz_text_block *block, fz_text_line *line)
{
- float w = (float)(bbox.x1 - bbox.x0) / n;
+ if (block->len == block->cap)
+ {
+ block->cap = MAX(16, block->cap * 2);
+ block->lines = fz_resize_array(ctx, block->lines, block->cap, sizeof *block->lines);
+ }
+ block->bbox = fz_union_rect(block->bbox, line->bbox);
+ block->lines[block->len++] = *line;
+}
+
+static fz_text_block *
+lookup_block_for_line(fz_context *ctx, fz_text_page *page, fz_text_line *line)
+{
+ float size = line->len > 0 && line->spans[0].len > 0 ? line->spans[0].style->size : 1;
+ int i;
+
+ for (i = 0; i < page->len; i++)
+ {
+ fz_text_block *block = page->blocks + i;
+ int w = block->bbox.x1 - block->bbox.x0;
+ if (block->bbox.y0 - line->bbox.y1 < size * PARAGRAPH_DIST)
+ if (line->bbox.x0 < block->bbox.x1 && line->bbox.x1 > block->bbox.x0)
+ if (ABS(line->bbox.x0 - block->bbox.x0) < w / 4)
+ return block;
+ }
+
+ if (page->len == page->cap)
+ {
+ page->cap = MAX(16, page->cap * 2);
+ page->blocks = fz_resize_array(ctx, page->blocks, page->cap, sizeof(*page->blocks));
+ }
+
+ page->blocks[page->len].bbox = fz_empty_rect;
+ page->blocks[page->len].len = 0;
+ page->blocks[page->len].cap = 0;
+ page->blocks[page->len].lines = NULL;
+
+ return &page->blocks[page->len++];
+}
+
+static void
+insert_line(fz_context *ctx, fz_text_page *page, fz_text_line *line)
+{
+ append_line(ctx, lookup_block_for_line(ctx, page, line), line);
+}
+
+static fz_rect
+fz_split_bbox(fz_rect bbox, int i, int n)
+{
+ float w = (bbox.x1 - bbox.x0) / n;
float x0 = bbox.x0;
bbox.x0 = x0 + i * w;
bbox.x1 = x0 + (i + 1) * w;
@@ -72,154 +224,71 @@ fz_split_bbox(fz_bbox bbox, int i, int n)
}
static void
-fz_add_text_char(fz_context *ctx, fz_text_span **last, fz_font *font, float size, int wmode, int c, fz_bbox bbox)
+fz_flush_text_line(fz_context *ctx, fz_text_device *dev, fz_text_style *style)
{
- fz_text_span *span = *last;
-
- if (!span->font)
- {
- span->font = fz_keep_font(ctx, font);
- span->size = size;
- }
+ append_span(ctx, &dev->cur_line, &dev->cur_span);
+ insert_line(ctx, dev->page, &dev->cur_line);
+ init_span(ctx, &dev->cur_span, style);
+ init_line(ctx, &dev->cur_line);
+}
- if ((span->font != font || span->size != size || span->wmode != wmode) && c != 32)
+static void
+fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style, int c, fz_rect bbox)
+{
+ if (!dev->cur_span.style)
+ dev->cur_span.style = style;
+ if (style != dev->cur_span.style)
{
- span = fz_new_text_span(ctx);
- span->font = fz_keep_font(ctx, font);
- span->size = size;
- span->wmode = wmode;
- (*last)->next = span;
- *last = span;
+ append_span(ctx, &dev->cur_line, &dev->cur_span);
+ init_span(ctx, &dev->cur_span, style);
}
+ append_char(ctx, &dev->cur_span, c, bbox);
+}
+static void
+fz_add_text_char(fz_context *ctx, fz_text_device *dev, fz_text_style *style, int c, fz_rect bbox)
+{
switch (c)
{
case -1: /* ignore when one unicode character maps to multiple glyphs */
break;
case 0xFB00: /* ff */
- fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 2));
- fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 1, 2));
+ fz_add_text_char_imp(ctx, dev, style, 'f', fz_split_bbox(bbox, 0, 2));
+ fz_add_text_char_imp(ctx, dev, style, 'f', fz_split_bbox(bbox, 1, 2));
break;
case 0xFB01: /* fi */
- fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 2));
- fz_add_text_char_imp(ctx, span, 'i', fz_split_bbox(bbox, 1, 2));
+ fz_add_text_char_imp(ctx, dev, style, 'f', fz_split_bbox(bbox, 0, 2));
+ fz_add_text_char_imp(ctx, dev, style, 'i', fz_split_bbox(bbox, 1, 2));
break;
case 0xFB02: /* fl */
- fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 2));
- fz_add_text_char_imp(ctx, span, 'l', fz_split_bbox(bbox, 1, 2));
+ fz_add_text_char_imp(ctx, dev, style, 'f', fz_split_bbox(bbox, 0, 2));
+ fz_add_text_char_imp(ctx, dev, style, 'l', fz_split_bbox(bbox, 1, 2));
break;
case 0xFB03: /* ffi */
- fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 3));
- fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 1, 3));
- fz_add_text_char_imp(ctx, span, 'i', fz_split_bbox(bbox, 2, 3));
+ fz_add_text_char_imp(ctx, dev, style, 'f', fz_split_bbox(bbox, 0, 3));
+ fz_add_text_char_imp(ctx, dev, style, 'f', fz_split_bbox(bbox, 1, 3));
+ fz_add_text_char_imp(ctx, dev, style, 'i', fz_split_bbox(bbox, 2, 3));
break;
case 0xFB04: /* ffl */
- fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 3));
- fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 1, 3));
- fz_add_text_char_imp(ctx, span, 'l', fz_split_bbox(bbox, 2, 3));
+ fz_add_text_char_imp(ctx, dev, style, 'f', fz_split_bbox(bbox, 0, 3));
+ fz_add_text_char_imp(ctx, dev, style, 'f', fz_split_bbox(bbox, 1, 3));
+ fz_add_text_char_imp(ctx, dev, style, 'l', fz_split_bbox(bbox, 2, 3));
break;
case 0xFB05: /* long st */
case 0xFB06: /* st */
- fz_add_text_char_imp(ctx, span, 's', fz_split_bbox(bbox, 0, 2));
- fz_add_text_char_imp(ctx, span, 't', fz_split_bbox(bbox, 1, 2));
+ fz_add_text_char_imp(ctx, dev, style, 's', fz_split_bbox(bbox, 0, 2));
+ fz_add_text_char_imp(ctx, dev, style, 't', fz_split_bbox(bbox, 1, 2));
break;
default:
- fz_add_text_char_imp(ctx, span, c, bbox);
+ fz_add_text_char_imp(ctx, dev, style, c, bbox);
break;
}
}
static void
-fz_divide_text_chars(fz_text_span **last, int n, fz_bbox bbox)
-{
- fz_text_span *span = *last;
- int i, x;
- x = span->len - n;
- if (x >= 0)
- for (i = 0; i < n; i++)
- span->text[x + i].bbox = fz_split_bbox(bbox, i, n);
-}
-
-static void
-fz_add_text_newline(fz_context *ctx, fz_text_span **last, fz_font *font, float size, int wmode)
-{
- fz_text_span *span;
- span = fz_new_text_span(ctx);
- span->font = fz_keep_font(ctx, font);
- span->size = size;
- span->wmode = wmode;
- (*last)->eol = 1;
- (*last)->next = span;
- *last = span;
-}
-
-void
-fz_debug_text_span_xml(fz_text_span *span)
-{
- char buf[10];
- int c, n, k, i;
-
- while (span)
- {
- printf("<span font=\"%s\" size=\"%g\" wmode=\"%d\" eol=\"%d\">\n",
- span->font ? span->font->name : "NULL", span->size, span->wmode, span->eol);
-
- for (i = 0; i < span->len; i++)
- {
- printf("\t<char ucs=\"");
- c = span->text[i].c;
- if (c < 128)
- putchar(c);
- else
- {
- n = runetochar(buf, &c);
- for (k = 0; k < n; k++)
- putchar(buf[k]);
- }
- printf("\" bbox=\"%d %d %d %d\" />\n",
- span->text[i].bbox.x0,
- span->text[i].bbox.y0,
- span->text[i].bbox.x1,
- span->text[i].bbox.y1);
- }
-
- printf("</span>\n");
-
- span = span->next;
- }
-}
-
-void
-fz_debug_text_span(fz_text_span *span)
-{
- char buf[10];
- int c, n, k, i;
-
- while (span)
- {
- for (i = 0; i < span->len; i++)
- {
- c = span->text[i].c;
- if (c < 128)
- putchar(c);
- else
- {
- n = runetochar(buf, &c);
- for (k = 0; k < n; k++)
- putchar(buf[k]);
- }
- }
-
- if (span->eol)
- putchar('\n');
-
- span = span->next;
- }
-}
-
-static void
-fz_text_extract_span(fz_context *ctx, fz_text_span **last, fz_text *text, fz_matrix ctm, fz_point *pen)
+fz_text_extract(fz_context *ctx, fz_text_device *dev, fz_text *text, fz_matrix ctm, fz_text_style *style)
{
+ fz_point *pen = &dev->point;
fz_font *font = text->font;
FT_Face face = font->ft_face;
fz_matrix tm = text->trm;
@@ -233,19 +302,21 @@ fz_text_extract_span(fz_context *ctx, fz_text_span **last, fz_text *text, fz_mat
float ascender = 1;
float descender = 0;
int multi;
- int i, err;
+ int i, j, err;
+ int lastchar = ' ';
if (text->len == 0)
return;
- fz_lock(ctx, FZ_LOCK_FREETYPE);
if (font->ft_face)
{
+ fz_lock(ctx, FZ_LOCK_FREETYPE);
err = FT_Set_Char_Size(font->ft_face, 64, 64, 72, 72);
if (err)
fz_warn(ctx, "freetype set character size: %s", ft_error_string(err));
ascender = (float)face->ascender / face->units_per_EM;
descender = (float)face->descender / face->units_per_EM;
+ fz_unlock(ctx, FZ_LOCK_FREETYPE);
}
rect = fz_empty_rect;
@@ -264,6 +335,7 @@ fz_text_extract_span(fz_context *ctx, fz_text_span **last, fz_text *text, fz_mat
tm.e = 0;
tm.f = 0;
trm = fz_concat(tm, ctm);
+
dir = fz_transform_vector(trm, dir);
dist = sqrtf(dir.x * dir.x + dir.y * dir.y);
ndir.x = dir.x / dist;
@@ -271,19 +343,8 @@ fz_text_extract_span(fz_context *ctx, fz_text_span **last, fz_text *text, fz_mat
size = fz_matrix_expansion(trm);
- multi = 1;
-
for (i = 0; i < text->len; i++)
{
- if (text->items[i].gid < 0)
- {
- fz_add_text_char(ctx, last, font, size, text->wmode, text->items[i].ucs, fz_round_rect(rect));
- multi ++;
- fz_divide_text_chars(last, multi, fz_round_rect(rect));
- continue;
- }
- multi = 1;
-
/* Calculate new pen location and delta */
tm.e = text->items[i].x;
tm.f = text->items[i].y;
@@ -305,20 +366,19 @@ fz_text_extract_span(fz_context *ctx, fz_text_span **last, fz_text *text, fz_mat
if (dist > size * LINE_DIST)
{
- fz_add_text_newline(ctx, last, font, size, text->wmode);
+ fz_flush_text_line(ctx, dev, style);
+ lastchar = ' ';
}
- else if (fabsf(dot) > 0.95f && dist > size * SPACE_DIST)
+ else if (fabsf(dot) > 0.95f && dist > size * SPACE_DIST && lastchar != ' ')
{
- if ((*last)->len > 0 && (*last)->text[(*last)->len - 1].c != ' ')
- {
- fz_rect spacerect;
- spacerect.x0 = -0.2f;
- spacerect.y0 = 0;
- spacerect.x1 = 0;
- spacerect.y1 = 1;
- spacerect = fz_transform_rect(trm, spacerect);
- fz_add_text_char(ctx, last, font, size, text->wmode, ' ', fz_round_rect(spacerect));
- }
+ fz_rect spacerect;
+ spacerect.x0 = -0.2f;
+ spacerect.y0 = 0;
+ spacerect.x1 = 0;
+ spacerect.y1 = 1;
+ spacerect = fz_transform_rect(trm, spacerect);
+ fz_add_text_char(ctx, dev, style, ' ', spacerect);
+ lastchar = ' ';
}
}
@@ -331,8 +391,13 @@ fz_text_extract_span(fz_context *ctx, fz_text_span **last, fz_text *text, fz_mat
/* TODO: freetype returns broken vertical metrics */
/* if (text->wmode) mask |= FT_LOAD_VERTICAL_LAYOUT; */
+ fz_lock(ctx, FZ_LOCK_FREETYPE);
+ err = FT_Set_Char_Size(font->ft_face, 64, 64, 72, 72);
+ if (err)
+ fz_warn(ctx, "freetype set character size: %s", ft_error_string(err));
FT_Get_Advance(font->ft_face, text->items[i].gid, mask, &ftadv);
adv = ftadv / 65536.0f;
+ fz_unlock(ctx, FZ_LOCK_FREETYPE);
rect.x0 = 0;
rect.y0 = descender;
@@ -352,9 +417,27 @@ fz_text_extract_span(fz_context *ctx, fz_text_span **last, fz_text *text, fz_mat
pen->x = trm.e + dir.x * adv;
pen->y = trm.f + dir.y * adv;
- fz_add_text_char(ctx, last, font, size, text->wmode, text->items[i].ucs, fz_round_rect(rect));
+ /* Check for one glyph to many char mapping */
+ for (j = i + 1; j < text->len; j++)
+ if (text->items[j].gid >= 0)
+ break;
+ multi = j - i;
+
+ if (multi == 1)
+ {
+ fz_add_text_char(ctx, dev, style, text->items[i].ucs, rect);
+ }
+ else
+ {
+ for (j = 0; j < multi; j++)
+ {
+ fz_rect part = fz_split_bbox(rect, j, multi);
+ fz_add_text_char(ctx, dev, style, text->items[i].ucs, part);
+ }
+ }
+
+ lastchar = text->items[i].ucs;
}
- fz_unlock(ctx, FZ_LOCK_FREETYPE);
}
static void
@@ -362,7 +445,9 @@ fz_text_fill_text(fz_device *dev, fz_text *text, fz_matrix ctm,
fz_colorspace *colorspace, float *color, float alpha)
{
fz_text_device *tdev = dev->user;
- fz_text_extract_span(dev->ctx, &tdev->span, text, ctm, &tdev->point);
+ fz_text_style *style;
+ style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, &ctm, colorspace, color, alpha, NULL);
+ fz_text_extract(dev->ctx, tdev, text, ctm, style);
}
static void
@@ -370,36 +455,57 @@ fz_text_stroke_text(fz_device *dev, fz_text *text, fz_stroke_state *stroke, fz_m
fz_colorspace *colorspace, float *color, float alpha)
{
fz_text_device *tdev = dev->user;
- fz_text_extract_span(dev->ctx, &tdev->span, text, ctm, &tdev->point);
+ fz_text_style *style;
+ style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, &ctm, colorspace, color, alpha, stroke);
+ fz_text_extract(dev->ctx, tdev, text, ctm, style);
}
static void
fz_text_clip_text(fz_device *dev, fz_text *text, fz_matrix ctm, int accumulate)
{
fz_text_device *tdev = dev->user;
- fz_text_extract_span(dev->ctx, &tdev->span, text, ctm, &tdev->point);
+ fz_text_style *style;
+ style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, &ctm, NULL, NULL, 0, NULL);
+ fz_text_extract(dev->ctx, tdev, text, ctm, style);
}
static void
fz_text_clip_stroke_text(fz_device *dev, fz_text *text, fz_stroke_state *stroke, fz_matrix ctm)
{
fz_text_device *tdev = dev->user;
- fz_text_extract_span(dev->ctx, &tdev->span, text, ctm, &tdev->point);
+ fz_text_style *style;
+ style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, &ctm, NULL, NULL, 0, stroke);
+ fz_text_extract(dev->ctx, tdev, text, ctm, style);
}
static void
fz_text_ignore_text(fz_device *dev, fz_text *text, fz_matrix ctm)
{
fz_text_device *tdev = dev->user;
- fz_text_extract_span(dev->ctx, &tdev->span, text, ctm, &tdev->point);
+ fz_text_style *style;
+ style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, &ctm, NULL, NULL, 0, NULL);
+ fz_text_extract(dev->ctx, tdev, text, ctm, style);
+}
+
+static int cmp_block(const void *av, const void *bv)
+{
+ const fz_text_block *a = av;
+ const fz_text_block *b = bv;
+ int x = a->bbox.x0 - b->bbox.x0;
+ if (x) return x;
+ return -(a->bbox.y0 - b->bbox.y0);
}
static void
fz_text_free_user(fz_device *dev)
{
+ fz_context *ctx = dev->ctx;
fz_text_device *tdev = dev->user;
- tdev->span->eol = 1;
+ append_span(ctx, &tdev->cur_line, &tdev->cur_span);
+ insert_line(ctx, tdev->page, &tdev->cur_line);
+
+ qsort(tdev->page->blocks, tdev->page->len, sizeof *tdev->page->blocks, cmp_block);
/* TODO: unicode NFC normalization */
/* TODO: bidi logical reordering */
@@ -408,15 +514,19 @@ fz_text_free_user(fz_device *dev)
}
fz_device *
-fz_new_text_device(fz_context *ctx, fz_text_span *root)
+fz_new_text_device(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page)
{
fz_device *dev;
+
fz_text_device *tdev = fz_malloc_struct(ctx, fz_text_device);
- tdev->head = root;
- tdev->span = root;
+ tdev->sheet = sheet;
+ tdev->page = page;
tdev->point.x = -1;
tdev->point.y = -1;
+ init_line(ctx, &tdev->cur_line);
+ init_span(ctx, &tdev->cur_span, NULL);
+
dev = fz_new_device(ctx, tdev);
dev->hints = FZ_IGNORE_IMAGE | FZ_IGNORE_SHADE;
dev->free_user = fz_text_free_user;
@@ -427,3 +537,209 @@ fz_new_text_device(fz_context *ctx, fz_text_span *root)
dev->ignore_text = fz_text_ignore_text;
return dev;
}
+
+/* XML, HTML and plain-text output */
+
+static int font_is_bold(fz_font *font)
+{
+ FT_Face face = font->ft_face;
+ if (face && (face->style_flags & FT_STYLE_FLAG_BOLD))
+ return 1;
+ if (strstr(font->name, "Bold"))
+ return 1;
+ return 0;
+}
+
+static int font_is_italic(fz_font *font)
+{
+ FT_Face face = font->ft_face;
+ if (face && (face->style_flags & FT_STYLE_FLAG_ITALIC))
+ return 1;
+ if (strstr(font->name, "Italic") || strstr(font->name, "Oblique"))
+ return 1;
+ return 0;
+}
+
+static void
+fz_print_style_begin(FILE *out, fz_text_style *style)
+{
+ int script = style->script;
+ fprintf(out, "<span class=\"s%d\">", style->id);
+ while (script-- > 0)
+ fprintf(out, "<sup>");
+ while (++script < 0)
+ fprintf(out, "<sub>");
+}
+
+static void
+fz_print_style_end(FILE *out, fz_text_style *style)
+{
+ int script = style->script;
+ while (script-- > 0)
+ fprintf(out, "</sup>");
+ while (++script < 0)
+ fprintf(out, "</sub>");
+ fprintf(out, "</span>");
+}
+
+static void
+fz_print_style(FILE *out, fz_text_style *style)
+{
+ char *s = strchr(style->font->name, '+');
+ s = s ? s + 1 : style->font->name;
+ fprintf(out, "span.s%d{font-family:\"%s\";font-size:%gpt;",
+ style->id, s, style->size);
+ if (font_is_italic(style->font))
+ fprintf(out, "font-style:italic;");
+ if (font_is_bold(style->font))
+ fprintf(out, "font-weight:bold;");
+ fprintf(out, "}\n");
+}
+
+void
+fz_print_text_sheet(fz_context *ctx, FILE *out, fz_text_sheet *sheet)
+{
+ fz_text_style *style;
+ for (style = sheet->style; style; style = style->next)
+ fz_print_style(out, style);
+}
+
+void
+fz_print_text_page_html(fz_context *ctx, FILE *out, fz_text_page *page)
+{
+ int block_n, line_n, span_n, ch_n;
+ fz_text_style *style = NULL;
+ fz_text_block *block;
+ fz_text_line *line;
+ fz_text_span *span;
+
+ fprintf(out, "<div class=\"page\">\n");
+
+ for (block_n = 0; block_n < page->len; block_n++)
+ {
+ block = &page->blocks[block_n];
+ fprintf(out, "<div class=\"block\">\n");
+ for (line_n = 0; line_n < block->len; line_n++)
+ {
+ line = &block->lines[line_n];
+ fprintf(out, "<p>");
+ style = NULL;
+
+ for (span_n = 0; span_n < line->len; span_n++)
+ {
+ span = &line->spans[span_n];
+ if (style != span->style)
+ {
+ if (style != NULL)
+ fz_print_style_end(out, style);
+ fz_print_style_begin(out, span->style);
+ style = span->style;
+ }
+
+ for (ch_n = 0; ch_n < span->len; ch_n++)
+ {
+ fz_text_char *ch = &span->text[ch_n];
+ if (ch->c == '<')
+ fprintf(out, "&lt;");
+ else if (ch->c == '>')
+ fprintf(out, "&gt;");
+ else if (ch->c == '&')
+ fprintf(out, "&amp;");
+ else if (ch->c >= 32 && ch->c <= 127)
+ fprintf(out, "%c", ch->c);
+ else
+ fprintf(out, "&#x%x;", ch->c);
+ }
+ }
+ fz_print_style_end(out, style);
+ fprintf(out, "</p>\n");
+ }
+ fprintf(out, "</div>\n");
+ }
+
+ fprintf(out, "</div>\n");
+}
+
+void
+fz_print_text_page_xml(fz_context *ctx, FILE *out, fz_text_page *page)
+{
+ fz_text_block *block;
+ fz_text_line *line;
+ fz_text_span *span;
+ fz_text_char *ch;
+ char *s;
+
+ fprintf(out, "<page>\n");
+ for (block = page->blocks; block < page->blocks + page->len; block++)
+ {
+ fprintf(out, "<block bbox=\"%g %g %g %g\">\n",
+ block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
+ for (line = block->lines; line < block->lines + block->len; line++)
+ {
+ fprintf(out, "<line bbox=\"%g %g %g %g\">\n",
+ line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1);
+ for (span = line->spans; span < line->spans + line->len; span++)
+ {
+ fz_text_style *style = span->style;
+ s = strchr(style->font->name, '+');
+ s = s ? s + 1 : style->font->name;
+ fprintf(out, "<span bbox=\"%g %g %g %g\" font=\"%s\" size=\"%g\">\n",
+ span->bbox.x0, span->bbox.y0, span->bbox.x1, span->bbox.y1,
+ s, style->size);
+ for (ch = span->text; ch < span->text + span->len; ch++)
+ {
+ fprintf(out, "<char bbox=\"%g %g %g %g\" c=\"",
+ ch->bbox.x0, ch->bbox.y0, ch->bbox.x1, ch->bbox.y1);
+ switch (ch->c)
+ {
+ case '<': fprintf(out, "&lt;"); break;
+ case '>': fprintf(out, "&gt;"); break;
+ case '&': fprintf(out, "&amp;"); break;
+ case '"': fprintf(out, "&quot;"); break;
+ case '\'': fprintf(out, "&apos;"); break;
+ default:
+ if (ch->c >= 32 && ch->c <= 127)
+ fprintf(out, "%c", ch->c);
+ else
+ fprintf(out, "&#x%x;", ch->c);
+ break;
+ }
+ fprintf(out, "\"/>\n");
+ }
+ fprintf(out, "</span>\n");
+ }
+ fprintf(out, "</line>\n");
+ }
+ fprintf(out, "</block>\n");
+ }
+ fprintf(out, "</page>\n");
+}
+
+void
+fz_print_text_page(fz_context *ctx, FILE *out, fz_text_page *page)
+{
+ fz_text_block *block;
+ fz_text_line *line;
+ fz_text_span *span;
+ fz_text_char *ch;
+ char utf[10];
+ int i, n;
+
+ for (block = page->blocks; block < page->blocks + page->len; block++)
+ {
+ for (line = block->lines; line < block->lines + block->len; line++)
+ {
+ for (span = line->spans; span < line->spans + line->len; span++)
+ {
+ for (ch = span->text; ch < span->text + span->len; ch++)
+ {
+ n = fz_runetochar(utf, ch->c);
+ for (i = 0; i < n; i++)
+ putc(utf[i], out);
+ }
+ }
+ fprintf(out, "\n");
+ }
+ fprintf(out, "\n");
+ }
+}