summaryrefslogtreecommitdiff
path: root/source/fitz/util.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2017-08-01 18:15:23 +0200
committerTor Andersson <tor.andersson@artifex.com>2017-08-17 13:38:48 +0200
commit626ea2ea771735492c9a4350ae02b26ea09d1423 (patch)
treec92241b181a51719cbb47402bad98bb1984bf963 /source/fitz/util.c
parente349ba5984fe837d3eec9649d718efe16169ca44 (diff)
downloadmupdf-626ea2ea771735492c9a4350ae02b26ea09d1423.tar.xz
Simplify stext structure and device.
* Use pool allocator and linked lists for all levels. * Remove separate fz_stext_sheet struct. * Remove unused 'script' style. * Remove 'span' level items. * Detect visual/logical RTL layouts. * Detect indented paragraphs.
Diffstat (limited to 'source/fitz/util.c')
-rw-r--r--source/fitz/util.c109
1 files changed, 34 insertions, 75 deletions
diff --git a/source/fitz/util.c b/source/fitz/util.c
index 6f900174..d6a7f317 100644
--- a/source/fitz/util.c
+++ b/source/fitz/util.c
@@ -267,7 +267,7 @@ fz_new_pixmap_from_page_number(fz_context *ctx, fz_document *doc, int number, co
}
fz_stext_page *
-fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_stext_sheet *sheet, const fz_stext_options *options)
+fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, const fz_stext_options *options)
{
fz_stext_page *text;
fz_device *dev;
@@ -279,7 +279,7 @@ fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_s
text = fz_new_stext_page(ctx, fz_bound_display_list(ctx, list, &mediabox));
fz_try(ctx)
{
- dev = fz_new_stext_device(ctx, sheet, text, options);
+ dev = fz_new_stext_device(ctx, text, options);
fz_run_display_list(ctx, list, dev, &fz_identity, NULL, NULL);
fz_close_device(ctx, dev);
}
@@ -297,7 +297,7 @@ fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_s
}
fz_stext_page *
-fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *sheet, const fz_stext_options *options)
+fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, const fz_stext_options *options)
{
fz_stext_page *text;
fz_device *dev;
@@ -309,7 +309,7 @@ fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *shee
text = fz_new_stext_page(ctx, fz_bound_page(ctx, page, &mediabox));
fz_try(ctx)
{
- dev = fz_new_stext_device(ctx, sheet, text, options);
+ dev = fz_new_stext_device(ctx, text, options);
fz_run_page(ctx, page, dev, &fz_identity, NULL);
fz_close_device(ctx, dev);
}
@@ -327,14 +327,14 @@ fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *shee
}
fz_stext_page *
-fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_stext_sheet *sheet, const fz_stext_options *options)
+fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_stext_options *options)
{
fz_page *page;
fz_stext_page *text;
page = fz_load_page(ctx, doc, number);
fz_try(ctx)
- text = fz_new_stext_page_from_page(ctx, page, sheet, options);
+ text = fz_new_stext_page_from_page(ctx, page, options);
fz_always(ctx)
fz_drop_page(ctx, page);
fz_catch(ctx)
@@ -345,24 +345,14 @@ fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number
int
fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, fz_rect *hit_bbox, int hit_max)
{
- fz_stext_sheet *sheet = NULL;
- fz_stext_page *text = NULL;
+ fz_stext_page *text;
int count;
- fz_var(sheet);
- fz_var(text);
-
+ text = fz_new_stext_page_from_display_list(ctx, list, NULL);
fz_try(ctx)
- {
- sheet = fz_new_stext_sheet(ctx);
- text = fz_new_stext_page_from_display_list(ctx, list, sheet, NULL);
count = fz_search_stext_page(ctx, text, needle, hit_bbox, hit_max);
- }
fz_always(ctx)
- {
fz_drop_stext_page(ctx, text);
- fz_drop_stext_sheet(ctx, sheet);
- }
fz_catch(ctx)
fz_rethrow(ctx);
return count;
@@ -371,24 +361,14 @@ fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needl
int
fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_rect *hit_bbox, int hit_max)
{
- fz_stext_sheet *sheet = NULL;
- fz_stext_page *text = NULL;
+ fz_stext_page *text;
int count;
- fz_var(sheet);
- fz_var(text);
-
+ text = fz_new_stext_page_from_page(ctx, page, NULL);
fz_try(ctx)
- {
- sheet = fz_new_stext_sheet(ctx);
- text = fz_new_stext_page_from_page(ctx, page, sheet, NULL);
count = fz_search_stext_page(ctx, text, needle, hit_bbox, hit_max);
- }
fz_always(ctx)
- {
fz_drop_stext_page(ctx, text);
- fz_drop_stext_sheet(ctx, sheet);
- }
fz_catch(ctx)
fz_rethrow(ctx);
return count;
@@ -411,14 +391,15 @@ fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char
}
fz_buffer *
-fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text, const fz_rect *sel, int crlf)
+fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *page, const fz_rect *sel, int crlf)
{
fz_buffer *buf;
fz_rect hitbox;
float x0, y0, x1, y1;
- int block_num;
+ fz_stext_block *block;
+ fz_stext_line *line;
+ fz_stext_char *ch;
int need_newline;
- int i;
need_newline = 0;
@@ -438,45 +419,33 @@ fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text, const fz_rec
buf = fz_new_buffer(ctx, 256);
fz_try(ctx)
{
- for (block_num = 0; block_num < text->len; block_num++)
+ for (block = page->first_block; block; block = block->next)
{
- fz_stext_line *line;
- fz_stext_block *block;
- fz_stext_span *span;
-
- if (text->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ if (block->type != FZ_STEXT_BLOCK_TEXT)
continue;
- block = text->blocks[block_num].u.text;
- for (line = block->lines; line < block->lines + block->len; line++)
+ for (line = block->u.t.first_line; line; line = line->next)
{
int saw_text = 0;
- for (span = line->first_span; span; span = span->next)
+ for (ch = line->first_char; ch; ch = ch->next)
{
- if (span->spacing > 1)
- fz_append_byte(ctx, buf, ' ');
- for (i = 0; i < span->len; i++)
+ int c = ch->c;
+ fz_stext_char_bbox(ctx, &hitbox, line, ch);
+ if (c < 32)
+ c = 0xFFFD;
+ if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1)
{
- int c;
- fz_stext_char_bbox(ctx, &hitbox, span, i);
- c = span->text[i].c;
- if (c < 32)
- c = 0xFFFD;
- if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1)
+ saw_text = 1;
+ if (need_newline)
{
- saw_text = 1;
- if (need_newline)
- {
- if (crlf)
- fz_append_byte(ctx, buf, '\r');
- fz_append_byte(ctx, buf, '\n');
- need_newline = 0;
- }
- fz_append_rune(ctx, buf, c);
+ if (crlf)
+ fz_append_byte(ctx, buf, '\r');
+ fz_append_byte(ctx, buf, '\n');
+ need_newline = 0;
}
+ fz_append_rune(ctx, buf, c);
}
}
-
if (saw_text)
need_newline = 1;
}
@@ -494,42 +463,32 @@ fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text, const fz_rec
fz_buffer *
fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf, const fz_stext_options *options)
{
- fz_stext_sheet *sheet;
fz_stext_page *text;
fz_buffer *buf;
- sheet = fz_new_stext_sheet(ctx);
+ text = fz_new_stext_page_from_display_list(ctx, list, options);
fz_try(ctx)
- {
- text = fz_new_stext_page_from_display_list(ctx, list, sheet, options);
buf = fz_new_buffer_from_stext_page(ctx, text, sel, crlf);
- }
fz_always(ctx)
- fz_drop_stext_sheet(ctx, sheet);
+ fz_drop_stext_page(ctx, text);
fz_catch(ctx)
fz_rethrow(ctx);
- fz_drop_stext_page(ctx, text);
return buf;
}
fz_buffer *
fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf, const fz_stext_options *options)
{
- fz_stext_sheet *sheet;
fz_stext_page *text;
fz_buffer *buf;
- sheet = fz_new_stext_sheet(ctx);
+ text = fz_new_stext_page_from_page(ctx, page, options);
fz_try(ctx)
- {
- text = fz_new_stext_page_from_page(ctx, page, sheet, options);
buf = fz_new_buffer_from_stext_page(ctx, text, sel, crlf);
- }
fz_always(ctx)
- fz_drop_stext_sheet(ctx, sheet);
+ fz_drop_stext_page(ctx, text);
fz_catch(ctx)
fz_rethrow(ctx);
- fz_drop_stext_page(ctx, text);
return buf;
}