summaryrefslogtreecommitdiff
path: root/source/fitz/stext-output.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2013-06-19 15:29:44 +0200
committerTor Andersson <tor.andersson@artifex.com>2013-06-20 16:45:35 +0200
commit0a927854a10e1e6b9770a81e2e1d9f3093631757 (patch)
tree3d65d820d9fdba2d0d394d99c36290c851b78ca0 /source/fitz/stext-output.c
parent1ae8f19179c5f0f8c6352b3c7855465325d5449a (diff)
downloadmupdf-0a927854a10e1e6b9770a81e2e1d9f3093631757.tar.xz
Rearrange source files.
Diffstat (limited to 'source/fitz/stext-output.c')
-rw-r--r--source/fitz/stext-output.c400
1 files changed, 400 insertions, 0 deletions
diff --git a/source/fitz/stext-output.c b/source/fitz/stext-output.c
new file mode 100644
index 00000000..d3241131
--- /dev/null
+++ b/source/fitz/stext-output.c
@@ -0,0 +1,400 @@
+#include "mupdf/fitz.h"
+
+#define SUBSCRIPT_OFFSET 0.2f
+#define SUPERSCRIPT_OFFSET -0.2f
+
+#include <ft2build.h>
+#include FT_FREETYPE_H
+
+/* XML, HTML and plain-text output */
+
+static int font_is_bold(fz_font *font)
+{
+ FT_Face face = font->ft_face;
+ if (face && (face->style_flags & FT_STYLE_FLAG_BOLD))
+ return 1;
+ if (strstr(font->name, "Bold"))
+ return 1;
+ return 0;
+}
+
+static int font_is_italic(fz_font *font)
+{
+ FT_Face face = font->ft_face;
+ if (face && (face->style_flags & FT_STYLE_FLAG_ITALIC))
+ return 1;
+ if (strstr(font->name, "Italic") || strstr(font->name, "Oblique"))
+ return 1;
+ return 0;
+}
+
+static void
+fz_print_style_begin(fz_output *out, fz_text_style *style)
+{
+ int script = style->script;
+ fz_printf(out, "<span class=\"s%d\">", style->id);
+ while (script-- > 0)
+ fz_printf(out, "<sup>");
+ while (++script < 0)
+ fz_printf(out, "<sub>");
+}
+
+static void
+fz_print_style_end(fz_output *out, fz_text_style *style)
+{
+ int script = style->script;
+ while (script-- > 0)
+ fz_printf(out, "</sup>");
+ while (++script < 0)
+ fz_printf(out, "</sub>");
+ fz_printf(out, "</span>");
+}
+
+static void
+fz_print_style(fz_output *out, fz_text_style *style)
+{
+ char *s = strchr(style->font->name, '+');
+ s = s ? s + 1 : style->font->name;
+ fz_printf(out, "span.s%d{font-family:\"%s\";font-size:%gpt;",
+ style->id, s, style->size);
+ if (font_is_italic(style->font))
+ fz_printf(out, "font-style:italic;");
+ if (font_is_bold(style->font))
+ fz_printf(out, "font-weight:bold;");
+ fz_printf(out, "}\n");
+}
+
+void
+fz_print_text_sheet(fz_context *ctx, fz_output *out, fz_text_sheet *sheet)
+{
+ fz_text_style *style;
+ for (style = sheet->style; style; style = style->next)
+ fz_print_style(out, style);
+}
+
+static void
+send_data_base64(fz_output *out, fz_buffer *buffer)
+{
+ int i, len;
+ static const char set[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+ len = buffer->len/3;
+ for (i = 0; i < len; i++)
+ {
+ int c = buffer->data[3*i];
+ int d = buffer->data[3*i+1];
+ int e = buffer->data[3*i+2];
+ if ((i & 15) == 0)
+ fz_printf(out, "\n");
+ fz_printf(out, "%c%c%c%c", set[c>>2], set[((c&3)<<4)|(d>>4)], set[((d&15)<<2)|(e>>6)], set[e & 63]);
+ }
+ i *= 3;
+ switch (buffer->len-i)
+ {
+ case 2:
+ {
+ int c = buffer->data[i];
+ int d = buffer->data[i+1];
+ fz_printf(out, "%c%c%c=", set[c>>2], set[((c&3)<<4)|(d>>4)], set[((d&15)<<2)]);
+ break;
+ }
+ case 1:
+ {
+ int c = buffer->data[i];
+ fz_printf(out, "%c%c==", set[c>>2], set[(c&3)<<4]);
+ break;
+ }
+ default:
+ case 0:
+ break;
+ }
+}
+
+void
+fz_print_text_page_html(fz_context *ctx, fz_output *out, fz_text_page *page)
+{
+ int block_n, line_n, ch_n;
+ fz_text_style *style = NULL;
+ fz_text_line *line;
+ fz_text_span *span;
+ void *last_region = NULL;
+
+ fz_printf(out, "<div class=\"page\">\n");
+
+ for (block_n = 0; block_n < page->len; block_n++)
+ {
+ switch (page->blocks[block_n].type)
+ {
+ case FZ_PAGE_BLOCK_TEXT:
+ {
+ fz_text_block * block = page->blocks[block_n].u.text;
+ fz_printf(out, "<div class=\"block\"><p>\n");
+ for (line_n = 0; line_n < block->len; line_n++)
+ {
+ int lastcol=-1;
+ line = &block->lines[line_n];
+ style = NULL;
+
+ if (line->region != last_region)
+ {
+ if (last_region)
+ fz_printf(out, "</div>");
+ fz_printf(out, "<div class=\"metaline\">");
+ last_region = line->region;
+ }
+ fz_printf(out, "<div class=\"line\"");
+#ifdef DEBUG_INTERNALS
+ if (line->region)
+ fz_printf(out, " region=\"%x\"", line->region);
+#endif
+ fz_printf(out, ">");
+ for (span = line->first_span; span; span = span->next)
+ {
+ float size = fz_matrix_expansion(&span->transform);
+ float base_offset = span->base_offset / size;
+
+ if (lastcol != span->column)
+ {
+ if (lastcol >= 0)
+ {
+ fz_printf(out, "</div>");
+ }
+ /* If we skipped any columns then output some spacer spans */
+ while (lastcol < span->column-1)
+ {
+ fz_printf(out, "<div class=\"cell\"></div>");
+ lastcol++;
+ }
+ lastcol++;
+ /* Now output the span to contain this entire column */
+ fz_printf(out, "<div class=\"cell\" style=\"");
+ {
+ fz_text_span *sn;
+ for (sn = span->next; sn; sn = sn->next)
+ {
+ if (sn->column != lastcol)
+ break;
+ }
+ fz_printf(out, "width:%g%%;align:%s", span->column_width, (span->align == 0 ? "left" : (span->align == 1 ? "center" : "right")));
+ }
+ if (span->indent > 1)
+ fz_printf(out, ";padding-left:1em;text-indent:-1em");
+ if (span->indent < -1)
+ fz_printf(out, ";text-indent:1em");
+ fz_printf(out, "\">");
+ }
+#ifdef DEBUG_INTERNALS
+ fz_printf(out, "<span class=\"internal_span\"");
+ if (span->column)
+ fz_printf(out, " col=\"%x\"", span->column);
+ fz_printf(out, ">");
+#endif
+ if (span->spacing >= 1)
+ fz_printf(out, " ");
+ if (base_offset > SUBSCRIPT_OFFSET)
+ fz_printf(out, "<sub>");
+ else if (base_offset < SUPERSCRIPT_OFFSET)
+ fz_printf(out, "<sup>");
+ for (ch_n = 0; ch_n < span->len; ch_n++)
+ {
+ fz_text_char *ch = &span->text[ch_n];
+ if (style != ch->style)
+ {
+ if (style)
+ fz_print_style_end(out, style);
+ fz_print_style_begin(out, ch->style);
+ style = ch->style;
+ }
+
+ if (ch->c == '<')
+ fz_printf(out, "&lt;");
+ else if (ch->c == '>')
+ fz_printf(out, "&gt;");
+ else if (ch->c == '&')
+ fz_printf(out, "&amp;");
+ else if (ch->c >= 32 && ch->c <= 127)
+ fz_printf(out, "%c", ch->c);
+ else
+ fz_printf(out, "&#x%x;", ch->c);
+ }
+ if (style)
+ {
+ fz_print_style_end(out, style);
+ style = NULL;
+ }
+ if (base_offset > SUBSCRIPT_OFFSET)
+ fz_printf(out, "</sub>");
+ else if (base_offset < SUPERSCRIPT_OFFSET)
+ fz_printf(out, "</sup>");
+#ifdef DEBUG_INTERNALS
+ fz_printf(out, "</span>");
+#endif
+ }
+ /* Close our floating span */
+ fz_printf(out, "</div>");
+ /* Close the line */
+ fz_printf(out, "</div>");
+ fz_printf(out, "\n");
+ }
+ /* Close the metaline */
+ fz_printf(out, "</div>");
+ last_region = NULL;
+ fz_printf(out, "</p></div>\n");
+ break;
+ }
+ case FZ_PAGE_BLOCK_IMAGE:
+ {
+ fz_image_block *image = page->blocks[block_n].u.image;
+ fz_printf(out, "<img width=%d height=%d src=\"data:", image->image->w, image->image->h);
+ switch (image->image->buffer == NULL ? FZ_IMAGE_JPX : image->image->buffer->params.type)
+ {
+ case FZ_IMAGE_JPEG:
+ fz_printf(out, "image/jpeg;base64,");
+ send_data_base64(out, image->image->buffer->buffer);
+ break;
+ case FZ_IMAGE_PNG:
+ fz_printf(out, "image/png;base64,");
+ send_data_base64(out, image->image->buffer->buffer);
+ break;
+ default:
+ {
+ fz_buffer *buf = fz_image_as_png(ctx, image->image, image->image->w, image->image->h);
+ fz_printf(out, "image/png;base64,");
+ send_data_base64(out, buf);
+ fz_drop_buffer(ctx, buf);
+ break;
+ }
+ }
+ fz_printf(out, "\">\n");
+ break;
+ }
+ }
+ }
+
+ fz_printf(out, "</div>\n");
+}
+
+void
+fz_print_text_page_xml(fz_context *ctx, fz_output *out, fz_text_page *page)
+{
+ int block_n;
+
+ fz_printf(out, "<page width=\"%g\" height=\"%g\">\n",
+ page->mediabox.x1 - page->mediabox.x0,
+ page->mediabox.y1 - page->mediabox.y0);
+
+ for (block_n = 0; block_n < page->len; block_n++)
+ {
+ switch (page->blocks[block_n].type)
+ {
+ case FZ_PAGE_BLOCK_TEXT:
+ {
+ fz_text_block *block = page->blocks[block_n].u.text;
+ fz_text_line *line;
+ char *s;
+
+ fz_printf(out, "<block bbox=\"%g %g %g %g\">\n",
+ block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
+ for (line = block->lines; line < block->lines + block->len; line++)
+ {
+ fz_text_span *span;
+ fz_printf(out, "<line bbox=\"%g %g %g %g\">\n",
+ line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1);
+ for (span = line->first_span; span; span = span->next)
+ {
+ fz_text_style *style = NULL;
+ int char_num;
+ for (char_num = 0; char_num < span->len; char_num++)
+ {
+ fz_text_char *ch = &span->text[char_num];
+ if (ch->style != style)
+ {
+ if (style)
+ {
+ fz_printf(out, "</span>\n");
+ }
+ style = ch->style;
+ s = strchr(style->font->name, '+');
+ s = s ? s + 1 : style->font->name;
+ fz_printf(out, "<span bbox=\"%g %g %g %g\" font=\"%s\" size=\"%g\">\n",
+ span->bbox.x0, span->bbox.y0, span->bbox.x1, span->bbox.y1,
+ s, style->size);
+ }
+ {
+ fz_rect rect;
+ fz_text_char_bbox(&rect, span, char_num);
+ fz_printf(out, "<char bbox=\"%g %g %g %g\" x=\"%g\" y=\"%g\" c=\"",
+ rect.x0, rect.y0, rect.x1, rect.y1, ch->p.x, ch->p.y);
+ }
+ switch (ch->c)
+ {
+ case '<': fz_printf(out, "&lt;"); break;
+ case '>': fz_printf(out, "&gt;"); break;
+ case '&': fz_printf(out, "&amp;"); break;
+ case '"': fz_printf(out, "&quot;"); break;
+ case '\'': fz_printf(out, "&apos;"); break;
+ default:
+ if (ch->c >= 32 && ch->c <= 127)
+ fz_printf(out, "%c", ch->c);
+ else
+ fz_printf(out, "&#x%x;", ch->c);
+ break;
+ }
+ fz_printf(out, "\"/>\n");
+ }
+ if (style)
+ fz_printf(out, "</span>\n");
+ }
+ fz_printf(out, "</line>\n");
+ }
+ fz_printf(out, "</block>\n");
+ break;
+ }
+ case FZ_PAGE_BLOCK_IMAGE:
+ {
+ break;
+ }
+ }
+ }
+ fz_printf(out, "</page>\n");
+}
+
+void
+fz_print_text_page(fz_context *ctx, fz_output *out, fz_text_page *page)
+{
+ int block_n;
+
+ for (block_n = 0; block_n < page->len; block_n++)
+ {
+ switch (page->blocks[block_n].type)
+ {
+ case FZ_PAGE_BLOCK_TEXT:
+ {
+ fz_text_block *block = page->blocks[block_n].u.text;
+ fz_text_line *line;
+ fz_text_char *ch;
+ char utf[10];
+ int i, n;
+
+ for (line = block->lines; line < block->lines + block->len; line++)
+ {
+ fz_text_span *span;
+ for (span = line->first_span; span; span = span->next)
+ {
+ for (ch = span->text; ch < span->text + span->len; ch++)
+ {
+ n = fz_runetochar(utf, ch->c);
+ for (i = 0; i < n; i++)
+ fz_printf(out, "%c", utf[i]);
+ }
+ }
+ fz_printf(out, "\n");
+ }
+ fz_printf(out, "\n");
+ break;
+ }
+ case FZ_PAGE_BLOCK_IMAGE:
+ break;
+ }
+ }
+}