#include "mupdf/html.h"
#include "mupdf/svg.h"
#include "hb.h"
#include "hb-ft.h"
#include
#undef DEBUG_HARFBUZZ
enum { T, R, B, L };
#define DEFAULT_DIR FZ_BIDI_LTR
static const char *html_default_css =
"@page{margin:2em 1em}"
"a{color:#06C;text-decoration:underline}"
"address{display:block;font-style:italic}"
"b{font-weight:bold}"
"bdo{direction:rtl;unicode-bidi:bidi-override}"
"blockquote{display:block;margin:1em 40px}"
"body{display:block;margin:1em}"
"cite{font-style:italic}"
"code{font-family:monospace}"
"dd{display:block;margin:0 0 0 40px}"
"del{text-decoration:line-through}"
"div{display:block}"
"dl{display:block;margin:1em 0}"
"dt{display:block}"
"em{font-style:italic}"
"h1{display:block;font-size:2em;font-weight:bold;margin:0.67em 0;page-break-after:avoid}"
"h2{display:block;font-size:1.5em;font-weight:bold;margin:0.83em 0;page-break-after:avoid}"
"h3{display:block;font-size:1.17em;font-weight:bold;margin:1em 0;page-break-after:avoid}"
"h4{display:block;font-size:1em;font-weight:bold;margin:1.33em 0;page-break-after:avoid}"
"h5{display:block;font-size:0.83em;font-weight:bold;margin:1.67em 0;page-break-after:avoid}"
"h6{display:block;font-size:0.67em;font-weight:bold;margin:2.33em 0;page-break-after:avoid}"
"head{display:none}"
"hr{border-style:solid;border-width:1px;display:block;margin-bottom:0.5em;margin-top:0.5em;text-align:center}"
"html{display:block}"
"i{font-style:italic}"
"ins{text-decoration:underline}"
"kbd{font-family:monospace}"
"li{display:list-item}"
"menu{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}"
"ol{display:block;list-style-type:decimal;margin:1em 0;padding:0 0 0 30pt}"
"p{display:block;margin:1em 0}"
"pre{display:block;font-family:monospace;margin:1em 0;white-space:pre}"
"samp{font-family:monospace}"
"script{display:none}"
"small{font-size:0.83em}"
"strong{font-weight:bold}"
"style{display:none}"
"sub{font-size:0.83em;vertical-align:sub}"
"sup{font-size:0.83em;vertical-align:super}"
"table{display:table}"
"tbody{display:table-row-group}"
"td{display:table-cell;padding:1px}"
"tfoot{display:table-footer-group}"
"th{display:table-cell;font-weight:bold;padding:1px;text-align:center}"
"thead{display:table-header-group}"
"tr{display:table-row}"
"ul{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}"
"ul ul{list-style-type:circle}"
"ul ul ul{list-style-type:square}"
"var{font-style:italic}"
"svg{display:none}"
;
static const char *fb2_default_css =
"@page{margin:2em 2em}"
"FictionBook{display:block;margin:0;line-height:1.2em}"
"stylesheet,binary{display:none}"
#ifdef FB2_FRONT_MATTER
"description>*{display:none}"
"description>title-info{display:block}"
"description>title-info>*{display:none}"
"description>title-info>annotation{display:block;page-break-before:always;page-break-after:always}"
"description>title-info>coverpage{display:block;page-break-before:always;page-break-after:always}"
#else
"description{display:none}"
#endif
"body,section,title,subtitle,p,cite,epigraph,text-author,date,poem,stanza,v,empty-line{display:block}"
"image{display:block}"
"p>image{display:inline}"
"table{display:table}"
"tr{display:table-row}"
"th,td{display:table-cell}"
"a{color:#06C;text-decoration:underline}"
"a[type=note]{font-size:small;vertical-align:super}"
"code{white-space:pre;font-family:monospace}"
"emphasis{font-style:italic}"
"strikethrough{text-decoration:line-through}"
"strong{font-weight:bold}"
"sub{font-size:small;vertical-align:sub}"
"sup{font-size:small;vertical-align:super}"
"image{margin:1em 0;text-align:center}"
"cite,poem{margin:1em 2em}"
"subtitle,epigraph,stanza{margin:1em 0}"
"title>p{text-align:center;font-size:x-large}"
"subtitle{text-align:center;font-size:large}"
"p{margin-top:1em;text-align:justify}"
"empty-line{padding-top:1em}"
"p+p{margin-top:0;text-indent:1.5em}"
"empty-line+p{margin-top:0}"
"section>title{page-break-before:always}"
;
struct genstate
{
fz_pool *pool;
fz_html_font_set *set;
fz_archive *zip;
fz_tree *images;
int is_fb2;
const char *base_uri;
fz_css *css;
int at_bol;
int emit_white;
int last_brk_cls;
};
static int iswhite(int c)
{
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}
static int is_all_white(const char *s)
{
while (*s)
{
if (!iswhite(*s))
return 0;
++s;
}
return 1;
}
/* TODO: pool allocator for flow nodes */
/* TODO: store text by pointing to a giant buffer */
static void fz_drop_html_flow(fz_context *ctx, fz_html_flow *flow)
{
while (flow)
{
fz_html_flow *next = flow->next;
if (flow->type == FLOW_IMAGE)
fz_drop_image(ctx, flow->content.image);
flow = next;
}
}
static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, int type)
{
fz_html_flow *flow = fz_pool_alloc(ctx, pool, sizeof *flow);
flow->type = type;
flow->expand = 0;
flow->bidi_level = 0;
flow->markup_lang = 0;
flow->breaks_line = 0;
flow->box = inline_box;
*top->flow_tail = flow;
top->flow_tail = &flow->next;
return flow;
}
static void add_flow_space(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_SPACE);
flow->expand = 1;
}
static void add_flow_break(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
(void)add_flow(ctx, pool, top, inline_box, FLOW_BREAK);
}
static void add_flow_sbreak(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
(void)add_flow(ctx, pool, top, inline_box, FLOW_SBREAK);
}
static void add_flow_shyphen(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
(void)add_flow(ctx, pool, top, inline_box, FLOW_SHYPHEN);
}
static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, const char *a, const char *b, int lang)
{
fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_WORD);
flow->content.text = fz_pool_alloc(ctx, pool, b - a + 1);
memcpy(flow->content.text, a, b - a);
flow->content.text[b - a] = 0;
flow->markup_lang = lang;
}
static void add_flow_image(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, fz_image *img)
{
fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_IMAGE);
flow->content.image = fz_keep_image(ctx, img);
}
static void add_flow_anchor(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
{
(void)add_flow(ctx, pool, top, inline_box, FLOW_ANCHOR);
}
static fz_html_flow *split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset)
{
fz_html_flow *new_flow;
char *text;
size_t len;
if (offset == 0)
return flow;
new_flow = fz_pool_alloc(ctx, pool, sizeof *flow);
*new_flow = *flow;
new_flow->next = flow->next;
flow->next = new_flow;
text = flow->content.text;
while (*text && offset)
{
int rune;
text += fz_chartorune(&rune, text);
offset--;
}
len = strlen(text);
new_flow->content.text = fz_pool_alloc(ctx, pool, len+1);
strcpy(new_flow->content.text, text);
*text = 0;
return new_flow;
}
static void flush_space(fz_context *ctx, fz_html_box *flow, fz_html_box *inline_box, int lang, struct genstate *g)
{
static const char *space = " ";
int bsp = inline_box->style.white_space & WS_ALLOW_BREAK_SPACE;
fz_pool *pool = g->pool;
if (g->emit_white)
{
if (!g->at_bol)
{
if (bsp)
add_flow_space(ctx, pool, flow, inline_box);
else
add_flow_word(ctx, pool, flow, inline_box, space, space+1, lang);
}
g->emit_white = 0;
}
}
/* pair-wise lookup table for UAX#14 linebreaks */
static const char *pairbrk[29] =
{
/* -OCCQGNESIPPNAHIIHBBBZCWHHJJJR- */
/* -PLPULSXYSROULLDNYAB2WMJ23LVTI- */
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^", /* OP open punctuation */
"_^^%%^^^^%%_____%%__^^^______", /* CL close punctuation */
"_^^%%^^^^%%%%%__%%__^^^______", /* CP close parenthesis */
"^^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* QU quotation */
"%^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* GL non-breaking glue */
"_^^%%%^^^_______%%__^^^______", /* NS nonstarters */
"_^^%%%^^^______%%%__^^^______", /* EX exclamation/interrogation */
"_^^%%%^^^__%_%__%%__^^^______", /* SY symbols allowing break after */
"_^^%%%^^^__%%%__%%__^^^______", /* IS infix numeric separator */
"%^^%%%^^^__%%%%_%%__^^^%%%%%_", /* PR prefix numeric */
"%^^%%%^^^__%%%__%%__^^^______", /* PO postfix numeric */
"%^^%%%^^^%%%%%_%%%__^^^______", /* NU numeric */
"%^^%%%^^^__%%%_%%%__^^^______", /* AL ordinary alphabetic and symbol characters */
"%^^%%%^^^__%%%_%%%__^^^______", /* HL hebrew letter */
"_^^%%%^^^_%____%%%__^^^______", /* ID ideographic */
"_^^%%%^^^______%%%__^^^______", /* IN inseparable characters */
"_^^%_%^^^__%____%%__^^^______", /* HY hyphens */
"_^^%_%^^^_______%%__^^^______", /* BA break after */
"%^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* BB break before */
"_^^%%%^^^_______%%_^^^^______", /* B2 break opportunity before and after */
"____________________^________", /* ZW zero width space */
"%^^%%%^^^__%%%_%%%__^^^______", /* CM combining mark */
"%^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* WJ word joiner */
"_^^%%%^^^_%____%%%__^^^___%%_", /* H2 hangul leading/vowel syllable */
"_^^%%%^^^_%____%%%__^^^____%_", /* H3 hangul leading/vowel/trailing syllable */
"_^^%%%^^^_%____%%%__^^^%%%%__", /* JL hangul leading jamo */
"_^^%%%^^^_%____%%%__^^^___%%_", /* JV hangul vowel jamo */
"_^^%%%^^^_%____%%%__^^^____%_", /* JT hangul trailing jamo */
"_^^%%%^^^_______%%__^^^_____%", /* RI regional indicator */
};
static void generate_text(fz_context *ctx, fz_html_box *box, const char *text, int lang, struct genstate *g)
{
fz_html_box *flow;
fz_pool *pool = g->pool;
int collapse = box->style.white_space & WS_COLLAPSE;
int bsp = box->style.white_space & WS_ALLOW_BREAK_SPACE;
int bnl = box->style.white_space & WS_FORCE_BREAK_NEWLINE;
static const char *space = " ";
flow = box;
while (flow->type != BOX_FLOW)
flow = flow->up;
while (*text)
{
if (bnl && (*text == '\n' || *text == '\r'))
{
if (text[0] == '\r' && text[1] == '\n')
text += 2;
else
text += 1;
add_flow_break(ctx, pool, flow, box);
g->at_bol = 1;
}
else if (iswhite(*text))
{
if (collapse)
{
if (bnl)
while (*text == ' ' || *text == '\t')
++text;
else
while (iswhite(*text))
++text;
g->emit_white = 1;
}
else
{
// TODO: tabs
if (bsp)
add_flow_space(ctx, pool, flow, box);
else
add_flow_word(ctx, pool, flow, box, space, space+1, lang);
++text;
}
g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a space */
}
else
{
const char *prev, *mark = text;
int c;
flush_space(ctx, flow, box, lang, g);
if (g->at_bol)
g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ;
while (*text && !iswhite(*text))
{
prev = text;
text += fz_chartorune(&c, text);
if (c == 0xAD) /* soft hyphen */
{
if (mark != prev)
add_flow_word(ctx, pool, flow, box, mark, prev, lang);
add_flow_shyphen(ctx, pool, flow, box);
mark = text;
g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a soft hyphen */
}
else if (bsp) /* allow soft breaks */
{
int this_brk_cls = ucdn_get_resolved_linebreak_class(c);
if (this_brk_cls < UCDN_LINEBREAK_CLASS_RI)
{
int brk = pairbrk[g->last_brk_cls][this_brk_cls];
/* we handle spaces elsewhere, so ignore these classes */
if (brk == '@') brk = '^';
if (brk == '#') brk = '^';
if (brk == '%') brk = '^';
if (brk == '_')
{
if (mark != prev)
add_flow_word(ctx, pool, flow, box, mark, prev, lang);
add_flow_sbreak(ctx, pool, flow, box);
mark = prev;
}
g->last_brk_cls = this_brk_cls;
}
}
}
if (mark != text)
add_flow_word(ctx, pool, flow, box, mark, text, lang);
g->at_bol = 0;
}
}
}
static fz_image *load_html_image(fz_context *ctx, fz_archive *zip, const char *base_uri, const char *src)
{
char path[2048];
fz_image *img = NULL;
fz_buffer *buf = NULL;
fz_var(img);
fz_var(buf);
fz_strlcpy(path, base_uri, sizeof path);
fz_strlcat(path, "/", sizeof path);
fz_strlcat(path, src, sizeof path);
fz_urldecode(path);
fz_cleanname(path);
fz_try(ctx)
{
buf = fz_read_archive_entry(ctx, zip, path);
#if FZ_ENABLE_SVG
if (strstr(path, ".svg"))
img = fz_new_image_from_svg(ctx, buf);
else
#endif
img = fz_new_image_from_buffer(ctx, buf);
}
fz_always(ctx)
fz_drop_buffer(ctx, buf);
fz_catch(ctx)
fz_warn(ctx, "html: cannot load image src='%s'", src);
return img;
}
static void generate_anchor(fz_context *ctx, fz_html_box *box, struct genstate *g)
{
fz_pool *pool = g->pool;
fz_html_box *flow = box;
while (flow->type != BOX_FLOW)
flow = flow->up;
add_flow_anchor(ctx, pool, flow, box);
}
static void generate_image(fz_context *ctx, fz_html_box *box, fz_image *img, struct genstate *g)
{
fz_html_box *flow = box;
fz_pool *pool = g->pool;
while (flow->type != BOX_FLOW)
flow = flow->up;
flush_space(ctx, flow, box, 0, g);
if (!img)
{
const char *alt = "[image]";
add_flow_word(ctx, pool, flow, box, alt, alt + 7, 0);
}
else
{
fz_try(ctx)
{
add_flow_sbreak(ctx, pool, flow, box);
add_flow_image(ctx, pool, flow, box, img);
add_flow_sbreak(ctx, pool, flow, box);
}
fz_always(ctx)
{
fz_drop_image(ctx, img);
}
fz_catch(ctx)
fz_rethrow(ctx);
}
g->at_bol = 0;
}
static void init_box(fz_context *ctx, fz_html_box *box, fz_bidi_direction markup_dir)
{
box->type = BOX_BLOCK;
box->x = box->y = 0;
box->w = box->h = 0;
box->up = NULL;
box->last = NULL;
box->down = NULL;
box->next = NULL;
box->flow_head = NULL;
box->flow_tail = &box->flow_head;
box->markup_dir = markup_dir;
fz_default_css_style(ctx, &box->style);
}
static void fz_drop_html_box(fz_context *ctx, fz_html_box *box)
{
while (box)
{
fz_html_box *next = box->next;
fz_drop_html_flow(ctx, box->flow_head);
fz_drop_html_box(ctx, box->down);
box = next;
}
}
void fz_drop_html(fz_context *ctx, fz_html *html)
{
if (html)
{
fz_drop_html_box(ctx, html->root);
fz_drop_pool(ctx, html->pool);
}
}
static fz_html_box *new_box(fz_context *ctx, fz_pool *pool, fz_bidi_direction markup_dir)
{
fz_html_box *box = fz_pool_alloc(ctx, pool, sizeof *box);
init_box(ctx, box, markup_dir);
return box;
}
static void insert_box(fz_context *ctx, fz_html_box *box, int type, fz_html_box *top)
{
box->type = type;
box->up = top;
if (top)
{
if (!top->last)
{
top->down = top->last = box;
}
else
{
top->last->next = box;
top->last = box;
}
}
}
static fz_html_box *insert_block_box(fz_context *ctx, fz_html_box *box, fz_html_box *top)
{
if (top->type == BOX_BLOCK)
{
insert_box(ctx, box, BOX_BLOCK, top);
}
else if (top->type == BOX_FLOW)
{
while (top->type != BOX_BLOCK)
top = top->up;
insert_box(ctx, box, BOX_BLOCK, top);
}
else if (top->type == BOX_INLINE)
{
while (top->type != BOX_BLOCK)
top = top->up;
insert_box(ctx, box, BOX_BLOCK, top);
}
return top;
}
static fz_html_box *insert_break_box(fz_context *ctx, fz_html_box *box, fz_html_box *top)
{
if (top->type == BOX_BLOCK)
{
insert_box(ctx, box, BOX_BREAK, top);
}
else if (top->type == BOX_FLOW)
{
while (top->type != BOX_BLOCK)
top = top->up;
insert_box(ctx, box, BOX_BREAK, top);
}
else if (top->type == BOX_INLINE)
{
while (top->type != BOX_BLOCK)
top = top->up;
insert_box(ctx, box, BOX_BREAK, top);
}
return top;
}
static void insert_inline_box(fz_context *ctx, fz_html_box *box, fz_html_box *top, int markup_dir, struct genstate *g)
{
if (top->type == BOX_BLOCK)
{
if (top->last && top->last->type == BOX_FLOW)
{
insert_box(ctx, box, BOX_INLINE, top->last);
}
else
{
fz_html_box *flow = new_box(ctx, g->pool, markup_dir);
flow->is_first_flow = !top->last;
insert_box(ctx, flow, BOX_FLOW, top);
insert_box(ctx, box, BOX_INLINE, flow);
g->at_bol = 1;
}
}
else if (top->type == BOX_FLOW)
{
insert_box(ctx, box, BOX_INLINE, top);
}
else if (top->type == BOX_INLINE)
{
insert_box(ctx, box, BOX_INLINE, top);
}
}
static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top,
fz_css_match *up_match, int list_counter, int markup_dir, int markup_lang, struct genstate *g)
{
fz_css_match match;
fz_html_box *box;
const char *tag;
int display;
while (node)
{
match.up = up_match;
match.count = 0;
tag = fz_xml_tag(node);
if (tag)
{
fz_match_css(ctx, &match, g->css, node);
display = fz_get_css_match_display(&match);
if (tag[0]=='b' && tag[1]=='r' && tag[2]==0)
{
if (top->type == BOX_INLINE)
{
fz_html_box *flow = top;
while (flow->type != BOX_FLOW)
flow = flow->up;
add_flow_break(ctx, g->pool, flow, top);
}
else
{
box = new_box(ctx, g->pool, markup_dir);
fz_apply_css_style(ctx, g->set, &box->style, &match);
top = insert_break_box(ctx, box, top);
}
g->at_bol = 1;
}
else if (tag[0]=='i' && tag[1]=='m' && tag[2]=='g' && tag[3]==0)
{
const char *src = fz_xml_att(node, "src");
if (src)
{
box = new_box(ctx, g->pool, markup_dir);
fz_apply_css_style(ctx, g->set, &box->style, &match);
insert_inline_box(ctx, box, top, markup_dir, g);
generate_image(ctx, box, load_html_image(ctx, g->zip, g->base_uri, src), g);
}
}
else if (g->is_fb2 && tag[0]=='i' && tag[1]=='m' && tag[2]=='a' && tag[3]=='g' && tag[4]=='e' && tag[5]==0)
{
const char *src = fz_xml_att(node, "l:href");
if (!src)
src = fz_xml_att(node, "xlink:href");
if (src && src[0] == '#')
{
fz_image *img = fz_tree_lookup(ctx, g->images, src+1);
if (display == DIS_BLOCK)
{
fz_html_box *imgbox;
box = new_box(ctx, g->pool, markup_dir);
fz_apply_css_style(ctx, g->set, &box->style, &match);
top = insert_block_box(ctx, box, top);
imgbox = new_box(ctx, g->pool, markup_dir);
fz_apply_css_style(ctx, g->set, &imgbox->style, &match);
insert_inline_box(ctx, imgbox, box, markup_dir, g);
generate_image(ctx, imgbox, fz_keep_image(ctx, img), g);
}
else if (display == DIS_INLINE)
{
box = new_box(ctx, g->pool, markup_dir);
fz_apply_css_style(ctx, g->set, &box->style, &match);
insert_inline_box(ctx, box, top, markup_dir, g);
generate_image(ctx, box, fz_keep_image(ctx, img), g);
}
}
}
else if (display != DIS_NONE)
{
const char *dir, *lang, *id, *href;
int child_dir = markup_dir;
int child_lang = markup_lang;
dir = fz_xml_att(node, "dir");
if (dir)
{
if (!strcmp(dir, "auto"))
child_dir = FZ_BIDI_NEUTRAL;
else if (!strcmp(dir, "rtl"))
child_dir = FZ_BIDI_RTL;
else if (!strcmp(dir, "ltr"))
child_dir = FZ_BIDI_LTR;
else
child_dir = DEFAULT_DIR;
}
lang = fz_xml_att(node, "lang");
if (lang)
child_lang = fz_text_language_from_string(lang);
box = new_box(ctx, g->pool, child_dir);
fz_apply_css_style(ctx, g->set, &box->style, &match);
id = fz_xml_att(node, "id");
if (id)
box->id = fz_pool_strdup(ctx, g->pool, id);
if (display == DIS_BLOCK || display == DIS_INLINE_BLOCK)
{
top = insert_block_box(ctx, box, top);
}
else if (display == DIS_LIST_ITEM)
{
top = insert_block_box(ctx, box, top);
box->list_item = ++list_counter;
}
else if (display == DIS_INLINE)
{
insert_inline_box(ctx, box, top, child_dir, g);
if (id)
generate_anchor(ctx, box, g);
if (tag[0]=='a' && tag[1]==0)
{
if (g->is_fb2)
{
href = fz_xml_att(node, "l:href");
if (!href)
href = fz_xml_att(node, "xlink:href");
}
else
href = fz_xml_att(node, g->is_fb2 ? "l:href" : "href");
if (href)
box->href = fz_pool_strdup(ctx, g->pool, href);
}
}
else
{
fz_warn(ctx, "unknown box display type");
insert_box(ctx, box, BOX_BLOCK, top);
}
if (fz_xml_down(node))
{
int child_counter = list_counter;
if (!strcmp(tag, "ul") || !strcmp(tag, "ol"))
child_counter = 0;
generate_boxes(ctx, fz_xml_down(node), box, &match, child_counter, child_dir, child_lang, g);
}
}
}
else
{
const char *text = fz_xml_text(node);
int collapse = top->style.white_space & WS_COLLAPSE;
if (collapse && is_all_white(text))
{
g->emit_white = 1;
}
else
{
if (top->type != BOX_INLINE)
{
/* Create anonymous inline box, with the same style as the top block box. */
box = new_box(ctx, g->pool, markup_dir);
insert_inline_box(ctx, box, top, markup_dir, g);
box->style = top->style;
/* Make sure not to recursively multiply font sizes. */
box->style.font_size.value = 1;
box->style.font_size.unit = N_SCALE;
generate_text(ctx, box, text, markup_lang, g);
}
else
{
generate_text(ctx, top, text, markup_lang, g);
}
}
}
node = fz_xml_next(node);
}
}
static void measure_image(fz_context *ctx, fz_html_flow *node, float max_w, float max_h)
{
float xs = 1, ys = 1, s = 1;
float image_w = node->content.image->w * 72.0f / node->content.image->xres;
float image_h = node->content.image->h * 72.0f / node->content.image->yres;
node->x = 0;
node->y = 0;
if (image_w > max_w)
xs = max_w / image_w;
if (image_h > max_h)
ys = max_h / image_h;
s = fz_min(xs, ys);
node->w = image_w * s;
node->h = image_h * s;
}
typedef struct string_walker
{
fz_context *ctx;
hb_buffer_t *hb_buf;
int rtl;
const char *start;
const char *end;
const char *s;
fz_font *base_font;
int script;
int language;
fz_font *font;
fz_font *next_font;
hb_glyph_position_t *glyph_pos;
hb_glyph_info_t *glyph_info;
unsigned int glyph_count;
int scale;
} string_walker;
static int quick_ligature_mov(fz_context *ctx, string_walker *walker, unsigned int i, unsigned int n, int unicode)
{
unsigned int k;
for (k = i + n + 1; k < walker->glyph_count; ++k)
{
walker->glyph_info[k-n] = walker->glyph_info[k];
walker->glyph_pos[k-n] = walker->glyph_pos[k];
}
walker->glyph_count -= n;
return unicode;
}
static int quick_ligature(fz_context *ctx, string_walker *walker, unsigned int i)
{
if (walker->glyph_info[i].codepoint == 'f' && i + 1 < walker->glyph_count && !fz_font_flags(walker->font)->is_mono)
{
if (walker->glyph_info[i+1].codepoint == 'f')
{
if (i + 2 < walker->glyph_count && walker->glyph_info[i+2].codepoint == 'i')
{
if (fz_encode_character(ctx, walker->font, 0xFB03))
return quick_ligature_mov(ctx, walker, i, 2, 0xFB03);
}
if (i + 2 < walker->glyph_count && walker->glyph_info[i+2].codepoint == 'l')
{
if (fz_encode_character(ctx, walker->font, 0xFB04))
return quick_ligature_mov(ctx, walker, i, 2, 0xFB04);
}
if (fz_encode_character(ctx, walker->font, 0xFB00))
return quick_ligature_mov(ctx, walker, i, 1, 0xFB00);
}
if (walker->glyph_info[i+1].codepoint == 'i')
{
if (fz_encode_character(ctx, walker->font, 0xFB01))
return quick_ligature_mov(ctx, walker, i, 1, 0xFB01);
}
if (walker->glyph_info[i+1].codepoint == 'l')
{
if (fz_encode_character(ctx, walker->font, 0xFB02))
return quick_ligature_mov(ctx, walker, i, 1, 0xFB02);
}
}
return walker->glyph_info[i].codepoint;
}
static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer_t *hb_buf, int rtl, fz_font *font, int script, int language, const char *text)
{
walker->ctx = ctx;
walker->hb_buf = hb_buf;
walker->rtl = rtl;
walker->start = text;
walker->end = text;
walker->s = text;
walker->base_font = font;
walker->script = script;
walker->language = language;
walker->font = NULL;
walker->next_font = NULL;
}
static void
destroy_hb_shaper_data(fz_context *ctx, void *handle)
{
hb_lock(ctx);
hb_font_destroy(handle);
hb_unlock(ctx);
}
static int walk_string(string_walker *walker)
{
fz_context *ctx = walker->ctx;
FT_Face face;
int fterr;
int quickshape;
char lang[8];
walker->start = walker->end;
walker->end = walker->s;
walker->font = walker->next_font;
if (*walker->start == 0)
return 0;
/* Run through the string, encoding chars until we find one
* that requires a different fallback font. */
while (*walker->s)
{
int c;
walker->s += fz_chartorune(&c, walker->s);
(void)fz_encode_character_with_fallback(ctx, walker->base_font, c, walker->script, walker->language, &walker->next_font);
if (walker->next_font != walker->font)
{
if (walker->font != NULL)
break;
walker->font = walker->next_font;
}
walker->end = walker->s;
}
/* Disable harfbuzz shaping if script is common or LGC and there are no opentype tables. */
quickshape = 0;
if (walker->script <= 3 && !walker->rtl && !fz_font_flags(walker->font)->has_opentype)
quickshape = 1;
hb_lock(ctx);
fz_try(ctx)
{
face = fz_font_ft_face(ctx, walker->font);
walker->scale = face->units_per_EM;
fterr = FT_Set_Char_Size(face, walker->scale, walker->scale, 72, 72);
if (fterr)
fz_throw(ctx, FZ_ERROR_GENERIC, "freetype setting character size: %s", ft_error_string(fterr));
hb_buffer_clear_contents(walker->hb_buf);
hb_buffer_set_direction(walker->hb_buf, walker->rtl ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
/* hb_buffer_set_script(walker->hb_buf, hb_ucdn_script_translate(walker->script)); */
if (walker->language)
{
fz_string_from_text_language(lang, walker->language);
hb_buffer_set_language(walker->hb_buf, hb_language_from_string(lang, (int)strlen(lang)));
}
/* hb_buffer_set_cluster_level(hb_buf, HB_BUFFER_CLUSTER_LEVEL_CHARACTERS); */
hb_buffer_add_utf8(walker->hb_buf, walker->start, walker->end - walker->start, 0, -1);
if (!quickshape)
{
fz_shaper_data_t *hb = fz_font_shaper_data(ctx, walker->font);
if (hb->shaper_handle == NULL)
{
Memento_startLeaking(); /* HarfBuzz leaks harmlessly */
hb->destroy = destroy_hb_shaper_data;
hb->shaper_handle = hb_ft_font_create(face, NULL);
Memento_stopLeaking();
}
Memento_startLeaking(); /* HarfBuzz leaks harmlessly */
hb_buffer_guess_segment_properties(walker->hb_buf);
Memento_stopLeaking();
hb_shape(hb->shaper_handle, walker->hb_buf, NULL, 0);
}
walker->glyph_pos = hb_buffer_get_glyph_positions(walker->hb_buf, &walker->glyph_count);
walker->glyph_info = hb_buffer_get_glyph_infos(walker->hb_buf, NULL);
}
fz_always(ctx)
{
hb_unlock(ctx);
}
fz_catch(ctx)
{
fz_rethrow(ctx);
}
if (quickshape)
{
unsigned int i;
for (i = 0; i < walker->glyph_count; ++i)
{
int unicode = quick_ligature(ctx, walker, i);
int glyph = fz_encode_character(ctx, walker->font, unicode);
walker->glyph_info[i].codepoint = glyph;
walker->glyph_pos[i].x_offset = 0;
walker->glyph_pos[i].y_offset = 0;
walker->glyph_pos[i].x_advance = fz_advance_glyph(ctx, walker->font, glyph, 0) * face->units_per_EM;
walker->glyph_pos[i].y_advance = 0;
}
}
return 1;
}
static const char *get_node_text(fz_context *ctx, fz_html_flow *node)
{
if (node->type == FLOW_WORD)
return node->content.text;
else if (node->type == FLOW_SPACE)
return " ";
else if (node->type == FLOW_SHYPHEN)
return "-";
else
return "";
}
static void measure_string(fz_context *ctx, fz_html_flow *node, hb_buffer_t *hb_buf)
{
string_walker walker;
unsigned int i;
const char *s;
float em;
em = node->box->em;
node->x = 0;
node->y = 0;
node->w = 0;
node->h = fz_from_css_number_scale(node->box->style.line_height, em, em, em);
s = get_node_text(ctx, node);
init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->box->style.font, node->script, node->markup_lang, s);
while (walk_string(&walker))
{
int x = 0;
for (i = 0; i < walker.glyph_count; i++)
x += walker.glyph_pos[i].x_advance;
node->w += x * em / walker.scale;
}
}
static float measure_line(fz_html_flow *node, fz_html_flow *end, float *baseline)
{
float max_a = 0, max_d = 0, h = node->h;
while (node != end)
{
if (node->type == FLOW_IMAGE)
{
if (node->h > max_a)
max_a = node->h;
}
else
{
float a = node->box->em * 0.8;
float d = node->box->em * 0.2;
if (a > max_a) max_a = a;
if (d > max_d) max_d = d;
}
if (node->h > h) h = node->h;
if (max_a + max_d > h) h = max_a + max_d;
node = node->next;
}
*baseline = max_a + (h - max_a - max_d) / 2;
return h;
}
static void layout_line(fz_context *ctx, float indent, float page_w, float line_w, int align, fz_html_flow *start, fz_html_flow *end, fz_html_box *box, float baseline, float line_h)
{
float x = box->x + indent;
float y = box->y + box->h;
float slop = page_w - line_w;
float justify = 0;
float va;
int n, i;
fz_html_flow *node;
fz_html_flow **reorder;
unsigned int min_level, max_level;
/* Count the number of nodes on the line */
for(i = 0, n = 0, node = start; node != end; node = node->next)
{
n++;
if (node->type == FLOW_SPACE && node->expand && !node->breaks_line)
i++;
}
if (align == TA_JUSTIFY)
{
justify = slop / i;
}
else if (align == TA_RIGHT)
x += slop;
else if (align == TA_CENTER)
x += slop / 2;
/* We need a block to hold the node pointers while we reorder */
reorder = fz_malloc_array(ctx, n, sizeof(*reorder));
min_level = start->bidi_level;
max_level = start->bidi_level;
for(i = 0, node = start; node != end; i++, node = node->next)
{
reorder[i] = node;
if (node->bidi_level < min_level)
min_level = node->bidi_level;
if (node->bidi_level > max_level)
max_level = node->bidi_level;
}
/* Do we need to do any reordering? */
if (min_level != max_level || (min_level & 1))
{
/* The lowest level we swap is always a rtl one */
min_level |= 1;
/* Each time around the loop we swap runs of fragments that have
* levels >= max_level (and decrement max_level). */
do
{
int start = 0;
int end;
do
{
/* Skip until we find a level that's >= max_level */
while (start < n && reorder[start]->bidi_level < max_level)
start++;
/* If start >= n-1 then no more runs. */
if (start >= n-1)
break;
/* Find the end of the match */
i = start+1;
while (i < n && reorder[i]->bidi_level >= max_level)
i++;
/* Reverse from start to i-1 */
end = i-1;
while (start < end)
{
fz_html_flow *t = reorder[start];
reorder[start++] = reorder[end];
reorder[end--] = t;
}
start = i+1;
}
while (start < n);
max_level--;
}
while (max_level >= min_level);
}
for (i = 0; i < n; i++)
{
float w;
node = reorder[i];
w = node->w;
if (node->type == FLOW_SPACE && node->breaks_line)
w = 0;
else if (node->type == FLOW_SPACE && !node->breaks_line)
w += node->expand ? justify : 0;
else if (node->type == FLOW_SHYPHEN && !node->breaks_line)
w = 0;
else if (node->type == FLOW_SHYPHEN && node->breaks_line)
w = node->w;
node->x = x;
x += w;
switch (node->box->style.vertical_align)
{
default:
case VA_BASELINE:
va = 0;
break;
case VA_SUB:
va = node->box->em * 0.2f;
break;
case VA_SUPER:
va = node->box->em * -0.3f;
break;
case VA_TOP:
case VA_TEXT_TOP:
va = -baseline + node->box->em * 0.8;
break;
case VA_BOTTOM:
case VA_TEXT_BOTTOM:
va = -baseline + line_h - node->box->em * 0.2;
break;
}
if (node->type == FLOW_IMAGE)
node->y = y + baseline - node->h;
else
{
node->y = y + baseline + va;
node->h = node->box->em;
}
}
fz_free(ctx, reorder);
}
static void find_accumulated_margins(fz_context *ctx, fz_html_box *box, float *w, float *h)
{
while (box)
{
/* TODO: take into account collapsed margins */
*h += box->margin[T] + box->padding[T] + box->border[T];
*h += box->margin[B] + box->padding[B] + box->border[B];
*w += box->margin[L] + box->padding[L] + box->border[L];
*w += box->margin[R] + box->padding[R] + box->border[R];
box = box->up;
}
}
static void flush_line(fz_context *ctx, fz_html_box *box, float page_h, float page_w, float line_w, int align, float indent, fz_html_flow *a, fz_html_flow *b)
{
float avail, line_h, baseline;
avail = page_h - fmodf(box->y + box->h, page_h);
line_h = measure_line(a, b, &baseline);
if (line_h > avail)
box->h += avail;
layout_line(ctx, indent, page_w, line_w, align, a, b, box, baseline, line_h);
box->h += line_h;
}
static void layout_flow_inline(fz_context *ctx, fz_html_box *box, fz_html_box *top)
{
while (box)
{
box->y = top->y;
box->em = fz_from_css_number(box->style.font_size, top->em, top->em);
if (box->down)
layout_flow_inline(ctx, box->down, box);
box = box->next;
}
}
static void layout_flow(fz_context *ctx, fz_html_box *box, fz_html_box *top, float page_h, hb_buffer_t *hb_buf)
{
fz_html_flow *node, *line, *candidate;
float line_w, candidate_w, indent, break_w, nonbreak_w;
int line_align, align;
float em = box->em = fz_from_css_number(box->style.font_size, top->em, top->em);
indent = box->is_first_flow ? fz_from_css_number(top->style.text_indent, em, top->w) : 0;
align = top->style.text_align;
if (box->markup_dir == FZ_BIDI_RTL)
{
if (align == TA_LEFT)
align = TA_RIGHT;
else if (align == TA_RIGHT)
align = TA_LEFT;
}
box->x = top->x;
box->y = top->y + top->h;
box->w = top->w;
box->h = 0;
if (!box->flow_head)
return;
if (box->down)
layout_flow_inline(ctx, box->down, box);
for (node = box->flow_head; node; node = node->next)
{
node->breaks_line = 0; /* reset line breaks from previous layout */
if (node->type == FLOW_IMAGE)
{
float w = 0, h = 0;
find_accumulated_margins(ctx, box, &w, &h);
measure_image(ctx, node, top->w - w, page_h - h);
}
else
{
measure_string(ctx, node, hb_buf);
}
}
node = box->flow_head;
candidate = NULL;
candidate_w = 0;
line = node;
line_w = indent;
while (node)
{
switch (node->type)
{
default:
case FLOW_WORD:
case FLOW_IMAGE:
nonbreak_w = break_w = node->w;
break;
case FLOW_SHYPHEN:
case FLOW_SBREAK:
case FLOW_SPACE:
nonbreak_w = break_w = 0;
/* Determine broken and unbroken widths of this node. */
if (node->type == FLOW_SPACE)
nonbreak_w = node->w;
else if (node->type == FLOW_SHYPHEN)
break_w = node->w;
/* If the broken node fits, remember it. */
/* Also remember it if we have no other candidate and need to break in desperation. */
if (line_w + break_w <= box->w || !candidate)
{
candidate = node;
candidate_w = line_w + break_w;
}
break;
case FLOW_BREAK:
nonbreak_w = break_w = 0;
candidate = node;
candidate_w = line_w;
break;
}
/* The current node either does not fit or we saw a hard break. */
/* Break the line if we have a candidate break point. */
if (node->type == FLOW_BREAK || (line_w + nonbreak_w > box->w && candidate))
{
candidate->breaks_line = 1;
if (candidate->type == FLOW_BREAK)
line_align = (align == TA_JUSTIFY) ? TA_LEFT : align;
else
line_align = align;
flush_line(ctx, box, page_h, box->w, candidate_w, line_align, indent, line, candidate->next);
line = candidate->next;
node = candidate->next;
candidate = NULL;
candidate_w = 0;
indent = 0;
line_w = 0;
}
else
{
line_w += nonbreak_w;
node = node->next;
}
}
if (line)
{
line_align = (align == TA_JUSTIFY) ? TA_LEFT : align;
flush_line(ctx, box, page_h, box->w, line_w, line_align, indent, line, NULL);
}
}
static int layout_block_page_break(fz_context *ctx, fz_html_box *box, float page_h, float vertical, int page_break)
{
if (page_break == PB_ALWAYS || page_break == PB_LEFT || page_break == PB_RIGHT)
{
float avail = page_h - fmodf(box->y + box->h - vertical, page_h);
int number = (box->y + box->h + (page_h * 0.1f)) / page_h;
if (avail > 0 && avail < page_h)
{
box->h += avail - vertical;
if (page_break == PB_LEFT && (number & 1) == 0) /* right side pages are even */
box->h += page_h;
if (page_break == PB_RIGHT && (number & 1) == 1) /* left side pages are odd */
box->h += page_h;
return 1;
}
}
return 0;
}
static float layout_block(fz_context *ctx, fz_html_box *box, fz_html_box *top, float page_h, float vertical, hb_buffer_t *hb_buf)
{
fz_html_box *child;
int first;
fz_css_style *style = &box->style;
float *margin = box->margin;
float *border = box->border;
float *padding = box->padding;
float em = box->em = fz_from_css_number(style->font_size, top->em, top->em);
margin[0] = fz_from_css_number(style->margin[0], em, top->w);
margin[1] = fz_from_css_number(style->margin[1], em, top->w);
margin[2] = fz_from_css_number(style->margin[2], em, top->w);
margin[3] = fz_from_css_number(style->margin[3], em, top->w);
padding[0] = fz_from_css_number(style->padding[0], em, top->w);
padding[1] = fz_from_css_number(style->padding[1], em, top->w);
padding[2] = fz_from_css_number(style->padding[2], em, top->w);
padding[3] = fz_from_css_number(style->padding[3], em, top->w);
border[0] = style->border_style_0 ? fz_from_css_number(style->border_width[0], em, top->w) : 0;
border[1] = style->border_style_1 ? fz_from_css_number(style->border_width[1], em, top->w) : 0;
border[2] = style->border_style_2 ? fz_from_css_number(style->border_width[2], em, top->w) : 0;
border[3] = style->border_style_3 ? fz_from_css_number(style->border_width[3], em, top->w) : 0;
/* TODO: remove 'vertical' margin adjustments across automatic page breaks */
if (layout_block_page_break(ctx, top, page_h, vertical, style->page_break_before))
vertical = 0;
box->x = top->x + margin[L] + border[L] + padding[L];
box->w = top->w - (margin[L] + margin[R] + border[L] + border[R] + padding[L] + padding[R]);
if (margin[T] > vertical)
margin[T] -= vertical;
else
margin[T] = 0;
if (padding[T] == 0 && border[T] == 0)
vertical += margin[T];
else
vertical = 0;
box->y = top->y + top->h + margin[T] + border[T] + padding[T];
box->h = 0;
first = 1;
for (child = box->down; child; child = child->next)
{
if (child->type == BOX_BLOCK)
{
vertical = layout_block(ctx, child, box, page_h, vertical, hb_buf);
if (first)
{
/* move collapsed parent/child top margins to parent */
margin[T] += child->margin[T];
box->y += child->margin[T];
child->margin[T] = 0;
first = 0;
}
box->h += child->h +
child->padding[T] + child->padding[B] +
child->border[T] + child->border[B] +
child->margin[T] + child->margin[B];
}
else if (child->type == BOX_BREAK)
{
box->h += fz_from_css_number_scale(style->line_height, em, em, em);
vertical = 0;
first = 0;
}
else if (child->type == BOX_FLOW)
{
layout_flow(ctx, child, box, page_h, hb_buf);
if (child->h > 0)
{
box->h += child->h;
vertical = 0;
first = 0;
}
}
}
/* reserve space for the list mark */
if (box->list_item && box->h == 0)
{
box->h += fz_from_css_number_scale(style->line_height, em, em, em);
vertical = 0;
}
if (layout_block_page_break(ctx, box, page_h, 0, style->page_break_after))
{
vertical = 0;
margin[B] = 0;
}
if (box->h == 0)
{
if (margin[B] > vertical)
margin[B] -= vertical;
else
margin[B] = 0;
}
else
{
box->h -= vertical;
vertical = fz_max(margin[B], vertical);
margin[B] = vertical;
}
return vertical;
}
static void draw_flow_box(fz_context *ctx, fz_html_box *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm, hb_buffer_t *hb_buf)
{
fz_html_flow *node;
fz_text *text;
fz_matrix trm;
float color[3];
float prev_color[3];
/* FIXME: HB_DIRECTION_TTB? */
text = NULL;
prev_color[0] = 0;
prev_color[1] = 0;
prev_color[2] = 0;
for (node = box->flow_head; node; node = node->next)
{
fz_css_style *style = &node->box->style;
if (node->type == FLOW_IMAGE)
{
if (node->y >= page_bot || node->y + node->h <= page_top)
continue;
}
else
{
if (node->y > page_bot || node->y < page_top)
continue;
}
if (node->type == FLOW_WORD || node->type == FLOW_SPACE || node->type == FLOW_SHYPHEN)
{
string_walker walker;
const char *s;
float x, y;
if (node->type == FLOW_WORD && node->content.text == NULL)
continue;
if (node->type == FLOW_SPACE && node->breaks_line)
continue;
if (node->type == FLOW_SHYPHEN && !node->breaks_line)
continue;
if (style->visibility != V_VISIBLE)
continue;
color[0] = style->color.r / 255.0f;
color[1] = style->color.g / 255.0f;
color[2] = style->color.b / 255.0f;
if (color[0] != prev_color[0] || color[1] != prev_color[1] || color[2] != prev_color[2])
{
if (text)
{
fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), prev_color, 1);
fz_drop_text(ctx, text);
text = NULL;
}
prev_color[0] = color[0];
prev_color[1] = color[1];
prev_color[2] = color[2];
}
if (!text)
text = fz_new_text(ctx);
if (node->bidi_level & 1)
x = node->x + node->w;
else
x = node->x;
y = node->y;
trm.a = node->box->em;
trm.b = 0;
trm.c = 0;
trm.d = -node->box->em;
trm.e = x;
trm.f = y - page_top;
s = get_node_text(ctx, node);
init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, style->font, node->script, node->markup_lang, s);
while (walk_string(&walker))
{
float node_scale = node->box->em / walker.scale;
unsigned int i;
int c, k, n;
/* Flatten advance and offset into offset array. */
int x_advance = 0;
int y_advance = 0;
for (i = 0; i < walker.glyph_count; ++i)
{
walker.glyph_pos[i].x_offset += x_advance;
walker.glyph_pos[i].y_offset += y_advance;
x_advance += walker.glyph_pos[i].x_advance;
y_advance += walker.glyph_pos[i].y_advance;
}
if (node->bidi_level & 1)
x -= x_advance * node_scale;
/* Walk characters to find glyph clusters */
k = 0;
while (walker.start + k < walker.end)
{
n = fz_chartorune(&c, walker.start + k);
for (i = 0; i < walker.glyph_count; ++i)
{
if (walker.glyph_info[i].cluster == k)
{
trm.e = x + walker.glyph_pos[i].x_offset * node_scale;
trm.f = y - walker.glyph_pos[i].y_offset * node_scale - page_top;
fz_show_glyph(ctx, text, walker.font, &trm,
walker.glyph_info[i].codepoint, c,
0, node->bidi_level, box->markup_dir, node->markup_lang);
c = -1; /* for subsequent glyphs in x-to-many mappings */
}
}
/* no glyph found (many-to-many or many-to-one mapping) */
if (c != -1)
{
fz_show_glyph(ctx, text, walker.font, &trm,
-1, c,
0, node->bidi_level, box->markup_dir, node->markup_lang);
}
k += n;
}
if ((node->bidi_level & 1) == 0)
x += x_advance * node_scale;
y += y_advance * node_scale;
}
}
else if (node->type == FLOW_IMAGE)
{
if (text)
{
fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1);
fz_drop_text(ctx, text);
text = NULL;
}
if (style->visibility == V_VISIBLE)
{
fz_matrix local_ctm = *ctm;
fz_pre_translate(&local_ctm, node->x, node->y - page_top);
fz_pre_scale(&local_ctm, node->w, node->h);
fz_fill_image(ctx, dev, node->content.image, &local_ctm, 1);
}
}
}
if (text)
{
fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1);
fz_drop_text(ctx, text);
text = NULL;
}
}
static void draw_rect(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, float page_top, fz_css_color color, float x0, float y0, float x1, float y1)
{
if (color.a > 0)
{
float rgb[3];
fz_path *path = fz_new_path(ctx);
fz_moveto(ctx, path, x0, y0 - page_top);
fz_lineto(ctx, path, x1, y0 - page_top);
fz_lineto(ctx, path, x1, y1 - page_top);
fz_lineto(ctx, path, x0, y1 - page_top);
fz_closepath(ctx, path);
rgb[0] = color.r / 255.0f;
rgb[1] = color.g / 255.0f;
rgb[2] = color.b / 255.0f;
fz_fill_path(ctx, dev, path, 0, ctm, fz_device_rgb(ctx), rgb, color.a / 255.0f);
fz_drop_path(ctx, path);
}
}
static const char *roman_uc[3][10] = {
{ "", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX" },
{ "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" },
{ "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" },
};
static const char *roman_lc[3][10] = {
{ "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" },
{ "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" },
{ "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" },
};
static void format_roman_number(fz_context *ctx, char *buf, int size, int n, const char *sym[3][10], const char *sym_m)
{
int I = n % 10;
int X = (n / 10) % 10;
int C = (n / 100) % 10;
int M = (n / 1000);
fz_strlcpy(buf, "", size);
while (M--)
fz_strlcat(buf, sym_m, size);
fz_strlcat(buf, sym[2][C], size);
fz_strlcat(buf, sym[1][X], size);
fz_strlcat(buf, sym[0][I], size);
fz_strlcat(buf, ". ", size);
}
static void format_alpha_number(fz_context *ctx, char *buf, int size, int n, int alpha, int omega)
{
int base = omega - alpha + 1;
int tmp[40];
int i, c;
if (alpha > 256) /* to skip final-s for greek */
--base;
/* Bijective base-26 (base-24 for greek) numeration */
i = 0;
while (n > 0)
{
--n;
c = n % base + alpha;
if (alpha > 256 && c > alpha + 16) /* skip final-s for greek */
++c;
tmp[i++] = c;
n /= base;
}
while (i > 0)
buf += fz_runetochar(buf, tmp[--i]);
*buf++ = '.';
*buf++ = ' ';
*buf = 0;
}
static void format_list_number(fz_context *ctx, int type, int x, char *buf, int size)
{
switch (type)
{
case LST_NONE: fz_strlcpy(buf, "", size); break;
case LST_DISC: fz_strlcpy(buf, "\342\227\217 ", size); break; /* U+25CF BLACK CIRCLE */
case LST_CIRCLE: fz_strlcpy(buf, "\342\227\213 ", size); break; /* U+25CB WHITE CIRCLE */
case LST_SQUARE: fz_strlcpy(buf, "\342\226\240 ", size); break; /* U+25A0 BLACK SQUARE */
default:
case LST_DECIMAL: fz_snprintf(buf, size, "%d. ", x); break;
case LST_DECIMAL_ZERO: fz_snprintf(buf, size, "%02d. ", x); break;
case LST_LC_ROMAN: format_roman_number(ctx, buf, size, x, roman_lc, "m"); break;
case LST_UC_ROMAN: format_roman_number(ctx, buf, size, x, roman_uc, "M"); break;
case LST_LC_ALPHA: format_alpha_number(ctx, buf, size, x, 'a', 'z'); break;
case LST_UC_ALPHA: format_alpha_number(ctx, buf, size, x, 'A', 'Z'); break;
case LST_LC_LATIN: format_alpha_number(ctx, buf, size, x, 'a', 'z'); break;
case LST_UC_LATIN: format_alpha_number(ctx, buf, size, x, 'A', 'Z'); break;
case LST_LC_GREEK: format_alpha_number(ctx, buf, size, x, 0x03B1, 0x03C9); break;
case LST_UC_GREEK: format_alpha_number(ctx, buf, size, x, 0x0391, 0x03A9); break;
}
}
static fz_html_flow *find_list_mark_anchor(fz_context *ctx, fz_html_box *box)
{
/* find first flow node in tag */
while (box)
{
if (box->type == BOX_FLOW)
return box->flow_head;
box = box->down;
}
return NULL;
}
static void draw_list_mark(fz_context *ctx, fz_html_box *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm, int n)
{
fz_font *font;
fz_text *text;
fz_matrix trm;
fz_html_flow *line;
float y, w;
float color[3];
const char *s;
char buf[40];
int c, g;
fz_scale(&trm, box->em, -box->em);
line = find_list_mark_anchor(ctx, box);
if (line)
{
y = line->y;
}
else
{
float h = fz_from_css_number_scale(box->style.line_height, box->em, box->em, box->em);
float a = box->em * 0.8;
float d = box->em * 0.2;
if (a + d > h)
h = a + d;
y = box->y + a + (h - a - d) / 2;
}
if (y > page_bot || y < page_top)
return;
format_list_number(ctx, box->style.list_style_type, n, buf, sizeof buf);
s = buf;
w = 0;
while (*s)
{
s += fz_chartorune(&c, s);
g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, FZ_LANG_UNSET, &font);
w += fz_advance_glyph(ctx, font, g, 0) * box->em;
}
text = fz_new_text(ctx);
fz_try(ctx)
{
s = buf;
trm.e = box->x - w;
trm.f = y - page_top;
while (*s)
{
s += fz_chartorune(&c, s);
g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, FZ_LANG_UNSET, &font);
fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_BIDI_NEUTRAL, FZ_LANG_UNSET);
trm.e += fz_advance_glyph(ctx, font, g, 0) * box->em;
}
color[0] = box->style.color.r / 255.0f;
color[1] = box->style.color.g / 255.0f;
color[2] = box->style.color.b / 255.0f;
fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1);
}
fz_always(ctx)
fz_drop_text(ctx, text);
fz_catch(ctx)
fz_rethrow(ctx);
}
static void draw_block_box(fz_context *ctx, fz_html_box *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm, hb_buffer_t *hb_buf)
{
float x0, y0, x1, y1;
float *border = box->border;
float *padding = box->padding;
x0 = box->x - padding[L];
y0 = box->y - padding[T];
x1 = box->x + box->w + padding[R];
y1 = box->y + box->h + padding[B];
if (y0 > page_bot || y1 < page_top)
return;
if (box->style.visibility == V_VISIBLE)
{
draw_rect(ctx, dev, ctm, page_top, box->style.background_color, x0, y0, x1, y1);
if (border[T] > 0)
draw_rect(ctx, dev, ctm, page_top, box->style.border_color[T], x0 - border[L], y0 - border[T], x1 + border[R], y0);
if (border[B] > 0)
draw_rect(ctx, dev, ctm, page_top, box->style.border_color[B], x0 - border[L], y1, x1 + border[R], y1 + border[B]);
if (border[L] > 0)
draw_rect(ctx, dev, ctm, page_top, box->style.border_color[L], x0 - border[L], y0 - border[T], x0, y1 + border[B]);
if (border[R] > 0)
draw_rect(ctx, dev, ctm, page_top, box->style.border_color[R], x1, y0 - border[T], x1 + border[R], y1 + border[B]);
if (box->list_item)
draw_list_mark(ctx, box, page_top, page_bot, dev, ctm, box->list_item);
}
for (box = box->down; box; box = box->next)
{
switch (box->type)
{
case BOX_BLOCK: draw_block_box(ctx, box, page_top, page_bot, dev, ctm, hb_buf); break;
case BOX_FLOW: draw_flow_box(ctx, box, page_top, page_bot, dev, ctm, hb_buf); break;
}
}
}
void
fz_draw_html(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, fz_html *html, int page)
{
fz_matrix local_ctm = *ctm;
hb_buffer_t *hb_buf = NULL;
fz_html_box *box;
int unlocked = 0;
float page_top = page * html->page_h;
float page_bot = (page + 1) * html->page_h;
fz_var(hb_buf);
fz_var(unlocked);
draw_rect(ctx, dev, ctm, 0, html->root->style.background_color,
0, 0,
html->page_w + html->page_margin[L] + html->page_margin[R],
html->page_h + html->page_margin[T] + html->page_margin[B]);
fz_pre_translate(&local_ctm, html->page_margin[L], html->page_margin[T]);
hb_lock(ctx);
fz_try(ctx)
{
hb_buf = hb_buffer_create();
hb_unlock(ctx);
unlocked = 1;
for (box = html->root->down; box; box = box->next)
draw_block_box(ctx, box, page_top, page_bot, dev, &local_ctm, hb_buf);
}
fz_always(ctx)
{
if (unlocked)
hb_lock(ctx);
hb_buffer_destroy(hb_buf);
hb_unlock(ctx);
}
fz_catch(ctx)
{
fz_rethrow(ctx);
}
}
static int is_internal_uri(const char *uri)
{
while (*uri >= 'a' && *uri <= 'z')
++uri;
if (uri[0] == ':' && uri[1] == '/' && uri[2] == '/')
return 0;
return 1;
}
static const char *box_href(fz_html_box *box)
{
while (box)
{
const char *href = box->href;
if (href)
return href;
box = box->up;
}
return NULL;
}
static int has_same_href(fz_html_box *box, const char *old_href)
{
while (box)
{
const char *href = box->href;
if (href)
return !strcmp(old_href, href);
box = box->up;
}
return 0;
}
static fz_link *load_link_flow(fz_context *ctx, fz_html_flow *flow, fz_link *head, int page, float page_h, const char *dir, const char *file)
{
fz_link *link;
fz_html_flow *next;
char path[2048];
fz_rect bbox;
const char *dest;
const char *href;
float end;
while (flow)
{
href = box_href(flow->box);
next = flow->next;
if (href && (int)(flow->y / page_h) == page)
{
/* Coalesce contiguous flow boxes into one link node */
end = flow->x + flow->w;
while (next &&
next->y == flow->y &&
next->h == flow->h &&
has_same_href(next->box, href))
{
end = next->x + next->w;
next = next->next;
}
bbox.x0 = flow->x;
bbox.y0 = flow->y - page * page_h;
bbox.x1 = end;
bbox.y1 = bbox.y0 + flow->h;
if (flow->type != FLOW_IMAGE)
{
/* flow->y is the baseline, adjust bbox appropriately */
bbox.y0 -= 0.8 * flow->h;
bbox.y1 -= 0.8 * flow->h;
}
if (is_internal_uri(href))
{
if (href[0] == '#')
{
fz_strlcpy(path, file, sizeof path);
fz_strlcat(path, href, sizeof path);
}
else
{
fz_strlcpy(path, dir, sizeof path);
fz_strlcat(path, "/", sizeof path);
fz_strlcat(path, href, sizeof path);
}
fz_urldecode(path);
fz_cleanname(path);
dest = path;
}
else
{
dest = href;
}
link = fz_new_link(ctx, &bbox, NULL, dest);
link->next = head;
head = link;
}
flow = next;
}
return head;
}
static fz_link *load_link_box(fz_context *ctx, fz_html_box *box, fz_link *head, int page, float page_h, const char *dir, const char *file)
{
while (box)
{
if (box->flow_head)
head = load_link_flow(ctx, box->flow_head, head, page, page_h, dir, file);
if (box->down)
head = load_link_box(ctx, box->down, head, page, page_h, dir, file);
box = box->next;
}
return head;
}
fz_link *
fz_load_html_links(fz_context *ctx, fz_html *html, int page, const char *file, void *doc)
{
fz_link *link, *head;
char dir[2048];
fz_dirname(dir, file, sizeof dir);
head = load_link_box(ctx, html->root, NULL, page, html->page_h, dir, file);
for (link = head; link; link = link->next)
{
/* Adjust for page margins */
link->rect.x0 += html->page_margin[L];
link->rect.x1 += html->page_margin[L];
link->rect.y0 += html->page_margin[T];
link->rect.y1 += html->page_margin[T];
/* Set document pointer */
link->doc = doc;
}
return head;
}
static fz_html_flow *
find_first_content(fz_html_box *box)
{
while (box)
{
if (box->type == BOX_FLOW)
return box->flow_head;
box = box->down;
}
return NULL;
}
static float
find_flow_target(fz_html_flow *flow, const char *id)
{
while (flow)
{
if (flow->box->id && !strcmp(id, flow->box->id))
return flow->y;
flow = flow->next;
}
return -1;
}
static float
find_box_target(fz_html_box *box, const char *id)
{
float y;
while (box)
{
if (box->id && !strcmp(id, box->id))
{
fz_html_flow *flow = find_first_content(box);
if (flow)
return flow->y;
return box->y;
}
if (box->type == BOX_FLOW)
{
y = find_flow_target(box->flow_head, id);
if (y >= 0)
return y;
}
else
{
y = find_box_target(box->down, id);
if (y >= 0)
return y;
}
box = box->next;
}
return -1;
}
float
fz_find_html_target(fz_context *ctx, fz_html *html, const char *id)
{
return find_box_target(html->root, id);
}
static fz_html_flow *
make_flow_bookmark(fz_context *ctx, fz_html_flow *flow, float y)
{
while (flow)
{
if (flow->y >= y)
return flow;
flow = flow->next;
}
return NULL;
}
static fz_html_flow *
make_box_bookmark(fz_context *ctx, fz_html_box *box, float y)
{
fz_html_flow *mark;
while (box)
{
if (box->type == BOX_FLOW)
{
if (box->y >= y)
{
mark = make_flow_bookmark(ctx, box->flow_head, y);
if (mark)
return mark;
}
}
else
{
mark = make_box_bookmark(ctx, box->down, y);
if (mark)
return mark;
}
box = box->next;
}
return NULL;
}
fz_bookmark
fz_make_html_bookmark(fz_context *ctx, fz_html *html, int page)
{
return (fz_bookmark)make_box_bookmark(ctx, html->root, page * html->page_h);
}
static int
lookup_flow_bookmark(fz_context *ctx, fz_html_flow *flow, fz_html_flow *mark)
{
while (flow)
{
if (flow == mark)
return 1;
flow = flow->next;
}
return 0;
}
static int
lookup_box_bookmark(fz_context *ctx, fz_html_box *box, fz_html_flow *mark)
{
while (box)
{
if (box->type == BOX_FLOW)
{
if (lookup_flow_bookmark(ctx, box->flow_head, mark))
return 1;
}
else
{
if (lookup_box_bookmark(ctx, box->down, mark))
return 1;
}
box = box->next;
}
return 0;
}
int
fz_lookup_html_bookmark(fz_context *ctx, fz_html *html, fz_bookmark mark)
{
fz_html_flow *flow = (fz_html_flow*)mark;
if (flow && lookup_box_bookmark(ctx, html->root, flow))
return (int)(flow->y / html->page_h);
return -1;
}
static char *concat_text(fz_context *ctx, fz_xml *root)
{
fz_xml *node;
size_t i = 0, n = 1;
char *s;
for (node = fz_xml_down(root); node; node = fz_xml_next(node))
{
const char *text = fz_xml_text(node);
n += text ? strlen(text) : 0;
}
s = fz_malloc(ctx, n);
for (node = fz_xml_down(root); node; node = fz_xml_next(node))
{
const char *text = fz_xml_text(node);
if (text)
{
n = strlen(text);
memcpy(s+i, text, n);
i += n;
}
}
s[i] = 0;
return s;
}
static void
html_load_css(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root)
{
fz_xml *html, *head, *node;
fz_buffer *buf;
char path[2048];
fz_var(buf);
html = fz_xml_find(root, "html");
head = fz_xml_find_down(html, "head");
for (node = fz_xml_down(head); node; node = fz_xml_next(node))
{
if (fz_xml_is_tag(node, "link"))
{
char *rel = fz_xml_att(node, "rel");
if (rel && !fz_strcasecmp(rel, "stylesheet"))
{
char *type = fz_xml_att(node, "type");
if ((type && !strcmp(type, "text/css")) || !type)
{
char *href = fz_xml_att(node, "href");
if (href)
{
fz_strlcpy(path, base_uri, sizeof path);
fz_strlcat(path, "/", sizeof path);
fz_strlcat(path, href, sizeof path);
fz_urldecode(path);
fz_cleanname(path);
buf = NULL;
fz_try(ctx)
{
buf = fz_read_archive_entry(ctx, zip, path);
fz_parse_css(ctx, css, fz_string_from_buffer(ctx, buf), path);
}
fz_always(ctx)
fz_drop_buffer(ctx, buf);
fz_catch(ctx)
fz_warn(ctx, "ignoring stylesheet %s", path);
}
}
}
}
else if (fz_xml_is_tag(node, "style"))
{
char *s = concat_text(ctx, node);
fz_try(ctx)
fz_parse_css(ctx, css, s, "