summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2015-02-23 15:00:56 +0100
committerTor Andersson <tor.andersson@artifex.com>2015-02-23 15:00:56 +0100
commit119552f74c71c24698f4b424411786bfb4baea6b (patch)
treebf312c79c7fe6fcb819c6f1b8e3cc9f8e4888e0e /source
parent241f8b6c35895b932ac7f27de1ee76416cae419a (diff)
parentfe05bc51ee70190516d6cc65f03e343bfdc4f849 (diff)
downloadmupdf-119552f74c71c24698f4b424411786bfb4baea6b.tar.xz
Merge branch 'html'
Conflicts: Makefile
Diffstat (limited to 'source')
-rw-r--r--source/fitz/document-all.c2
-rw-r--r--source/html/css-apply.c993
-rw-r--r--source/html/css-parse.c865
-rw-r--r--source/html/epub-doc.c326
-rw-r--r--source/html/html-doc.c166
-rw-r--r--source/html/html-font.c47
-rw-r--r--source/html/html-layout.c909
7 files changed, 3308 insertions, 0 deletions
diff --git a/source/fitz/document-all.c b/source/fitz/document-all.c
index e1eb4a58..bfe57592 100644
--- a/source/fitz/document-all.c
+++ b/source/fitz/document-all.c
@@ -7,4 +7,6 @@ void fz_register_document_handlers(fz_context *ctx)
fz_register_document_handler(ctx, &cbz_document_handler);
fz_register_document_handler(ctx, &img_document_handler);
fz_register_document_handler(ctx, &tiff_document_handler);
+ fz_register_document_handler(ctx, &html_document_handler);
+ fz_register_document_handler(ctx, &epub_document_handler);
}
diff --git a/source/html/css-apply.c b/source/html/css-apply.c
new file mode 100644
index 00000000..9a6d7a89
--- /dev/null
+++ b/source/html/css-apply.c
@@ -0,0 +1,993 @@
+#include "mupdf/html.h"
+
+static const char *inherit_list[] = {
+ "color",
+ "direction",
+ "font-family",
+ "font-size",
+ "font-style",
+ "font-variant",
+ "font-weight",
+ "letter-spacing",
+ "line-height",
+ "list-style-image",
+ "list-style-position",
+ "list-style-type",
+ "orphans",
+ "quotes",
+ "text-align",
+ "text-indent",
+ "text-transform",
+ "visibility",
+ "white-space",
+ "widows",
+ "word-spacing",
+};
+
+static const char *border_width_kw[] = {
+ "medium",
+ "thick",
+ "thin",
+};
+
+static const char *border_style_kw[] = {
+ "dashed",
+ "dotted",
+ "double",
+ "groove",
+ "hidden",
+ "inset",
+ "none",
+ "outset",
+ "ridge",
+ "solid",
+};
+
+static const char *color_kw[] = {
+ "aqua",
+ "black",
+ "blue",
+ "fuchsia",
+ "gray",
+ "green",
+ "lime",
+ "maroon",
+ "navy",
+ "olive",
+ "orange",
+ "purple",
+ "red",
+ "silver",
+ "teal",
+ "transparent",
+ "white",
+ "yellow",
+};
+
+static int
+keyword_in_list(const char *name, const char **list, int n)
+{
+ int l = 0;
+ int r = n - 1;
+ while (l <= r)
+ {
+ int m = (l + r) >> 1;
+ int c = strcmp(name, list[m]);
+ if (c < 0)
+ r = m - 1;
+ else if (c > 0)
+ l = m + 1;
+ else
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Compute specificity
+ */
+
+static int
+count_condition_ids(fz_css_condition *cond)
+{
+ int n = 0;
+ while (cond)
+ {
+ if (cond->type == '#')
+ n ++;
+ cond = cond->next;
+ }
+ return n;
+}
+
+static int
+count_selector_ids(fz_css_selector *sel)
+{
+ int n = count_condition_ids(sel->cond);
+ if (sel->left && sel->right)
+ {
+ n += count_selector_ids(sel->left);
+ n += count_selector_ids(sel->right);
+ }
+ return n;
+}
+
+static int
+count_condition_atts(fz_css_condition *cond)
+{
+ int n = 0;
+ while (cond)
+ {
+ if (cond->type != '#' && cond->type != ':')
+ n ++;
+ cond = cond->next;
+ }
+ return n;
+}
+
+static int
+count_selector_atts(fz_css_selector *sel)
+{
+ int n = count_condition_atts(sel->cond);
+ if (sel->left && sel->right)
+ {
+ n += count_selector_atts(sel->left);
+ n += count_selector_atts(sel->right);
+ }
+ return n;
+}
+
+static int
+count_condition_names(fz_css_condition *cond)
+{
+ int n = 0;
+ while (cond)
+ {
+ if (cond->type == ':')
+ n ++;
+ cond = cond->next;
+ }
+ return n;
+}
+
+static int
+count_selector_names(fz_css_selector *sel)
+{
+ int n = count_condition_names(sel->cond);
+ if (sel->left && sel->right)
+ {
+ n += count_selector_names(sel->left);
+ n += count_selector_names(sel->right);
+ }
+ else if (sel->name)
+ {
+ n ++;
+ }
+ return n;
+}
+
+#define INLINE_SPECIFICITY 1000
+
+static int
+selector_specificity(fz_css_selector *sel)
+{
+ int b = count_selector_ids(sel);
+ int c = count_selector_atts(sel);
+ int d = count_selector_names(sel);
+ return b * 100 + c * 10 + d;
+}
+
+/*
+ * Selector matching
+ */
+
+static int
+match_id_condition(fz_xml *node, const char *p)
+{
+ const char *s = fz_xml_att(node, "id");
+ if (s && !strcmp(s, p))
+ return 1;
+ return 0;
+}
+
+static int
+match_class_condition(fz_xml *node, const char *p)
+{
+ const char *s = fz_xml_att(node, "class");
+ char buf[1024];
+ if (s) {
+ strcpy(buf, s);
+ s = strtok(buf, " ");
+ while (s) {
+ if (!strcmp(s, p))
+ return 1;
+ s = strtok(NULL, " ");
+ }
+ }
+ return 0;
+}
+
+static int
+match_condition(fz_css_condition *cond, fz_xml *node)
+{
+ if (!cond)
+ return 1;
+
+ switch (cond->type) {
+ default: return 0;
+ case ':': return 0; /* don't support pseudo-classes */
+ case '#': if (!match_id_condition(node, cond->val)) return 0; break;
+ case '.': if (!match_class_condition(node, cond->val)) return 0; break;
+ }
+
+ return match_condition(cond->next, node);
+}
+
+static int
+match_selector(fz_css_selector *sel, fz_xml *node)
+{
+ if (!node)
+ return 0;
+
+ if (sel->combine)
+ {
+ /* descendant */
+ if (sel->combine == ' ')
+ {
+ fz_xml *parent = fz_xml_up(node);
+ while (parent)
+ {
+ if (match_selector(sel->left, parent))
+ if (match_selector(sel->right, node))
+ return 1;
+ parent = fz_xml_up(parent);
+ }
+ return 0;
+ }
+
+ /* child */
+ if (sel->combine == '>')
+ {
+ fz_xml *parent = fz_xml_up(node);
+ if (!parent)
+ return 0;
+ if (!match_selector(sel->left, parent))
+ return 0;
+ if (!match_selector(sel->right, node))
+ return 0;
+ }
+
+ /* adjacent */
+ if (sel->combine == '+')
+ {
+ fz_xml *prev = fz_xml_prev(node);
+ while (prev && !fz_xml_tag(prev))
+ prev = fz_xml_prev(prev);
+ if (!prev)
+ return 0;
+ if (!fz_xml_tag(prev))
+ return 0;
+ if (!match_selector(sel->left, prev))
+ return 0;
+ if (!match_selector(sel->right, node))
+ return 0;
+ }
+ }
+
+ if (sel->name)
+ {
+ if (strcmp(sel->name, fz_xml_tag(node)))
+ return 0;
+ }
+
+ if (sel->cond)
+ {
+ if (!match_condition(sel->cond, node))
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Annotating nodes with properties and expanding shorthand forms.
+ */
+
+static int
+count_values(fz_css_value *value)
+{
+ int n = 0;
+ while (value)
+ {
+ n++;
+ value = value->next;
+ }
+ return n;
+}
+
+static void add_property(fz_css_match *match, const char *name, fz_css_value *value, int spec);
+
+static void
+add_shorthand_trbl(fz_css_match *match, fz_css_value *value, int spec,
+ const char *name_t, const char *name_r, const char *name_b, const char *name_l)
+{
+ int n = count_values(value);
+
+ if (n == 1)
+ {
+ add_property(match, name_t, value, spec);
+ add_property(match, name_r, value, spec);
+ add_property(match, name_b, value, spec);
+ add_property(match, name_l, value, spec);
+ }
+
+ if (n == 2)
+ {
+ fz_css_value *a = value;
+ fz_css_value *b = value->next;
+
+ add_property(match, name_t, a, spec);
+ add_property(match, name_r, b, spec);
+ add_property(match, name_b, a, spec);
+ add_property(match, name_l, b, spec);
+ }
+
+ if (n == 3)
+ {
+ fz_css_value *a = value;
+ fz_css_value *b = value->next;
+ fz_css_value *c = value->next->next;
+
+ add_property(match, name_t, a, spec);
+ add_property(match, name_r, b, spec);
+ add_property(match, name_b, c, spec);
+ add_property(match, name_l, b, spec);
+ }
+
+ if (n == 4)
+ {
+ fz_css_value *a = value;
+ fz_css_value *b = value->next;
+ fz_css_value *c = value->next->next;
+ fz_css_value *d = value->next->next->next;
+
+ add_property(match, name_t, a, spec);
+ add_property(match, name_r, b, spec);
+ add_property(match, name_b, c, spec);
+ add_property(match, name_l, d, spec);
+ }
+}
+
+static void
+add_shorthand_margin(fz_css_match *match, fz_css_value *value, int spec)
+{
+ add_shorthand_trbl(match, value, spec,
+ "margin-top", "margin-right", "margin-bottom", "margin-left");
+}
+
+static void
+add_shorthand_padding(fz_css_match *match, fz_css_value *value, int spec)
+{
+ add_shorthand_trbl(match, value, spec,
+ "padding-top", "padding-right", "padding-bottom", "padding-left");
+}
+
+static void
+add_shorthand_border_width(fz_css_match *match, fz_css_value *value, int spec)
+{
+ add_shorthand_trbl(match, value, spec,
+ "border-width-top", "border-width-right", "border-width-bottom", "border-width-left");
+}
+
+static void
+add_shorthand_border(fz_css_match *match, fz_css_value *value, int spec)
+{
+ while (value)
+ {
+ if (value->type == CSS_COLOR)
+ {
+ add_property(match, "border-color", value, spec);
+ }
+ else if (value->type == CSS_KEYWORD)
+ {
+ if (keyword_in_list(value->data, border_width_kw, nelem(border_width_kw)))
+ {
+ add_property(match, "border-width-top", value, spec);
+ add_property(match, "border-width-right", value, spec);
+ add_property(match, "border-width-bottom", value, spec);
+ add_property(match, "border-width-left", value, spec);
+ }
+ else if (keyword_in_list(value->data, border_style_kw, nelem(border_style_kw)))
+ {
+ add_property(match, "border-style", value, spec);
+ }
+ else if (keyword_in_list(value->data, color_kw, nelem(color_kw)))
+ {
+ add_property(match, "border-color", value, spec);
+ }
+ }
+ else
+ {
+ add_property(match, "border-width-top", value, spec);
+ add_property(match, "border-width-right", value, spec);
+ add_property(match, "border-width-bottom", value, spec);
+ add_property(match, "border-width-left", value, spec);
+ }
+ value = value->next;
+ }
+}
+
+static void
+add_property(fz_css_match *match, const char *name, fz_css_value *value, int spec)
+{
+ int i;
+
+ if (!strcmp(name, "margin"))
+ {
+ add_shorthand_margin(match, value, spec);
+ return;
+ }
+ if (!strcmp(name, "padding"))
+ {
+ add_shorthand_padding(match, value, spec);
+ return;
+ }
+ if (!strcmp(name, "border-width"))
+ {
+ add_shorthand_border_width(match, value, spec);
+ return;
+ }
+ if (!strcmp(name, "border"))
+ {
+ add_shorthand_border(match, value, spec);
+ return;
+ }
+
+ /* shorthand expansions: */
+ /* TODO: border-color */
+ /* TODO: border-style */
+ /* TODO: font */
+ /* TODO: list-style */
+ /* TODO: background */
+
+ for (i = 0; i < match->count; ++i)
+ {
+ if (!strcmp(match->prop[i].name, name))
+ {
+ if (match->prop[i].spec <= spec)
+ {
+ match->prop[i].value = value;
+ match->prop[i].spec = spec;
+ }
+ return;
+ }
+ }
+
+ if (match->count + 1 >= nelem(match->prop))
+ {
+ // fz_warn(ctx, "too many css properties");
+ return;
+ }
+
+ match->prop[match->count].name = name;
+ match->prop[match->count].value = value;
+ match->prop[match->count].spec = spec;
+ ++match->count;
+}
+
+void
+fz_match_css(fz_context *ctx, fz_css_match *match, fz_css_rule *css, fz_xml *node)
+{
+ fz_css_rule *rule;
+ fz_css_selector *sel;
+ fz_css_property *prop, *head, *tail;
+ const char *s;
+
+ for (rule = css; rule; rule = rule->next)
+ {
+ sel = rule->selector;
+ while (sel)
+ {
+ if (match_selector(sel, node))
+ {
+ for (prop = rule->declaration; prop; prop = prop->next)
+ add_property(match, prop->name, prop->value, selector_specificity(sel));
+ break;
+ }
+ sel = sel->next;
+ }
+ }
+
+ s = fz_xml_att(node, "style");
+ if (s)
+ {
+ head = tail = prop = fz_parse_css_properties(ctx, s);
+ while (prop)
+ {
+ add_property(match, prop->name, prop->value, INLINE_SPECIFICITY);
+ tail = prop;
+ prop = prop->next;
+ }
+ if (tail)
+ tail->next = css->garbage;
+ css->garbage = head;
+ }
+}
+
+static fz_css_value *
+value_from_raw_property(fz_css_match *match, const char *name)
+{
+ int i;
+ for (i = 0; i < match->count; ++i)
+ if (!strcmp(match->prop[i].name, name))
+ return match->prop[i].value;
+ return NULL;
+}
+
+static fz_css_value *
+value_from_property(fz_css_match *match, const char *name)
+{
+ fz_css_value *value;
+
+ value = value_from_raw_property(match, name);
+ if (match->up)
+ {
+ if (value && !strcmp(value->data, "inherit"))
+ return value_from_property(match->up, name);
+ if (!value && keyword_in_list(name, inherit_list, nelem(inherit_list)))
+ return value_from_property(match->up, name);
+ }
+ return value;
+}
+
+static const char *
+string_from_property(fz_css_match *match, const char *name, const char *initial)
+{
+ fz_css_value *value;
+ value = value_from_property(match, name);
+ if (!value)
+ return initial;
+ return value->data;
+}
+
+static fz_css_number
+make_number(float v, int u)
+{
+ fz_css_number n;
+ n.value = v;
+ n.unit = u;
+ return n;
+}
+
+static fz_css_number
+number_from_value(fz_css_value *value, float initial, int initial_unit)
+{
+ char *p;
+
+ if (!value)
+ return make_number(initial, initial_unit);
+
+ if (value->type == CSS_PERCENT)
+ return make_number(strtof(value->data, NULL), N_PERCENT);
+
+ if (value->type == CSS_NUMBER)
+ return make_number(strtof(value->data, NULL), N_NUMBER);
+
+ if (value->type == CSS_LENGTH)
+ {
+ float x = strtof(value->data, &p);
+
+ if (p[0] == 'e' && p[1] == 'm')
+ return make_number(x, N_SCALE);
+ if (p[0] == 'e' && p[1] == 'x')
+ return make_number(x / 2, N_SCALE);
+
+ if (p[0] == 'i' && p[1] == 'n')
+ return make_number(x * 72, N_NUMBER);
+ if (p[0] == 'c' && p[1] == 'm')
+ return make_number(x * 7200 / 254, N_NUMBER);
+ if (p[0] == 'm' && p[1] == 'm')
+ return make_number(x * 720 / 254, N_NUMBER);
+ if (p[0] == 'p' && p[1] == 'c')
+ return make_number(x * 12, N_NUMBER);
+
+ if (p[0] == 'p' && p[1] == 't')
+ return make_number(x, N_NUMBER);
+ if (p[0] == 'p' && p[1] == 'x')
+ return make_number(x, N_NUMBER);
+
+ return make_number(x, N_NUMBER);
+ }
+
+ return make_number(initial, initial_unit);
+}
+
+static fz_css_number
+number_from_property(fz_css_match *match, const char *property, float initial, int initial_unit)
+{
+ return number_from_value(value_from_property(match, property), initial, initial_unit);
+}
+
+static fz_css_number
+border_width_from_property(fz_css_match *match, const char *property)
+{
+ fz_css_value *value = value_from_property(match, property);
+ if (value)
+ {
+ if (!strcmp(value->data, "thin"))
+ return make_number(1, N_NUMBER);
+ if (!strcmp(value->data, "medium"))
+ return make_number(2, N_NUMBER);
+ if (!strcmp(value->data, "thick"))
+ return make_number(4, N_NUMBER);
+ return number_from_value(value, 0, N_NUMBER);
+ }
+ return make_number(2, N_NUMBER); /* initial: 'medium' */
+}
+
+float
+fz_from_css_number(fz_css_number number, float em, float width)
+{
+ switch (number.unit) {
+ default:
+ case N_NUMBER: return number.value;
+ case N_SCALE: return number.value * em;
+ case N_PERCENT: return number.value * 0.01 * width;
+ }
+}
+
+float
+fz_from_css_number_scale(fz_css_number number, float scale, float em, float width)
+{
+ switch (number.unit) {
+ default:
+ case N_NUMBER: return number.value * scale;
+ case N_SCALE: return number.value * em;
+ case N_PERCENT: return number.value * 0.01 * width;
+ }
+}
+
+static fz_css_color
+make_color(int r, int g, int b, int a)
+{
+ fz_css_color c;
+ c.r = r;
+ c.g = g;
+ c.b = b;
+ c.a = a;
+ return c;
+}
+
+static int tohex(int c)
+{
+ if (c - '0' < 10)
+ return c - '0';
+ return (c | 32) - 'a' + 10;
+}
+
+static fz_css_color
+color_from_value(fz_css_value *value, fz_css_color initial)
+{
+ if (!value)
+ return initial;
+ if (value->type == CSS_COLOR)
+ {
+ int r = tohex(value->data[0]) * 16 + tohex(value->data[1]);
+ int g = tohex(value->data[2]) * 16 + tohex(value->data[3]);
+ int b = tohex(value->data[4]) * 16 + tohex(value->data[5]);
+ return make_color(r, g, b, 255);
+ }
+ if (value->type == CSS_KEYWORD)
+ {
+ if (!strcmp(value->data, "transparent"))
+ return make_color(0, 0, 0, 0);
+ if (!strcmp(value->data, "maroon"))
+ return make_color(0x80, 0x00, 0x00, 255);
+ if (!strcmp(value->data, "red"))
+ return make_color(0xFF, 0x00, 0x00, 255);
+ if (!strcmp(value->data, "orange"))
+ return make_color(0xFF, 0xA5, 0x00, 255);
+ if (!strcmp(value->data, "yellow"))
+ return make_color(0xFF, 0xFF, 0x00, 255);
+ if (!strcmp(value->data, "olive"))
+ return make_color(0x80, 0x80, 0x00, 255);
+ if (!strcmp(value->data, "purple"))
+ return make_color(0x80, 0x00, 0x80, 255);
+ if (!strcmp(value->data, "fuchsia"))
+ return make_color(0xFF, 0x00, 0xFF, 255);
+ if (!strcmp(value->data, "white"))
+ return make_color(0xFF, 0xFF, 0xFF, 255);
+ if (!strcmp(value->data, "lime"))
+ return make_color(0x00, 0xFF, 0x00, 255);
+ if (!strcmp(value->data, "green"))
+ return make_color(0x00, 0x80, 0x00, 255);
+ if (!strcmp(value->data, "navy"))
+ return make_color(0x00, 0x00, 0x80, 255);
+ if (!strcmp(value->data, "blue"))
+ return make_color(0x00, 0x00, 0xFF, 255);
+ if (!strcmp(value->data, "aqua"))
+ return make_color(0x00, 0xFF, 0xFF, 255);
+ if (!strcmp(value->data, "teal"))
+ return make_color(0x00, 0x80, 0x80, 255);
+ if (!strcmp(value->data, "black"))
+ return make_color(0x00, 0x00, 0x00, 255);
+ if (!strcmp(value->data, "silver"))
+ return make_color(0xC0, 0xC0, 0xC0, 255);
+ if (!strcmp(value->data, "gray"))
+ return make_color(0x80, 0x80, 0x80, 255);
+ return make_color(0, 0, 0, 255);
+ }
+ return initial;
+}
+
+static fz_css_color
+color_from_property(fz_css_match *match, const char *property, fz_css_color initial)
+{
+ return color_from_value(value_from_property(match, property), initial);
+}
+
+int
+fz_get_css_match_display(fz_css_match *match)
+{
+ fz_css_value *value = value_from_property(match, "display");
+ if (value)
+ {
+ if (!strcmp(value->data, "none"))
+ return DIS_NONE;
+ if (!strcmp(value->data, "inline"))
+ return DIS_INLINE;
+ if (!strcmp(value->data, "block"))
+ return DIS_BLOCK;
+ if (!strcmp(value->data, "list-item"))
+ return DIS_LIST_ITEM;
+ }
+ return DIS_INLINE;
+}
+
+static int
+white_space_from_property(fz_css_match *match)
+{
+ fz_css_value *value = value_from_property(match, "white-space");
+ if (value)
+ {
+ if (!strcmp(value->data, "normal")) return WS_NORMAL;
+ if (!strcmp(value->data, "pre")) return WS_PRE;
+ if (!strcmp(value->data, "nowrap")) return WS_NOWRAP;
+ if (!strcmp(value->data, "pre-wrap")) return WS_PRE_WRAP;
+ if (!strcmp(value->data, "pre-line")) return WS_PRE_LINE;
+ }
+ return WS_NORMAL;
+}
+
+void
+fz_default_css_style(fz_context *ctx, fz_css_style *style)
+{
+ memset(style, 0, sizeof *style);
+ style->text_align = TA_LEFT;
+ style->vertical_align = VA_BASELINE;
+ style->white_space = WS_NORMAL;
+ style->font_size = make_number(1, N_SCALE);
+}
+
+void
+fz_apply_css_style(fz_context *ctx, fz_html_font_set *set, fz_css_style *style, fz_css_match *match)
+{
+ fz_css_value *value;
+
+ fz_css_color black = { 0, 0, 0, 255 };
+ fz_css_color transparent = { 0, 0, 0, 0 };
+
+ fz_default_css_style(ctx, style);
+
+ style->white_space = white_space_from_property(match);
+
+ value = value_from_property(match, "text-align");
+ if (value)
+ {
+ if (!strcmp(value->data, "left"))
+ style->text_align = TA_LEFT;
+ if (!strcmp(value->data, "right"))
+ style->text_align = TA_RIGHT;
+ if (!strcmp(value->data, "center"))
+ style->text_align = TA_CENTER;
+ if (!strcmp(value->data, "justify"))
+ style->text_align = TA_JUSTIFY;
+ }
+
+ value = value_from_property(match, "vertical-align");
+ if (value)
+ {
+ if (!strcmp(value->data, "baseline"))
+ style->vertical_align = VA_BASELINE;
+ if (!strcmp(value->data, "sub"))
+ style->vertical_align = VA_SUB;
+ if (!strcmp(value->data, "super"))
+ style->vertical_align = VA_SUPER;
+ if (!strcmp(value->data, "top"))
+ style->vertical_align = VA_TOP;
+ if (!strcmp(value->data, "bottom"))
+ style->vertical_align = VA_BOTTOM;
+ }
+
+ value = value_from_property(match, "font-size");
+ if (value)
+ {
+ if (!strcmp(value->data, "xx-large")) style->font_size = make_number(1.73, N_SCALE);
+ else if (!strcmp(value->data, "x-large")) style->font_size = make_number(1.44, N_SCALE);
+ else if (!strcmp(value->data, "large")) style->font_size = make_number(1.2, N_SCALE);
+ else if (!strcmp(value->data, "medium")) style->font_size = make_number(1, N_SCALE);
+ else if (!strcmp(value->data, "small")) style->font_size = make_number(0.83, N_SCALE);
+ else if (!strcmp(value->data, "x-small")) style->font_size = make_number(0.69, N_SCALE);
+ else if (!strcmp(value->data, "xx-small")) style->font_size = make_number(0.69, N_SCALE);
+ else if (!strcmp(value->data, "larger")) style->font_size = make_number(1.2f, N_SCALE);
+ else if (!strcmp(value->data, "smaller")) style->font_size = make_number(1/1.2f, N_SCALE);
+ else style->font_size = number_from_value(value, 12, N_NUMBER);
+ }
+ else
+ {
+ style->font_size = make_number(1, N_SCALE);
+ }
+
+ value = value_from_property(match, "border-style");
+ if (value)
+ {
+ if (!strcmp(value->data, "none"))
+ style->border_style = BS_NONE;
+ if (!strcmp(value->data, "hidden"))
+ style->border_style = BS_NONE;
+ if (!strcmp(value->data, "solid"))
+ style->border_style = BS_SOLID;
+ }
+
+ style->line_height = number_from_property(match, "line-height", 1.2, N_SCALE);
+
+ style->text_indent = number_from_property(match, "text-indent", 0, N_NUMBER);
+
+ style->margin[0] = number_from_property(match, "margin-top", 0, N_NUMBER);
+ style->margin[1] = number_from_property(match, "margin-right", 0, N_NUMBER);
+ style->margin[2] = number_from_property(match, "margin-bottom", 0, N_NUMBER);
+ style->margin[3] = number_from_property(match, "margin-left", 0, N_NUMBER);
+
+ style->padding[0] = number_from_property(match, "padding-top", 0, N_NUMBER);
+ style->padding[1] = number_from_property(match, "padding-right", 0, N_NUMBER);
+ style->padding[2] = number_from_property(match, "padding-bottom", 0, N_NUMBER);
+ style->padding[3] = number_from_property(match, "padding-left", 0, N_NUMBER);
+
+ style->border_width[0] = border_width_from_property(match, "border-width-top");
+ style->border_width[1] = border_width_from_property(match, "border-width-right");
+ style->border_width[2] = border_width_from_property(match, "border-width-bottom");
+ style->border_width[3] = border_width_from_property(match, "border-width-left");
+
+ style->color = color_from_property(match, "color", black);
+ style->background_color = color_from_property(match, "background-color", transparent);
+ style->border_color = color_from_property(match, "border-color", style->color);
+
+ {
+ const char *font_family = string_from_property(match, "font-family", "serif");
+ const char *font_variant = string_from_property(match, "font-variant", "normal");
+ const char *font_style = string_from_property(match, "font-style", "normal");
+ const char *font_weight = string_from_property(match, "font-weight", "normal");
+ style->font = fz_load_html_font(ctx, set, font_family, font_variant, font_style, font_weight);
+ }
+}
+
+/*
+ * Pretty printing
+ */
+
+void
+print_value(fz_css_value *val)
+{
+ printf("%s", val->data);
+ if (val->args)
+ {
+ printf("(");
+ print_value(val->args);
+ printf(")");
+ }
+ if (val->next)
+ {
+ printf(" ");
+ print_value(val->next);
+ }
+}
+
+void
+print_property(fz_css_property *prop)
+{
+ printf("\t%s: ", prop->name);
+ print_value(prop->value);
+ printf(" !%d;\n", prop->spec);
+}
+
+void
+print_condition(fz_css_condition *cond)
+{
+ if (cond->type == '=')
+ printf("[%s=%s]", cond->key, cond->val);
+ else if (cond->type == '[')
+ printf("[%s]", cond->key);
+ else
+ printf("%c%s", cond->type, cond->val);
+ if (cond->next)
+ print_condition(cond->next);
+}
+
+void
+print_selector(fz_css_selector *sel)
+{
+ if (sel->combine)
+ {
+putchar('(');
+ print_selector(sel->left);
+ if (sel->combine == ' ')
+ printf(" ");
+ else
+ printf(" %c ", sel->combine);
+ print_selector(sel->right);
+putchar(')');
+ }
+ else if (sel->name)
+ printf("%s", sel->name);
+ else
+ printf("*");
+ if (sel->cond)
+ {
+ print_condition(sel->cond);
+ }
+}
+
+void
+print_rule(fz_css_rule *rule)
+{
+ fz_css_selector *sel;
+ fz_css_property *prop;
+
+ for (sel = rule->selector; sel; sel = sel->next)
+ {
+ print_selector(sel);
+ printf(" !%d", selector_specificity(sel));
+ if (sel->next)
+ printf(", ");
+ }
+
+ printf("\n{\n");
+ for (prop = rule->declaration; prop; prop = prop->next)
+ {
+ print_property(prop);
+ }
+ printf("}\n");
+}
+
+void
+print_rules(fz_css_rule *rule)
+{
+ while (rule)
+ {
+ print_rule(rule);
+ rule = rule->next;
+ }
+}
+
+void
+print_style(fz_css_style *style)
+{
+ printf("style {\n");
+ printf("\tfont-size = %g%c;\n", style->font_size.value, style->font_size.unit);
+ printf("\tfont = %s;\n", style->font->name);
+ printf("\tline-height = %g%c;\n", style->line_height.value, style->line_height.unit);
+ printf("\ttext-indent = %g%c;\n", style->text_indent.value, style->text_indent.unit);
+ printf("\ttext-align = %d;\n", style->text_align);
+ printf("\tvertical-align = %d;\n", style->vertical_align);
+ printf("\tmargin = %g%c %g%c %g%c %g%c;\n",
+ style->margin[0].value, style->margin[0].unit,
+ style->margin[1].value, style->margin[1].unit,
+ style->margin[2].value, style->margin[2].unit,
+ style->margin[3].value, style->margin[3].unit);
+ printf("\tpadding = %g%c %g%c %g%c %g%c;\n",
+ style->padding[0].value, style->padding[0].unit,
+ style->padding[1].value, style->padding[1].unit,
+ style->padding[2].value, style->padding[2].unit,
+ style->padding[3].value, style->padding[3].unit);
+ printf("}\n");
+}
diff --git a/source/html/css-parse.c b/source/html/css-parse.c
new file mode 100644
index 00000000..e3ddd48f
--- /dev/null
+++ b/source/html/css-parse.c
@@ -0,0 +1,865 @@
+#include "mupdf/html.h"
+
+struct lexbuf
+{
+ fz_context *ctx;
+ const char *s;
+ const char *file;
+ int line;
+ int lookahead;
+ int c;
+ int color;
+ int string_len;
+ char string[1024];
+};
+
+FZ_NORETURN static void fz_css_error(struct lexbuf *buf, const char *msg)
+{
+ fz_throw(buf->ctx, FZ_ERROR_GENERIC, "css syntax error: %s (%s:%d)", msg, buf->file, buf->line);
+}
+
+static fz_css_rule *fz_new_css_rule(fz_context *ctx, fz_css_selector *selector, fz_css_property *declaration)
+{
+ fz_css_rule *rule = fz_malloc_struct(ctx, fz_css_rule);
+ rule->selector = selector;
+ rule->declaration = declaration;
+ rule->garbage = NULL;
+ rule->next = NULL;
+ return rule;
+}
+
+static fz_css_selector *fz_new_css_selector(fz_context *ctx, const char *name)
+{
+ fz_css_selector *sel = fz_malloc_struct(ctx, fz_css_selector);
+ sel->name = name ? fz_strdup(ctx, name) : NULL;
+ sel->combine = 0;
+ sel->cond = NULL;
+ sel->left = NULL;
+ sel->right = NULL;
+ sel->next = NULL;
+ return sel;
+}
+
+static fz_css_condition *fz_new_css_condition(fz_context *ctx, int type, const char *key, const char *val)
+{
+ fz_css_condition *cond = fz_malloc_struct(ctx, fz_css_condition);
+ cond->type = type;
+ cond->key = key ? fz_strdup(ctx, key) : NULL;
+ cond->val = val ? fz_strdup(ctx, val) : NULL;
+ cond->next = NULL;
+ return cond;
+}
+
+static fz_css_property *fz_new_css_property(fz_context *ctx, const char *name, fz_css_value *value, int spec)
+{
+ fz_css_property *prop = fz_malloc_struct(ctx, fz_css_property);
+ prop->name = fz_strdup(ctx, name);
+ prop->value = value;
+ prop->spec = spec;
+ prop->next = NULL;
+ return prop;
+}
+
+static fz_css_value *fz_new_css_value(fz_context *ctx, int type, const char *data)
+{
+ fz_css_value *val = fz_malloc_struct(ctx, fz_css_value);
+ val->type = type;
+ val->data = fz_strdup(ctx, data);
+ val->args = NULL;
+ val->next = NULL;
+ return val;
+}
+
+static void fz_drop_css_value(fz_context *ctx, fz_css_value *val)
+{
+ while (val)
+ {
+ fz_css_value *next = val->next;
+ fz_drop_css_value(ctx, val->args);
+ fz_free(ctx, val->data);
+ fz_free(ctx, val);
+ val = next;
+ }
+}
+
+static void fz_drop_css_condition(fz_context *ctx, fz_css_condition *cond)
+{
+ while (cond)
+ {
+ fz_css_condition *next = cond->next;
+ fz_free(ctx, cond->key);
+ fz_free(ctx, cond->val);
+ fz_free(ctx, cond);
+ cond = next;
+ }
+}
+
+static void fz_drop_css_selector(fz_context *ctx, fz_css_selector *sel)
+{
+ while (sel)
+ {
+ fz_css_selector *next = sel->next;
+ fz_free(ctx, sel->name);
+ fz_drop_css_condition(ctx, sel->cond);
+ fz_drop_css_selector(ctx, sel->left);
+ fz_drop_css_selector(ctx, sel->right);
+ fz_free(ctx, sel);
+ sel = next;
+ }
+}
+
+static void fz_drop_css_property(fz_context *ctx, fz_css_property *prop)
+{
+ while (prop)
+ {
+ fz_css_property *next = prop->next;
+ fz_free(ctx, prop->name);
+ fz_drop_css_value(ctx, prop->value);
+ fz_free(ctx, prop);
+ prop = next;
+ }
+}
+
+void fz_drop_css(fz_context *ctx, fz_css_rule *rule)
+{
+ while (rule)
+ {
+ fz_css_rule *next = rule->next;
+ fz_drop_css_selector(ctx, rule->selector);
+ fz_drop_css_property(ctx, rule->declaration);
+ fz_drop_css_property(ctx, rule->garbage);
+ fz_free(ctx, rule);
+ rule = next;
+ }
+}
+
+static void css_lex_next(struct lexbuf *buf)
+{
+ buf->c = *(buf->s++);
+ if (buf->c == '\n')
+ ++buf->line;
+}
+
+static void css_lex_init(fz_context *ctx, struct lexbuf *buf, const char *s, const char *file)
+{
+ buf->ctx = ctx;
+ buf->s = s;
+ buf->c = 0;
+ buf->file = file;
+ buf->line = 1;
+ css_lex_next(buf);
+
+ buf->color = 0;
+ buf->string_len = 0;
+}
+
+static int iswhite(int c)
+{
+ return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f';
+}
+
+static int isnmstart(int c)
+{
+ return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+ (c >= 128 && c <= 255);
+}
+
+static int isnmchar(int c)
+{
+ return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') || c == '-' || (c >= 128 && c <= 255);
+}
+
+static void css_push_char(struct lexbuf *buf, int c)
+{
+ if (buf->string_len + 1 >= nelem(buf->string))
+ fz_css_error(buf, "token too long");
+ buf->string[buf->string_len++] = c;
+}
+
+static int css_lex_accept(struct lexbuf *buf, int t)
+{
+ if (buf->c == t)
+ {
+ css_lex_next(buf);
+ return 1;
+ }
+ return 0;
+}
+
+static void css_lex_expect(struct lexbuf *buf, int t)
+{
+ if (!css_lex_accept(buf, t))
+ fz_css_error(buf, "unexpected character");
+}
+
+static int ishex(int c, int *v)
+{
+ if (c >= '0' && c <= '9')
+ {
+ *v = c - '0';
+ return 1;
+ }
+ if (c >= 'A' && c <= 'F')
+ {
+ *v = c - 'A' + 0xA;
+ return 1;
+ }
+ if (c >= 'a' && c <= 'f')
+ {
+ *v = c - 'a' + 0xA;
+ return 1;
+ }
+ return 0;
+}
+
+static int css_lex_accept_hex(struct lexbuf *buf, int *v)
+{
+ if (ishex(buf->c, v))
+ {
+ css_lex_next(buf);
+ return 1;
+ }
+ return 0;
+}
+
+static int css_lex_number(struct lexbuf *buf)
+{
+ while (buf->c >= '0' && buf->c <= '9')
+ {
+ css_push_char(buf, buf->c);
+ css_lex_next(buf);
+ }
+
+ if (css_lex_accept(buf, '.'))
+ {
+ css_push_char(buf, '.');
+ while (buf->c >= '0' && buf->c <= '9')
+ {
+ css_push_char(buf, buf->c);
+ css_lex_next(buf);
+ }
+ }
+
+ if (css_lex_accept(buf, '%'))
+ {
+ css_push_char(buf, '%');
+ css_push_char(buf, 0);
+ return CSS_PERCENT;
+ }
+
+ if (isnmstart(buf->c))
+ {
+ css_push_char(buf, buf->c);
+ css_lex_next(buf);
+ while (isnmchar(buf->c))
+ {
+ css_push_char(buf, buf->c);
+ css_lex_next(buf);
+ }
+ css_push_char(buf, 0);
+ return CSS_LENGTH;
+ }
+
+ css_push_char(buf, 0);
+ return CSS_NUMBER;
+}
+
+static int css_lex_keyword(struct lexbuf *buf)
+{
+ while (isnmchar(buf->c))
+ {
+ css_push_char(buf, buf->c);
+ css_lex_next(buf);
+ }
+ css_push_char(buf, 0);
+ return CSS_KEYWORD;
+}
+
+static int css_lex_string(struct lexbuf *buf, int q)
+{
+ while (buf->c && buf->c != q)
+ {
+ if (css_lex_accept(buf, '\\'))
+ {
+ if (css_lex_accept(buf, 'n'))
+ css_push_char(buf, '\n');
+ else if (css_lex_accept(buf, 'r'))
+ css_push_char(buf, '\r');
+ else if (css_lex_accept(buf, 'f'))
+ css_push_char(buf, '\f');
+ else if (css_lex_accept(buf, '\f'))
+ /* line continuation */ ;
+ else if (css_lex_accept(buf, '\n'))
+ /* line continuation */ ;
+ else if (css_lex_accept(buf, '\r'))
+ css_lex_accept(buf, '\n');
+ else
+ {
+ css_push_char(buf, buf->c);
+ css_lex_next(buf);
+ }
+ }
+ else
+ {
+ css_push_char(buf, buf->c);
+ css_lex_next(buf);
+ }
+ }
+ css_lex_expect(buf, q);
+ css_push_char(buf, 0);
+ return CSS_STRING;
+}
+
+static int css_lex(struct lexbuf *buf)
+{
+ int t;
+
+ // TODO: keyword escape sequences
+
+ buf->string_len = 0;
+
+ while (buf->c)
+ {
+restart:
+ while (iswhite(buf->c))
+ css_lex_next(buf);
+
+ if (buf->c == 0)
+ break;
+
+ if (css_lex_accept(buf, '/'))
+ {
+ if (css_lex_accept(buf, '*'))
+ {
+ while (buf->c)
+ {
+ if (css_lex_accept(buf, '*'))
+ {
+ while (buf->c == '*')
+ css_lex_next(buf);
+ if (css_lex_accept(buf, '/'))
+ goto restart;
+ }
+ css_lex_next(buf);
+ }
+ fz_css_error(buf, "unterminated comment");
+ }
+ return '/';
+ }
+
+ if (css_lex_accept(buf, '<'))
+ {
+ if (css_lex_accept(buf, '!'))
+ {
+ css_lex_expect(buf, '-');
+ css_lex_expect(buf, '-');
+ continue; /* ignore CDO */
+ }
+ return '<';
+ }
+
+ if (css_lex_accept(buf, '-'))
+ {
+ if (css_lex_accept(buf, '-'))
+ {
+ css_lex_expect(buf, '>');
+ continue; /* ignore CDC */
+ }
+ if (buf->c >= '0' && buf->c <= '9')
+ {
+ css_push_char(buf, '-');
+ return css_lex_number(buf);
+ }
+ if (isnmstart(buf->c))
+ {
+ css_push_char(buf, '-');
+ css_push_char(buf, buf->c);
+ css_lex_next(buf);
+ return css_lex_keyword(buf);
+ }
+ return '-';
+ }
+
+ if (css_lex_accept(buf, '+'))
+ {
+ if (buf->c >= '0' && buf->c <= '9')
+ return css_lex_number(buf);
+ return '+';
+ }
+
+ if (css_lex_accept(buf, '.'))
+ {
+ if (buf->c >= '0' && buf->c <= '9')
+ {
+ css_push_char(buf, '.');
+ return css_lex_number(buf);
+ }
+ return '.';
+ }
+
+ if (css_lex_accept(buf, '#'))
+ {
+ int a, b, c, d, e, f;
+ if (!css_lex_accept_hex(buf, &a)) goto colorerror;
+ if (!css_lex_accept_hex(buf, &b)) goto colorerror;
+ if (!css_lex_accept_hex(buf, &c)) goto colorerror;
+ if (css_lex_accept_hex(buf, &d))
+ {
+ if (!css_lex_accept_hex(buf, &e)) goto colorerror;
+ if (!css_lex_accept_hex(buf, &f)) goto colorerror;
+ buf->color = (a << 20) | (b << 16) | (c << 12) | (d << 8) | (e << 4) | f;
+ }
+ else
+ {
+ buf->color = (a << 20) | (b << 12) | (c << 4);
+ }
+ sprintf(buf->string, "%06x", buf->color); // XXX
+ return CSS_COLOR;
+colorerror:
+ fz_css_error(buf, "invalid color");
+ }
+
+ if (css_lex_accept(buf, '"'))
+ return css_lex_string(buf, '"');
+ if (css_lex_accept(buf, '\''))
+ return css_lex_string(buf, '\'');
+
+ if (buf->c >= '0' && buf->c <= '9')
+ return css_lex_number(buf);
+
+ if (css_lex_accept(buf, 'u'))
+ {
+ if (css_lex_accept(buf, 'r'))
+ {
+ if (css_lex_accept(buf, 'l'))
+ {
+ if (css_lex_accept(buf, '('))
+ {
+ // string or url
+ css_lex_expect(buf, ')');
+ return CSS_URI;
+ }
+ css_push_char(buf, 'u');
+ css_push_char(buf, 'r');
+ css_push_char(buf, 'l');
+ return css_lex_keyword(buf);
+ }
+ css_push_char(buf, 'u');
+ css_push_char(buf, 'r');
+ return css_lex_keyword(buf);
+ }
+ css_push_char(buf, 'u');
+ return css_lex_keyword(buf);
+ }
+
+ if (isnmstart(buf->c))
+ {
+ css_push_char(buf, buf->c);
+ css_lex_next(buf);
+ return css_lex_keyword(buf);
+ }
+
+ t = buf->c;
+ css_lex_next(buf);
+ return t;
+ }
+ return EOF;
+}
+
+static void next(struct lexbuf *buf)
+{
+ buf->lookahead = css_lex(buf);
+}
+
+static int accept(struct lexbuf *buf, int t)
+{
+ if (buf->lookahead == t)
+ {
+ next(buf);
+ return 1;
+ }
+ return 0;
+}
+
+static void expect(struct lexbuf *buf, int t)
+{
+ if (accept(buf, t))
+ return;
+ fz_css_error(buf, "unexpected token");
+}
+
+static int iscond(int t)
+{
+ return t == ':' || t == '.' || t == '#' || t == '[';
+}
+
+static fz_css_value *parse_value_list(struct lexbuf *buf);
+
+static fz_css_value *parse_value(struct lexbuf *buf)
+{
+ fz_css_value *v;
+
+ if (buf->lookahead == CSS_KEYWORD)
+ {
+ v = fz_new_css_value(buf->ctx, CSS_KEYWORD, buf->string);
+ next(buf);
+
+ if (accept(buf, '('))
+ {
+ v->type = '(';
+ v->args = parse_value_list(buf);
+ expect(buf, ')');
+ }
+
+ return v;
+ }
+
+ switch (buf->lookahead)
+ {
+ case CSS_NUMBER:
+ case CSS_LENGTH:
+ case CSS_PERCENT:
+ case CSS_STRING:
+ case CSS_COLOR:
+ case CSS_URI:
+ v = fz_new_css_value(buf->ctx, buf->lookahead, buf->string);
+ next(buf);
+ return v;
+ }
+
+ if (accept(buf, ','))
+ return fz_new_css_value(buf->ctx, ',', ",");
+ if (accept(buf, '/'))
+ return fz_new_css_value(buf->ctx, '/', "/");
+
+ fz_css_error(buf, "expected value");
+}
+
+static fz_css_value *parse_value_list(struct lexbuf *buf)
+{
+ fz_css_value *head, *tail;
+
+ head = tail = NULL;
+
+ while (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != '!' &&
+ buf->lookahead != ')' && buf->lookahead != EOF)
+ {
+ if (!head)
+ head = tail = parse_value(buf);
+ else
+ tail = tail->next = parse_value(buf);
+ }
+
+ return head;
+}
+
+static fz_css_property *parse_declaration(struct lexbuf *buf)
+{
+ fz_css_property *p;
+
+ if (buf->lookahead != CSS_KEYWORD)
+ fz_css_error(buf, "expected keyword in property");
+ p = fz_new_css_property(buf->ctx, buf->string, NULL, 0);
+ next(buf);
+
+ expect(buf, ':');
+
+ p->value = parse_value_list(buf);
+
+ /* !important */
+ if (accept(buf, '!'))
+ expect(buf, CSS_KEYWORD);
+
+ return p;
+}
+
+static fz_css_property *parse_declaration_list(struct lexbuf *buf)
+{
+ fz_css_property *head, *tail;
+
+ if (buf->lookahead == '}' || buf->lookahead == EOF)
+ return NULL;
+
+ head = tail = parse_declaration(buf);
+
+ while (accept(buf, ';'))
+ {
+ if (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != EOF)
+ {
+ tail = tail->next = parse_declaration(buf);
+ }
+ }
+
+ return head;
+}
+
+static char *parse_attrib_value(struct lexbuf *buf)
+{
+ char *s;
+
+ if (buf->lookahead == CSS_KEYWORD || buf->lookahead == CSS_STRING)
+ {
+ s = fz_strdup(buf->ctx, buf->string);
+ next(buf);
+ return s;
+ }
+
+ fz_css_error(buf, "expected attribute value");
+}
+
+static fz_css_condition *parse_condition(struct lexbuf *buf)
+{
+ fz_css_condition *c;
+
+ if (accept(buf, ':'))
+ {
+ if (buf->lookahead != CSS_KEYWORD)
+ fz_css_error(buf, "expected keyword after ':'");
+ c = fz_new_css_condition(buf->ctx, ':', "pseudo", buf->string);
+ next(buf);
+ return c;
+ }
+
+ if (accept(buf, '.'))
+ {
+ if (buf->lookahead != CSS_KEYWORD)
+ fz_css_error(buf, "expected keyword after '.'");
+ c = fz_new_css_condition(buf->ctx, '.', "class", buf->string);
+ next(buf);
+ return c;
+ }
+
+ if (accept(buf, '#'))
+ {
+ if (buf->lookahead != CSS_KEYWORD)
+ fz_css_error(buf, "expected keyword after '#'");
+ c = fz_new_css_condition(buf->ctx, '#', "id", buf->string);
+ next(buf);
+ return c;
+ }
+
+ if (accept(buf, '['))
+ {
+ if (buf->lookahead != CSS_KEYWORD)
+ fz_css_error(buf, "expected keyword after '['");
+
+ c = fz_new_css_condition(buf->ctx, '[', buf->string, NULL);
+ next(buf);
+
+ if (accept(buf, '='))
+ {
+ c->type = '=';
+ c->val = parse_attrib_value(buf);
+ }
+ else if (accept(buf, '|'))
+ {
+ expect(buf, '=');
+ c->type = '|';
+ c->val = parse_attrib_value(buf);
+ }
+ else if (accept(buf, '~'))
+ {
+ expect(buf, '=');
+ c->type = '~';
+ c->val = parse_attrib_value(buf);
+ }
+
+ expect(buf, ']');
+
+ return c;
+ }
+
+ fz_css_error(buf, "expected condition");
+}
+
+static fz_css_condition *parse_condition_list(struct lexbuf *buf)
+{
+ fz_css_condition *head, *tail;
+
+ head = tail = parse_condition(buf);
+ while (iscond(buf->lookahead))
+ {
+ tail = tail->next = parse_condition(buf);
+ }
+ return head;
+}
+
+static fz_css_selector *parse_simple_selector(struct lexbuf *buf)
+{
+ fz_css_selector *s;
+
+ if (accept(buf, '*'))
+ {
+ s = fz_new_css_selector(buf->ctx, NULL);
+ if (iscond(buf->lookahead))
+ s->cond = parse_condition_list(buf);
+ return s;
+ }
+ else if (buf->lookahead == CSS_KEYWORD)
+ {
+ s = fz_new_css_selector(buf->ctx, buf->string);
+ next(buf);
+ if (iscond(buf->lookahead))
+ s->cond = parse_condition_list(buf);
+ return s;
+ }
+ else if (iscond(buf->lookahead))
+ {
+ s = fz_new_css_selector(buf->ctx, NULL);
+ s->cond = parse_condition_list(buf);
+ return s;
+ }
+
+ fz_css_error(buf, "expected selector");
+}
+
+static fz_css_selector *parse_adjacent_selector(struct lexbuf *buf)
+{
+ fz_css_selector *s, *a, *b;
+
+ a = parse_simple_selector(buf);
+ if (accept(buf, '+'))
+ {
+ b = parse_adjacent_selector(buf);
+ s = fz_new_css_selector(buf->ctx, NULL);
+ s->combine = '+';
+ s->left = a;
+ s->right = b;
+ return s;
+ }
+ return a;
+}
+
+static fz_css_selector *parse_child_selector(struct lexbuf *buf)
+{
+ fz_css_selector *s, *a, *b;
+
+ a = parse_adjacent_selector(buf);
+ if (accept(buf, '>'))
+ {
+ b = parse_child_selector(buf);
+ s = fz_new_css_selector(buf->ctx, NULL);
+ s->combine = '>';
+ s->left = a;
+ s->right = b;
+ return s;
+ }
+ return a;
+}
+
+static fz_css_selector *parse_descendant_selector(struct lexbuf *buf)
+{
+ fz_css_selector *s, *a, *b;
+
+ a = parse_child_selector(buf);
+ if (buf->lookahead != ',' && buf->lookahead != '{' && buf->lookahead != EOF)
+ {
+ b = parse_descendant_selector(buf);
+ s = fz_new_css_selector(buf->ctx, NULL);
+ s->combine = ' ';
+ s->left = a;
+ s->right = b;
+ return s;
+ }
+ return a;
+}
+
+static fz_css_selector *parse_selector_list(struct lexbuf *buf)
+{
+ fz_css_selector *head, *tail;
+
+ head = tail = parse_descendant_selector(buf);
+ while (accept(buf, ','))
+ {
+ tail = tail->next = parse_descendant_selector(buf);
+ }
+ return head;
+}
+
+static fz_css_rule *parse_rule(struct lexbuf *buf)
+{
+ fz_css_selector *s;
+ fz_css_property *p;
+
+ s = parse_selector_list(buf);
+ expect(buf, '{');
+ p = parse_declaration_list(buf);
+ expect(buf, '}');
+ return fz_new_css_rule(buf->ctx, s, p);
+}
+
+static void parse_at_rule(struct lexbuf *buf)
+{
+ expect(buf, CSS_KEYWORD);
+
+ /* skip until '{' or ';' */
+ while (buf->lookahead != EOF)
+ {
+ if (accept(buf, ';'))
+ return;
+ if (accept(buf, '{'))
+ {
+ int depth = 1;
+ while (buf->lookahead != EOF && depth > 0)
+ {
+ if (accept(buf, '{'))
+ ++depth;
+ else if (accept(buf, '}'))
+ --depth;
+ else
+ next(buf);
+ }
+ return;
+ }
+ next(buf);
+ }
+}
+
+static fz_css_rule *parse_stylesheet(struct lexbuf *buf, fz_css_rule *chain)
+{
+ fz_css_rule *rule, **nextp, *tail;
+
+ tail = chain;
+ if (tail)
+ {
+ while (tail->next)
+ tail = tail->next;
+ nextp = &tail->next;
+ }
+ else
+ {
+ nextp = &tail;
+ }
+
+ while (buf->lookahead != EOF)
+ {
+ if (accept(buf, '@'))
+ {
+ parse_at_rule(buf);
+ }
+ else
+ {
+ rule = *nextp = parse_rule(buf);
+ nextp = &rule->next;
+ }
+ }
+
+ return chain ? chain : tail;
+}
+
+fz_css_property *fz_parse_css_properties(fz_context *ctx, const char *source)
+{
+ struct lexbuf buf;
+ css_lex_init(ctx, &buf, source, "<inline>");
+ next(&buf);
+ return parse_declaration_list(&buf);
+}
+
+fz_css_rule *fz_parse_css(fz_context *ctx, fz_css_rule *chain, const char *source, const char *file)
+{
+ struct lexbuf buf;
+ css_lex_init(ctx, &buf, source, file);
+ next(&buf);
+ return parse_stylesheet(&buf, chain);
+}
diff --git a/source/html/epub-doc.c b/source/html/epub-doc.c
new file mode 100644
index 00000000..69963ff0
--- /dev/null
+++ b/source/html/epub-doc.c
@@ -0,0 +1,326 @@
+#include "mupdf/html.h"
+
+#define DEFW (450)
+#define DEFH (600)
+#define DEFEM (12)
+
+typedef struct epub_document_s epub_document;
+typedef struct epub_chapter_s epub_chapter;
+typedef struct epub_page_s epub_page;
+
+struct epub_document_s
+{
+ fz_document super;
+ fz_archive *zip;
+ fz_html_font_set *set;
+ float page_w, page_h, em;
+ int count;
+ epub_chapter *spine;
+};
+
+struct epub_chapter_s
+{
+ int start;
+ fz_html *box;
+ epub_chapter *next;
+};
+
+struct epub_page_s
+{
+ fz_page super;
+ epub_document *doc;
+ int number;
+};
+
+static void
+epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em)
+{
+ epub_document *doc = (epub_document*)doc_;
+ epub_chapter *ch;
+
+ doc->page_w = w;
+ doc->page_h = h;
+ doc->em = em;
+
+ printf("epub: laying out chapters.\n");
+ for (ch = doc->spine; ch; ch = ch->next)
+ fz_layout_html(ctx, ch->box, w, h, em);
+ printf("epub: done.\n");
+}
+
+static int
+epub_count_pages(fz_context *ctx, fz_document *doc_)
+{
+ epub_document *doc = (epub_document*)doc_;
+ epub_chapter *ch;
+ int count = 0;
+ for (ch = doc->spine; ch; ch = ch->next)
+ count += ceilf(ch->box->h / doc->page_h);
+ return count;
+}
+
+static void
+epub_drop_page_imp(fz_context *ctx, fz_page *page_)
+{
+}
+
+static fz_rect *
+epub_bound_page(fz_context *ctx, fz_page *page_, fz_rect *bbox)
+{
+ epub_page *page = (epub_page*)page_;
+ epub_document *doc = page->doc;
+ bbox->x0 = 0;
+ bbox->y0 = 0;
+ bbox->x1 = doc->page_w;
+ bbox->y1 = doc->page_h;
+ return bbox;
+}
+
+static void
+epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie)
+{
+ epub_page *page = (epub_page*)page_;
+ epub_document *doc = page->doc;
+ epub_chapter *ch;
+ int n = page->number;
+
+ int count = 0;
+ for (ch = doc->spine; ch; ch = ch->next)
+ {
+ int cn = ceilf(ch->box->h / doc->page_h);
+ if (n < count + cn)
+ {
+ fz_draw_html(ctx, ch->box, (n-count) * doc->page_h, (n-count+1) * doc->page_h, dev, ctm);
+ break;
+ }
+ count += cn;
+ }
+}
+
+static fz_page *
+epub_load_page(fz_context *ctx, fz_document *doc_, int number)
+{
+ epub_document *doc = (epub_document*)doc_;
+ epub_page *page = fz_new_page(ctx, sizeof *page);
+ page->super.bound_page = epub_bound_page;
+ page->super.run_page_contents = epub_run_page;
+ page->super.drop_page_imp = epub_drop_page_imp;
+ page->doc = doc;
+ page->number = number;
+ return (fz_page*)page;
+}
+
+static void
+epub_close_document(fz_context *ctx, fz_document *doc_)
+{
+ epub_document *doc = (epub_document*)doc_;
+ epub_chapter *ch, *next;
+ ch = doc->spine;
+ while (ch)
+ {
+ next = ch->next;
+ fz_drop_html(ctx, ch->box);
+ fz_free(ctx, ch);
+ ch = next;
+ }
+ fz_drop_archive(ctx, doc->zip);
+ fz_drop_html_font_set(ctx, doc->set);
+ fz_free(ctx, doc);
+}
+
+static const char *
+rel_path_from_idref(fz_xml *manifest, const char *idref)
+{
+ fz_xml *item;
+ if (!idref)
+ return NULL;
+ item = fz_xml_find_down(manifest, "item");
+ while (item)
+ {
+ const char *id = fz_xml_att(item, "id");
+ if (id && !strcmp(id, idref))
+ return fz_xml_att(item, "href");
+ item = fz_xml_find_next(item, "item");
+ }
+ return NULL;
+}
+
+static const char *
+path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char *idref, int n)
+{
+ const char *rel_path = rel_path_from_idref(manifest, idref);
+ if (!rel_path)
+ {
+ path[0] = 0;
+ return NULL;
+ }
+ fz_strlcpy(path, base_uri, n);
+ fz_strlcat(path, "/", n);
+ fz_strlcat(path, rel_path, n);
+ return fz_cleanname(path);
+}
+
+static epub_chapter *
+epub_parse_chapter(fz_context *ctx, epub_document *doc, const char *path)
+{
+ fz_archive *zip = doc->zip;
+ fz_buffer *buf;
+ epub_chapter *ch;
+ char base_uri[2048];
+
+ fz_dirname(base_uri, path, sizeof base_uri);
+
+ buf = fz_read_archive_entry(ctx, zip, path);
+ fz_write_buffer_byte(ctx, buf, 0);
+
+ ch = fz_malloc_struct(ctx, epub_chapter);
+ ch->box = fz_parse_html(ctx, doc->set, zip, base_uri, buf, NULL);
+ ch->next = NULL;
+
+ fz_drop_buffer(ctx, buf);
+
+ return ch;
+}
+
+static void
+epub_parse_header(fz_context *ctx, epub_document *doc)
+{
+ fz_archive *zip = doc->zip;
+ fz_buffer *buf;
+ fz_xml *container_xml, *content_opf;
+ fz_xml *container, *rootfiles, *rootfile;
+ fz_xml *package, *manifest, *spine, *itemref;
+ char base_uri[2048];
+ const char *full_path;
+ char ncx[2048], s[2048];
+ epub_chapter *head, *tail;
+
+ /* parse META-INF/container.xml to find OPF */
+
+ buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml");
+ fz_write_buffer_byte(ctx, buf, 0);
+ container_xml = fz_parse_xml(ctx, buf->data, buf->len, 0);
+ fz_drop_buffer(ctx, buf);
+
+ container = fz_xml_find(container_xml, "container");
+ rootfiles = fz_xml_find_down(container, "rootfiles");
+ rootfile = fz_xml_find_down(rootfiles, "rootfile");
+ full_path = fz_xml_att(rootfile, "full-path");
+ if (!full_path)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB");
+
+ printf("epub: found root: %s\n", full_path);
+
+ fz_dirname(base_uri, full_path, sizeof base_uri);
+
+ /* parse OPF to find NCX and spine */
+
+ buf = fz_read_archive_entry(ctx, zip, full_path);
+ fz_write_buffer_byte(ctx, buf, 0);
+ content_opf = fz_parse_xml(ctx, buf->data, buf->len, 0);
+ fz_drop_buffer(ctx, buf);
+
+ package = fz_xml_find(content_opf, "package");
+ manifest = fz_xml_find_down(package, "manifest");
+ spine = fz_xml_find_down(package, "spine");
+
+ if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx))
+ {
+ /* TODO: parse NCX to create fz_outline */
+ printf("epub: found outline: %s\n", ncx);
+ }
+
+ head = tail = NULL;
+ itemref = fz_xml_find_down(spine, "itemref");
+ while (itemref)
+ {
+ if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s))
+ {
+ printf("epub: found spine %s\n", s);
+ if (!head)
+ head = tail = epub_parse_chapter(ctx, doc, s);
+ else
+ tail = tail->next = epub_parse_chapter(ctx, doc, s);
+ }
+ itemref = fz_xml_find_next(itemref, "itemref");
+ }
+
+ doc->spine = head;
+
+ printf("epub: done.\n");
+
+ fz_drop_xml(ctx, container_xml);
+ fz_drop_xml(ctx, content_opf);
+}
+
+static epub_document *
+epub_init(fz_context *ctx, fz_archive *zip)
+{
+ epub_document *doc;
+
+ doc = fz_malloc_struct(ctx, epub_document);
+ doc->zip = zip;
+ doc->set = fz_new_html_font_set(ctx);
+
+ doc->super.close = epub_close_document;
+ doc->super.layout = epub_layout;
+ doc->super.count_pages = epub_count_pages;
+ doc->super.load_page = epub_load_page;
+
+ fz_try(ctx)
+ {
+ epub_parse_header(ctx, doc);
+ epub_layout(ctx, (fz_document*)doc, DEFW, DEFH, DEFEM);
+ }
+ fz_catch(ctx)
+ {
+ epub_close_document(ctx, (fz_document*)doc);
+ fz_rethrow(ctx);
+ }
+
+ return doc;
+}
+
+static epub_document *
+epub_open_document_with_stream(fz_context *ctx, fz_stream *file)
+{
+ return epub_init(ctx, fz_open_archive_with_stream(ctx, file));
+}
+
+static epub_document *
+epub_open_document(fz_context *ctx, const char *filename)
+{
+ if (strstr(filename, "META-INF/container.xml") || strstr(filename, "META-INF\\container.xml"))
+ {
+ char dirname[2048], *p;
+ fz_strlcpy(dirname, filename, sizeof dirname);
+ p = strstr(dirname, "META-INF");
+ *p = 0;
+ if (!dirname[0])
+ fz_strlcpy(dirname, ".", sizeof dirname);
+ return epub_init(ctx, fz_open_directory(ctx, dirname));
+ }
+
+ return epub_init(ctx, fz_open_archive(ctx, filename));
+}
+
+static int
+epub_recognize(fz_context *doc, const char *magic)
+{
+ char *ext = strrchr(magic, '.');
+ if (ext)
+ if (!fz_strcasecmp(ext, ".epub"))
+ return 100;
+ if (strstr(magic, "META-INF/container.xml") || strstr(magic, "META-INF\\container.xml"))
+ return 200;
+ if (!strcmp(magic, "application/epub+zip"))
+ return 100;
+ return 0;
+}
+
+fz_document_handler epub_document_handler =
+{
+ (fz_document_recognize_fn *)&epub_recognize,
+ (fz_document_open_fn *)&epub_open_document,
+ (fz_document_open_with_stream_fn *)&epub_open_document_with_stream
+};
diff --git a/source/html/html-doc.c b/source/html/html-doc.c
new file mode 100644
index 00000000..7217f74c
--- /dev/null
+++ b/source/html/html-doc.c
@@ -0,0 +1,166 @@
+#include "mupdf/html.h"
+
+#define DEFW (450)
+#define DEFH (600)
+#define DEFEM (12)
+
+typedef struct html_document_s html_document;
+typedef struct html_page_s html_page;
+
+struct html_document_s
+{
+ fz_document super;
+ fz_archive *zip;
+ fz_html_font_set *set;
+ float page_w, page_h, em;
+ fz_html *box;
+};
+
+struct html_page_s
+{
+ fz_page super;
+ html_document *doc;
+ int number;
+};
+
+static void
+htdoc_close_document(fz_context *ctx, fz_document *doc_)
+{
+ html_document *doc = (html_document*)doc_;
+ fz_drop_archive(ctx, doc->zip);
+ fz_drop_html(ctx, doc->box);
+ fz_drop_html_font_set(ctx, doc->set);
+ fz_free(ctx, doc);
+}
+
+static int
+htdoc_count_pages(fz_context *ctx, fz_document *doc_)
+{
+ html_document *doc = (html_document*)doc_;
+ int count = ceilf(doc->box->h / doc->page_h);
+ return count;
+}
+
+static void
+htdoc_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em)
+{
+ html_document *doc = (html_document*)doc_;
+ doc->page_w = w;
+ doc->page_h = h;
+ doc->em = em;
+ fz_layout_html(ctx, doc->box, w, h, em);
+}
+
+static void
+htdoc_drop_page_imp(fz_context *ctx, fz_page *page_)
+{
+}
+
+static fz_rect *
+htdoc_bound_page(fz_context *ctx, fz_page *page_, fz_rect *bbox)
+{
+ html_page *page = (html_page*)page_;
+ html_document *doc = page->doc;
+ bbox->x0 = bbox->y0 = 0;
+ bbox->x1 = doc->page_w;
+ bbox->y1 = doc->page_h;
+ return bbox;
+}
+
+static void
+htdoc_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie)
+{
+ html_page *page = (html_page*)page_;
+ html_document *doc = page->doc;
+ int n = page->number;
+ fz_draw_html(ctx, doc->box, n * doc->page_h, (n+1) * doc->page_h, dev, ctm);
+}
+
+static fz_page *
+htdoc_load_page(fz_context *ctx, fz_document *doc_, int number)
+{
+ html_document *doc = (html_document*)doc_;
+ html_page *page = fz_new_page(ctx, sizeof *page);
+ page->super.bound_page = htdoc_bound_page;
+ page->super.run_page_contents = htdoc_run_page;
+ page->super.drop_page_imp = htdoc_drop_page_imp;
+ page->doc = doc;
+ page->number = number;
+ return (fz_page*)page;
+}
+
+static html_document *
+htdoc_open_document_with_stream(fz_context *ctx, fz_stream *file)
+{
+ html_document *doc;
+ fz_buffer *buf;
+
+ doc = fz_malloc_struct(ctx, html_document);
+ doc->zip = fz_open_directory(ctx, ".");
+ doc->set = fz_new_html_font_set(ctx);
+
+ doc->super.close = htdoc_close_document;
+ doc->super.layout = htdoc_layout;
+ doc->super.count_pages = htdoc_count_pages;
+ doc->super.load_page = htdoc_load_page;
+
+ buf = fz_read_all(ctx, file, 0);
+ fz_write_buffer_byte(ctx, buf, 0);
+ doc->box = fz_parse_html(ctx, doc->set, doc->zip, ".", buf, NULL);
+ fz_drop_buffer(ctx, buf);
+
+ htdoc_layout(ctx, (fz_document*)doc, DEFW, DEFH, DEFEM);
+
+ return doc;
+}
+
+static html_document *
+htdoc_open_document(fz_context *ctx, const char *filename)
+{
+ char dirname[2048];
+ fz_buffer *buf;
+ html_document *doc;
+
+ fz_dirname(dirname, filename, sizeof dirname);
+
+ doc = fz_malloc_struct(ctx, html_document);
+ doc->zip = fz_open_directory(ctx, dirname);
+ doc->set = fz_new_html_font_set(ctx);
+
+ doc->super.close = htdoc_close_document;
+ doc->super.layout = htdoc_layout;
+ doc->super.count_pages = htdoc_count_pages;
+ doc->super.load_page = htdoc_load_page;
+
+ buf = fz_read_file(ctx, filename);
+ fz_write_buffer_byte(ctx, buf, 0);
+ doc->box = fz_parse_html(ctx, doc->set, doc->zip, ".", buf, NULL);
+ fz_drop_buffer(ctx, buf);
+
+ htdoc_layout(ctx, (fz_document*)doc, DEFW, DEFH, DEFEM);
+
+ return doc;
+}
+
+static int
+htdoc_recognize(fz_context *doc, const char *magic)
+{
+ char *ext = strrchr(magic, '.');
+
+ if (ext)
+ {
+ if (!fz_strcasecmp(ext, ".xml") || !fz_strcasecmp(ext, ".xhtml") || !fz_strcasecmp(ext, ".html"))
+ return 100;
+ }
+ if (!strcmp(magic, "application/html+xml") || !strcmp(magic, "application/xml") || !strcmp(magic, "text/xml"))
+ return 100;
+
+ return 0;
+}
+
+fz_document_handler html_document_handler =
+{
+ (fz_document_recognize_fn *)&htdoc_recognize,
+ (fz_document_open_fn *)&htdoc_open_document,
+ (fz_document_open_with_stream_fn *)&htdoc_open_document_with_stream
+};
diff --git a/source/html/html-font.c b/source/html/html-font.c
new file mode 100644
index 00000000..ae6568c9
--- /dev/null
+++ b/source/html/html-font.c
@@ -0,0 +1,47 @@
+#include "mupdf/html.h"
+#include "mupdf/pdf.h" /* for pdf_lookup_builtin_font */
+
+static const char *font_names[16] =
+{
+ "Times-Roman", "Times-Italic", "Times-Bold", "Times-BoldItalic",
+ "Helvetica", "Helvetica-Oblique", "Helvetica-Bold", "Helvetica-BoldOblique",
+ "Courier", "Courier-Oblique", "Courier-Bold", "Courier-BoldOblique",
+ "Courier", "Courier-Oblique", "Courier-Bold", "Courier-BoldOblique",
+};
+
+fz_font *
+fz_load_html_font(fz_context *ctx, fz_html_font_set *set,
+ const char *family, const char *variant, const char *style, const char *weight)
+{
+ unsigned char *data;
+ unsigned int size;
+
+ int is_mono = !strcmp(family, "monospace");
+ int is_sans = !strcmp(family, "sans-serif");
+ int is_bold = !strcmp(weight, "bold") || !strcmp(weight, "bolder") || atoi(weight) > 400;
+ int is_italic = !strcmp(style, "italic") || !strcmp(style, "oblique");
+
+ int idx = is_mono * 8 + is_sans * 4 + is_bold * 2 + is_italic;
+ if (!set->fonts[idx])
+ {
+ data = pdf_lookup_builtin_font(ctx, font_names[idx], &size);
+ if (!data)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load html font: %s", font_names[idx]);
+ set->fonts[idx] = fz_new_font_from_memory(ctx, font_names[idx], data, size, 0, 1);
+ }
+
+ return set->fonts[idx];
+}
+
+fz_html_font_set *fz_new_html_font_set(fz_context *ctx)
+{
+ return fz_malloc_struct(ctx, fz_html_font_set);
+}
+
+void fz_drop_html_font_set(fz_context *ctx, fz_html_font_set *set)
+{
+ int i;
+ for (i = 0; i < nelem(set->fonts); ++i)
+ fz_drop_font(ctx, set->fonts[i]);
+ fz_free(ctx, set);
+}
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
new file mode 100644
index 00000000..5a1a3157
--- /dev/null
+++ b/source/html/html-layout.c
@@ -0,0 +1,909 @@
+#include "mupdf/html.h"
+
+enum { T, R, B, L };
+
+static const char *default_css =
+"html,address,blockquote,body,dd,div,dl,dt,h1,h2,h3,h4,h5,h6,ol,p,ul,center,hr,pre{display:block}"
+"span{display:inline}"
+"li{display:list-item}"
+"head{display:none}"
+"body{margin:1em}"
+"h1{font-size:2em;margin:.67em 0}"
+"h2{font-size:1.5em;margin:.75em 0}"
+"h3{font-size:1.17em;margin:.83em 0}"
+"h4,p,blockquote,ul,ol,dl,dir,menu{margin:1.12em 0}"
+"h5{font-size:.83em;margin:1.5em 0}"
+"h6{font-size:.67em;margin:1.67em 0}"
+"h1,h2,h3,h4,h5,h6,b,strong{font-weight:bold}"
+"blockquote{margin-left:40px;margin-right:40px}"
+"i,cite,em,var,address{font-style:italic}"
+"pre,tt,code,kbd,samp{font-family:monospace}"
+"pre{white-space:pre}"
+"big{font-size:1.17em}"
+"small,sub,sup{font-size:.83em}"
+"sub{vertical-align:sub}"
+"sup{vertical-align:super}"
+"s,strike,del{text-decoration:line-through}"
+"hr{border-width:thin;border-color:black;border-style:solid;margin:.5em 0}"
+"ol,ul,dir,menu,dd{margin-left:40px}"
+"ol{list-style-type:decimal}"
+"ol ul,ul ol,ul ul,ol ol{margin-top:0;margin-bottom:0}"
+"u,ins{text-decoration:underline}"
+"center{text-align:center}"
+"svg{display:none}"
+"a{color:blue}"
+;
+
+static int iswhite(int c)
+{
+ return c == ' ' || c == '\t' || c == '\r' || c == '\n';
+}
+
+static void fz_drop_html_flow(fz_context *ctx, fz_html_flow *flow)
+{
+ while (flow)
+ {
+ fz_html_flow *next = flow->next;
+ if (flow->type == FLOW_WORD)
+ fz_free(ctx, flow->text);
+ if (flow->type == FLOW_IMAGE)
+ fz_drop_image(ctx, flow->image);
+ fz_free(ctx, flow);
+ flow = next;
+ }
+}
+
+static fz_html_flow *add_flow(fz_context *ctx, fz_html *top, fz_css_style *style, int type)
+{
+ fz_html_flow *flow = fz_malloc_struct(ctx, fz_html_flow);
+ flow->type = type;
+ flow->style = style;
+ *top->flow_tail = flow;
+ top->flow_tail = &flow->next;
+ return flow;
+}
+
+static void add_flow_space(fz_context *ctx, fz_html *top, fz_css_style *style)
+{
+ fz_html_flow *flow;
+
+ /* delete space at the beginning of the line */
+ if (!top->flow_head)
+ return;
+
+ flow = add_flow(ctx, top, style, FLOW_GLUE);
+ flow->text = " ";
+ flow->broken_text = "";
+}
+
+static void add_flow_word(fz_context *ctx, fz_html *top, fz_css_style *style, const char *a, const char *b)
+{
+ fz_html_flow *flow = add_flow(ctx, top, style, FLOW_WORD);
+ flow->text = fz_malloc(ctx, b - a + 1);
+ memcpy(flow->text, a, b - a);
+ flow->text[b - a] = 0;
+}
+
+static void add_flow_image(fz_context *ctx, fz_html *top, fz_css_style *style, fz_image *img)
+{
+ fz_html_flow *flow = add_flow(ctx, top, style, FLOW_IMAGE);
+ flow->image = fz_keep_image(ctx, img);
+}
+
+static void generate_text(fz_context *ctx, fz_html *box, const char *text)
+{
+ fz_html *flow = box;
+ while (flow->type != BOX_FLOW)
+ flow = flow->up;
+
+ while (*text)
+ {
+ if (iswhite(*text))
+ {
+ ++text;
+ while (iswhite(*text))
+ ++text;
+ add_flow_space(ctx, flow, &box->style);
+ }
+ if (*text)
+ {
+ const char *mark = text++;
+ while (*text && !iswhite(*text))
+ ++text;
+ add_flow_word(ctx, flow, &box->style, mark, text);
+ }
+ }
+}
+
+static void generate_image(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_html *box, const char *src)
+{
+ fz_image *img;
+ fz_buffer *buf;
+ char path[2048];
+
+ fz_html *flow = box;
+ while (flow->type != BOX_FLOW)
+ flow = flow->up;
+
+ fz_strlcpy(path, base_uri, sizeof path);
+ fz_strlcat(path, "/", sizeof path);
+ fz_strlcat(path, src, sizeof path);
+ fz_cleanname(path);
+
+ fz_try(ctx)
+ {
+ buf = fz_read_archive_entry(ctx, zip, path);
+ img = fz_new_image_from_buffer(ctx, buf);
+ fz_drop_buffer(ctx, buf);
+
+ add_flow_image(ctx, flow, &box->style, img);
+ }
+ fz_catch(ctx)
+ {
+ const char *alt = "[image]";
+ fz_warn(ctx, "html: cannot add image src='%s'", src);
+ add_flow_word(ctx, flow, &box->style, alt, alt + 7);
+ }
+}
+
+static void init_box(fz_context *ctx, fz_html *box)
+{
+ box->type = BOX_BLOCK;
+ box->x = box->y = 0;
+ box->w = box->h = 0;
+
+ box->up = NULL;
+ box->last = NULL;
+ box->down = NULL;
+ box->next = NULL;
+
+ box->flow_head = NULL;
+ box->flow_tail = &box->flow_head;
+
+ fz_default_css_style(ctx, &box->style);
+}
+
+void fz_drop_html(fz_context *ctx, fz_html *box)
+{
+ while (box)
+ {
+ fz_html *next = box->next;
+ fz_drop_html_flow(ctx, box->flow_head);
+ fz_drop_html(ctx, box->down);
+ fz_free(ctx, box);
+ box = next;
+ }
+}
+
+static fz_html *new_box(fz_context *ctx)
+{
+ fz_html *box = fz_malloc_struct(ctx, fz_html);
+ init_box(ctx, box);
+ return box;
+}
+
+static void insert_box(fz_context *ctx, fz_html *box, int type, fz_html *top)
+{
+ box->type = type;
+
+ box->up = top;
+
+ if (top)
+ {
+ if (!top->last)
+ {
+ top->down = top->last = box;
+ }
+ else
+ {
+ top->last->next = box;
+ top->last = box;
+ }
+ }
+}
+
+static fz_html *insert_block_box(fz_context *ctx, fz_html *box, fz_html *top)
+{
+ if (top->type == BOX_BLOCK)
+ {
+ insert_box(ctx, box, BOX_BLOCK, top);
+ }
+ else if (top->type == BOX_FLOW)
+ {
+ while (top->type != BOX_BLOCK)
+ top = top->up;
+ insert_box(ctx, box, BOX_BLOCK, top);
+ }
+ else if (top->type == BOX_INLINE)
+ {
+ while (top->type != BOX_BLOCK)
+ top = top->up;
+ insert_box(ctx, box, BOX_BLOCK, top);
+ }
+ return top;
+}
+
+static fz_html *insert_break_box(fz_context *ctx, fz_html *box, fz_html *top)
+{
+ if (top->type == BOX_BLOCK)
+ {
+ insert_box(ctx, box, BOX_BREAK, top);
+ }
+ else if (top->type == BOX_FLOW)
+ {
+ while (top->type != BOX_BLOCK)
+ top = top->up;
+ insert_box(ctx, box, BOX_BREAK, top);
+ }
+ else if (top->type == BOX_INLINE)
+ {
+ while (top->type != BOX_BLOCK)
+ top = top->up;
+ insert_box(ctx, box, BOX_BREAK, top);
+ }
+ return top;
+}
+
+static void insert_inline_box(fz_context *ctx, fz_html *box, fz_html *top)
+{
+ if (top->type == BOX_BLOCK)
+ {
+ if (top->last && top->last->type == BOX_FLOW)
+ {
+ insert_box(ctx, box, BOX_INLINE, top->last);
+ }
+ else
+ {
+ fz_html *flow = new_box(ctx);
+ flow->is_first_flow = !top->last;
+ insert_box(ctx, flow, BOX_FLOW, top);
+ insert_box(ctx, box, BOX_INLINE, flow);
+ }
+ }
+ else if (top->type == BOX_FLOW)
+ {
+ insert_box(ctx, box, BOX_INLINE, top);
+ }
+ else if (top->type == BOX_INLINE)
+ {
+ insert_box(ctx, box, BOX_INLINE, top);
+ }
+}
+
+static void generate_boxes(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri,
+ fz_xml *node, fz_html *top, fz_css_rule *rule, fz_css_match *up_match)
+{
+ fz_css_match match;
+ fz_html *box;
+ const char *tag;
+ int display;
+
+ while (node)
+ {
+ match.up = up_match;
+ match.count = 0;
+
+ tag = fz_xml_tag(node);
+ if (tag)
+ {
+ fz_match_css(ctx, &match, rule, node);
+
+ display = fz_get_css_match_display(&match);
+
+ if (!strcmp(tag, "br"))
+ {
+ box = new_box(ctx);
+ fz_apply_css_style(ctx, set, &box->style, &match);
+ top = insert_break_box(ctx, box, top);
+ }
+
+ else if (!strcmp(tag, "img"))
+ {
+ const char *src = fz_xml_att(node, "src");
+ if (src)
+ {
+ box = new_box(ctx);
+ fz_apply_css_style(ctx, set, &box->style, &match);
+ insert_inline_box(ctx, box, top);
+ generate_image(ctx, zip, base_uri, box, src);
+ }
+ }
+
+ else if (display != DIS_NONE)
+ {
+ box = new_box(ctx);
+ fz_apply_css_style(ctx, set, &box->style, &match);
+
+ if (display == DIS_BLOCK)
+ {
+ top = insert_block_box(ctx, box, top);
+ }
+ else if (display == DIS_LIST_ITEM)
+ {
+ top = insert_block_box(ctx, box, top);
+ }
+ else if (display == DIS_INLINE)
+ {
+ insert_inline_box(ctx, box, top);
+ }
+ else
+ {
+ fz_warn(ctx, "unknown box display type");
+ insert_box(ctx, box, BOX_BLOCK, top);
+ }
+
+ if (fz_xml_down(node))
+ generate_boxes(ctx, set, zip, base_uri, fz_xml_down(node), box, rule, &match);
+
+ // TODO: remove empty flow boxes
+ }
+ }
+ else
+ {
+ if (top->type != BOX_INLINE)
+ {
+ box = new_box(ctx);
+ insert_inline_box(ctx, box, top);
+ box->style = top->style;
+ generate_text(ctx, box, fz_xml_text(node));
+ }
+ else
+ {
+ generate_text(ctx, top, fz_xml_text(node));
+ }
+ }
+
+ node = fz_xml_next(node);
+ }
+}
+
+static void measure_image(fz_context *ctx, fz_html_flow *node, float w, float h)
+{
+ float xs = 1, ys = 1, s = 1;
+ node->x = 0;
+ node->y = 0;
+ if (node->image->w > w)
+ xs = w / node->image->w;
+ if (node->image->h > h)
+ ys = h / node->image->h;
+ s = fz_min(xs, ys);
+ node->w = node->image->w * s;
+ node->h = node->image->h * s;
+}
+
+static void measure_word(fz_context *ctx, fz_html_flow *node, float em)
+{
+ const char *s;
+ int c, g;
+ float w;
+
+ em = fz_from_css_number(node->style->font_size, em, em);
+ node->x = 0;
+ node->y = 0;
+ node->h = fz_from_css_number_scale(node->style->line_height, em, em, em);
+
+ w = 0;
+ s = node->text;
+ while (*s)
+ {
+ s += fz_chartorune(&c, s);
+ g = fz_encode_character(ctx, node->style->font, c);
+ w += fz_advance_glyph(ctx, node->style->font, g) * em;
+ }
+ node->w = w;
+ node->em = em;
+}
+
+static float measure_line(fz_html_flow *node, fz_html_flow *end, float *baseline)
+{
+ float max_a = 0, max_d = 0, h = 0;
+ while (node != end)
+ {
+ if (node->type == FLOW_IMAGE)
+ {
+ if (node->h > max_a)
+ max_a = node->h;
+ }
+ else
+ {
+ float a = node->em * 0.8;
+ float d = node->em * 0.2;
+ if (a > max_a) max_a = a;
+ if (d > max_d) max_d = d;
+ }
+ if (node->h > h) h = node->h;
+ if (max_a + max_d > h) h = max_a + max_d;
+ node = node->next;
+ }
+ *baseline = max_a + (h - max_a - max_d) / 2;
+ return h;
+}
+
+static void layout_line(fz_context *ctx, float indent, float page_w, float line_w, int align, fz_html_flow *node, fz_html_flow *end, fz_html *box, float baseline)
+{
+ float x = box->x + indent;
+ float y = box->y + box->h;
+ float slop = page_w - line_w;
+ float justify = 0;
+ float va;
+ int n = 0;
+
+ if (align == TA_JUSTIFY)
+ {
+ fz_html_flow *it;
+ for (it = node; it != end; it = it->next)
+ if (it->type == FLOW_GLUE)
+ ++n;
+ justify = slop / n;
+ }
+ else if (align == TA_RIGHT)
+ x += slop;
+ else if (align == TA_CENTER)
+ x += slop / 2;
+
+ while (node != end)
+ {
+ switch (node->style->vertical_align)
+ {
+ default:
+ case VA_BASELINE:
+ va = 0;
+ break;
+ case VA_SUB:
+ va = node->em * 0.2f;
+ break;
+ case VA_SUPER:
+ va = node->em * -0.3f;
+ break;
+ }
+ node->x = x;
+ if (node->type == FLOW_IMAGE)
+ node->y = y + baseline - node->h;
+ else
+ node->y = y + baseline + va;
+ x += node->w;
+ if (node->type == FLOW_GLUE)
+ x += justify;
+ node = node->next;
+ }
+}
+
+static fz_html_flow *find_next_glue(fz_html_flow *node, float *w)
+{
+ while (node && node->type == FLOW_GLUE)
+ {
+ *w += node->w;
+ node = node->next;
+ }
+ while (node && node->type != FLOW_GLUE)
+ {
+ *w += node->w;
+ node = node->next;
+ }
+ return node;
+}
+
+static fz_html_flow *find_next_word(fz_html_flow *node, float *w)
+{
+ while (node && node->type == FLOW_GLUE)
+ {
+ *w += node->w;
+ node = node->next;
+ }
+ return node;
+}
+
+static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, float page_h)
+{
+ fz_html_flow *node, *line_start, *word_start, *word_end, *line_end;
+ float glue_w;
+ float word_w;
+ float line_w;
+ float indent;
+ float avail, line_h;
+ float baseline;
+ int align;
+
+ em = fz_from_css_number(box->style.font_size, em, em);
+ indent = box->is_first_flow ? fz_from_css_number(top->style.text_indent, em, top->w) : 0;
+ align = top->style.text_align;
+
+ box->x = top->x;
+ box->y = top->y + top->h;
+ box->w = top->w;
+ box->h = 0;
+
+ if (!box->flow_head)
+ return;
+
+ for (node = box->flow_head; node; node = node->next)
+ if (node->type == FLOW_IMAGE)
+ measure_image(ctx, node, top->w, page_h);
+ else
+ measure_word(ctx, node, em);
+
+ line_start = find_next_word(box->flow_head, &glue_w);
+ line_end = NULL;
+
+ line_w = indent;
+ word_w = 0;
+ word_start = line_start;
+ while (word_start)
+ {
+ word_end = find_next_glue(word_start, &word_w);
+ if (line_w + word_w <= top->w)
+ {
+ line_w += word_w;
+ glue_w = 0;
+ line_end = word_end;
+ word_start = find_next_word(word_end, &glue_w);
+ word_w = glue_w;
+ }
+ else
+ {
+ avail = page_h - fmodf(box->y + box->h, page_h);
+ line_h = measure_line(line_start, line_end, &baseline);
+ if (line_h > avail)
+ box->h += avail;
+ layout_line(ctx, indent, top->w, line_w, align, line_start, line_end, box, baseline);
+ box->h += line_h;
+ word_start = find_next_word(line_end, &glue_w);
+ line_start = word_start;
+ line_end = NULL;
+ indent = 0;
+ line_w = 0;
+ word_w = 0;
+ }
+ }
+
+ /* don't justify the last line of a paragraph */
+ if (align == TA_JUSTIFY)
+ align = TA_LEFT;
+
+ if (line_start)
+ {
+ avail = page_h - fmodf(box->y + box->h, page_h);
+ line_h = measure_line(line_start, line_end, &baseline);
+ if (line_h > avail)
+ box->h += avail;
+ layout_line(ctx, indent, top->w, line_w, align, line_start, line_end, box, baseline);
+ box->h += line_h;
+ }
+}
+
+static void layout_block(fz_context *ctx, fz_html *box, fz_html *top, float em, float top_collapse_margin, float page_h)
+{
+ fz_html *child;
+ float box_collapse_margin;
+ int prev_br;
+
+ float *margin = box->margin;
+ float *border = box->border;
+ float *padding = box->padding;
+
+ em = fz_from_css_number(box->style.font_size, em, em);
+
+ margin[0] = fz_from_css_number(box->style.margin[0], em, top->w);
+ margin[1] = fz_from_css_number(box->style.margin[1], em, top->w);
+ margin[2] = fz_from_css_number(box->style.margin[2], em, top->w);
+ margin[3] = fz_from_css_number(box->style.margin[3], em, top->w);
+
+ padding[0] = fz_from_css_number(box->style.padding[0], em, top->w);
+ padding[1] = fz_from_css_number(box->style.padding[1], em, top->w);
+ padding[2] = fz_from_css_number(box->style.padding[2], em, top->w);
+ padding[3] = fz_from_css_number(box->style.padding[3], em, top->w);
+
+ if (box->style.border_style)
+ {
+ border[0] = fz_from_css_number(box->style.border_width[0], em, top->w);
+ border[1] = fz_from_css_number(box->style.border_width[1], em, top->w);
+ border[2] = fz_from_css_number(box->style.border_width[2], em, top->w);
+ border[3] = fz_from_css_number(box->style.border_width[3], em, top->w);
+ }
+ else
+ border[0] = border[1] = border[2] = border[3] = 0;
+
+ if (padding[T] == 0 && border[T] == 0)
+ box_collapse_margin = margin[T];
+ else
+ box_collapse_margin = 0;
+
+ if (margin[T] > top_collapse_margin)
+ margin[T] -= top_collapse_margin;
+ else
+ margin[T] = 0;
+
+ box->x = top->x + margin[L] + border[L] + padding[L];
+ box->y = top->y + top->h + margin[T] + border[T] + padding[T];
+ box->w = top->w - (margin[L] + margin[R] + border[L] + border[R] + padding[L] + padding[R]);
+ box->h = 0;
+
+ prev_br = 0;
+ for (child = box->down; child; child = child->next)
+ {
+ if (child->type == BOX_BLOCK)
+ {
+ layout_block(ctx, child, box, em, box_collapse_margin, page_h);
+ box->h += child->h +
+ child->padding[T] + child->padding[B] +
+ child->border[T] + child->border[B] +
+ child->margin[T] + child->margin[B];
+ box_collapse_margin = child->margin[B];
+ prev_br = 0;
+ }
+ else if (child->type == BOX_BREAK)
+ {
+ /* TODO: interaction with page breaks */
+ if (prev_br)
+ box->h += fz_from_css_number_scale(box->style.line_height, em, em, em);
+ prev_br = 1;
+ }
+ else if (child->type == BOX_FLOW)
+ {
+ layout_flow(ctx, child, box, em, page_h);
+ if (child->h > 0)
+ {
+ box->h += child->h;
+ box_collapse_margin = 0;
+ prev_br = 0;
+ }
+ }
+ }
+
+ if (padding[B] == 0 && border[B] == 0)
+ {
+ if (margin[B] > 0)
+ {
+ box->h -= box_collapse_margin;
+ if (margin[B] < box_collapse_margin)
+ margin[B] = box_collapse_margin;
+ }
+ }
+}
+
+static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm)
+{
+ fz_html_flow *node;
+ fz_text *text;
+ fz_matrix trm;
+ const char *s;
+ float color[3];
+ float x, y;
+ int c, g;
+
+ for (node = box->flow_head; node; node = node->next)
+ {
+ if (node->type == FLOW_IMAGE)
+ {
+ if (node->y > page_bot || node->y + node->h < page_top)
+ continue;
+ }
+ else
+ {
+ if (node->y > page_bot || node->y < page_top)
+ continue;
+ }
+
+ if (node->type == FLOW_WORD)
+ {
+ fz_scale(&trm, node->em, -node->em);
+ text = fz_new_text(ctx, node->style->font, &trm, 0);
+
+ x = node->x;
+ y = node->y;
+ s = node->text;
+ while (*s)
+ {
+ s += fz_chartorune(&c, s);
+ g = fz_encode_character(ctx, node->style->font, c);
+ fz_add_text(ctx, text, g, c, x, y);
+ x += fz_advance_glyph(ctx, node->style->font, g) * node->em;
+ }
+
+ color[0] = node->style->color.r / 255.0f;
+ color[1] = node->style->color.g / 255.0f;
+ color[2] = node->style->color.b / 255.0f;
+
+ fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1);
+
+ fz_drop_text(ctx, text);
+ }
+ else if (node->type == FLOW_IMAGE)
+ {
+ fz_matrix local_ctm = *ctm;
+ fz_pre_translate(&local_ctm, node->x, node->y);
+ fz_pre_scale(&local_ctm, node->w, node->h);
+ fz_fill_image(ctx, dev, node->image, &local_ctm, 1);
+ }
+ }
+}
+
+static void draw_rect(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, float *rgba, float x0, float y0, float x1, float y1)
+{
+ fz_path *path = fz_new_path(ctx);
+
+ fz_moveto(ctx, path, x0, y0);
+ fz_lineto(ctx, path, x1, y0);
+ fz_lineto(ctx, path, x1, y1);
+ fz_lineto(ctx, path, x0, y1);
+ fz_closepath(ctx, path);
+
+ fz_fill_path(ctx, dev, path, 0, ctm, fz_device_rgb(ctx), rgba, rgba[3]);
+
+ fz_drop_path(ctx, path);
+}
+
+static void draw_block_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm)
+{
+ float x0, y0, x1, y1;
+ float color[4];
+
+ // TODO: background fill
+ // TODO: border stroke
+
+ float *border = box->border;
+ float *padding = box->padding;
+
+ x0 = box->x - padding[L];
+ y0 = box->y - padding[T];
+ x1 = box->x + box->w + padding[R];
+ y1 = box->y + box->h + padding[B];
+
+ if (y0 > page_bot || y1 < page_top)
+ return;
+
+ if (box->style.background_color.a > 0)
+ {
+ color[0] = box->style.background_color.r / 255.0f;
+ color[1] = box->style.background_color.g / 255.0f;
+ color[2] = box->style.background_color.b / 255.0f;
+ color[3] = box->style.background_color.a / 255.0f;
+ draw_rect(ctx, dev, ctm, color, x0, y0, x1, y1);
+ }
+
+ if (box->style.border_color.a > 0)
+ {
+ color[0] = box->style.border_color.r / 255.0f;
+ color[1] = box->style.border_color.g / 255.0f;
+ color[2] = box->style.border_color.b / 255.0f;
+ color[3] = box->style.border_color.a / 255.0f;
+ if (border[T] > 0)
+ draw_rect(ctx, dev, ctm, color, x0 - border[L], y0 - border[T], x1 + border[R], y0);
+ if (border[B] > 0)
+ draw_rect(ctx, dev, ctm, color, x0 - border[L], y1, x1 + border[R], y1 + border[B]);
+ if (border[L] > 0)
+ draw_rect(ctx, dev, ctm, color, x0 - border[L], y0 - border[T], x0, y1 + border[B]);
+ if (border[R] > 0)
+ draw_rect(ctx, dev, ctm, color, x1, y0 - border[T], x1 + border[R], y1 + border[B]);
+ }
+
+ for (box = box->down; box; box = box->next)
+ {
+ switch (box->type)
+ {
+ case BOX_BLOCK: draw_block_box(ctx, box, page_top, page_bot, dev, ctm); break;
+ case BOX_FLOW: draw_flow_box(ctx, box, page_top, page_bot, dev, ctm); break;
+ }
+ }
+}
+
+void
+fz_draw_html(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *inctm)
+{
+ fz_matrix ctm = *inctm;
+ fz_pre_translate(&ctm, 0, -page_top);
+ draw_block_box(ctx, box, page_top, page_bot, dev, &ctm);
+}
+
+static char *concat_text(fz_context *ctx, fz_xml *root)
+{
+ fz_xml *node;
+ int i = 0, n = 1;
+ char *s;
+ for (node = fz_xml_down(root); node; node = fz_xml_next(node))
+ {
+ const char *text = fz_xml_text(node);
+ n += text ? strlen(text) : 0;
+ }
+ s = fz_malloc(ctx, n);
+ for (node = fz_xml_down(root); node; node = fz_xml_next(node))
+ {
+ const char *text = fz_xml_text(node);
+ if (text)
+ {
+ n = strlen(text);
+ memcpy(s+i, text, n);
+ i += n;
+ }
+ }
+ s[i] = 0;
+ return s;
+}
+
+static fz_css_rule *
+html_load_css(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_css_rule *css, fz_xml *root)
+{
+ fz_xml *node;
+ fz_buffer *buf;
+ char path[2048];
+
+ for (node = root; node; node = fz_xml_next(node))
+ {
+ const char *tag = fz_xml_tag(node);
+ if (tag && !strcmp(tag, "link"))
+ {
+ char *rel = fz_xml_att(node, "rel");
+ if (rel && !strcasecmp(rel, "stylesheet"))
+ {
+ char *type = fz_xml_att(node, "type");
+ if ((type && !strcmp(type, "text/css")) || !type)
+ {
+ char *href = fz_xml_att(node, "href");
+ if (href)
+ {
+ fz_strlcpy(path, base_uri, sizeof path);
+ fz_strlcat(path, "/", sizeof path);
+ fz_strlcat(path, href, sizeof path);
+ fz_cleanname(path);
+
+ buf = fz_read_archive_entry(ctx, zip, path);
+ fz_write_buffer_byte(ctx, buf, 0);
+ css = fz_parse_css(ctx, css, (char*)buf->data, path);
+ fz_drop_buffer(ctx, buf);
+ }
+ }
+ }
+ }
+ if (tag && !strcmp(tag, "style"))
+ {
+ char *s = concat_text(ctx, node);
+ css = fz_parse_css(ctx, css, s, "<style>");
+ fz_free(ctx, s);
+ }
+ if (fz_xml_down(node))
+ css = html_load_css(ctx, zip, base_uri, css, fz_xml_down(node));
+ }
+ return css;
+}
+
+void
+fz_layout_html(fz_context *ctx, fz_html *box, float w, float h, float em)
+{
+ fz_html page_box;
+
+ init_box(ctx, &page_box);
+ page_box.w = w;
+ page_box.h = 0;
+
+ layout_block(ctx, box, &page_box, em, 0, h);
+}
+
+fz_html *
+fz_parse_html(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css)
+{
+ fz_xml *xml;
+ fz_css_rule *css;
+ fz_css_match match;
+ fz_html *box;
+
+ xml = fz_parse_xml(ctx, buf->data, buf->len, 1);
+
+ css = fz_parse_css(ctx, NULL, default_css, "<default>");
+ if (user_css)
+ css = fz_parse_css(ctx, NULL, user_css, "<user>");
+ css = html_load_css(ctx, zip, base_uri, css, xml);
+
+ // print_rules(css);
+
+ box = new_box(ctx);
+
+ match.up = NULL;
+ match.count = 0;
+
+ generate_boxes(ctx, set, zip, base_uri, xml, box, css, &match);
+
+ fz_drop_css(ctx, css);
+ fz_drop_xml(ctx, xml);
+
+ return box;
+}