diff options
Diffstat (limited to 'source')
-rw-r--r-- | source/fitz/document-all.c | 2 | ||||
-rw-r--r-- | source/html/css-apply.c | 993 | ||||
-rw-r--r-- | source/html/css-parse.c | 865 | ||||
-rw-r--r-- | source/html/epub-doc.c | 326 | ||||
-rw-r--r-- | source/html/html-doc.c | 166 | ||||
-rw-r--r-- | source/html/html-font.c | 47 | ||||
-rw-r--r-- | source/html/html-layout.c | 909 |
7 files changed, 3308 insertions, 0 deletions
diff --git a/source/fitz/document-all.c b/source/fitz/document-all.c index e1eb4a58..bfe57592 100644 --- a/source/fitz/document-all.c +++ b/source/fitz/document-all.c @@ -7,4 +7,6 @@ void fz_register_document_handlers(fz_context *ctx) fz_register_document_handler(ctx, &cbz_document_handler); fz_register_document_handler(ctx, &img_document_handler); fz_register_document_handler(ctx, &tiff_document_handler); + fz_register_document_handler(ctx, &html_document_handler); + fz_register_document_handler(ctx, &epub_document_handler); } diff --git a/source/html/css-apply.c b/source/html/css-apply.c new file mode 100644 index 00000000..9a6d7a89 --- /dev/null +++ b/source/html/css-apply.c @@ -0,0 +1,993 @@ +#include "mupdf/html.h" + +static const char *inherit_list[] = { + "color", + "direction", + "font-family", + "font-size", + "font-style", + "font-variant", + "font-weight", + "letter-spacing", + "line-height", + "list-style-image", + "list-style-position", + "list-style-type", + "orphans", + "quotes", + "text-align", + "text-indent", + "text-transform", + "visibility", + "white-space", + "widows", + "word-spacing", +}; + +static const char *border_width_kw[] = { + "medium", + "thick", + "thin", +}; + +static const char *border_style_kw[] = { + "dashed", + "dotted", + "double", + "groove", + "hidden", + "inset", + "none", + "outset", + "ridge", + "solid", +}; + +static const char *color_kw[] = { + "aqua", + "black", + "blue", + "fuchsia", + "gray", + "green", + "lime", + "maroon", + "navy", + "olive", + "orange", + "purple", + "red", + "silver", + "teal", + "transparent", + "white", + "yellow", +}; + +static int +keyword_in_list(const char *name, const char **list, int n) +{ + int l = 0; + int r = n - 1; + while (l <= r) + { + int m = (l + r) >> 1; + int c = strcmp(name, list[m]); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return 1; + } + return 0; +} + +/* + * Compute specificity + */ + +static int +count_condition_ids(fz_css_condition *cond) +{ + int n = 0; + while (cond) + { + if (cond->type == '#') + n ++; + cond = cond->next; + } + return n; +} + +static int +count_selector_ids(fz_css_selector *sel) +{ + int n = count_condition_ids(sel->cond); + if (sel->left && sel->right) + { + n += count_selector_ids(sel->left); + n += count_selector_ids(sel->right); + } + return n; +} + +static int +count_condition_atts(fz_css_condition *cond) +{ + int n = 0; + while (cond) + { + if (cond->type != '#' && cond->type != ':') + n ++; + cond = cond->next; + } + return n; +} + +static int +count_selector_atts(fz_css_selector *sel) +{ + int n = count_condition_atts(sel->cond); + if (sel->left && sel->right) + { + n += count_selector_atts(sel->left); + n += count_selector_atts(sel->right); + } + return n; +} + +static int +count_condition_names(fz_css_condition *cond) +{ + int n = 0; + while (cond) + { + if (cond->type == ':') + n ++; + cond = cond->next; + } + return n; +} + +static int +count_selector_names(fz_css_selector *sel) +{ + int n = count_condition_names(sel->cond); + if (sel->left && sel->right) + { + n += count_selector_names(sel->left); + n += count_selector_names(sel->right); + } + else if (sel->name) + { + n ++; + } + return n; +} + +#define INLINE_SPECIFICITY 1000 + +static int +selector_specificity(fz_css_selector *sel) +{ + int b = count_selector_ids(sel); + int c = count_selector_atts(sel); + int d = count_selector_names(sel); + return b * 100 + c * 10 + d; +} + +/* + * Selector matching + */ + +static int +match_id_condition(fz_xml *node, const char *p) +{ + const char *s = fz_xml_att(node, "id"); + if (s && !strcmp(s, p)) + return 1; + return 0; +} + +static int +match_class_condition(fz_xml *node, const char *p) +{ + const char *s = fz_xml_att(node, "class"); + char buf[1024]; + if (s) { + strcpy(buf, s); + s = strtok(buf, " "); + while (s) { + if (!strcmp(s, p)) + return 1; + s = strtok(NULL, " "); + } + } + return 0; +} + +static int +match_condition(fz_css_condition *cond, fz_xml *node) +{ + if (!cond) + return 1; + + switch (cond->type) { + default: return 0; + case ':': return 0; /* don't support pseudo-classes */ + case '#': if (!match_id_condition(node, cond->val)) return 0; break; + case '.': if (!match_class_condition(node, cond->val)) return 0; break; + } + + return match_condition(cond->next, node); +} + +static int +match_selector(fz_css_selector *sel, fz_xml *node) +{ + if (!node) + return 0; + + if (sel->combine) + { + /* descendant */ + if (sel->combine == ' ') + { + fz_xml *parent = fz_xml_up(node); + while (parent) + { + if (match_selector(sel->left, parent)) + if (match_selector(sel->right, node)) + return 1; + parent = fz_xml_up(parent); + } + return 0; + } + + /* child */ + if (sel->combine == '>') + { + fz_xml *parent = fz_xml_up(node); + if (!parent) + return 0; + if (!match_selector(sel->left, parent)) + return 0; + if (!match_selector(sel->right, node)) + return 0; + } + + /* adjacent */ + if (sel->combine == '+') + { + fz_xml *prev = fz_xml_prev(node); + while (prev && !fz_xml_tag(prev)) + prev = fz_xml_prev(prev); + if (!prev) + return 0; + if (!fz_xml_tag(prev)) + return 0; + if (!match_selector(sel->left, prev)) + return 0; + if (!match_selector(sel->right, node)) + return 0; + } + } + + if (sel->name) + { + if (strcmp(sel->name, fz_xml_tag(node))) + return 0; + } + + if (sel->cond) + { + if (!match_condition(sel->cond, node)) + return 0; + } + + return 1; +} + +/* + * Annotating nodes with properties and expanding shorthand forms. + */ + +static int +count_values(fz_css_value *value) +{ + int n = 0; + while (value) + { + n++; + value = value->next; + } + return n; +} + +static void add_property(fz_css_match *match, const char *name, fz_css_value *value, int spec); + +static void +add_shorthand_trbl(fz_css_match *match, fz_css_value *value, int spec, + const char *name_t, const char *name_r, const char *name_b, const char *name_l) +{ + int n = count_values(value); + + if (n == 1) + { + add_property(match, name_t, value, spec); + add_property(match, name_r, value, spec); + add_property(match, name_b, value, spec); + add_property(match, name_l, value, spec); + } + + if (n == 2) + { + fz_css_value *a = value; + fz_css_value *b = value->next; + + add_property(match, name_t, a, spec); + add_property(match, name_r, b, spec); + add_property(match, name_b, a, spec); + add_property(match, name_l, b, spec); + } + + if (n == 3) + { + fz_css_value *a = value; + fz_css_value *b = value->next; + fz_css_value *c = value->next->next; + + add_property(match, name_t, a, spec); + add_property(match, name_r, b, spec); + add_property(match, name_b, c, spec); + add_property(match, name_l, b, spec); + } + + if (n == 4) + { + fz_css_value *a = value; + fz_css_value *b = value->next; + fz_css_value *c = value->next->next; + fz_css_value *d = value->next->next->next; + + add_property(match, name_t, a, spec); + add_property(match, name_r, b, spec); + add_property(match, name_b, c, spec); + add_property(match, name_l, d, spec); + } +} + +static void +add_shorthand_margin(fz_css_match *match, fz_css_value *value, int spec) +{ + add_shorthand_trbl(match, value, spec, + "margin-top", "margin-right", "margin-bottom", "margin-left"); +} + +static void +add_shorthand_padding(fz_css_match *match, fz_css_value *value, int spec) +{ + add_shorthand_trbl(match, value, spec, + "padding-top", "padding-right", "padding-bottom", "padding-left"); +} + +static void +add_shorthand_border_width(fz_css_match *match, fz_css_value *value, int spec) +{ + add_shorthand_trbl(match, value, spec, + "border-width-top", "border-width-right", "border-width-bottom", "border-width-left"); +} + +static void +add_shorthand_border(fz_css_match *match, fz_css_value *value, int spec) +{ + while (value) + { + if (value->type == CSS_COLOR) + { + add_property(match, "border-color", value, spec); + } + else if (value->type == CSS_KEYWORD) + { + if (keyword_in_list(value->data, border_width_kw, nelem(border_width_kw))) + { + add_property(match, "border-width-top", value, spec); + add_property(match, "border-width-right", value, spec); + add_property(match, "border-width-bottom", value, spec); + add_property(match, "border-width-left", value, spec); + } + else if (keyword_in_list(value->data, border_style_kw, nelem(border_style_kw))) + { + add_property(match, "border-style", value, spec); + } + else if (keyword_in_list(value->data, color_kw, nelem(color_kw))) + { + add_property(match, "border-color", value, spec); + } + } + else + { + add_property(match, "border-width-top", value, spec); + add_property(match, "border-width-right", value, spec); + add_property(match, "border-width-bottom", value, spec); + add_property(match, "border-width-left", value, spec); + } + value = value->next; + } +} + +static void +add_property(fz_css_match *match, const char *name, fz_css_value *value, int spec) +{ + int i; + + if (!strcmp(name, "margin")) + { + add_shorthand_margin(match, value, spec); + return; + } + if (!strcmp(name, "padding")) + { + add_shorthand_padding(match, value, spec); + return; + } + if (!strcmp(name, "border-width")) + { + add_shorthand_border_width(match, value, spec); + return; + } + if (!strcmp(name, "border")) + { + add_shorthand_border(match, value, spec); + return; + } + + /* shorthand expansions: */ + /* TODO: border-color */ + /* TODO: border-style */ + /* TODO: font */ + /* TODO: list-style */ + /* TODO: background */ + + for (i = 0; i < match->count; ++i) + { + if (!strcmp(match->prop[i].name, name)) + { + if (match->prop[i].spec <= spec) + { + match->prop[i].value = value; + match->prop[i].spec = spec; + } + return; + } + } + + if (match->count + 1 >= nelem(match->prop)) + { + // fz_warn(ctx, "too many css properties"); + return; + } + + match->prop[match->count].name = name; + match->prop[match->count].value = value; + match->prop[match->count].spec = spec; + ++match->count; +} + +void +fz_match_css(fz_context *ctx, fz_css_match *match, fz_css_rule *css, fz_xml *node) +{ + fz_css_rule *rule; + fz_css_selector *sel; + fz_css_property *prop, *head, *tail; + const char *s; + + for (rule = css; rule; rule = rule->next) + { + sel = rule->selector; + while (sel) + { + if (match_selector(sel, node)) + { + for (prop = rule->declaration; prop; prop = prop->next) + add_property(match, prop->name, prop->value, selector_specificity(sel)); + break; + } + sel = sel->next; + } + } + + s = fz_xml_att(node, "style"); + if (s) + { + head = tail = prop = fz_parse_css_properties(ctx, s); + while (prop) + { + add_property(match, prop->name, prop->value, INLINE_SPECIFICITY); + tail = prop; + prop = prop->next; + } + if (tail) + tail->next = css->garbage; + css->garbage = head; + } +} + +static fz_css_value * +value_from_raw_property(fz_css_match *match, const char *name) +{ + int i; + for (i = 0; i < match->count; ++i) + if (!strcmp(match->prop[i].name, name)) + return match->prop[i].value; + return NULL; +} + +static fz_css_value * +value_from_property(fz_css_match *match, const char *name) +{ + fz_css_value *value; + + value = value_from_raw_property(match, name); + if (match->up) + { + if (value && !strcmp(value->data, "inherit")) + return value_from_property(match->up, name); + if (!value && keyword_in_list(name, inherit_list, nelem(inherit_list))) + return value_from_property(match->up, name); + } + return value; +} + +static const char * +string_from_property(fz_css_match *match, const char *name, const char *initial) +{ + fz_css_value *value; + value = value_from_property(match, name); + if (!value) + return initial; + return value->data; +} + +static fz_css_number +make_number(float v, int u) +{ + fz_css_number n; + n.value = v; + n.unit = u; + return n; +} + +static fz_css_number +number_from_value(fz_css_value *value, float initial, int initial_unit) +{ + char *p; + + if (!value) + return make_number(initial, initial_unit); + + if (value->type == CSS_PERCENT) + return make_number(strtof(value->data, NULL), N_PERCENT); + + if (value->type == CSS_NUMBER) + return make_number(strtof(value->data, NULL), N_NUMBER); + + if (value->type == CSS_LENGTH) + { + float x = strtof(value->data, &p); + + if (p[0] == 'e' && p[1] == 'm') + return make_number(x, N_SCALE); + if (p[0] == 'e' && p[1] == 'x') + return make_number(x / 2, N_SCALE); + + if (p[0] == 'i' && p[1] == 'n') + return make_number(x * 72, N_NUMBER); + if (p[0] == 'c' && p[1] == 'm') + return make_number(x * 7200 / 254, N_NUMBER); + if (p[0] == 'm' && p[1] == 'm') + return make_number(x * 720 / 254, N_NUMBER); + if (p[0] == 'p' && p[1] == 'c') + return make_number(x * 12, N_NUMBER); + + if (p[0] == 'p' && p[1] == 't') + return make_number(x, N_NUMBER); + if (p[0] == 'p' && p[1] == 'x') + return make_number(x, N_NUMBER); + + return make_number(x, N_NUMBER); + } + + return make_number(initial, initial_unit); +} + +static fz_css_number +number_from_property(fz_css_match *match, const char *property, float initial, int initial_unit) +{ + return number_from_value(value_from_property(match, property), initial, initial_unit); +} + +static fz_css_number +border_width_from_property(fz_css_match *match, const char *property) +{ + fz_css_value *value = value_from_property(match, property); + if (value) + { + if (!strcmp(value->data, "thin")) + return make_number(1, N_NUMBER); + if (!strcmp(value->data, "medium")) + return make_number(2, N_NUMBER); + if (!strcmp(value->data, "thick")) + return make_number(4, N_NUMBER); + return number_from_value(value, 0, N_NUMBER); + } + return make_number(2, N_NUMBER); /* initial: 'medium' */ +} + +float +fz_from_css_number(fz_css_number number, float em, float width) +{ + switch (number.unit) { + default: + case N_NUMBER: return number.value; + case N_SCALE: return number.value * em; + case N_PERCENT: return number.value * 0.01 * width; + } +} + +float +fz_from_css_number_scale(fz_css_number number, float scale, float em, float width) +{ + switch (number.unit) { + default: + case N_NUMBER: return number.value * scale; + case N_SCALE: return number.value * em; + case N_PERCENT: return number.value * 0.01 * width; + } +} + +static fz_css_color +make_color(int r, int g, int b, int a) +{ + fz_css_color c; + c.r = r; + c.g = g; + c.b = b; + c.a = a; + return c; +} + +static int tohex(int c) +{ + if (c - '0' < 10) + return c - '0'; + return (c | 32) - 'a' + 10; +} + +static fz_css_color +color_from_value(fz_css_value *value, fz_css_color initial) +{ + if (!value) + return initial; + if (value->type == CSS_COLOR) + { + int r = tohex(value->data[0]) * 16 + tohex(value->data[1]); + int g = tohex(value->data[2]) * 16 + tohex(value->data[3]); + int b = tohex(value->data[4]) * 16 + tohex(value->data[5]); + return make_color(r, g, b, 255); + } + if (value->type == CSS_KEYWORD) + { + if (!strcmp(value->data, "transparent")) + return make_color(0, 0, 0, 0); + if (!strcmp(value->data, "maroon")) + return make_color(0x80, 0x00, 0x00, 255); + if (!strcmp(value->data, "red")) + return make_color(0xFF, 0x00, 0x00, 255); + if (!strcmp(value->data, "orange")) + return make_color(0xFF, 0xA5, 0x00, 255); + if (!strcmp(value->data, "yellow")) + return make_color(0xFF, 0xFF, 0x00, 255); + if (!strcmp(value->data, "olive")) + return make_color(0x80, 0x80, 0x00, 255); + if (!strcmp(value->data, "purple")) + return make_color(0x80, 0x00, 0x80, 255); + if (!strcmp(value->data, "fuchsia")) + return make_color(0xFF, 0x00, 0xFF, 255); + if (!strcmp(value->data, "white")) + return make_color(0xFF, 0xFF, 0xFF, 255); + if (!strcmp(value->data, "lime")) + return make_color(0x00, 0xFF, 0x00, 255); + if (!strcmp(value->data, "green")) + return make_color(0x00, 0x80, 0x00, 255); + if (!strcmp(value->data, "navy")) + return make_color(0x00, 0x00, 0x80, 255); + if (!strcmp(value->data, "blue")) + return make_color(0x00, 0x00, 0xFF, 255); + if (!strcmp(value->data, "aqua")) + return make_color(0x00, 0xFF, 0xFF, 255); + if (!strcmp(value->data, "teal")) + return make_color(0x00, 0x80, 0x80, 255); + if (!strcmp(value->data, "black")) + return make_color(0x00, 0x00, 0x00, 255); + if (!strcmp(value->data, "silver")) + return make_color(0xC0, 0xC0, 0xC0, 255); + if (!strcmp(value->data, "gray")) + return make_color(0x80, 0x80, 0x80, 255); + return make_color(0, 0, 0, 255); + } + return initial; +} + +static fz_css_color +color_from_property(fz_css_match *match, const char *property, fz_css_color initial) +{ + return color_from_value(value_from_property(match, property), initial); +} + +int +fz_get_css_match_display(fz_css_match *match) +{ + fz_css_value *value = value_from_property(match, "display"); + if (value) + { + if (!strcmp(value->data, "none")) + return DIS_NONE; + if (!strcmp(value->data, "inline")) + return DIS_INLINE; + if (!strcmp(value->data, "block")) + return DIS_BLOCK; + if (!strcmp(value->data, "list-item")) + return DIS_LIST_ITEM; + } + return DIS_INLINE; +} + +static int +white_space_from_property(fz_css_match *match) +{ + fz_css_value *value = value_from_property(match, "white-space"); + if (value) + { + if (!strcmp(value->data, "normal")) return WS_NORMAL; + if (!strcmp(value->data, "pre")) return WS_PRE; + if (!strcmp(value->data, "nowrap")) return WS_NOWRAP; + if (!strcmp(value->data, "pre-wrap")) return WS_PRE_WRAP; + if (!strcmp(value->data, "pre-line")) return WS_PRE_LINE; + } + return WS_NORMAL; +} + +void +fz_default_css_style(fz_context *ctx, fz_css_style *style) +{ + memset(style, 0, sizeof *style); + style->text_align = TA_LEFT; + style->vertical_align = VA_BASELINE; + style->white_space = WS_NORMAL; + style->font_size = make_number(1, N_SCALE); +} + +void +fz_apply_css_style(fz_context *ctx, fz_html_font_set *set, fz_css_style *style, fz_css_match *match) +{ + fz_css_value *value; + + fz_css_color black = { 0, 0, 0, 255 }; + fz_css_color transparent = { 0, 0, 0, 0 }; + + fz_default_css_style(ctx, style); + + style->white_space = white_space_from_property(match); + + value = value_from_property(match, "text-align"); + if (value) + { + if (!strcmp(value->data, "left")) + style->text_align = TA_LEFT; + if (!strcmp(value->data, "right")) + style->text_align = TA_RIGHT; + if (!strcmp(value->data, "center")) + style->text_align = TA_CENTER; + if (!strcmp(value->data, "justify")) + style->text_align = TA_JUSTIFY; + } + + value = value_from_property(match, "vertical-align"); + if (value) + { + if (!strcmp(value->data, "baseline")) + style->vertical_align = VA_BASELINE; + if (!strcmp(value->data, "sub")) + style->vertical_align = VA_SUB; + if (!strcmp(value->data, "super")) + style->vertical_align = VA_SUPER; + if (!strcmp(value->data, "top")) + style->vertical_align = VA_TOP; + if (!strcmp(value->data, "bottom")) + style->vertical_align = VA_BOTTOM; + } + + value = value_from_property(match, "font-size"); + if (value) + { + if (!strcmp(value->data, "xx-large")) style->font_size = make_number(1.73, N_SCALE); + else if (!strcmp(value->data, "x-large")) style->font_size = make_number(1.44, N_SCALE); + else if (!strcmp(value->data, "large")) style->font_size = make_number(1.2, N_SCALE); + else if (!strcmp(value->data, "medium")) style->font_size = make_number(1, N_SCALE); + else if (!strcmp(value->data, "small")) style->font_size = make_number(0.83, N_SCALE); + else if (!strcmp(value->data, "x-small")) style->font_size = make_number(0.69, N_SCALE); + else if (!strcmp(value->data, "xx-small")) style->font_size = make_number(0.69, N_SCALE); + else if (!strcmp(value->data, "larger")) style->font_size = make_number(1.2f, N_SCALE); + else if (!strcmp(value->data, "smaller")) style->font_size = make_number(1/1.2f, N_SCALE); + else style->font_size = number_from_value(value, 12, N_NUMBER); + } + else + { + style->font_size = make_number(1, N_SCALE); + } + + value = value_from_property(match, "border-style"); + if (value) + { + if (!strcmp(value->data, "none")) + style->border_style = BS_NONE; + if (!strcmp(value->data, "hidden")) + style->border_style = BS_NONE; + if (!strcmp(value->data, "solid")) + style->border_style = BS_SOLID; + } + + style->line_height = number_from_property(match, "line-height", 1.2, N_SCALE); + + style->text_indent = number_from_property(match, "text-indent", 0, N_NUMBER); + + style->margin[0] = number_from_property(match, "margin-top", 0, N_NUMBER); + style->margin[1] = number_from_property(match, "margin-right", 0, N_NUMBER); + style->margin[2] = number_from_property(match, "margin-bottom", 0, N_NUMBER); + style->margin[3] = number_from_property(match, "margin-left", 0, N_NUMBER); + + style->padding[0] = number_from_property(match, "padding-top", 0, N_NUMBER); + style->padding[1] = number_from_property(match, "padding-right", 0, N_NUMBER); + style->padding[2] = number_from_property(match, "padding-bottom", 0, N_NUMBER); + style->padding[3] = number_from_property(match, "padding-left", 0, N_NUMBER); + + style->border_width[0] = border_width_from_property(match, "border-width-top"); + style->border_width[1] = border_width_from_property(match, "border-width-right"); + style->border_width[2] = border_width_from_property(match, "border-width-bottom"); + style->border_width[3] = border_width_from_property(match, "border-width-left"); + + style->color = color_from_property(match, "color", black); + style->background_color = color_from_property(match, "background-color", transparent); + style->border_color = color_from_property(match, "border-color", style->color); + + { + const char *font_family = string_from_property(match, "font-family", "serif"); + const char *font_variant = string_from_property(match, "font-variant", "normal"); + const char *font_style = string_from_property(match, "font-style", "normal"); + const char *font_weight = string_from_property(match, "font-weight", "normal"); + style->font = fz_load_html_font(ctx, set, font_family, font_variant, font_style, font_weight); + } +} + +/* + * Pretty printing + */ + +void +print_value(fz_css_value *val) +{ + printf("%s", val->data); + if (val->args) + { + printf("("); + print_value(val->args); + printf(")"); + } + if (val->next) + { + printf(" "); + print_value(val->next); + } +} + +void +print_property(fz_css_property *prop) +{ + printf("\t%s: ", prop->name); + print_value(prop->value); + printf(" !%d;\n", prop->spec); +} + +void +print_condition(fz_css_condition *cond) +{ + if (cond->type == '=') + printf("[%s=%s]", cond->key, cond->val); + else if (cond->type == '[') + printf("[%s]", cond->key); + else + printf("%c%s", cond->type, cond->val); + if (cond->next) + print_condition(cond->next); +} + +void +print_selector(fz_css_selector *sel) +{ + if (sel->combine) + { +putchar('('); + print_selector(sel->left); + if (sel->combine == ' ') + printf(" "); + else + printf(" %c ", sel->combine); + print_selector(sel->right); +putchar(')'); + } + else if (sel->name) + printf("%s", sel->name); + else + printf("*"); + if (sel->cond) + { + print_condition(sel->cond); + } +} + +void +print_rule(fz_css_rule *rule) +{ + fz_css_selector *sel; + fz_css_property *prop; + + for (sel = rule->selector; sel; sel = sel->next) + { + print_selector(sel); + printf(" !%d", selector_specificity(sel)); + if (sel->next) + printf(", "); + } + + printf("\n{\n"); + for (prop = rule->declaration; prop; prop = prop->next) + { + print_property(prop); + } + printf("}\n"); +} + +void +print_rules(fz_css_rule *rule) +{ + while (rule) + { + print_rule(rule); + rule = rule->next; + } +} + +void +print_style(fz_css_style *style) +{ + printf("style {\n"); + printf("\tfont-size = %g%c;\n", style->font_size.value, style->font_size.unit); + printf("\tfont = %s;\n", style->font->name); + printf("\tline-height = %g%c;\n", style->line_height.value, style->line_height.unit); + printf("\ttext-indent = %g%c;\n", style->text_indent.value, style->text_indent.unit); + printf("\ttext-align = %d;\n", style->text_align); + printf("\tvertical-align = %d;\n", style->vertical_align); + printf("\tmargin = %g%c %g%c %g%c %g%c;\n", + style->margin[0].value, style->margin[0].unit, + style->margin[1].value, style->margin[1].unit, + style->margin[2].value, style->margin[2].unit, + style->margin[3].value, style->margin[3].unit); + printf("\tpadding = %g%c %g%c %g%c %g%c;\n", + style->padding[0].value, style->padding[0].unit, + style->padding[1].value, style->padding[1].unit, + style->padding[2].value, style->padding[2].unit, + style->padding[3].value, style->padding[3].unit); + printf("}\n"); +} diff --git a/source/html/css-parse.c b/source/html/css-parse.c new file mode 100644 index 00000000..e3ddd48f --- /dev/null +++ b/source/html/css-parse.c @@ -0,0 +1,865 @@ +#include "mupdf/html.h" + +struct lexbuf +{ + fz_context *ctx; + const char *s; + const char *file; + int line; + int lookahead; + int c; + int color; + int string_len; + char string[1024]; +}; + +FZ_NORETURN static void fz_css_error(struct lexbuf *buf, const char *msg) +{ + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "css syntax error: %s (%s:%d)", msg, buf->file, buf->line); +} + +static fz_css_rule *fz_new_css_rule(fz_context *ctx, fz_css_selector *selector, fz_css_property *declaration) +{ + fz_css_rule *rule = fz_malloc_struct(ctx, fz_css_rule); + rule->selector = selector; + rule->declaration = declaration; + rule->garbage = NULL; + rule->next = NULL; + return rule; +} + +static fz_css_selector *fz_new_css_selector(fz_context *ctx, const char *name) +{ + fz_css_selector *sel = fz_malloc_struct(ctx, fz_css_selector); + sel->name = name ? fz_strdup(ctx, name) : NULL; + sel->combine = 0; + sel->cond = NULL; + sel->left = NULL; + sel->right = NULL; + sel->next = NULL; + return sel; +} + +static fz_css_condition *fz_new_css_condition(fz_context *ctx, int type, const char *key, const char *val) +{ + fz_css_condition *cond = fz_malloc_struct(ctx, fz_css_condition); + cond->type = type; + cond->key = key ? fz_strdup(ctx, key) : NULL; + cond->val = val ? fz_strdup(ctx, val) : NULL; + cond->next = NULL; + return cond; +} + +static fz_css_property *fz_new_css_property(fz_context *ctx, const char *name, fz_css_value *value, int spec) +{ + fz_css_property *prop = fz_malloc_struct(ctx, fz_css_property); + prop->name = fz_strdup(ctx, name); + prop->value = value; + prop->spec = spec; + prop->next = NULL; + return prop; +} + +static fz_css_value *fz_new_css_value(fz_context *ctx, int type, const char *data) +{ + fz_css_value *val = fz_malloc_struct(ctx, fz_css_value); + val->type = type; + val->data = fz_strdup(ctx, data); + val->args = NULL; + val->next = NULL; + return val; +} + +static void fz_drop_css_value(fz_context *ctx, fz_css_value *val) +{ + while (val) + { + fz_css_value *next = val->next; + fz_drop_css_value(ctx, val->args); + fz_free(ctx, val->data); + fz_free(ctx, val); + val = next; + } +} + +static void fz_drop_css_condition(fz_context *ctx, fz_css_condition *cond) +{ + while (cond) + { + fz_css_condition *next = cond->next; + fz_free(ctx, cond->key); + fz_free(ctx, cond->val); + fz_free(ctx, cond); + cond = next; + } +} + +static void fz_drop_css_selector(fz_context *ctx, fz_css_selector *sel) +{ + while (sel) + { + fz_css_selector *next = sel->next; + fz_free(ctx, sel->name); + fz_drop_css_condition(ctx, sel->cond); + fz_drop_css_selector(ctx, sel->left); + fz_drop_css_selector(ctx, sel->right); + fz_free(ctx, sel); + sel = next; + } +} + +static void fz_drop_css_property(fz_context *ctx, fz_css_property *prop) +{ + while (prop) + { + fz_css_property *next = prop->next; + fz_free(ctx, prop->name); + fz_drop_css_value(ctx, prop->value); + fz_free(ctx, prop); + prop = next; + } +} + +void fz_drop_css(fz_context *ctx, fz_css_rule *rule) +{ + while (rule) + { + fz_css_rule *next = rule->next; + fz_drop_css_selector(ctx, rule->selector); + fz_drop_css_property(ctx, rule->declaration); + fz_drop_css_property(ctx, rule->garbage); + fz_free(ctx, rule); + rule = next; + } +} + +static void css_lex_next(struct lexbuf *buf) +{ + buf->c = *(buf->s++); + if (buf->c == '\n') + ++buf->line; +} + +static void css_lex_init(fz_context *ctx, struct lexbuf *buf, const char *s, const char *file) +{ + buf->ctx = ctx; + buf->s = s; + buf->c = 0; + buf->file = file; + buf->line = 1; + css_lex_next(buf); + + buf->color = 0; + buf->string_len = 0; +} + +static int iswhite(int c) +{ + return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'; +} + +static int isnmstart(int c) +{ + return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= 128 && c <= 255); +} + +static int isnmchar(int c) +{ + return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c == '-' || (c >= 128 && c <= 255); +} + +static void css_push_char(struct lexbuf *buf, int c) +{ + if (buf->string_len + 1 >= nelem(buf->string)) + fz_css_error(buf, "token too long"); + buf->string[buf->string_len++] = c; +} + +static int css_lex_accept(struct lexbuf *buf, int t) +{ + if (buf->c == t) + { + css_lex_next(buf); + return 1; + } + return 0; +} + +static void css_lex_expect(struct lexbuf *buf, int t) +{ + if (!css_lex_accept(buf, t)) + fz_css_error(buf, "unexpected character"); +} + +static int ishex(int c, int *v) +{ + if (c >= '0' && c <= '9') + { + *v = c - '0'; + return 1; + } + if (c >= 'A' && c <= 'F') + { + *v = c - 'A' + 0xA; + return 1; + } + if (c >= 'a' && c <= 'f') + { + *v = c - 'a' + 0xA; + return 1; + } + return 0; +} + +static int css_lex_accept_hex(struct lexbuf *buf, int *v) +{ + if (ishex(buf->c, v)) + { + css_lex_next(buf); + return 1; + } + return 0; +} + +static int css_lex_number(struct lexbuf *buf) +{ + while (buf->c >= '0' && buf->c <= '9') + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + + if (css_lex_accept(buf, '.')) + { + css_push_char(buf, '.'); + while (buf->c >= '0' && buf->c <= '9') + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + } + + if (css_lex_accept(buf, '%')) + { + css_push_char(buf, '%'); + css_push_char(buf, 0); + return CSS_PERCENT; + } + + if (isnmstart(buf->c)) + { + css_push_char(buf, buf->c); + css_lex_next(buf); + while (isnmchar(buf->c)) + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + css_push_char(buf, 0); + return CSS_LENGTH; + } + + css_push_char(buf, 0); + return CSS_NUMBER; +} + +static int css_lex_keyword(struct lexbuf *buf) +{ + while (isnmchar(buf->c)) + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + css_push_char(buf, 0); + return CSS_KEYWORD; +} + +static int css_lex_string(struct lexbuf *buf, int q) +{ + while (buf->c && buf->c != q) + { + if (css_lex_accept(buf, '\\')) + { + if (css_lex_accept(buf, 'n')) + css_push_char(buf, '\n'); + else if (css_lex_accept(buf, 'r')) + css_push_char(buf, '\r'); + else if (css_lex_accept(buf, 'f')) + css_push_char(buf, '\f'); + else if (css_lex_accept(buf, '\f')) + /* line continuation */ ; + else if (css_lex_accept(buf, '\n')) + /* line continuation */ ; + else if (css_lex_accept(buf, '\r')) + css_lex_accept(buf, '\n'); + else + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + } + else + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + } + css_lex_expect(buf, q); + css_push_char(buf, 0); + return CSS_STRING; +} + +static int css_lex(struct lexbuf *buf) +{ + int t; + + // TODO: keyword escape sequences + + buf->string_len = 0; + + while (buf->c) + { +restart: + while (iswhite(buf->c)) + css_lex_next(buf); + + if (buf->c == 0) + break; + + if (css_lex_accept(buf, '/')) + { + if (css_lex_accept(buf, '*')) + { + while (buf->c) + { + if (css_lex_accept(buf, '*')) + { + while (buf->c == '*') + css_lex_next(buf); + if (css_lex_accept(buf, '/')) + goto restart; + } + css_lex_next(buf); + } + fz_css_error(buf, "unterminated comment"); + } + return '/'; + } + + if (css_lex_accept(buf, '<')) + { + if (css_lex_accept(buf, '!')) + { + css_lex_expect(buf, '-'); + css_lex_expect(buf, '-'); + continue; /* ignore CDO */ + } + return '<'; + } + + if (css_lex_accept(buf, '-')) + { + if (css_lex_accept(buf, '-')) + { + css_lex_expect(buf, '>'); + continue; /* ignore CDC */ + } + if (buf->c >= '0' && buf->c <= '9') + { + css_push_char(buf, '-'); + return css_lex_number(buf); + } + if (isnmstart(buf->c)) + { + css_push_char(buf, '-'); + css_push_char(buf, buf->c); + css_lex_next(buf); + return css_lex_keyword(buf); + } + return '-'; + } + + if (css_lex_accept(buf, '+')) + { + if (buf->c >= '0' && buf->c <= '9') + return css_lex_number(buf); + return '+'; + } + + if (css_lex_accept(buf, '.')) + { + if (buf->c >= '0' && buf->c <= '9') + { + css_push_char(buf, '.'); + return css_lex_number(buf); + } + return '.'; + } + + if (css_lex_accept(buf, '#')) + { + int a, b, c, d, e, f; + if (!css_lex_accept_hex(buf, &a)) goto colorerror; + if (!css_lex_accept_hex(buf, &b)) goto colorerror; + if (!css_lex_accept_hex(buf, &c)) goto colorerror; + if (css_lex_accept_hex(buf, &d)) + { + if (!css_lex_accept_hex(buf, &e)) goto colorerror; + if (!css_lex_accept_hex(buf, &f)) goto colorerror; + buf->color = (a << 20) | (b << 16) | (c << 12) | (d << 8) | (e << 4) | f; + } + else + { + buf->color = (a << 20) | (b << 12) | (c << 4); + } + sprintf(buf->string, "%06x", buf->color); // XXX + return CSS_COLOR; +colorerror: + fz_css_error(buf, "invalid color"); + } + + if (css_lex_accept(buf, '"')) + return css_lex_string(buf, '"'); + if (css_lex_accept(buf, '\'')) + return css_lex_string(buf, '\''); + + if (buf->c >= '0' && buf->c <= '9') + return css_lex_number(buf); + + if (css_lex_accept(buf, 'u')) + { + if (css_lex_accept(buf, 'r')) + { + if (css_lex_accept(buf, 'l')) + { + if (css_lex_accept(buf, '(')) + { + // string or url + css_lex_expect(buf, ')'); + return CSS_URI; + } + css_push_char(buf, 'u'); + css_push_char(buf, 'r'); + css_push_char(buf, 'l'); + return css_lex_keyword(buf); + } + css_push_char(buf, 'u'); + css_push_char(buf, 'r'); + return css_lex_keyword(buf); + } + css_push_char(buf, 'u'); + return css_lex_keyword(buf); + } + + if (isnmstart(buf->c)) + { + css_push_char(buf, buf->c); + css_lex_next(buf); + return css_lex_keyword(buf); + } + + t = buf->c; + css_lex_next(buf); + return t; + } + return EOF; +} + +static void next(struct lexbuf *buf) +{ + buf->lookahead = css_lex(buf); +} + +static int accept(struct lexbuf *buf, int t) +{ + if (buf->lookahead == t) + { + next(buf); + return 1; + } + return 0; +} + +static void expect(struct lexbuf *buf, int t) +{ + if (accept(buf, t)) + return; + fz_css_error(buf, "unexpected token"); +} + +static int iscond(int t) +{ + return t == ':' || t == '.' || t == '#' || t == '['; +} + +static fz_css_value *parse_value_list(struct lexbuf *buf); + +static fz_css_value *parse_value(struct lexbuf *buf) +{ + fz_css_value *v; + + if (buf->lookahead == CSS_KEYWORD) + { + v = fz_new_css_value(buf->ctx, CSS_KEYWORD, buf->string); + next(buf); + + if (accept(buf, '(')) + { + v->type = '('; + v->args = parse_value_list(buf); + expect(buf, ')'); + } + + return v; + } + + switch (buf->lookahead) + { + case CSS_NUMBER: + case CSS_LENGTH: + case CSS_PERCENT: + case CSS_STRING: + case CSS_COLOR: + case CSS_URI: + v = fz_new_css_value(buf->ctx, buf->lookahead, buf->string); + next(buf); + return v; + } + + if (accept(buf, ',')) + return fz_new_css_value(buf->ctx, ',', ","); + if (accept(buf, '/')) + return fz_new_css_value(buf->ctx, '/', "/"); + + fz_css_error(buf, "expected value"); +} + +static fz_css_value *parse_value_list(struct lexbuf *buf) +{ + fz_css_value *head, *tail; + + head = tail = NULL; + + while (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != '!' && + buf->lookahead != ')' && buf->lookahead != EOF) + { + if (!head) + head = tail = parse_value(buf); + else + tail = tail->next = parse_value(buf); + } + + return head; +} + +static fz_css_property *parse_declaration(struct lexbuf *buf) +{ + fz_css_property *p; + + if (buf->lookahead != CSS_KEYWORD) + fz_css_error(buf, "expected keyword in property"); + p = fz_new_css_property(buf->ctx, buf->string, NULL, 0); + next(buf); + + expect(buf, ':'); + + p->value = parse_value_list(buf); + + /* !important */ + if (accept(buf, '!')) + expect(buf, CSS_KEYWORD); + + return p; +} + +static fz_css_property *parse_declaration_list(struct lexbuf *buf) +{ + fz_css_property *head, *tail; + + if (buf->lookahead == '}' || buf->lookahead == EOF) + return NULL; + + head = tail = parse_declaration(buf); + + while (accept(buf, ';')) + { + if (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != EOF) + { + tail = tail->next = parse_declaration(buf); + } + } + + return head; +} + +static char *parse_attrib_value(struct lexbuf *buf) +{ + char *s; + + if (buf->lookahead == CSS_KEYWORD || buf->lookahead == CSS_STRING) + { + s = fz_strdup(buf->ctx, buf->string); + next(buf); + return s; + } + + fz_css_error(buf, "expected attribute value"); +} + +static fz_css_condition *parse_condition(struct lexbuf *buf) +{ + fz_css_condition *c; + + if (accept(buf, ':')) + { + if (buf->lookahead != CSS_KEYWORD) + fz_css_error(buf, "expected keyword after ':'"); + c = fz_new_css_condition(buf->ctx, ':', "pseudo", buf->string); + next(buf); + return c; + } + + if (accept(buf, '.')) + { + if (buf->lookahead != CSS_KEYWORD) + fz_css_error(buf, "expected keyword after '.'"); + c = fz_new_css_condition(buf->ctx, '.', "class", buf->string); + next(buf); + return c; + } + + if (accept(buf, '#')) + { + if (buf->lookahead != CSS_KEYWORD) + fz_css_error(buf, "expected keyword after '#'"); + c = fz_new_css_condition(buf->ctx, '#', "id", buf->string); + next(buf); + return c; + } + + if (accept(buf, '[')) + { + if (buf->lookahead != CSS_KEYWORD) + fz_css_error(buf, "expected keyword after '['"); + + c = fz_new_css_condition(buf->ctx, '[', buf->string, NULL); + next(buf); + + if (accept(buf, '=')) + { + c->type = '='; + c->val = parse_attrib_value(buf); + } + else if (accept(buf, '|')) + { + expect(buf, '='); + c->type = '|'; + c->val = parse_attrib_value(buf); + } + else if (accept(buf, '~')) + { + expect(buf, '='); + c->type = '~'; + c->val = parse_attrib_value(buf); + } + + expect(buf, ']'); + + return c; + } + + fz_css_error(buf, "expected condition"); +} + +static fz_css_condition *parse_condition_list(struct lexbuf *buf) +{ + fz_css_condition *head, *tail; + + head = tail = parse_condition(buf); + while (iscond(buf->lookahead)) + { + tail = tail->next = parse_condition(buf); + } + return head; +} + +static fz_css_selector *parse_simple_selector(struct lexbuf *buf) +{ + fz_css_selector *s; + + if (accept(buf, '*')) + { + s = fz_new_css_selector(buf->ctx, NULL); + if (iscond(buf->lookahead)) + s->cond = parse_condition_list(buf); + return s; + } + else if (buf->lookahead == CSS_KEYWORD) + { + s = fz_new_css_selector(buf->ctx, buf->string); + next(buf); + if (iscond(buf->lookahead)) + s->cond = parse_condition_list(buf); + return s; + } + else if (iscond(buf->lookahead)) + { + s = fz_new_css_selector(buf->ctx, NULL); + s->cond = parse_condition_list(buf); + return s; + } + + fz_css_error(buf, "expected selector"); +} + +static fz_css_selector *parse_adjacent_selector(struct lexbuf *buf) +{ + fz_css_selector *s, *a, *b; + + a = parse_simple_selector(buf); + if (accept(buf, '+')) + { + b = parse_adjacent_selector(buf); + s = fz_new_css_selector(buf->ctx, NULL); + s->combine = '+'; + s->left = a; + s->right = b; + return s; + } + return a; +} + +static fz_css_selector *parse_child_selector(struct lexbuf *buf) +{ + fz_css_selector *s, *a, *b; + + a = parse_adjacent_selector(buf); + if (accept(buf, '>')) + { + b = parse_child_selector(buf); + s = fz_new_css_selector(buf->ctx, NULL); + s->combine = '>'; + s->left = a; + s->right = b; + return s; + } + return a; +} + +static fz_css_selector *parse_descendant_selector(struct lexbuf *buf) +{ + fz_css_selector *s, *a, *b; + + a = parse_child_selector(buf); + if (buf->lookahead != ',' && buf->lookahead != '{' && buf->lookahead != EOF) + { + b = parse_descendant_selector(buf); + s = fz_new_css_selector(buf->ctx, NULL); + s->combine = ' '; + s->left = a; + s->right = b; + return s; + } + return a; +} + +static fz_css_selector *parse_selector_list(struct lexbuf *buf) +{ + fz_css_selector *head, *tail; + + head = tail = parse_descendant_selector(buf); + while (accept(buf, ',')) + { + tail = tail->next = parse_descendant_selector(buf); + } + return head; +} + +static fz_css_rule *parse_rule(struct lexbuf *buf) +{ + fz_css_selector *s; + fz_css_property *p; + + s = parse_selector_list(buf); + expect(buf, '{'); + p = parse_declaration_list(buf); + expect(buf, '}'); + return fz_new_css_rule(buf->ctx, s, p); +} + +static void parse_at_rule(struct lexbuf *buf) +{ + expect(buf, CSS_KEYWORD); + + /* skip until '{' or ';' */ + while (buf->lookahead != EOF) + { + if (accept(buf, ';')) + return; + if (accept(buf, '{')) + { + int depth = 1; + while (buf->lookahead != EOF && depth > 0) + { + if (accept(buf, '{')) + ++depth; + else if (accept(buf, '}')) + --depth; + else + next(buf); + } + return; + } + next(buf); + } +} + +static fz_css_rule *parse_stylesheet(struct lexbuf *buf, fz_css_rule *chain) +{ + fz_css_rule *rule, **nextp, *tail; + + tail = chain; + if (tail) + { + while (tail->next) + tail = tail->next; + nextp = &tail->next; + } + else + { + nextp = &tail; + } + + while (buf->lookahead != EOF) + { + if (accept(buf, '@')) + { + parse_at_rule(buf); + } + else + { + rule = *nextp = parse_rule(buf); + nextp = &rule->next; + } + } + + return chain ? chain : tail; +} + +fz_css_property *fz_parse_css_properties(fz_context *ctx, const char *source) +{ + struct lexbuf buf; + css_lex_init(ctx, &buf, source, "<inline>"); + next(&buf); + return parse_declaration_list(&buf); +} + +fz_css_rule *fz_parse_css(fz_context *ctx, fz_css_rule *chain, const char *source, const char *file) +{ + struct lexbuf buf; + css_lex_init(ctx, &buf, source, file); + next(&buf); + return parse_stylesheet(&buf, chain); +} diff --git a/source/html/epub-doc.c b/source/html/epub-doc.c new file mode 100644 index 00000000..69963ff0 --- /dev/null +++ b/source/html/epub-doc.c @@ -0,0 +1,326 @@ +#include "mupdf/html.h" + +#define DEFW (450) +#define DEFH (600) +#define DEFEM (12) + +typedef struct epub_document_s epub_document; +typedef struct epub_chapter_s epub_chapter; +typedef struct epub_page_s epub_page; + +struct epub_document_s +{ + fz_document super; + fz_archive *zip; + fz_html_font_set *set; + float page_w, page_h, em; + int count; + epub_chapter *spine; +}; + +struct epub_chapter_s +{ + int start; + fz_html *box; + epub_chapter *next; +}; + +struct epub_page_s +{ + fz_page super; + epub_document *doc; + int number; +}; + +static void +epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em) +{ + epub_document *doc = (epub_document*)doc_; + epub_chapter *ch; + + doc->page_w = w; + doc->page_h = h; + doc->em = em; + + printf("epub: laying out chapters.\n"); + for (ch = doc->spine; ch; ch = ch->next) + fz_layout_html(ctx, ch->box, w, h, em); + printf("epub: done.\n"); +} + +static int +epub_count_pages(fz_context *ctx, fz_document *doc_) +{ + epub_document *doc = (epub_document*)doc_; + epub_chapter *ch; + int count = 0; + for (ch = doc->spine; ch; ch = ch->next) + count += ceilf(ch->box->h / doc->page_h); + return count; +} + +static void +epub_drop_page_imp(fz_context *ctx, fz_page *page_) +{ +} + +static fz_rect * +epub_bound_page(fz_context *ctx, fz_page *page_, fz_rect *bbox) +{ + epub_page *page = (epub_page*)page_; + epub_document *doc = page->doc; + bbox->x0 = 0; + bbox->y0 = 0; + bbox->x1 = doc->page_w; + bbox->y1 = doc->page_h; + return bbox; +} + +static void +epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie) +{ + epub_page *page = (epub_page*)page_; + epub_document *doc = page->doc; + epub_chapter *ch; + int n = page->number; + + int count = 0; + for (ch = doc->spine; ch; ch = ch->next) + { + int cn = ceilf(ch->box->h / doc->page_h); + if (n < count + cn) + { + fz_draw_html(ctx, ch->box, (n-count) * doc->page_h, (n-count+1) * doc->page_h, dev, ctm); + break; + } + count += cn; + } +} + +static fz_page * +epub_load_page(fz_context *ctx, fz_document *doc_, int number) +{ + epub_document *doc = (epub_document*)doc_; + epub_page *page = fz_new_page(ctx, sizeof *page); + page->super.bound_page = epub_bound_page; + page->super.run_page_contents = epub_run_page; + page->super.drop_page_imp = epub_drop_page_imp; + page->doc = doc; + page->number = number; + return (fz_page*)page; +} + +static void +epub_close_document(fz_context *ctx, fz_document *doc_) +{ + epub_document *doc = (epub_document*)doc_; + epub_chapter *ch, *next; + ch = doc->spine; + while (ch) + { + next = ch->next; + fz_drop_html(ctx, ch->box); + fz_free(ctx, ch); + ch = next; + } + fz_drop_archive(ctx, doc->zip); + fz_drop_html_font_set(ctx, doc->set); + fz_free(ctx, doc); +} + +static const char * +rel_path_from_idref(fz_xml *manifest, const char *idref) +{ + fz_xml *item; + if (!idref) + return NULL; + item = fz_xml_find_down(manifest, "item"); + while (item) + { + const char *id = fz_xml_att(item, "id"); + if (id && !strcmp(id, idref)) + return fz_xml_att(item, "href"); + item = fz_xml_find_next(item, "item"); + } + return NULL; +} + +static const char * +path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char *idref, int n) +{ + const char *rel_path = rel_path_from_idref(manifest, idref); + if (!rel_path) + { + path[0] = 0; + return NULL; + } + fz_strlcpy(path, base_uri, n); + fz_strlcat(path, "/", n); + fz_strlcat(path, rel_path, n); + return fz_cleanname(path); +} + +static epub_chapter * +epub_parse_chapter(fz_context *ctx, epub_document *doc, const char *path) +{ + fz_archive *zip = doc->zip; + fz_buffer *buf; + epub_chapter *ch; + char base_uri[2048]; + + fz_dirname(base_uri, path, sizeof base_uri); + + buf = fz_read_archive_entry(ctx, zip, path); + fz_write_buffer_byte(ctx, buf, 0); + + ch = fz_malloc_struct(ctx, epub_chapter); + ch->box = fz_parse_html(ctx, doc->set, zip, base_uri, buf, NULL); + ch->next = NULL; + + fz_drop_buffer(ctx, buf); + + return ch; +} + +static void +epub_parse_header(fz_context *ctx, epub_document *doc) +{ + fz_archive *zip = doc->zip; + fz_buffer *buf; + fz_xml *container_xml, *content_opf; + fz_xml *container, *rootfiles, *rootfile; + fz_xml *package, *manifest, *spine, *itemref; + char base_uri[2048]; + const char *full_path; + char ncx[2048], s[2048]; + epub_chapter *head, *tail; + + /* parse META-INF/container.xml to find OPF */ + + buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); + fz_write_buffer_byte(ctx, buf, 0); + container_xml = fz_parse_xml(ctx, buf->data, buf->len, 0); + fz_drop_buffer(ctx, buf); + + container = fz_xml_find(container_xml, "container"); + rootfiles = fz_xml_find_down(container, "rootfiles"); + rootfile = fz_xml_find_down(rootfiles, "rootfile"); + full_path = fz_xml_att(rootfile, "full-path"); + if (!full_path) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB"); + + printf("epub: found root: %s\n", full_path); + + fz_dirname(base_uri, full_path, sizeof base_uri); + + /* parse OPF to find NCX and spine */ + + buf = fz_read_archive_entry(ctx, zip, full_path); + fz_write_buffer_byte(ctx, buf, 0); + content_opf = fz_parse_xml(ctx, buf->data, buf->len, 0); + fz_drop_buffer(ctx, buf); + + package = fz_xml_find(content_opf, "package"); + manifest = fz_xml_find_down(package, "manifest"); + spine = fz_xml_find_down(package, "spine"); + + if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx)) + { + /* TODO: parse NCX to create fz_outline */ + printf("epub: found outline: %s\n", ncx); + } + + head = tail = NULL; + itemref = fz_xml_find_down(spine, "itemref"); + while (itemref) + { + if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s)) + { + printf("epub: found spine %s\n", s); + if (!head) + head = tail = epub_parse_chapter(ctx, doc, s); + else + tail = tail->next = epub_parse_chapter(ctx, doc, s); + } + itemref = fz_xml_find_next(itemref, "itemref"); + } + + doc->spine = head; + + printf("epub: done.\n"); + + fz_drop_xml(ctx, container_xml); + fz_drop_xml(ctx, content_opf); +} + +static epub_document * +epub_init(fz_context *ctx, fz_archive *zip) +{ + epub_document *doc; + + doc = fz_malloc_struct(ctx, epub_document); + doc->zip = zip; + doc->set = fz_new_html_font_set(ctx); + + doc->super.close = epub_close_document; + doc->super.layout = epub_layout; + doc->super.count_pages = epub_count_pages; + doc->super.load_page = epub_load_page; + + fz_try(ctx) + { + epub_parse_header(ctx, doc); + epub_layout(ctx, (fz_document*)doc, DEFW, DEFH, DEFEM); + } + fz_catch(ctx) + { + epub_close_document(ctx, (fz_document*)doc); + fz_rethrow(ctx); + } + + return doc; +} + +static epub_document * +epub_open_document_with_stream(fz_context *ctx, fz_stream *file) +{ + return epub_init(ctx, fz_open_archive_with_stream(ctx, file)); +} + +static epub_document * +epub_open_document(fz_context *ctx, const char *filename) +{ + if (strstr(filename, "META-INF/container.xml") || strstr(filename, "META-INF\\container.xml")) + { + char dirname[2048], *p; + fz_strlcpy(dirname, filename, sizeof dirname); + p = strstr(dirname, "META-INF"); + *p = 0; + if (!dirname[0]) + fz_strlcpy(dirname, ".", sizeof dirname); + return epub_init(ctx, fz_open_directory(ctx, dirname)); + } + + return epub_init(ctx, fz_open_archive(ctx, filename)); +} + +static int +epub_recognize(fz_context *doc, const char *magic) +{ + char *ext = strrchr(magic, '.'); + if (ext) + if (!fz_strcasecmp(ext, ".epub")) + return 100; + if (strstr(magic, "META-INF/container.xml") || strstr(magic, "META-INF\\container.xml")) + return 200; + if (!strcmp(magic, "application/epub+zip")) + return 100; + return 0; +} + +fz_document_handler epub_document_handler = +{ + (fz_document_recognize_fn *)&epub_recognize, + (fz_document_open_fn *)&epub_open_document, + (fz_document_open_with_stream_fn *)&epub_open_document_with_stream +}; diff --git a/source/html/html-doc.c b/source/html/html-doc.c new file mode 100644 index 00000000..7217f74c --- /dev/null +++ b/source/html/html-doc.c @@ -0,0 +1,166 @@ +#include "mupdf/html.h" + +#define DEFW (450) +#define DEFH (600) +#define DEFEM (12) + +typedef struct html_document_s html_document; +typedef struct html_page_s html_page; + +struct html_document_s +{ + fz_document super; + fz_archive *zip; + fz_html_font_set *set; + float page_w, page_h, em; + fz_html *box; +}; + +struct html_page_s +{ + fz_page super; + html_document *doc; + int number; +}; + +static void +htdoc_close_document(fz_context *ctx, fz_document *doc_) +{ + html_document *doc = (html_document*)doc_; + fz_drop_archive(ctx, doc->zip); + fz_drop_html(ctx, doc->box); + fz_drop_html_font_set(ctx, doc->set); + fz_free(ctx, doc); +} + +static int +htdoc_count_pages(fz_context *ctx, fz_document *doc_) +{ + html_document *doc = (html_document*)doc_; + int count = ceilf(doc->box->h / doc->page_h); + return count; +} + +static void +htdoc_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em) +{ + html_document *doc = (html_document*)doc_; + doc->page_w = w; + doc->page_h = h; + doc->em = em; + fz_layout_html(ctx, doc->box, w, h, em); +} + +static void +htdoc_drop_page_imp(fz_context *ctx, fz_page *page_) +{ +} + +static fz_rect * +htdoc_bound_page(fz_context *ctx, fz_page *page_, fz_rect *bbox) +{ + html_page *page = (html_page*)page_; + html_document *doc = page->doc; + bbox->x0 = bbox->y0 = 0; + bbox->x1 = doc->page_w; + bbox->y1 = doc->page_h; + return bbox; +} + +static void +htdoc_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie) +{ + html_page *page = (html_page*)page_; + html_document *doc = page->doc; + int n = page->number; + fz_draw_html(ctx, doc->box, n * doc->page_h, (n+1) * doc->page_h, dev, ctm); +} + +static fz_page * +htdoc_load_page(fz_context *ctx, fz_document *doc_, int number) +{ + html_document *doc = (html_document*)doc_; + html_page *page = fz_new_page(ctx, sizeof *page); + page->super.bound_page = htdoc_bound_page; + page->super.run_page_contents = htdoc_run_page; + page->super.drop_page_imp = htdoc_drop_page_imp; + page->doc = doc; + page->number = number; + return (fz_page*)page; +} + +static html_document * +htdoc_open_document_with_stream(fz_context *ctx, fz_stream *file) +{ + html_document *doc; + fz_buffer *buf; + + doc = fz_malloc_struct(ctx, html_document); + doc->zip = fz_open_directory(ctx, "."); + doc->set = fz_new_html_font_set(ctx); + + doc->super.close = htdoc_close_document; + doc->super.layout = htdoc_layout; + doc->super.count_pages = htdoc_count_pages; + doc->super.load_page = htdoc_load_page; + + buf = fz_read_all(ctx, file, 0); + fz_write_buffer_byte(ctx, buf, 0); + doc->box = fz_parse_html(ctx, doc->set, doc->zip, ".", buf, NULL); + fz_drop_buffer(ctx, buf); + + htdoc_layout(ctx, (fz_document*)doc, DEFW, DEFH, DEFEM); + + return doc; +} + +static html_document * +htdoc_open_document(fz_context *ctx, const char *filename) +{ + char dirname[2048]; + fz_buffer *buf; + html_document *doc; + + fz_dirname(dirname, filename, sizeof dirname); + + doc = fz_malloc_struct(ctx, html_document); + doc->zip = fz_open_directory(ctx, dirname); + doc->set = fz_new_html_font_set(ctx); + + doc->super.close = htdoc_close_document; + doc->super.layout = htdoc_layout; + doc->super.count_pages = htdoc_count_pages; + doc->super.load_page = htdoc_load_page; + + buf = fz_read_file(ctx, filename); + fz_write_buffer_byte(ctx, buf, 0); + doc->box = fz_parse_html(ctx, doc->set, doc->zip, ".", buf, NULL); + fz_drop_buffer(ctx, buf); + + htdoc_layout(ctx, (fz_document*)doc, DEFW, DEFH, DEFEM); + + return doc; +} + +static int +htdoc_recognize(fz_context *doc, const char *magic) +{ + char *ext = strrchr(magic, '.'); + + if (ext) + { + if (!fz_strcasecmp(ext, ".xml") || !fz_strcasecmp(ext, ".xhtml") || !fz_strcasecmp(ext, ".html")) + return 100; + } + if (!strcmp(magic, "application/html+xml") || !strcmp(magic, "application/xml") || !strcmp(magic, "text/xml")) + return 100; + + return 0; +} + +fz_document_handler html_document_handler = +{ + (fz_document_recognize_fn *)&htdoc_recognize, + (fz_document_open_fn *)&htdoc_open_document, + (fz_document_open_with_stream_fn *)&htdoc_open_document_with_stream +}; diff --git a/source/html/html-font.c b/source/html/html-font.c new file mode 100644 index 00000000..ae6568c9 --- /dev/null +++ b/source/html/html-font.c @@ -0,0 +1,47 @@ +#include "mupdf/html.h" +#include "mupdf/pdf.h" /* for pdf_lookup_builtin_font */ + +static const char *font_names[16] = +{ + "Times-Roman", "Times-Italic", "Times-Bold", "Times-BoldItalic", + "Helvetica", "Helvetica-Oblique", "Helvetica-Bold", "Helvetica-BoldOblique", + "Courier", "Courier-Oblique", "Courier-Bold", "Courier-BoldOblique", + "Courier", "Courier-Oblique", "Courier-Bold", "Courier-BoldOblique", +}; + +fz_font * +fz_load_html_font(fz_context *ctx, fz_html_font_set *set, + const char *family, const char *variant, const char *style, const char *weight) +{ + unsigned char *data; + unsigned int size; + + int is_mono = !strcmp(family, "monospace"); + int is_sans = !strcmp(family, "sans-serif"); + int is_bold = !strcmp(weight, "bold") || !strcmp(weight, "bolder") || atoi(weight) > 400; + int is_italic = !strcmp(style, "italic") || !strcmp(style, "oblique"); + + int idx = is_mono * 8 + is_sans * 4 + is_bold * 2 + is_italic; + if (!set->fonts[idx]) + { + data = pdf_lookup_builtin_font(ctx, font_names[idx], &size); + if (!data) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load html font: %s", font_names[idx]); + set->fonts[idx] = fz_new_font_from_memory(ctx, font_names[idx], data, size, 0, 1); + } + + return set->fonts[idx]; +} + +fz_html_font_set *fz_new_html_font_set(fz_context *ctx) +{ + return fz_malloc_struct(ctx, fz_html_font_set); +} + +void fz_drop_html_font_set(fz_context *ctx, fz_html_font_set *set) +{ + int i; + for (i = 0; i < nelem(set->fonts); ++i) + fz_drop_font(ctx, set->fonts[i]); + fz_free(ctx, set); +} diff --git a/source/html/html-layout.c b/source/html/html-layout.c new file mode 100644 index 00000000..5a1a3157 --- /dev/null +++ b/source/html/html-layout.c @@ -0,0 +1,909 @@ +#include "mupdf/html.h" + +enum { T, R, B, L }; + +static const char *default_css = +"html,address,blockquote,body,dd,div,dl,dt,h1,h2,h3,h4,h5,h6,ol,p,ul,center,hr,pre{display:block}" +"span{display:inline}" +"li{display:list-item}" +"head{display:none}" +"body{margin:1em}" +"h1{font-size:2em;margin:.67em 0}" +"h2{font-size:1.5em;margin:.75em 0}" +"h3{font-size:1.17em;margin:.83em 0}" +"h4,p,blockquote,ul,ol,dl,dir,menu{margin:1.12em 0}" +"h5{font-size:.83em;margin:1.5em 0}" +"h6{font-size:.67em;margin:1.67em 0}" +"h1,h2,h3,h4,h5,h6,b,strong{font-weight:bold}" +"blockquote{margin-left:40px;margin-right:40px}" +"i,cite,em,var,address{font-style:italic}" +"pre,tt,code,kbd,samp{font-family:monospace}" +"pre{white-space:pre}" +"big{font-size:1.17em}" +"small,sub,sup{font-size:.83em}" +"sub{vertical-align:sub}" +"sup{vertical-align:super}" +"s,strike,del{text-decoration:line-through}" +"hr{border-width:thin;border-color:black;border-style:solid;margin:.5em 0}" +"ol,ul,dir,menu,dd{margin-left:40px}" +"ol{list-style-type:decimal}" +"ol ul,ul ol,ul ul,ol ol{margin-top:0;margin-bottom:0}" +"u,ins{text-decoration:underline}" +"center{text-align:center}" +"svg{display:none}" +"a{color:blue}" +; + +static int iswhite(int c) +{ + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +static void fz_drop_html_flow(fz_context *ctx, fz_html_flow *flow) +{ + while (flow) + { + fz_html_flow *next = flow->next; + if (flow->type == FLOW_WORD) + fz_free(ctx, flow->text); + if (flow->type == FLOW_IMAGE) + fz_drop_image(ctx, flow->image); + fz_free(ctx, flow); + flow = next; + } +} + +static fz_html_flow *add_flow(fz_context *ctx, fz_html *top, fz_css_style *style, int type) +{ + fz_html_flow *flow = fz_malloc_struct(ctx, fz_html_flow); + flow->type = type; + flow->style = style; + *top->flow_tail = flow; + top->flow_tail = &flow->next; + return flow; +} + +static void add_flow_space(fz_context *ctx, fz_html *top, fz_css_style *style) +{ + fz_html_flow *flow; + + /* delete space at the beginning of the line */ + if (!top->flow_head) + return; + + flow = add_flow(ctx, top, style, FLOW_GLUE); + flow->text = " "; + flow->broken_text = ""; +} + +static void add_flow_word(fz_context *ctx, fz_html *top, fz_css_style *style, const char *a, const char *b) +{ + fz_html_flow *flow = add_flow(ctx, top, style, FLOW_WORD); + flow->text = fz_malloc(ctx, b - a + 1); + memcpy(flow->text, a, b - a); + flow->text[b - a] = 0; +} + +static void add_flow_image(fz_context *ctx, fz_html *top, fz_css_style *style, fz_image *img) +{ + fz_html_flow *flow = add_flow(ctx, top, style, FLOW_IMAGE); + flow->image = fz_keep_image(ctx, img); +} + +static void generate_text(fz_context *ctx, fz_html *box, const char *text) +{ + fz_html *flow = box; + while (flow->type != BOX_FLOW) + flow = flow->up; + + while (*text) + { + if (iswhite(*text)) + { + ++text; + while (iswhite(*text)) + ++text; + add_flow_space(ctx, flow, &box->style); + } + if (*text) + { + const char *mark = text++; + while (*text && !iswhite(*text)) + ++text; + add_flow_word(ctx, flow, &box->style, mark, text); + } + } +} + +static void generate_image(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_html *box, const char *src) +{ + fz_image *img; + fz_buffer *buf; + char path[2048]; + + fz_html *flow = box; + while (flow->type != BOX_FLOW) + flow = flow->up; + + fz_strlcpy(path, base_uri, sizeof path); + fz_strlcat(path, "/", sizeof path); + fz_strlcat(path, src, sizeof path); + fz_cleanname(path); + + fz_try(ctx) + { + buf = fz_read_archive_entry(ctx, zip, path); + img = fz_new_image_from_buffer(ctx, buf); + fz_drop_buffer(ctx, buf); + + add_flow_image(ctx, flow, &box->style, img); + } + fz_catch(ctx) + { + const char *alt = "[image]"; + fz_warn(ctx, "html: cannot add image src='%s'", src); + add_flow_word(ctx, flow, &box->style, alt, alt + 7); + } +} + +static void init_box(fz_context *ctx, fz_html *box) +{ + box->type = BOX_BLOCK; + box->x = box->y = 0; + box->w = box->h = 0; + + box->up = NULL; + box->last = NULL; + box->down = NULL; + box->next = NULL; + + box->flow_head = NULL; + box->flow_tail = &box->flow_head; + + fz_default_css_style(ctx, &box->style); +} + +void fz_drop_html(fz_context *ctx, fz_html *box) +{ + while (box) + { + fz_html *next = box->next; + fz_drop_html_flow(ctx, box->flow_head); + fz_drop_html(ctx, box->down); + fz_free(ctx, box); + box = next; + } +} + +static fz_html *new_box(fz_context *ctx) +{ + fz_html *box = fz_malloc_struct(ctx, fz_html); + init_box(ctx, box); + return box; +} + +static void insert_box(fz_context *ctx, fz_html *box, int type, fz_html *top) +{ + box->type = type; + + box->up = top; + + if (top) + { + if (!top->last) + { + top->down = top->last = box; + } + else + { + top->last->next = box; + top->last = box; + } + } +} + +static fz_html *insert_block_box(fz_context *ctx, fz_html *box, fz_html *top) +{ + if (top->type == BOX_BLOCK) + { + insert_box(ctx, box, BOX_BLOCK, top); + } + else if (top->type == BOX_FLOW) + { + while (top->type != BOX_BLOCK) + top = top->up; + insert_box(ctx, box, BOX_BLOCK, top); + } + else if (top->type == BOX_INLINE) + { + while (top->type != BOX_BLOCK) + top = top->up; + insert_box(ctx, box, BOX_BLOCK, top); + } + return top; +} + +static fz_html *insert_break_box(fz_context *ctx, fz_html *box, fz_html *top) +{ + if (top->type == BOX_BLOCK) + { + insert_box(ctx, box, BOX_BREAK, top); + } + else if (top->type == BOX_FLOW) + { + while (top->type != BOX_BLOCK) + top = top->up; + insert_box(ctx, box, BOX_BREAK, top); + } + else if (top->type == BOX_INLINE) + { + while (top->type != BOX_BLOCK) + top = top->up; + insert_box(ctx, box, BOX_BREAK, top); + } + return top; +} + +static void insert_inline_box(fz_context *ctx, fz_html *box, fz_html *top) +{ + if (top->type == BOX_BLOCK) + { + if (top->last && top->last->type == BOX_FLOW) + { + insert_box(ctx, box, BOX_INLINE, top->last); + } + else + { + fz_html *flow = new_box(ctx); + flow->is_first_flow = !top->last; + insert_box(ctx, flow, BOX_FLOW, top); + insert_box(ctx, box, BOX_INLINE, flow); + } + } + else if (top->type == BOX_FLOW) + { + insert_box(ctx, box, BOX_INLINE, top); + } + else if (top->type == BOX_INLINE) + { + insert_box(ctx, box, BOX_INLINE, top); + } +} + +static void generate_boxes(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, + fz_xml *node, fz_html *top, fz_css_rule *rule, fz_css_match *up_match) +{ + fz_css_match match; + fz_html *box; + const char *tag; + int display; + + while (node) + { + match.up = up_match; + match.count = 0; + + tag = fz_xml_tag(node); + if (tag) + { + fz_match_css(ctx, &match, rule, node); + + display = fz_get_css_match_display(&match); + + if (!strcmp(tag, "br")) + { + box = new_box(ctx); + fz_apply_css_style(ctx, set, &box->style, &match); + top = insert_break_box(ctx, box, top); + } + + else if (!strcmp(tag, "img")) + { + const char *src = fz_xml_att(node, "src"); + if (src) + { + box = new_box(ctx); + fz_apply_css_style(ctx, set, &box->style, &match); + insert_inline_box(ctx, box, top); + generate_image(ctx, zip, base_uri, box, src); + } + } + + else if (display != DIS_NONE) + { + box = new_box(ctx); + fz_apply_css_style(ctx, set, &box->style, &match); + + if (display == DIS_BLOCK) + { + top = insert_block_box(ctx, box, top); + } + else if (display == DIS_LIST_ITEM) + { + top = insert_block_box(ctx, box, top); + } + else if (display == DIS_INLINE) + { + insert_inline_box(ctx, box, top); + } + else + { + fz_warn(ctx, "unknown box display type"); + insert_box(ctx, box, BOX_BLOCK, top); + } + + if (fz_xml_down(node)) + generate_boxes(ctx, set, zip, base_uri, fz_xml_down(node), box, rule, &match); + + // TODO: remove empty flow boxes + } + } + else + { + if (top->type != BOX_INLINE) + { + box = new_box(ctx); + insert_inline_box(ctx, box, top); + box->style = top->style; + generate_text(ctx, box, fz_xml_text(node)); + } + else + { + generate_text(ctx, top, fz_xml_text(node)); + } + } + + node = fz_xml_next(node); + } +} + +static void measure_image(fz_context *ctx, fz_html_flow *node, float w, float h) +{ + float xs = 1, ys = 1, s = 1; + node->x = 0; + node->y = 0; + if (node->image->w > w) + xs = w / node->image->w; + if (node->image->h > h) + ys = h / node->image->h; + s = fz_min(xs, ys); + node->w = node->image->w * s; + node->h = node->image->h * s; +} + +static void measure_word(fz_context *ctx, fz_html_flow *node, float em) +{ + const char *s; + int c, g; + float w; + + em = fz_from_css_number(node->style->font_size, em, em); + node->x = 0; + node->y = 0; + node->h = fz_from_css_number_scale(node->style->line_height, em, em, em); + + w = 0; + s = node->text; + while (*s) + { + s += fz_chartorune(&c, s); + g = fz_encode_character(ctx, node->style->font, c); + w += fz_advance_glyph(ctx, node->style->font, g) * em; + } + node->w = w; + node->em = em; +} + +static float measure_line(fz_html_flow *node, fz_html_flow *end, float *baseline) +{ + float max_a = 0, max_d = 0, h = 0; + while (node != end) + { + if (node->type == FLOW_IMAGE) + { + if (node->h > max_a) + max_a = node->h; + } + else + { + float a = node->em * 0.8; + float d = node->em * 0.2; + if (a > max_a) max_a = a; + if (d > max_d) max_d = d; + } + if (node->h > h) h = node->h; + if (max_a + max_d > h) h = max_a + max_d; + node = node->next; + } + *baseline = max_a + (h - max_a - max_d) / 2; + return h; +} + +static void layout_line(fz_context *ctx, float indent, float page_w, float line_w, int align, fz_html_flow *node, fz_html_flow *end, fz_html *box, float baseline) +{ + float x = box->x + indent; + float y = box->y + box->h; + float slop = page_w - line_w; + float justify = 0; + float va; + int n = 0; + + if (align == TA_JUSTIFY) + { + fz_html_flow *it; + for (it = node; it != end; it = it->next) + if (it->type == FLOW_GLUE) + ++n; + justify = slop / n; + } + else if (align == TA_RIGHT) + x += slop; + else if (align == TA_CENTER) + x += slop / 2; + + while (node != end) + { + switch (node->style->vertical_align) + { + default: + case VA_BASELINE: + va = 0; + break; + case VA_SUB: + va = node->em * 0.2f; + break; + case VA_SUPER: + va = node->em * -0.3f; + break; + } + node->x = x; + if (node->type == FLOW_IMAGE) + node->y = y + baseline - node->h; + else + node->y = y + baseline + va; + x += node->w; + if (node->type == FLOW_GLUE) + x += justify; + node = node->next; + } +} + +static fz_html_flow *find_next_glue(fz_html_flow *node, float *w) +{ + while (node && node->type == FLOW_GLUE) + { + *w += node->w; + node = node->next; + } + while (node && node->type != FLOW_GLUE) + { + *w += node->w; + node = node->next; + } + return node; +} + +static fz_html_flow *find_next_word(fz_html_flow *node, float *w) +{ + while (node && node->type == FLOW_GLUE) + { + *w += node->w; + node = node->next; + } + return node; +} + +static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, float page_h) +{ + fz_html_flow *node, *line_start, *word_start, *word_end, *line_end; + float glue_w; + float word_w; + float line_w; + float indent; + float avail, line_h; + float baseline; + int align; + + em = fz_from_css_number(box->style.font_size, em, em); + indent = box->is_first_flow ? fz_from_css_number(top->style.text_indent, em, top->w) : 0; + align = top->style.text_align; + + box->x = top->x; + box->y = top->y + top->h; + box->w = top->w; + box->h = 0; + + if (!box->flow_head) + return; + + for (node = box->flow_head; node; node = node->next) + if (node->type == FLOW_IMAGE) + measure_image(ctx, node, top->w, page_h); + else + measure_word(ctx, node, em); + + line_start = find_next_word(box->flow_head, &glue_w); + line_end = NULL; + + line_w = indent; + word_w = 0; + word_start = line_start; + while (word_start) + { + word_end = find_next_glue(word_start, &word_w); + if (line_w + word_w <= top->w) + { + line_w += word_w; + glue_w = 0; + line_end = word_end; + word_start = find_next_word(word_end, &glue_w); + word_w = glue_w; + } + else + { + avail = page_h - fmodf(box->y + box->h, page_h); + line_h = measure_line(line_start, line_end, &baseline); + if (line_h > avail) + box->h += avail; + layout_line(ctx, indent, top->w, line_w, align, line_start, line_end, box, baseline); + box->h += line_h; + word_start = find_next_word(line_end, &glue_w); + line_start = word_start; + line_end = NULL; + indent = 0; + line_w = 0; + word_w = 0; + } + } + + /* don't justify the last line of a paragraph */ + if (align == TA_JUSTIFY) + align = TA_LEFT; + + if (line_start) + { + avail = page_h - fmodf(box->y + box->h, page_h); + line_h = measure_line(line_start, line_end, &baseline); + if (line_h > avail) + box->h += avail; + layout_line(ctx, indent, top->w, line_w, align, line_start, line_end, box, baseline); + box->h += line_h; + } +} + +static void layout_block(fz_context *ctx, fz_html *box, fz_html *top, float em, float top_collapse_margin, float page_h) +{ + fz_html *child; + float box_collapse_margin; + int prev_br; + + float *margin = box->margin; + float *border = box->border; + float *padding = box->padding; + + em = fz_from_css_number(box->style.font_size, em, em); + + margin[0] = fz_from_css_number(box->style.margin[0], em, top->w); + margin[1] = fz_from_css_number(box->style.margin[1], em, top->w); + margin[2] = fz_from_css_number(box->style.margin[2], em, top->w); + margin[3] = fz_from_css_number(box->style.margin[3], em, top->w); + + padding[0] = fz_from_css_number(box->style.padding[0], em, top->w); + padding[1] = fz_from_css_number(box->style.padding[1], em, top->w); + padding[2] = fz_from_css_number(box->style.padding[2], em, top->w); + padding[3] = fz_from_css_number(box->style.padding[3], em, top->w); + + if (box->style.border_style) + { + border[0] = fz_from_css_number(box->style.border_width[0], em, top->w); + border[1] = fz_from_css_number(box->style.border_width[1], em, top->w); + border[2] = fz_from_css_number(box->style.border_width[2], em, top->w); + border[3] = fz_from_css_number(box->style.border_width[3], em, top->w); + } + else + border[0] = border[1] = border[2] = border[3] = 0; + + if (padding[T] == 0 && border[T] == 0) + box_collapse_margin = margin[T]; + else + box_collapse_margin = 0; + + if (margin[T] > top_collapse_margin) + margin[T] -= top_collapse_margin; + else + margin[T] = 0; + + box->x = top->x + margin[L] + border[L] + padding[L]; + box->y = top->y + top->h + margin[T] + border[T] + padding[T]; + box->w = top->w - (margin[L] + margin[R] + border[L] + border[R] + padding[L] + padding[R]); + box->h = 0; + + prev_br = 0; + for (child = box->down; child; child = child->next) + { + if (child->type == BOX_BLOCK) + { + layout_block(ctx, child, box, em, box_collapse_margin, page_h); + box->h += child->h + + child->padding[T] + child->padding[B] + + child->border[T] + child->border[B] + + child->margin[T] + child->margin[B]; + box_collapse_margin = child->margin[B]; + prev_br = 0; + } + else if (child->type == BOX_BREAK) + { + /* TODO: interaction with page breaks */ + if (prev_br) + box->h += fz_from_css_number_scale(box->style.line_height, em, em, em); + prev_br = 1; + } + else if (child->type == BOX_FLOW) + { + layout_flow(ctx, child, box, em, page_h); + if (child->h > 0) + { + box->h += child->h; + box_collapse_margin = 0; + prev_br = 0; + } + } + } + + if (padding[B] == 0 && border[B] == 0) + { + if (margin[B] > 0) + { + box->h -= box_collapse_margin; + if (margin[B] < box_collapse_margin) + margin[B] = box_collapse_margin; + } + } +} + +static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm) +{ + fz_html_flow *node; + fz_text *text; + fz_matrix trm; + const char *s; + float color[3]; + float x, y; + int c, g; + + for (node = box->flow_head; node; node = node->next) + { + if (node->type == FLOW_IMAGE) + { + if (node->y > page_bot || node->y + node->h < page_top) + continue; + } + else + { + if (node->y > page_bot || node->y < page_top) + continue; + } + + if (node->type == FLOW_WORD) + { + fz_scale(&trm, node->em, -node->em); + text = fz_new_text(ctx, node->style->font, &trm, 0); + + x = node->x; + y = node->y; + s = node->text; + while (*s) + { + s += fz_chartorune(&c, s); + g = fz_encode_character(ctx, node->style->font, c); + fz_add_text(ctx, text, g, c, x, y); + x += fz_advance_glyph(ctx, node->style->font, g) * node->em; + } + + color[0] = node->style->color.r / 255.0f; + color[1] = node->style->color.g / 255.0f; + color[2] = node->style->color.b / 255.0f; + + fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1); + + fz_drop_text(ctx, text); + } + else if (node->type == FLOW_IMAGE) + { + fz_matrix local_ctm = *ctm; + fz_pre_translate(&local_ctm, node->x, node->y); + fz_pre_scale(&local_ctm, node->w, node->h); + fz_fill_image(ctx, dev, node->image, &local_ctm, 1); + } + } +} + +static void draw_rect(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, float *rgba, float x0, float y0, float x1, float y1) +{ + fz_path *path = fz_new_path(ctx); + + fz_moveto(ctx, path, x0, y0); + fz_lineto(ctx, path, x1, y0); + fz_lineto(ctx, path, x1, y1); + fz_lineto(ctx, path, x0, y1); + fz_closepath(ctx, path); + + fz_fill_path(ctx, dev, path, 0, ctm, fz_device_rgb(ctx), rgba, rgba[3]); + + fz_drop_path(ctx, path); +} + +static void draw_block_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm) +{ + float x0, y0, x1, y1; + float color[4]; + + // TODO: background fill + // TODO: border stroke + + float *border = box->border; + float *padding = box->padding; + + x0 = box->x - padding[L]; + y0 = box->y - padding[T]; + x1 = box->x + box->w + padding[R]; + y1 = box->y + box->h + padding[B]; + + if (y0 > page_bot || y1 < page_top) + return; + + if (box->style.background_color.a > 0) + { + color[0] = box->style.background_color.r / 255.0f; + color[1] = box->style.background_color.g / 255.0f; + color[2] = box->style.background_color.b / 255.0f; + color[3] = box->style.background_color.a / 255.0f; + draw_rect(ctx, dev, ctm, color, x0, y0, x1, y1); + } + + if (box->style.border_color.a > 0) + { + color[0] = box->style.border_color.r / 255.0f; + color[1] = box->style.border_color.g / 255.0f; + color[2] = box->style.border_color.b / 255.0f; + color[3] = box->style.border_color.a / 255.0f; + if (border[T] > 0) + draw_rect(ctx, dev, ctm, color, x0 - border[L], y0 - border[T], x1 + border[R], y0); + if (border[B] > 0) + draw_rect(ctx, dev, ctm, color, x0 - border[L], y1, x1 + border[R], y1 + border[B]); + if (border[L] > 0) + draw_rect(ctx, dev, ctm, color, x0 - border[L], y0 - border[T], x0, y1 + border[B]); + if (border[R] > 0) + draw_rect(ctx, dev, ctm, color, x1, y0 - border[T], x1 + border[R], y1 + border[B]); + } + + for (box = box->down; box; box = box->next) + { + switch (box->type) + { + case BOX_BLOCK: draw_block_box(ctx, box, page_top, page_bot, dev, ctm); break; + case BOX_FLOW: draw_flow_box(ctx, box, page_top, page_bot, dev, ctm); break; + } + } +} + +void +fz_draw_html(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *inctm) +{ + fz_matrix ctm = *inctm; + fz_pre_translate(&ctm, 0, -page_top); + draw_block_box(ctx, box, page_top, page_bot, dev, &ctm); +} + +static char *concat_text(fz_context *ctx, fz_xml *root) +{ + fz_xml *node; + int i = 0, n = 1; + char *s; + for (node = fz_xml_down(root); node; node = fz_xml_next(node)) + { + const char *text = fz_xml_text(node); + n += text ? strlen(text) : 0; + } + s = fz_malloc(ctx, n); + for (node = fz_xml_down(root); node; node = fz_xml_next(node)) + { + const char *text = fz_xml_text(node); + if (text) + { + n = strlen(text); + memcpy(s+i, text, n); + i += n; + } + } + s[i] = 0; + return s; +} + +static fz_css_rule * +html_load_css(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_css_rule *css, fz_xml *root) +{ + fz_xml *node; + fz_buffer *buf; + char path[2048]; + + for (node = root; node; node = fz_xml_next(node)) + { + const char *tag = fz_xml_tag(node); + if (tag && !strcmp(tag, "link")) + { + char *rel = fz_xml_att(node, "rel"); + if (rel && !strcasecmp(rel, "stylesheet")) + { + char *type = fz_xml_att(node, "type"); + if ((type && !strcmp(type, "text/css")) || !type) + { + char *href = fz_xml_att(node, "href"); + if (href) + { + fz_strlcpy(path, base_uri, sizeof path); + fz_strlcat(path, "/", sizeof path); + fz_strlcat(path, href, sizeof path); + fz_cleanname(path); + + buf = fz_read_archive_entry(ctx, zip, path); + fz_write_buffer_byte(ctx, buf, 0); + css = fz_parse_css(ctx, css, (char*)buf->data, path); + fz_drop_buffer(ctx, buf); + } + } + } + } + if (tag && !strcmp(tag, "style")) + { + char *s = concat_text(ctx, node); + css = fz_parse_css(ctx, css, s, "<style>"); + fz_free(ctx, s); + } + if (fz_xml_down(node)) + css = html_load_css(ctx, zip, base_uri, css, fz_xml_down(node)); + } + return css; +} + +void +fz_layout_html(fz_context *ctx, fz_html *box, float w, float h, float em) +{ + fz_html page_box; + + init_box(ctx, &page_box); + page_box.w = w; + page_box.h = 0; + + layout_block(ctx, box, &page_box, em, 0, h); +} + +fz_html * +fz_parse_html(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css) +{ + fz_xml *xml; + fz_css_rule *css; + fz_css_match match; + fz_html *box; + + xml = fz_parse_xml(ctx, buf->data, buf->len, 1); + + css = fz_parse_css(ctx, NULL, default_css, "<default>"); + if (user_css) + css = fz_parse_css(ctx, NULL, user_css, "<user>"); + css = html_load_css(ctx, zip, base_uri, css, xml); + + // print_rules(css); + + box = new_box(ctx); + + match.up = NULL; + match.count = 0; + + generate_boxes(ctx, set, zip, base_uri, xml, box, css, &match); + + fz_drop_css(ctx, css); + fz_drop_xml(ctx, xml); + + return box; +} |