diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2014-09-08 15:49:59 +0200 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2014-12-03 12:25:51 +0100 |
commit | 9d1482bc78d72ba330c2130170b49b4e18702623 (patch) | |
tree | e7f87a3266419e7bb238095e3fa146953dcb9e28 /source/html | |
parent | 4b8638cfa35ecacf7418ec8933f971577652bb79 (diff) | |
download | mupdf-9d1482bc78d72ba330c2130170b49b4e18702623.tar.xz |
html: CSS lexer and parser.
Diffstat (limited to 'source/html')
-rw-r--r-- | source/html/css-apply.c | 875 | ||||
-rw-r--r-- | source/html/css-parse.c | 734 | ||||
-rw-r--r-- | source/html/handler.c | 11 | ||||
-rw-r--r-- | source/html/layout.c | 153 |
4 files changed, 1765 insertions, 8 deletions
diff --git a/source/html/css-apply.c b/source/html/css-apply.c new file mode 100644 index 00000000..7f3028a9 --- /dev/null +++ b/source/html/css-apply.c @@ -0,0 +1,875 @@ +#include "mupdf/html.h" + +static void add_property(struct style *style, const char *name, struct value *value, int spec); + +struct rule * +new_rule(struct selector *selector, struct property *declaration) +{ + struct rule *rule; + + rule = malloc(sizeof(struct rule)); + rule->selector = selector; + rule->declaration = declaration; + rule->next = NULL; + + return rule; +} + +struct selector * +new_selector(const char *name) +{ + struct selector *sel; + + sel = malloc(sizeof(struct selector)); + sel->name = name ? strdup(name) : NULL; + sel->combine = 0; + sel->cond = NULL; + sel->left = NULL; + sel->right = NULL; + sel->next = NULL; + + return sel; +} + +struct condition * +new_condition(int type, const char *key, const char *val) +{ + struct condition *cond; + + cond = malloc(sizeof(struct condition)); + cond->type = type; + cond->key = key ? strdup(key) : NULL; + cond->val = val ? strdup(val) : NULL; + cond->next = NULL; + + return cond; +} + +struct property * +new_property(const char *name, struct value *value, int spec) +{ + struct property *prop; + + prop = malloc(sizeof(struct property)); + prop->name = strdup(name); + prop->value = value; + prop->spec = spec; + prop->next = NULL; + + return prop; +} + +struct value * +new_value(int type, const char *data) +{ + struct value *val; + + val = malloc(sizeof(struct value)); + val->type = type; + val->data = strdup(data); + val->args = NULL; + val->next = NULL; + + return val; +} + +/* + * Compute specificity + */ + +static int +count_condition_ids(struct condition *cond) +{ + int n = 0; + while (cond) + { + if (cond->type == '#') + n ++; + cond = cond->next; + } + return n; +} + +static int +count_selector_ids(struct selector *sel) +{ + int n = count_condition_ids(sel->cond); + if (sel->left && sel->right) + { + n += count_selector_ids(sel->left); + n += count_selector_ids(sel->right); + } + return n; +} + +static int +count_condition_atts(struct condition *cond) +{ + int n = 0; + while (cond) + { + if (cond->type != '#' && cond->type != ':') + n ++; + cond = cond->next; + } + return n; +} + +static int +count_selector_atts(struct selector *sel) +{ + int n = count_condition_atts(sel->cond); + if (sel->left && sel->right) + { + n += count_selector_atts(sel->left); + n += count_selector_atts(sel->right); + } + return n; +} + +static int +count_condition_names(struct condition *cond) +{ + int n = 0; + while (cond) + { + if (cond->type == ':') + n ++; + cond = cond->next; + } + return n; +} + +static int +count_selector_names(struct selector *sel) +{ + int n = count_condition_names(sel->cond); + if (sel->left && sel->right) + { + n += count_selector_names(sel->left); + n += count_selector_names(sel->right); + } + else if (sel->name) + { + n ++; + } + return n; +} + +int +selector_specificity(struct selector *sel) +{ + int b = count_selector_ids(sel); + int c = count_selector_atts(sel); + int d = count_selector_names(sel); + return b * 100 + c * 10 + d; +} + +/* + * Pretty printing + */ + +void +print_value(struct value *val) +{ + printf("%s", val->data); + if (val->args) + { + printf("("); + print_value(val->args); + printf(")"); + } + if (val->next) + { + printf(" "); + print_value(val->next); + } +} + +void +print_property(struct property *prop) +{ + printf("\t%s: ", prop->name); + print_value(prop->value); + printf(" !%d;\n", prop->spec); +} + +void +print_condition(struct condition *cond) +{ + if (cond->type == '=') + printf("[%s=%s]", cond->key, cond->val); + else if (cond->type == '[') + printf("[%s]", cond->key); + else + printf("%c%s", cond->type, cond->val); + if (cond->next) + print_condition(cond->next); +} + +void +print_selector(struct selector *sel) +{ + if (sel->combine) + { +putchar('('); + print_selector(sel->left); + if (sel->combine == ' ') + printf(" "); + else + printf(" %c ", sel->combine); + print_selector(sel->right); +putchar(')'); + } + else if (sel->name) + printf("%s", sel->name); + else + printf("*"); + if (sel->cond) + { + print_condition(sel->cond); + } +} + +void +print_rule(struct rule *rule) +{ + struct selector *sel; + struct property *prop; + + for (sel = rule->selector; sel; sel = sel->next) + { + print_selector(sel); + printf(" !%d", selector_specificity(sel)); + if (sel->next) + printf(", "); + } + + printf("\n{\n"); + for (prop = rule->declaration; prop; prop = prop->next) + { + print_property(prop); + } + printf("}\n"); +} + +void +print_rules(struct rule *rule) +{ + while (rule) + { + print_rule(rule); + rule = rule->next; + } +} + +/* + * Selector matching + */ + +int +match_id_condition(fz_xml *node, const char *p) +{ + const char *s = fz_xml_att(node, "id"); + if (s && !strcmp(s, p)) + return 1; + return 0; +} + +int +match_class_condition(fz_xml *node, const char *p) +{ + const char *s = fz_xml_att(node, "class"); + char buf[1024]; + if (s) { + strcpy(buf, s); + s = strtok(buf, " "); + while (s) { + if (!strcmp(s, p)) + return 1; + s = strtok(NULL, " "); + } + } + return 0; +} + +int +match_condition(struct condition *cond, fz_xml *node) +{ + if (!cond) + return 1; + + switch (cond->type) { + default: return 0; + case ':': return 0; /* don't support pseudo-classes */ + case '#': if (!match_id_condition(node, cond->val)) return 0; break; + case '.': if (!match_class_condition(node, cond->val)) return 0; break; + } + + return match_condition(cond->next, node); +} + +int +match_selector(struct selector *sel, fz_xml *node) +{ + if (!node) + return 0; + + if (sel->combine) + { + /* descendant */ + if (sel->combine == ' ') + { + fz_xml *parent = fz_xml_up(node); + while (parent) + { + if (match_selector(sel->left, parent)) + if (match_selector(sel->right, node)) + return 1; + parent = fz_xml_up(parent); + } + return 0; + } + + /* child */ + if (sel->combine == '>') + { + fz_xml *parent = fz_xml_up(node); + if (!parent) + return 0; + if (!match_selector(sel->left, parent)) + return 0; + if (!match_selector(sel->right, node)) + return 0; + } + + /* adjacent */ + if (sel->combine == '+') + { + fz_xml *prev = fz_xml_prev(node); + while (prev && !fz_xml_tag(prev) && fz_xml_prev(prev)) + prev = fz_xml_prev(prev); + if (!prev) + return 0; + if (!fz_xml_tag(prev)) + return 0; + if (!match_selector(sel->left, prev)) + return 0; + if (!match_selector(sel->right, node)) + return 0; + } + } + + if (sel->name) + { + if (strcmp(sel->name, fz_xml_tag(node))) + return 0; + } + + if (sel->cond) + { + if (!match_condition(sel->cond, node)) + return 0; + } + + return 1; +} + +/* + * Annotating nodes with properties and expanding shorthand forms. + */ + +static int +count_values(struct value *value) +{ + int n = 0; + while (value) + { + n++; + value = value->next; + } + return n; +} + +static void +add_shorthand_margin(struct style *style, struct value *value, int spec) +{ + int n = count_values(value); + + if (n == 1) + { + add_property(style, "margin-top", value, spec); + add_property(style, "margin-right", value, spec); + add_property(style, "margin-bottom", value, spec); + add_property(style, "margin-left", value, spec); + } + + if (n == 2) + { + struct value *a = new_value(value->type, value->data); + struct value *b = new_value(value->next->type, value->next->data); + + add_property(style, "margin-top", a, spec); + add_property(style, "margin-right", b, spec); + add_property(style, "margin-bottom", a, spec); + add_property(style, "margin-left", b, spec); + } + + if (n == 3) + { + struct value *a = new_value(value->type, value->data); + struct value *b = new_value(value->next->type, value->next->data); + struct value *c = new_value(value->next->next->type, value->next->next->data); + + add_property(style, "margin-top", a, spec); + add_property(style, "margin-right", b, spec); + add_property(style, "margin-bottom", c, spec); + add_property(style, "margin-left", b, spec); + } + + if (n == 4) + { + struct value *a = new_value(value->type, value->data); + struct value *b = new_value(value->next->type, value->next->data); + struct value *c = new_value(value->next->next->type, value->next->next->data); + struct value *d = new_value(value->next->next->next->type, value->next->next->next->data); + + add_property(style, "margin-top", a, spec); + add_property(style, "margin-right", b, spec); + add_property(style, "margin-bottom", c, spec); + add_property(style, "margin-left", d, spec); + } +} + +static void +add_shorthand_padding(struct style *style, struct value *value, int spec) +{ + int n = count_values(value); + + if (n == 1) + { + add_property(style, "padding-top", value, spec); + add_property(style, "padding-right", value, spec); + add_property(style, "padding-bottom", value, spec); + add_property(style, "padding-left", value, spec); + } + + if (n == 2) + { + struct value *a = new_value(value->type, value->data); + struct value *b = new_value(value->next->type, value->next->data); + + add_property(style, "padding-top", a, spec); + add_property(style, "padding-right", b, spec); + add_property(style, "padding-bottom", a, spec); + add_property(style, "padding-left", b, spec); + } + + if (n == 3) + { + struct value *a = new_value(value->type, value->data); + struct value *b = new_value(value->next->type, value->next->data); + struct value *c = new_value(value->next->next->type, value->next->next->data); + + add_property(style, "padding-top", a, spec); + add_property(style, "padding-right", b, spec); + add_property(style, "padding-bottom", c, spec); + add_property(style, "padding-left", b, spec); + } + + if (n == 4) + { + struct value *a = new_value(value->type, value->data); + struct value *b = new_value(value->next->type, value->next->data); + struct value *c = new_value(value->next->next->type, value->next->next->data); + struct value *d = new_value(value->next->next->next->type, value->next->next->next->data); + + add_property(style, "padding-top", a, spec); + add_property(style, "padding-right", b, spec); + add_property(style, "padding-bottom", c, spec); + add_property(style, "padding-left", d, spec); + } +} + +static void +add_shorthand_border_width(struct style *style, struct value *value, int spec) +{ + int n = count_values(value); + + if (n == 1) + { + add_property(style, "border-top-width", value, spec); + add_property(style, "border-right-width", value, spec); + add_property(style, "border-bottom-width", value, spec); + add_property(style, "border-left-width", value, spec); + } + + if (n == 2) + { + struct value *a = new_value(value->type, value->data); + struct value *b = new_value(value->next->type, value->next->data); + + add_property(style, "border-top-width", a, spec); + add_property(style, "border-right-width", b, spec); + add_property(style, "border-bottom-width", a, spec); + add_property(style, "border-left-width", b, spec); + } + + if (n == 3) + { + struct value *a = new_value(value->type, value->data); + struct value *b = new_value(value->next->type, value->next->data); + struct value *c = new_value(value->next->next->type, value->next->next->data); + + add_property(style, "border-top-width", a, spec); + add_property(style, "border-right-width", b, spec); + add_property(style, "border-bottom-width", c, spec); + add_property(style, "border-left-width", b, spec); + } + + if (n == 4) + { + struct value *a = new_value(value->type, value->data); + struct value *b = new_value(value->next->type, value->next->data); + struct value *c = new_value(value->next->next->type, value->next->next->data); + struct value *d = new_value(value->next->next->next->type, value->next->next->next->data); + + add_property(style, "border-top-width", a, spec); + add_property(style, "border-right-width", b, spec); + add_property(style, "border-bottom-width", c, spec); + add_property(style, "border-left-width", d, spec); + } +} + +static void +add_property(struct style *style, const char *name, struct value *value, int spec) +{ + int i; + + if (!strcmp(name, "margin")) + { + add_shorthand_margin(style, value, spec); + return; + } + if (!strcmp(name, "padding")) + { + add_shorthand_padding(style, value, spec); + return; + } + if (!strcmp(name, "border-width")) + { + add_shorthand_border_width(style, value, spec); + return; + } + + /* TODO: border-color */ + /* TODO: border-style */ + /* TODO: border */ + /* TODO: font */ + /* TODO: list-style */ + /* TODO: background */ + + for (i = 0; i < style->count; ++i) + { + if (!strcmp(style->prop[i].name, name)) + { + if (style->prop[i].spec <= spec) + { + style->prop[i].value = value; + style->prop[i].spec = spec; + } + return; + } + } + + if (style->count + 1 >= nelem(style->prop)) + { + // fz_warn(ctx, "too many css properties"); + return; + } + + style->prop[style->count].name = name; + style->prop[style->count].value = value; + style->prop[style->count].spec = spec; + ++style->count; +} + +void +apply_styles(struct rule *rule, struct style *style, fz_xml *node) +{ + struct selector *sel; + struct property *prop; + + while (rule) + { + sel = rule->selector; + while (sel) + { + if (match_selector(sel, node)) + { + for (prop = rule->declaration; prop; prop = prop->next) + add_property(style, prop->name, prop->value, selector_specificity(sel)); + break; + } + sel = sel->next; + } + rule = rule->next; + } +} + +static const char *inherit_list[] = { + "color", "direction", + "font-family", "font-size", "font-style", "font-variant", "font-weight", + "letter-spacing", "line-height", + "list-style-image", "list-style-position", "list-style-type", + "orphans", "quotes", "text-align", "text-indent", "text-transform", + "visibility", "white-space", "widows", "word-spacing", + + /* this is not supposed to be inherited: */ + "vertical-align", +}; + +static struct value * +get_raw_property(struct style *node, const char *name) +{ + int i; + for (i = 0; i < node->count; ++i) + if (!strcmp(node->prop[i].name, name)) + return node->prop[i].value; + return NULL; +} + +static int +should_inherit_property(const char *name) +{ + int l = 0; + int r = nelem(inherit_list) - 1; + while (l <= r) + { + int m = (l + r) >> 1; + int c = strcmp(name, inherit_list[m]); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return 1; + } + return 0; +} + +static struct value * +get_style_property(struct style *node, const char *name) +{ + struct value *value; + + value = get_raw_property(node, name); + if (node->up) + { + if (value && !strcmp(value->data, "inherit")) + return get_style_property(node->up, name); + if (!value && should_inherit_property(name)) + return get_style_property(node->up, name); + } + return value; +} + +static const char * +get_style_property_string(struct style *node, const char *name, const char *initial) +{ + struct value *value; + value = get_style_property(node, name); + if (!value) + return initial; + return value->data; +} + +static int +compute_number(struct value *value, int em, int hundred, int scale, int initial) +{ + char *p; + + if (!value) + return initial; + + if (value->type == CSS_PERCENT) + return strtof(value->data, &p) * hundred / 100; + + if (value->type == CSS_NUMBER) + return strtof(value->data, &p) * scale; + + if (value->type == CSS_LENGTH) + { + float x = strtof(value->data, &p); + + if (p[0] == 'e' && p[1] == 'm') + return x * em; + if (p[0] == 'e' && p[1] == 'x') + return x * em / 2; + + if (p[0] == 'i' && p[1] == 'n') + return x * 72; + if (p[0] == 'c' && p[1] == 'm') + return x * 72 / 2.54; + if (p[0] == 'm' && p[1] == 'm') + return x * 72 / 25.4; + if (p[0] == 'p' && p[1] == 'c') + return x * 12; + + if (p[0] == 'p' && p[1] == 't') + return x; + if (p[0] == 'p' && p[1] == 'x') + return x; + + return x; + } + + return initial; +} + +void +compute_style(struct computed_style *style, struct style *node) +{ + struct value *value; + int em = 12; + int hundred = 100; + + memset(style, 0, sizeof *style); + + style->display = INLINE; + style->position = STATIC; + style->text_align = LEFT; + style->font_size = 12; + + value = get_style_property(node, "display"); + if (value) + { + if (!strcmp(value->data, "none")) + style->display = NONE; + if (!strcmp(value->data, "inline")) + style->display = INLINE; + if (!strcmp(value->data, "block")) + style->display = BLOCK; + if (!strcmp(value->data, "list-item")) + style->display = LIST_ITEM; + } + + value = get_style_property(node, "position"); + if (value) + { + if (!strcmp(value->data, "static")) + style->position = STATIC; + if (!strcmp(value->data, "relative")) + style->position = RELATIVE; + if (!strcmp(value->data, "absolute")) + style->position = ABSOLUTE; + if (!strcmp(value->data, "fixed")) + style->position = FIXED; + } + + value = get_style_property(node, "text-align"); + if (value) + { + if (!strcmp(value->data, "left")) + style->text_align = LEFT; + if (!strcmp(value->data, "right")) + style->text_align = RIGHT; + if (!strcmp(value->data, "center")) + style->text_align = CENTER; + if (!strcmp(value->data, "justify")) + style->text_align = JUSTIFY; + } + + value = get_style_property(node, "vertical-align"); + if (value) + { + if (!strcmp(value->data, "super")) + style->vertical_align = 1; + if (!strcmp(value->data, "sub")) + style->vertical_align = -1; + } + + value = get_style_property(node, "font-size"); + if (value) { + if (!strcmp(value->data, "xx-large")) style->font_size = 20; + else if (!strcmp(value->data, "x-large")) style->font_size = 16; + else if (!strcmp(value->data, "large")) style->font_size = 14; + else if (!strcmp(value->data, "medium")) style->font_size = 12; + else if (!strcmp(value->data, "small")) style->font_size = 10; + else if (!strcmp(value->data, "x-small")) style->font_size = 8; + else if (!strcmp(value->data, "xx-small")) style->font_size = 6; + else if (!strcmp(value->data, "larger")) style->font_size = em + 2; + else if (!strcmp(value->data, "smaller")) style->font_size = em - 2; + else style->font_size = compute_number(value, em, em, 1, 12); + } else { + style->font_size = 12; + } + em = style->font_size; + + value = get_style_property(node, "line-height"); + style->line_height = compute_number(value, em, em, em, 1.2 * em); + + value = get_style_property(node, "text-indent"); + style->text_indent = compute_number(value, em, hundred, 1, 0); + + value = get_style_property(node, "margin-top"); + style->margin[0] = compute_number(value, em, hundred, 1, 0); + value = get_style_property(node, "margin-right"); + style->margin[1] = compute_number(value, em, hundred, 1, 0); + value = get_style_property(node, "margin-bottom"); + style->margin[2] = compute_number(value, em, hundred, 1, 0); + value = get_style_property(node, "margin-left"); + style->margin[3] = compute_number(value, em, hundred, 1, 0); + + value = get_style_property(node, "padding-top"); + style->padding[0] = compute_number(value, em, hundred, 1, 0); + value = get_style_property(node, "padding-right"); + style->padding[1] = compute_number(value, em, hundred, 1, 0); + value = get_style_property(node, "padding-bottom"); + style->padding[2] = compute_number(value, em, hundred, 1, 0); + value = get_style_property(node, "padding-left"); + style->padding[3] = compute_number(value, em, hundred, 1, 0); + + { + const char *font_family = get_style_property_string(node, "font-family", "serif"); + const char *font_variant = get_style_property_string(node, "font-variant", "normal"); + const char *font_style = get_style_property_string(node, "font-style", "normal"); + const char *font_weight = get_style_property_string(node, "font-weight", "normal"); + + style->font_family = font_family; + + style->smallcaps = 0; + if (!strcmp(font_variant, "small-caps")) + style->smallcaps = 1; + + style->italic = 0; + if (!strcmp(font_style, "italic") || !strcmp(font_style, "oblique")) + style->italic = 1; + + style->bold = 0; + if (!strcmp(font_weight, "bold") || !strcmp(font_weight, "bolder") || atoi(font_weight) > 400) + style->bold = 1; + } +} + +void +print_style(struct computed_style *style) +{ + printf("style {\n"); + printf("\tdisplay = %d;\n", style->display); + printf("\tposition = %d;\n", style->position); + printf("\ttext-align = %d;\n", style->text_align); + printf("\tfont-family = %s;\n", style->font_family); + printf("\tfont-weight = %s;\n", style->bold ? "bold" : "normal"); + printf("\tfont-style = %s;\n", style->italic ? "italic" : "normal"); + printf("\tfont-variant = %s;\n", style->smallcaps ? "small-caps" : "normal"); + printf("\tfont-size = %d;\n", style->font_size); + printf("\tline-height = %d;\n", style->line_height); + printf("\ttext-indent = %d;\n", style->text_indent); + printf("\tvertical-align = %d;\n", style->vertical_align); + printf("\tmargin = %d %d %d %d;\n", + style->margin[0], style->margin[1], style->margin[2], style->margin[3]); + printf("\tpadding = %d %d %d %d;\n", + style->padding[0], style->padding[1], style->padding[2], style->padding[3]); + printf("}\n"); +} diff --git a/source/html/css-parse.c b/source/html/css-parse.c new file mode 100644 index 00000000..5f7fd254 --- /dev/null +++ b/source/html/css-parse.c @@ -0,0 +1,734 @@ +#include "mupdf/fitz.h" +#include "mupdf/html.h" + +struct lexbuf +{ + fz_context *ctx; + const char *s; + int lookahead; + int c; + int color; + int string_len; + char string[1024]; +}; + +static void css_lex_next(struct lexbuf *buf) +{ + // buf->s += fz_chartorune(&buf->c, buf->s); + buf->c = *(buf->s++); +} + +static void css_lex_init(fz_context *ctx, struct lexbuf *buf, const char *s) +{ + buf->ctx = ctx; + buf->s = s; + buf->c = 0; + css_lex_next(buf); + + buf->color = 0; + buf->string_len = 0; +} + +static int iswhite(int c) +{ + return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'; +} + +static int isnmstart(int c) +{ + return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || + (c >= 128 && c <= 255); +} + +static int isnmchar(int c) +{ + return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || c == '-' || (c >= 128 && c <= 255); +} + +static void css_push_char(struct lexbuf *buf, int c) +{ + if (buf->string_len + 1 >= nelem(buf->string)) + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "token too long"); + buf->string[buf->string_len++] = c; +} + +static int css_lex_accept(struct lexbuf *buf, int t) +{ + if (buf->c == t) + { + css_lex_next(buf); + return 1; + } + return 0; +} + +static void css_lex_expect(struct lexbuf *buf, int t) +{ + if (!css_lex_accept(buf, t)) + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected '%c'", t); +} + +static int ishex(int c, int *v) +{ + if (c >= '0' && c <= '9') + { + *v = c - '0'; + return 1; + } + if (c >= 'A' && c <= 'F') + { + *v = c - 'A' + 0xA; + return 1; + } + if (c >= 'a' && c <= 'f') + { + *v = c - 'a' + 0xA; + return 1; + } + return 0; +} + +static int css_lex_accept_hex(struct lexbuf *buf, int *v) +{ + if (ishex(buf->c, v)) + { + css_lex_next(buf); + return 1; + } + return 0; +} + +static int css_lex_number(struct lexbuf *buf) +{ + while (buf->c >= '0' && buf->c <= '9') + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + + if (css_lex_accept(buf, '.')) + { + css_push_char(buf, '.'); + while (buf->c >= '0' && buf->c <= '9') + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + } + + if (css_lex_accept(buf, '%')) + { + css_push_char(buf, '%'); + css_push_char(buf, 0); + return CSS_PERCENT; + } + + if (isnmstart(buf->c)) + { + css_push_char(buf, buf->c); + css_lex_next(buf); + while (isnmchar(buf->c)) + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + css_push_char(buf, 0); + return CSS_LENGTH; + } + + css_push_char(buf, 0); + return CSS_NUMBER; +} + +static int css_lex_keyword(struct lexbuf *buf) +{ + while (isnmchar(buf->c)) + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + css_push_char(buf, 0); + return CSS_KEYWORD; +} + +static int css_lex_string(struct lexbuf *buf, int q) +{ + while (buf->c && buf->c != q) + { + if (css_lex_accept(buf, '\\')) + { + if (css_lex_accept(buf, 'n')) + css_push_char(buf, '\n'); + else if (css_lex_accept(buf, 'r')) + css_push_char(buf, '\r'); + else if (css_lex_accept(buf, 'f')) + css_push_char(buf, '\f'); + else if (css_lex_accept(buf, '\f')) + /* line continuation */ ; + else if (css_lex_accept(buf, '\n')) + /* line continuation */ ; + else if (css_lex_accept(buf, '\r')) + css_lex_accept(buf, '\n'); + else + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + } + else + { + css_push_char(buf, buf->c); + css_lex_next(buf); + } + } + css_lex_expect(buf, q); + css_push_char(buf, 0); + return CSS_STRING; +} + +static int css_lex(struct lexbuf *buf) +{ + int t; + + // TODO: keyword escape sequences + + buf->string_len = 0; + + while (buf->c) + { + while (iswhite(buf->c)) + css_lex_next(buf); + + if (buf->c == 0) + break; + + if (css_lex_accept(buf, '/')) + { + if (css_lex_accept(buf, '*')) + { + while (buf->c) + { + if (css_lex_accept(buf, '*')) + { + while (buf->c == '*') + css_lex_next(buf); + if (css_lex_accept(buf, '/')) + continue; + } + css_lex_next(buf); + } + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: unterminated comment"); + } + return '/'; + } + + if (css_lex_accept(buf, '<')) + { + if (css_lex_accept(buf, '!')) + { + css_lex_expect(buf, '-'); + css_lex_expect(buf, '-'); + continue; /* ignore CDO */ + } + return '<'; + } + + if (css_lex_accept(buf, '-')) + { + if (css_lex_accept(buf, '-')) + { + css_lex_expect(buf, '>'); + continue; /* ignore CDC */ + } + if (buf->c >= '0' && buf->c <= '9') + { + css_push_char(buf, '-'); + return css_lex_number(buf); + } + if (isnmstart(buf->c)) + { + css_push_char(buf, '-'); + css_push_char(buf, buf->c); + css_lex_next(buf); + return css_lex_keyword(buf); + } + return '-'; + } + + if (css_lex_accept(buf, '.')) + { + if (buf->c >= '0' && buf->c <= '9') + { + css_push_char(buf, '.'); + return css_lex_number(buf); + } + return '.'; + } + + if (css_lex_accept(buf, '#')) + { + int a, b, c, d, e, f; + if (!css_lex_accept_hex(buf, &a)) goto colorerror; + if (!css_lex_accept_hex(buf, &b)) goto colorerror; + if (!css_lex_accept_hex(buf, &c)) goto colorerror; + if (css_lex_accept_hex(buf, &d)) + { + if (!css_lex_accept_hex(buf, &e)) goto colorerror; + if (!css_lex_accept_hex(buf, &f)) goto colorerror; + buf->color = (a << 20) | (b << 16) | (c << 12) | (d << 8) | (e << 4) | f; + } + else + { + buf->color = (a << 20) | (b << 12) | (c << 4); + } + sprintf(buf->string, "%06x", buf->color); // XXX + return CSS_COLOR; +colorerror: + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error in color"); + } + + if (css_lex_accept(buf, '"')) + return css_lex_string(buf, '"'); + if (css_lex_accept(buf, '\'')) + return css_lex_string(buf, '\''); + + if (buf->c >= '0' && buf->c <= '9') + return css_lex_number(buf); + + if (css_lex_accept(buf, 'u')) + { + if (css_lex_accept(buf, 'r')) + { + if (css_lex_accept(buf, 'l')) + { + if (css_lex_accept(buf, '(')) + { + // string or url + css_lex_expect(buf, ')'); + return CSS_URI; + } + css_push_char(buf, 'u'); + css_push_char(buf, 'r'); + css_push_char(buf, 'l'); + return css_lex_keyword(buf); + } + css_push_char(buf, 'u'); + css_push_char(buf, 'r'); + return css_lex_keyword(buf); + } + css_push_char(buf, 'u'); + return css_lex_keyword(buf); + } + + if (isnmstart(buf->c)) + { + css_push_char(buf, buf->c); + css_lex_next(buf); + return css_lex_keyword(buf); + } + + t = buf->c; + css_lex_next(buf); + return t; + } + return EOF; +} + +static void next(struct lexbuf *buf) +{ + buf->lookahead = css_lex(buf); +} + +static int accept(struct lexbuf *buf, int t) +{ + if (buf->lookahead == t) + { + next(buf); + return 1; + } + return 0; +} + +static void expect(struct lexbuf *buf, int t) +{ + if (accept(buf, t)) + return; + if (t < 256) + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected '%c'", t); + else + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: unexpected token"); +} + +static int iscond(int t) +{ + return t == ':' || t == '.' || t == '#' || t == '['; +} + +static struct value *parse_value_list(struct lexbuf *buf); + +static struct value *parse_value(struct lexbuf *buf) +{ + struct value *v; + + if (buf->lookahead == CSS_KEYWORD) + { + v = new_value(CSS_KEYWORD, buf->string); + next(buf); + + if (accept(buf, '(')) + { + v->type = '('; + v->args = parse_value_list(buf); + expect(buf, ')'); + } + + return v; + } + + switch (buf->lookahead) + { + case CSS_NUMBER: + case CSS_LENGTH: + case CSS_PERCENT: + case CSS_STRING: + case CSS_COLOR: + case CSS_URI: + v = new_value(buf->lookahead, buf->string); + next(buf); + return v; + } + + if (accept(buf, ',')) + return new_value(',', ","); + if (accept(buf, '/')) + return new_value('/', "/"); + + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected value"); +} + +static struct value *parse_value_list(struct lexbuf *buf) +{ + struct value *v, *vv; + + vv = NULL; + + while (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != '!' && + buf->lookahead != ')' && buf->lookahead != EOF) + { + v = parse_value(buf); + v->next = vv; + vv = v; + } + + return vv; +} + +static struct property *parse_declaration(struct lexbuf *buf) +{ + struct property *p; + + if (buf->lookahead != CSS_KEYWORD) + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected keyword in property"); + p = new_property(buf->string, NULL, 0); + next(buf); + + expect(buf, ':'); + + p->value = parse_value_list(buf); + + /* !important */ + if (accept(buf, '!')) + expect(buf, CSS_KEYWORD); + + return p; +} + +static struct property *parse_declaration_list(struct lexbuf *buf) +{ + struct property *p, *pp; + + if (buf->lookahead == '}') + return NULL; + + pp = parse_declaration(buf); + + while (accept(buf, ';')) + { + if (buf->lookahead != '}' && buf->lookahead != ';') + { + p = parse_declaration(buf); + p->next = pp; + pp = p; + } + } + + return pp; +} + +static const char *parse_attrib_value(struct lexbuf *buf) +{ + const char *s; + + if (buf->lookahead == CSS_KEYWORD || buf->lookahead == CSS_STRING) + { + s = strdup(buf->string); + next(buf); + return s; + } + + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected attribute value"); +} + +static struct condition *parse_condition(struct lexbuf *buf) +{ + struct condition *c; + + if (accept(buf, ':')) + { + if (buf->lookahead != CSS_KEYWORD) + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected keyword after ':'"); + c = new_condition(':', "pseudo", buf->string); + next(buf); + return c; + } + + if (accept(buf, '.')) + { + if (buf->lookahead != CSS_KEYWORD) + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected keyword after '.'"); + c = new_condition('.', "class", buf->string); + next(buf); + return c; + } + + if (accept(buf, '#')) + { + if (buf->lookahead != CSS_KEYWORD) + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected keyword after '#'"); + c = new_condition('#', "id", buf->string); + next(buf); + return c; + } + + if (accept(buf, '[')) + { + if (buf->lookahead != CSS_KEYWORD) + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected keyword after '['"); + + c = new_condition('[', buf->string, NULL); + next(buf); + + if (accept(buf, '=')) + { + c->type = '='; + c->val = parse_attrib_value(buf); + } + else if (accept(buf, '|')) + { + expect(buf, '='); + c->type = '|'; + c->val = parse_attrib_value(buf); + } + else if (accept(buf, '~')) + { + expect(buf, '='); + c->type = '~'; + c->val = parse_attrib_value(buf); + } + + expect(buf, ']'); + + return c; + } + + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected condition"); +} + +static struct condition *parse_condition_list(struct lexbuf *buf) +{ + struct condition *c, *cc; + + cc = parse_condition(buf); + while (iscond(buf->lookahead)) + { + c = parse_condition(buf); + c->next = cc; + cc = c; + } + return cc; +} + +static struct selector *parse_simple_selector(struct lexbuf *buf) +{ + struct selector *s; + + if (accept(buf, '*')) + { + s = new_selector(NULL); + if (iscond(buf->lookahead)) + s->cond = parse_condition_list(buf); + return s; + } + else if (buf->lookahead == CSS_KEYWORD) + { + s = new_selector(buf->string); + next(buf); + if (iscond(buf->lookahead)) + s->cond = parse_condition_list(buf); + return s; + } + else if (iscond(buf->lookahead)) + { + s = new_selector(NULL); + s->cond = parse_condition_list(buf); + return s; + } + + fz_throw(buf->ctx, FZ_ERROR_GENERIC, "syntax error: expected selector"); +} + +static struct selector *parse_adjacent_selector(struct lexbuf *buf) +{ + struct selector *s, *a, *b; + + a = parse_simple_selector(buf); + if (accept(buf, '+')) + { + b = parse_adjacent_selector(buf); + s = new_selector(NULL); + s->combine = '>'; + s->left = a; + s->right = b; + return s; + } + return a; +} + +static struct selector *parse_child_selector(struct lexbuf *buf) +{ + struct selector *s, *a, *b; + + a = parse_adjacent_selector(buf); + if (accept(buf, '>')) + { + b = parse_child_selector(buf); + s = new_selector(NULL); + s->combine = '>'; + s->left = a; + s->right = b; + return s; + } + return a; +} + +static struct selector *parse_descendant_selector(struct lexbuf *buf) +{ + struct selector *s, *a, *b; + + a = parse_child_selector(buf); + if (buf->lookahead != ',' && buf->lookahead != '{' && buf->lookahead != EOF) + { + b = parse_descendant_selector(buf); + s = new_selector(NULL); + s->combine = ' '; + s->left = a; + s->right = b; + return s; + } + return a; +} + +static struct selector *parse_selector_list(struct lexbuf *buf) +{ + struct selector *s, *ss; + + ss = parse_descendant_selector(buf); + while (accept(buf, ',')) + { + s = parse_descendant_selector(buf); + s->next = ss; + ss = s; + } + return ss; +} + +static struct rule *parse_rule(struct lexbuf *buf) +{ + struct selector *s; + struct property *p; + + s = parse_selector_list(buf); + expect(buf, '{'); + p = parse_declaration_list(buf); + expect(buf, '}'); + return new_rule(s, p); +} + +static void parse_media_list(struct lexbuf *buf) +{ + struct rule *r; + + while (buf->lookahead != '}' && buf->lookahead != EOF) + { + r = parse_rule(buf); + // TODO: free_rule(r); + } +} + +static void parse_at_rule(struct lexbuf *buf) +{ + struct property *p; + struct value *v; + + expect(buf, CSS_KEYWORD); + if (accept(buf, '{')) /* @page */ + { + p = parse_declaration_list(buf); + // TODO: free_properties(p); + expect(buf, '}'); + } + else + { + v = parse_value_list(buf); + // TODO: free_value_list(v); + if (accept(buf, '{')) /* @media */ + { + parse_media_list(buf); + expect(buf, '}'); + } + else /* @import */ + { + expect(buf, ';'); + } + } +} + +static struct rule *parse_stylesheet(struct lexbuf *buf, struct rule *chain) +{ + struct rule *r; + + while (buf->lookahead != EOF) + { + if (accept(buf, '@')) + { + parse_at_rule(buf); + } + else + { + r = parse_rule(buf); + r->next = chain; + chain = r; + } + } + + return chain; +} + +struct rule *fz_parse_css(fz_context *ctx, struct rule *chain, const char *source) +{ + struct lexbuf buf; + css_lex_init(ctx, &buf, source); + next(&buf); + return parse_stylesheet(&buf, chain); +} diff --git a/source/html/handler.c b/source/html/handler.c index 39d5e9c9..77b362d3 100644 --- a/source/html/handler.c +++ b/source/html/handler.c @@ -1,12 +1,5 @@ #include "mupdf/html.h" -struct html_document_s -{ - fz_document super; - fz_context *ctx; - fz_xml *root; -}; - struct html_page_s { }; @@ -28,7 +21,7 @@ html_page * html_load_page(html_document *doc, int number) { printf("html: load page %d\n", number); - return NULL; + return "nothing"; } void @@ -74,6 +67,8 @@ html_open_document_with_stream(fz_context *ctx, fz_stream *file) doc->super.run_page_contents = (void*)html_run_page; doc->super.free_page = (void*)html_free_page; + html_layout_document(doc, 400, 600); + return doc; } diff --git a/source/html/layout.c b/source/html/layout.c new file mode 100644 index 00000000..91e20b9f --- /dev/null +++ b/source/html/layout.c @@ -0,0 +1,153 @@ +#include "mupdf/html.h" + +static const char *default_css = +"html,address,blockquote,body,dd,div,dl,dt,h1,h2,h3,h4,h5,h6,ol,p,ul,center,hr,pre{display:block}" +"span{display:inline}" +"li{display:list-item}" +"head{display:none}" +"body{margin:0px}" +"h1{font-size:2em;margin:.67em 0}" +"h2{font-size:1.5em;margin:.75em 0}" +"h3{font-size:1.17em;margin:.83em 0}" +"h4,p,blockquote,ul,ol,dl,dir,menu{margin:1.12em 0}" +"h5{font-size:.83em;margin:1.5em 0}" +"h6{font-size:.75em;margin:1.67em 0}" +"h1,h2,h3,h4,h5,h6,b,strong{font-weight:bold}" +"blockquote{margin-left:40px;margin-right:40px}" +"i,cite,em,var,address{font-style:italic}" +"pre,tt,code,kbd,samp{font-family:monospace}" +"pre{white-space:pre}" +"big{font-size:1.17em}" +"small,sub,sup{font-size:.83em}" +"sub{vertical-align:sub}" +"sup{vertical-align:super}" +"s,strike,del{text-decoration:line-through}" +"hr{border:1pxinset}" +"ol,ul,dir,menu,dd{margin-left:40px}" +"ol{list-style-type:decimal}" +"ol ul,ul ol,ul ul,ol ol{margin-top:0;margin-bottom:0}" +"u,ins{text-decoration:underline}" +"center{text-align:center}" +"svg{display:none}"; + +char dirname[2048]; +char filename[2048]; + +static char *concat_text(fz_xml *root) +{ + fz_xml *node; + int i = 0, n = 1; + char *s; + for (node = fz_xml_down(root); node; node = fz_xml_next(node)) + { + const char *text = fz_xml_text(node); + n += text ? strlen(text) : 0; + } + s = malloc(n); + for (node = fz_xml_down(root); node; node = fz_xml_next(node)) + { + const char *text = fz_xml_text(node); + if (text) { + n = strlen(text); + memcpy(s+i, text, n); + i += n; + } + } + s[i] = 0; + return s; +} + +static struct rule *load_css(fz_context *ctx, struct rule *css, fz_xml *root) +{ + fz_xml *node; + for (node = root; node; node = fz_xml_next(node)) { + const char *tag = fz_xml_tag(node); +#if 0 + if (tag && !strcmp(tag, "link")) { + char *rel = fz_xml_att(node, "rel"); + if (rel && !strcasecmp(rel, "stylesheet")) { + char *type = fz_xml_att(node, "type"); + if ((type && !strcmp(type, "text/css")) || !type) { + char *href = fz_xml_att(node, "href"); + strcpy(filename, dirname); + strcat(filename, href); + css = css_parse_file(css, filename); + } + } + } +#endif + if (tag && !strcmp(tag, "style")) { +printf("found inline style sheet!\n"); + char *s = concat_text(node); +printf("'%s'\n", s); + css = fz_parse_css(ctx, css, s); + } + if (fz_xml_down(node)) + css = load_css(ctx, css, fz_xml_down(node)); + } + return css; +} + +static void layout_text(struct rule *rule, struct style *style, fz_xml *node) +{ + printf("%s\n", fz_xml_text(node)); +} + +static void layout_tree(struct rule *rule, struct style *up, fz_xml *node) +{ + while (node) + { + struct style style; + style.up = up; + style.count = 0; + + if (fz_xml_tag(node)) + { + printf("open '%s'\n", fz_xml_tag(node)); + struct computed_style cstyle; + apply_styles(rule, &style, node); + + // TODO: check inline style attribute! + //s = fz_xml_att(node, "style"); + //if (s) { + // istyle = parse_declarations(s); + // apply_styles(istyle); + //} + + compute_style(&cstyle, &style); + print_style(&cstyle); + } + else + layout_text(rule, &style, node); + + // TOOD: <br> + // TODO: <img> + + if (fz_xml_down(node)) + layout_tree(rule, &style, fz_xml_down(node)); + + printf("end\n"); + node = fz_xml_next(node); + } +} + +void +html_layout_document(html_document *doc, float w, float h) +{ + struct rule *css = NULL; + +#if 0 + strcpy(dirname, argv[i]); + s = strrchr(dirname, '/'); + if (!s) s = strrchr(dirname, '\\'); + if (s) s[1] = 0; + else strcpy(dirname, "./"); +#endif + + css = fz_parse_css(doc->ctx, NULL, default_css); + css = load_css(doc->ctx, css, doc->root); + + print_rules(css); + + layout_tree(css, NULL, doc->root); +} |