diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2014-11-25 23:50:27 +0100 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2014-12-03 12:25:52 +0100 |
commit | cf42f2f4d5e95b7254479e80614d1814e74e2387 (patch) | |
tree | 9d0452350d479c0222002c5969a4b0d8ce104925 /source/html | |
parent | bb238db8919162c8976980b8aa48f70664f2f29d (diff) | |
download | mupdf-cf42f2f4d5e95b7254479e80614d1814e74e2387.tar.xz |
html: Split html parsing cache and state into html_context.
html_document is now a simple client of html_context.
Diffstat (limited to 'source/html')
-rw-r--r-- | source/html/css-apply.c | 4 | ||||
-rw-r--r-- | source/html/font.c | 9 | ||||
-rw-r--r-- | source/html/handler.c | 150 | ||||
-rw-r--r-- | source/html/layout.c | 135 |
4 files changed, 173 insertions, 125 deletions
diff --git a/source/html/css-apply.c b/source/html/css-apply.c index 67814440..bbfc7ffa 100644 --- a/source/html/css-apply.c +++ b/source/html/css-apply.c @@ -990,7 +990,7 @@ default_computed_style(struct computed_style *style) } void -compute_style(html_document *doc, struct computed_style *style, struct style *node) +compute_style(fz_context *ctx, html_context *htx, struct computed_style *style, struct style *node) { struct value *value; @@ -1087,7 +1087,7 @@ compute_style(html_document *doc, struct computed_style *style, struct style *no const char *font_variant = get_style_property_string(node, "font-variant", "normal"); const char *font_style = get_style_property_string(node, "font-style", "normal"); const char *font_weight = get_style_property_string(node, "font-weight", "normal"); - style->font = html_load_font(doc, font_family, font_variant, font_style, font_weight); + style->font = html_load_font(ctx, htx, font_family, font_variant, font_style, font_weight); } } diff --git a/source/html/font.c b/source/html/font.c index 9c408367..51654ae9 100644 --- a/source/html/font.c +++ b/source/html/font.c @@ -9,10 +9,9 @@ static const char *font_names[16] = { }; fz_font * -html_load_font(html_document *doc, +html_load_font(fz_context *ctx, html_context *htx, const char *family, const char *variant, const char *style, const char *weight) { - fz_context *ctx = doc->ctx; unsigned char *data; unsigned int size; @@ -22,15 +21,15 @@ html_load_font(html_document *doc, int is_italic = !strcmp(style, "italic") || !strcmp(style, "oblique"); int idx = is_mono * 8 + is_sans * 4 + is_bold * 2 + is_italic; - if (!doc->fonts[idx]) + if (!htx->fonts[idx]) { data = pdf_lookup_builtin_font(font_names[idx], &size); if (!data) { printf("data=%p idx=%d s=%s\n", data, idx, font_names[idx]); abort(); } - doc->fonts[idx] = fz_new_font_from_memory(ctx, font_names[idx], data, size, 0, 1); + htx->fonts[idx] = fz_new_font_from_memory(ctx, font_names[idx], data, size, 0, 1); } - return doc->fonts[idx]; + return htx->fonts[idx]; } diff --git a/source/html/handler.c b/source/html/handler.c index c89e82ae..626ed443 100644 --- a/source/html/handler.c +++ b/source/html/handler.c @@ -2,125 +2,141 @@ #define DEFW (450) #define DEFH (600) +#define DEFEM (12) -void -html_close_document(html_document *doc) +typedef struct html_document_s html_document; + +struct html_document_s +{ + fz_document super; + fz_context *ctx; + html_context htx; + struct box *box; +}; + +static void +htdoc_close_document(html_document *doc) { fz_context *ctx = doc->ctx; + html_fini(ctx, &doc->htx); fz_free(ctx, doc); } -int -html_count_pages(html_document *doc) +static int +htdoc_count_pages(html_document *doc) { int count; - if (!doc->box) html_layout_document(doc, DEFW, DEFH, 12); + // TODO: reflow - count = ceilf(doc->box->h / doc->page_h); -printf("count pages! %g / %g = %d\n", doc->box->h, doc->page_h, count); + count = ceilf(doc->box->h / doc->htx.page_h); +printf("count pages! %g / %g = %d\n", doc->box->h, doc->htx.page_h, count); return count; } -html_page * -html_load_page(html_document *doc, int number) +static void * +htdoc_load_page(html_document *doc, int number) { printf("load page %d\n", number); - if (!doc->box) html_layout_document(doc, DEFW, DEFH, 12); + // TODO: reflow return (void*)((intptr_t)number + 1); } -void -html_free_page(html_document *doc, html_page *page) +static void +htdoc_free_page(html_document *doc, void *page) +{ +} + +static void +htdoc_layout(html_document *doc, float w, float h, float em) { + html_layout(doc->ctx, &doc->htx, doc->box, w, h, em); } -fz_rect * -html_bound_page(html_document *doc, html_page *page, fz_rect *bbox) +static fz_rect * +htdoc_bound_page(html_document *doc, void *page, fz_rect *bbox) { - if (!doc->box) html_layout_document(doc, DEFW, DEFH, 12); + // TODO: reflow printf("html: bound page\n"); bbox->x0 = bbox->y0 = 0; - bbox->x1 = doc->page_w; - bbox->y1 = doc->page_h; + bbox->x1 = doc->htx.page_w; + bbox->y1 = doc->htx.page_h; return bbox; } -void -html_run_page(html_document *doc, html_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie) +static void +htdoc_run_page(html_document *doc, void *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie) { int n = ((intptr_t)page) - 1; printf("html: run page %d\n", n); - html_run_box(doc->ctx, doc->box, n * doc->page_h, (n+1) * doc->page_h, dev, ctm); + html_draw(doc->ctx, &doc->htx, doc->box, n * doc->htx.page_h, (n+1) * doc->htx.page_h, dev, ctm); } -html_document * -html_open_document_with_stream(fz_context *ctx, fz_stream *file) + +static html_document * +htdoc_open_document_with_stream(fz_context *ctx, fz_stream *file) { html_document *doc; + fz_archive *zip; fz_buffer *buf; - fz_xml *xml; - buf = fz_read_all(file, 0); - fz_write_buffer_byte(ctx, buf, 0); - -printf("html: parsing XHTML.\n"); - xml = fz_parse_xml(ctx, buf->data, buf->len, 1); - fz_drop_buffer(ctx, buf); + zip = fz_open_directory(ctx, "."); doc = fz_malloc_struct(ctx, html_document); doc->ctx = ctx; - doc->dirname = NULL; + html_init(ctx, &doc->htx, zip); - doc->super.close = (void*)html_close_document; - doc->super.layout = (void*)html_layout_document; - doc->super.count_pages = (void*)html_count_pages; - doc->super.load_page = (void*)html_load_page; - doc->super.bound_page = (void*)html_bound_page; - doc->super.run_page_contents = (void*)html_run_page; - doc->super.free_page = (void*)html_free_page; + doc->super.close = (void*)htdoc_close_document; + doc->super.layout = (void*)htdoc_layout; + doc->super.count_pages = (void*)htdoc_count_pages; + doc->super.load_page = (void*)htdoc_load_page; + doc->super.bound_page = (void*)htdoc_bound_page; + doc->super.run_page_contents = (void*)htdoc_run_page; + doc->super.free_page = (void*)htdoc_free_page; - doc->xml = xml; - doc->box = NULL; + buf = fz_read_all(file, 0); + fz_write_buffer_byte(ctx, buf, 0); + doc->box = html_generate(ctx, &doc->htx, ".", buf); + fz_drop_buffer(ctx, buf); return doc; } -html_document * -html_open_document(fz_context *ctx, const char *filename) +static html_document * +htdoc_open_document(fz_context *ctx, const char *filename) { - fz_stream *file; + char dirname[2048]; + fz_archive *zip; + fz_buffer *buf; html_document *doc; - char *s; - file = fz_open_file(ctx, filename); - if (!file) - fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open file '%s': %s", filename, strerror(errno)); + fz_dirname(dirname, filename, sizeof dirname); + zip = fz_open_directory(ctx, dirname); - fz_try(ctx) - { - doc = html_open_document_with_stream(ctx, file); - } - fz_always(ctx) - { - fz_close(file); - } - fz_catch(ctx) - { - fz_rethrow(ctx); - } + doc = fz_malloc_struct(ctx, html_document); + doc->ctx = ctx; + html_init(ctx, &doc->htx, zip); + + doc->super.close = (void*)htdoc_close_document; + doc->super.layout = (void*)htdoc_layout; + doc->super.count_pages = (void*)htdoc_count_pages; + doc->super.load_page = (void*)htdoc_load_page; + doc->super.bound_page = (void*)htdoc_bound_page; + doc->super.run_page_contents = (void*)htdoc_run_page; + doc->super.free_page = (void*)htdoc_free_page; + + buf = fz_read_file(ctx, filename); + fz_write_buffer_byte(ctx, buf, 0); + doc->box = html_generate(ctx, &doc->htx, ".", buf); + fz_drop_buffer(ctx, buf); - doc->dirname = fz_strdup(ctx, filename); - s = strrchr(doc->dirname, '/'); - if (!s) s = strrchr(doc->dirname, '\\'); - if (s) s[1] = 0; - else doc->dirname[0] = 0; + htdoc_layout(doc, DEFW, DEFH, DEFEM); return doc; } static int -html_recognize(fz_context *doc, const char *magic) +htdoc_recognize(fz_context *doc, const char *magic) { char *ext = strrchr(magic, '.'); @@ -137,7 +153,7 @@ html_recognize(fz_context *doc, const char *magic) fz_document_handler html_document_handler = { - (fz_document_recognize_fn *)&html_recognize, - (fz_document_open_fn *)&html_open_document, - (fz_document_open_with_stream_fn *)&html_open_document_with_stream + (fz_document_recognize_fn *)&htdoc_recognize, + (fz_document_open_fn *)&htdoc_open_document, + (fz_document_open_with_stream_fn *)&htdoc_open_document_with_stream }; diff --git a/source/html/layout.c b/source/html/layout.c index 822e2937..f07108b5 100644 --- a/source/html/layout.c +++ b/source/html/layout.c @@ -100,33 +100,30 @@ static void generate_text(fz_context *ctx, struct box *box, const char *text) } } -static void generate_image(html_document *doc, struct box *box, const char *src) +static void generate_image(fz_context *ctx, html_context *htx, const char *base_uri, struct box *box, const char *src) { - fz_context *ctx = doc->ctx; fz_image *img; fz_buffer *buf; - char filename[2048]; + char path[2048]; struct box *flow = box; while (flow->type != BOX_FLOW) flow = flow->up; - fz_strlcpy(filename, doc->dirname, sizeof filename); - fz_strlcat(filename, src, sizeof filename); + fz_strlcpy(path, base_uri, sizeof path); + fz_strlcat(path, "/", sizeof path); + fz_strlcat(path, src, sizeof path); + fz_cleanname(path); - buf = fz_read_file(ctx, filename); + buf = fz_read_archive_entry(ctx, htx->zip, path); img = fz_new_image_from_buffer(ctx, buf); fz_drop_buffer(ctx, buf); add_flow_image(ctx, flow, &box->style, img); } -struct box *new_box(fz_context *ctx, fz_xml *node) +static void init_box(fz_context *ctx, struct box *box, fz_xml *node) { - struct box *box; - - box = fz_malloc_struct(ctx, struct box); - box->type = BOX_BLOCK; box->x = box->y = 0; box->w = box->h = 0; @@ -142,11 +139,16 @@ struct box *new_box(fz_context *ctx, fz_xml *node) box->flow_tail = &box->flow_head; default_computed_style(&box->style); +} +static struct box *new_box(fz_context *ctx, fz_xml *node) +{ + struct box *box = fz_malloc_struct(ctx, struct box); + init_box(ctx, box, node); return box; } -void insert_box(fz_context *ctx, struct box *box, int type, struct box *top) +static void insert_box(fz_context *ctx, struct box *box, int type, struct box *top) { box->type = type; @@ -234,9 +236,9 @@ static void insert_inline_box(fz_context *ctx, struct box *box, struct box *top) } } -static void generate_boxes(html_document *doc, fz_xml *node, struct box *top, struct rule *rule, struct style *up_style) +static void generate_boxes(fz_context *ctx, html_context *htx, const char *base_uri, + fz_xml *node, struct box *top, struct rule *rule, struct style *up_style) { - fz_context *ctx = doc->ctx; struct style style; struct box *box; const char *tag; @@ -257,7 +259,7 @@ static void generate_boxes(html_document *doc, fz_xml *node, struct box *top, st if (!strcmp(tag, "br")) { box = new_box(ctx, node); - compute_style(doc, &box->style, &style); + compute_style(ctx, htx, &box->style, &style); top = insert_break_box(ctx, box, top); } @@ -267,16 +269,16 @@ static void generate_boxes(html_document *doc, fz_xml *node, struct box *top, st if (src) { box = new_box(ctx, node); - compute_style(doc, &box->style, &style); + compute_style(ctx, htx, &box->style, &style); insert_inline_box(ctx, box, top); - generate_image(doc, box, src); + generate_image(ctx, htx, base_uri, box, src); } } else if (display != DIS_NONE) { box = new_box(ctx, node); - compute_style(doc, &box->style, &style); + compute_style(ctx, htx, &box->style, &style); if (display == DIS_BLOCK) { @@ -297,7 +299,7 @@ static void generate_boxes(html_document *doc, fz_xml *node, struct box *top, st } if (fz_xml_down(node)) - generate_boxes(doc, fz_xml_down(node), box, rule, &style); + generate_boxes(ctx, htx, base_uri, fz_xml_down(node), box, rule, &style); // TODO: remove empty flow boxes } @@ -804,7 +806,7 @@ draw_block_box(fz_context *ctx, struct box *box, float page_top, float page_bot, } void -html_run_box(fz_context *ctx, struct box *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *inctm) +html_draw(fz_context *ctx, html_context *htx, struct box *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *inctm) { fz_matrix ctm = *inctm; fz_pre_translate(&ctm, 0, -page_top); @@ -836,16 +838,15 @@ static char *concat_text(fz_context *ctx, fz_xml *root) return s; } -static struct rule *load_css(html_document *doc, struct rule *css, fz_xml *root) +static struct rule *html_load_css(fz_context *ctx, html_context *htx, const char *base_uri, struct rule *css, fz_xml *root) { - fz_context *ctx = doc->ctx; fz_xml *node; - char filename[2048]; + fz_buffer *buf; + char path[2048]; for (node = root; node; node = fz_xml_next(node)) { const char *tag = fz_xml_tag(node); -#if 1 if (tag && !strcmp(tag, "link")) { char *rel = fz_xml_att(node, "rel"); @@ -855,13 +856,21 @@ static struct rule *load_css(html_document *doc, struct rule *css, fz_xml *root) if ((type && !strcmp(type, "text/css")) || !type) { char *href = fz_xml_att(node, "href"); - fz_strlcpy(filename, doc->dirname, sizeof filename); - fz_strlcat(filename, href, sizeof filename); - css = fz_parse_css_file(ctx, css, filename); + if (href) + { + fz_strlcpy(path, base_uri, sizeof path); + fz_strlcat(path, "/", sizeof path); + fz_strlcat(path, href, sizeof path); + fz_cleanname(path); + + buf = fz_read_archive_entry(ctx, htx->zip, path); + fz_write_buffer_byte(ctx, buf, 0); + css = fz_parse_css(ctx, css, (char*)buf->data); + fz_drop_buffer(ctx, buf); + } } } } -#endif if (tag && !strcmp(tag, "style")) { char *s = concat_text(ctx, node); @@ -869,46 +878,70 @@ static struct rule *load_css(html_document *doc, struct rule *css, fz_xml *root) fz_free(ctx, s); } if (fz_xml_down(node)) - css = load_css(doc, css, fz_xml_down(node)); + css = html_load_css(ctx, htx, base_uri, css, fz_xml_down(node)); } return css; } void -html_layout_document(html_document *doc, float page_w, float page_h, float em) +html_layout(fz_context *ctx, html_context *htx, struct box *box, float w, float h, float em) +{ + struct box page_box; + + htx->page_w = w; + htx->page_h = h; + + printf("html: laying out text.\n"); + + init_box(ctx, &page_box, NULL); + page_box.w = w; + page_box.h = 0; + + layout_block(ctx, box, &page_box, em, 0, h); + + printf("html: finished.\n"); +} + +struct box * +html_generate(fz_context *ctx, html_context *htx, const char *base_uri, fz_buffer *buf) { - struct rule *css = NULL; - struct box *root_box; - struct box *page_box; + fz_xml *xml; + struct rule *css; + struct box *box; struct style style; - doc->page_w = page_w; - doc->page_h = page_h; + printf("html: parsing XHTML.\n"); + xml = fz_parse_xml(ctx, buf->data, buf->len, 1); -printf("html: parsing style sheets.\n"); - css = fz_parse_css(doc->ctx, NULL, default_css); - css = load_css(doc, css, doc->xml); + printf("html: parsing style sheets.\n"); + css = fz_parse_css(ctx, NULL, default_css); + css = html_load_css(ctx, htx, base_uri, css, xml); // print_rules(css); + printf("html: applying styles and generating boxes.\n"); + box = new_box(ctx, NULL); + style.up = NULL; style.count = 0; - root_box = new_box(doc->ctx, NULL); - - page_box = new_box(doc->ctx, NULL); - page_box->w = page_w; - page_box->h = 0; + generate_boxes(ctx, htx, base_uri, xml, box, css, &style); - // TODO: split generate and layout + return box; +} -printf("html: applying styles and generating boxes.\n"); - generate_boxes(doc, doc->xml, root_box, css, &style); -printf("html: laying out text.\n"); - layout_block(doc->ctx, root_box, page_box, em, 0, page_h); -printf("html: finished.\n"); +void html_init(fz_context *ctx, html_context *html, fz_archive *zip) +{ + memset(html, 0, sizeof *html); + html->zip = zip; +} - // print_box(doc->ctx, root_box, 0); +void html_rebind(html_context *html, fz_context *ctx) +{ + fz_rebind_archive(html->zip, ctx); +} - doc->box = root_box; +void html_fini(fz_context *ctx, html_context *html) +{ + fz_close_archive(ctx, html->zip); } |