summaryrefslogtreecommitdiff
path: root/source/html
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2014-11-25 23:50:27 +0100
committerTor Andersson <tor.andersson@artifex.com>2014-12-03 12:25:52 +0100
commitcf42f2f4d5e95b7254479e80614d1814e74e2387 (patch)
tree9d0452350d479c0222002c5969a4b0d8ce104925 /source/html
parentbb238db8919162c8976980b8aa48f70664f2f29d (diff)
downloadmupdf-cf42f2f4d5e95b7254479e80614d1814e74e2387.tar.xz
html: Split html parsing cache and state into html_context.
html_document is now a simple client of html_context.
Diffstat (limited to 'source/html')
-rw-r--r--source/html/css-apply.c4
-rw-r--r--source/html/font.c9
-rw-r--r--source/html/handler.c150
-rw-r--r--source/html/layout.c135
4 files changed, 173 insertions, 125 deletions
diff --git a/source/html/css-apply.c b/source/html/css-apply.c
index 67814440..bbfc7ffa 100644
--- a/source/html/css-apply.c
+++ b/source/html/css-apply.c
@@ -990,7 +990,7 @@ default_computed_style(struct computed_style *style)
}
void
-compute_style(html_document *doc, struct computed_style *style, struct style *node)
+compute_style(fz_context *ctx, html_context *htx, struct computed_style *style, struct style *node)
{
struct value *value;
@@ -1087,7 +1087,7 @@ compute_style(html_document *doc, struct computed_style *style, struct style *no
const char *font_variant = get_style_property_string(node, "font-variant", "normal");
const char *font_style = get_style_property_string(node, "font-style", "normal");
const char *font_weight = get_style_property_string(node, "font-weight", "normal");
- style->font = html_load_font(doc, font_family, font_variant, font_style, font_weight);
+ style->font = html_load_font(ctx, htx, font_family, font_variant, font_style, font_weight);
}
}
diff --git a/source/html/font.c b/source/html/font.c
index 9c408367..51654ae9 100644
--- a/source/html/font.c
+++ b/source/html/font.c
@@ -9,10 +9,9 @@ static const char *font_names[16] = {
};
fz_font *
-html_load_font(html_document *doc,
+html_load_font(fz_context *ctx, html_context *htx,
const char *family, const char *variant, const char *style, const char *weight)
{
- fz_context *ctx = doc->ctx;
unsigned char *data;
unsigned int size;
@@ -22,15 +21,15 @@ html_load_font(html_document *doc,
int is_italic = !strcmp(style, "italic") || !strcmp(style, "oblique");
int idx = is_mono * 8 + is_sans * 4 + is_bold * 2 + is_italic;
- if (!doc->fonts[idx])
+ if (!htx->fonts[idx])
{
data = pdf_lookup_builtin_font(font_names[idx], &size);
if (!data) {
printf("data=%p idx=%d s=%s\n", data, idx, font_names[idx]);
abort();
}
- doc->fonts[idx] = fz_new_font_from_memory(ctx, font_names[idx], data, size, 0, 1);
+ htx->fonts[idx] = fz_new_font_from_memory(ctx, font_names[idx], data, size, 0, 1);
}
- return doc->fonts[idx];
+ return htx->fonts[idx];
}
diff --git a/source/html/handler.c b/source/html/handler.c
index c89e82ae..626ed443 100644
--- a/source/html/handler.c
+++ b/source/html/handler.c
@@ -2,125 +2,141 @@
#define DEFW (450)
#define DEFH (600)
+#define DEFEM (12)
-void
-html_close_document(html_document *doc)
+typedef struct html_document_s html_document;
+
+struct html_document_s
+{
+ fz_document super;
+ fz_context *ctx;
+ html_context htx;
+ struct box *box;
+};
+
+static void
+htdoc_close_document(html_document *doc)
{
fz_context *ctx = doc->ctx;
+ html_fini(ctx, &doc->htx);
fz_free(ctx, doc);
}
-int
-html_count_pages(html_document *doc)
+static int
+htdoc_count_pages(html_document *doc)
{
int count;
- if (!doc->box) html_layout_document(doc, DEFW, DEFH, 12);
+ // TODO: reflow
- count = ceilf(doc->box->h / doc->page_h);
-printf("count pages! %g / %g = %d\n", doc->box->h, doc->page_h, count);
+ count = ceilf(doc->box->h / doc->htx.page_h);
+printf("count pages! %g / %g = %d\n", doc->box->h, doc->htx.page_h, count);
return count;
}
-html_page *
-html_load_page(html_document *doc, int number)
+static void *
+htdoc_load_page(html_document *doc, int number)
{
printf("load page %d\n", number);
- if (!doc->box) html_layout_document(doc, DEFW, DEFH, 12);
+ // TODO: reflow
return (void*)((intptr_t)number + 1);
}
-void
-html_free_page(html_document *doc, html_page *page)
+static void
+htdoc_free_page(html_document *doc, void *page)
+{
+}
+
+static void
+htdoc_layout(html_document *doc, float w, float h, float em)
{
+ html_layout(doc->ctx, &doc->htx, doc->box, w, h, em);
}
-fz_rect *
-html_bound_page(html_document *doc, html_page *page, fz_rect *bbox)
+static fz_rect *
+htdoc_bound_page(html_document *doc, void *page, fz_rect *bbox)
{
- if (!doc->box) html_layout_document(doc, DEFW, DEFH, 12);
+ // TODO: reflow
printf("html: bound page\n");
bbox->x0 = bbox->y0 = 0;
- bbox->x1 = doc->page_w;
- bbox->y1 = doc->page_h;
+ bbox->x1 = doc->htx.page_w;
+ bbox->y1 = doc->htx.page_h;
return bbox;
}
-void
-html_run_page(html_document *doc, html_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie)
+static void
+htdoc_run_page(html_document *doc, void *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie)
{
int n = ((intptr_t)page) - 1;
printf("html: run page %d\n", n);
- html_run_box(doc->ctx, doc->box, n * doc->page_h, (n+1) * doc->page_h, dev, ctm);
+ html_draw(doc->ctx, &doc->htx, doc->box, n * doc->htx.page_h, (n+1) * doc->htx.page_h, dev, ctm);
}
-html_document *
-html_open_document_with_stream(fz_context *ctx, fz_stream *file)
+
+static html_document *
+htdoc_open_document_with_stream(fz_context *ctx, fz_stream *file)
{
html_document *doc;
+ fz_archive *zip;
fz_buffer *buf;
- fz_xml *xml;
- buf = fz_read_all(file, 0);
- fz_write_buffer_byte(ctx, buf, 0);
-
-printf("html: parsing XHTML.\n");
- xml = fz_parse_xml(ctx, buf->data, buf->len, 1);
- fz_drop_buffer(ctx, buf);
+ zip = fz_open_directory(ctx, ".");
doc = fz_malloc_struct(ctx, html_document);
doc->ctx = ctx;
- doc->dirname = NULL;
+ html_init(ctx, &doc->htx, zip);
- doc->super.close = (void*)html_close_document;
- doc->super.layout = (void*)html_layout_document;
- doc->super.count_pages = (void*)html_count_pages;
- doc->super.load_page = (void*)html_load_page;
- doc->super.bound_page = (void*)html_bound_page;
- doc->super.run_page_contents = (void*)html_run_page;
- doc->super.free_page = (void*)html_free_page;
+ doc->super.close = (void*)htdoc_close_document;
+ doc->super.layout = (void*)htdoc_layout;
+ doc->super.count_pages = (void*)htdoc_count_pages;
+ doc->super.load_page = (void*)htdoc_load_page;
+ doc->super.bound_page = (void*)htdoc_bound_page;
+ doc->super.run_page_contents = (void*)htdoc_run_page;
+ doc->super.free_page = (void*)htdoc_free_page;
- doc->xml = xml;
- doc->box = NULL;
+ buf = fz_read_all(file, 0);
+ fz_write_buffer_byte(ctx, buf, 0);
+ doc->box = html_generate(ctx, &doc->htx, ".", buf);
+ fz_drop_buffer(ctx, buf);
return doc;
}
-html_document *
-html_open_document(fz_context *ctx, const char *filename)
+static html_document *
+htdoc_open_document(fz_context *ctx, const char *filename)
{
- fz_stream *file;
+ char dirname[2048];
+ fz_archive *zip;
+ fz_buffer *buf;
html_document *doc;
- char *s;
- file = fz_open_file(ctx, filename);
- if (!file)
- fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open file '%s': %s", filename, strerror(errno));
+ fz_dirname(dirname, filename, sizeof dirname);
+ zip = fz_open_directory(ctx, dirname);
- fz_try(ctx)
- {
- doc = html_open_document_with_stream(ctx, file);
- }
- fz_always(ctx)
- {
- fz_close(file);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
+ doc = fz_malloc_struct(ctx, html_document);
+ doc->ctx = ctx;
+ html_init(ctx, &doc->htx, zip);
+
+ doc->super.close = (void*)htdoc_close_document;
+ doc->super.layout = (void*)htdoc_layout;
+ doc->super.count_pages = (void*)htdoc_count_pages;
+ doc->super.load_page = (void*)htdoc_load_page;
+ doc->super.bound_page = (void*)htdoc_bound_page;
+ doc->super.run_page_contents = (void*)htdoc_run_page;
+ doc->super.free_page = (void*)htdoc_free_page;
+
+ buf = fz_read_file(ctx, filename);
+ fz_write_buffer_byte(ctx, buf, 0);
+ doc->box = html_generate(ctx, &doc->htx, ".", buf);
+ fz_drop_buffer(ctx, buf);
- doc->dirname = fz_strdup(ctx, filename);
- s = strrchr(doc->dirname, '/');
- if (!s) s = strrchr(doc->dirname, '\\');
- if (s) s[1] = 0;
- else doc->dirname[0] = 0;
+ htdoc_layout(doc, DEFW, DEFH, DEFEM);
return doc;
}
static int
-html_recognize(fz_context *doc, const char *magic)
+htdoc_recognize(fz_context *doc, const char *magic)
{
char *ext = strrchr(magic, '.');
@@ -137,7 +153,7 @@ html_recognize(fz_context *doc, const char *magic)
fz_document_handler html_document_handler =
{
- (fz_document_recognize_fn *)&html_recognize,
- (fz_document_open_fn *)&html_open_document,
- (fz_document_open_with_stream_fn *)&html_open_document_with_stream
+ (fz_document_recognize_fn *)&htdoc_recognize,
+ (fz_document_open_fn *)&htdoc_open_document,
+ (fz_document_open_with_stream_fn *)&htdoc_open_document_with_stream
};
diff --git a/source/html/layout.c b/source/html/layout.c
index 822e2937..f07108b5 100644
--- a/source/html/layout.c
+++ b/source/html/layout.c
@@ -100,33 +100,30 @@ static void generate_text(fz_context *ctx, struct box *box, const char *text)
}
}
-static void generate_image(html_document *doc, struct box *box, const char *src)
+static void generate_image(fz_context *ctx, html_context *htx, const char *base_uri, struct box *box, const char *src)
{
- fz_context *ctx = doc->ctx;
fz_image *img;
fz_buffer *buf;
- char filename[2048];
+ char path[2048];
struct box *flow = box;
while (flow->type != BOX_FLOW)
flow = flow->up;
- fz_strlcpy(filename, doc->dirname, sizeof filename);
- fz_strlcat(filename, src, sizeof filename);
+ fz_strlcpy(path, base_uri, sizeof path);
+ fz_strlcat(path, "/", sizeof path);
+ fz_strlcat(path, src, sizeof path);
+ fz_cleanname(path);
- buf = fz_read_file(ctx, filename);
+ buf = fz_read_archive_entry(ctx, htx->zip, path);
img = fz_new_image_from_buffer(ctx, buf);
fz_drop_buffer(ctx, buf);
add_flow_image(ctx, flow, &box->style, img);
}
-struct box *new_box(fz_context *ctx, fz_xml *node)
+static void init_box(fz_context *ctx, struct box *box, fz_xml *node)
{
- struct box *box;
-
- box = fz_malloc_struct(ctx, struct box);
-
box->type = BOX_BLOCK;
box->x = box->y = 0;
box->w = box->h = 0;
@@ -142,11 +139,16 @@ struct box *new_box(fz_context *ctx, fz_xml *node)
box->flow_tail = &box->flow_head;
default_computed_style(&box->style);
+}
+static struct box *new_box(fz_context *ctx, fz_xml *node)
+{
+ struct box *box = fz_malloc_struct(ctx, struct box);
+ init_box(ctx, box, node);
return box;
}
-void insert_box(fz_context *ctx, struct box *box, int type, struct box *top)
+static void insert_box(fz_context *ctx, struct box *box, int type, struct box *top)
{
box->type = type;
@@ -234,9 +236,9 @@ static void insert_inline_box(fz_context *ctx, struct box *box, struct box *top)
}
}
-static void generate_boxes(html_document *doc, fz_xml *node, struct box *top, struct rule *rule, struct style *up_style)
+static void generate_boxes(fz_context *ctx, html_context *htx, const char *base_uri,
+ fz_xml *node, struct box *top, struct rule *rule, struct style *up_style)
{
- fz_context *ctx = doc->ctx;
struct style style;
struct box *box;
const char *tag;
@@ -257,7 +259,7 @@ static void generate_boxes(html_document *doc, fz_xml *node, struct box *top, st
if (!strcmp(tag, "br"))
{
box = new_box(ctx, node);
- compute_style(doc, &box->style, &style);
+ compute_style(ctx, htx, &box->style, &style);
top = insert_break_box(ctx, box, top);
}
@@ -267,16 +269,16 @@ static void generate_boxes(html_document *doc, fz_xml *node, struct box *top, st
if (src)
{
box = new_box(ctx, node);
- compute_style(doc, &box->style, &style);
+ compute_style(ctx, htx, &box->style, &style);
insert_inline_box(ctx, box, top);
- generate_image(doc, box, src);
+ generate_image(ctx, htx, base_uri, box, src);
}
}
else if (display != DIS_NONE)
{
box = new_box(ctx, node);
- compute_style(doc, &box->style, &style);
+ compute_style(ctx, htx, &box->style, &style);
if (display == DIS_BLOCK)
{
@@ -297,7 +299,7 @@ static void generate_boxes(html_document *doc, fz_xml *node, struct box *top, st
}
if (fz_xml_down(node))
- generate_boxes(doc, fz_xml_down(node), box, rule, &style);
+ generate_boxes(ctx, htx, base_uri, fz_xml_down(node), box, rule, &style);
// TODO: remove empty flow boxes
}
@@ -804,7 +806,7 @@ draw_block_box(fz_context *ctx, struct box *box, float page_top, float page_bot,
}
void
-html_run_box(fz_context *ctx, struct box *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *inctm)
+html_draw(fz_context *ctx, html_context *htx, struct box *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *inctm)
{
fz_matrix ctm = *inctm;
fz_pre_translate(&ctm, 0, -page_top);
@@ -836,16 +838,15 @@ static char *concat_text(fz_context *ctx, fz_xml *root)
return s;
}
-static struct rule *load_css(html_document *doc, struct rule *css, fz_xml *root)
+static struct rule *html_load_css(fz_context *ctx, html_context *htx, const char *base_uri, struct rule *css, fz_xml *root)
{
- fz_context *ctx = doc->ctx;
fz_xml *node;
- char filename[2048];
+ fz_buffer *buf;
+ char path[2048];
for (node = root; node; node = fz_xml_next(node))
{
const char *tag = fz_xml_tag(node);
-#if 1
if (tag && !strcmp(tag, "link"))
{
char *rel = fz_xml_att(node, "rel");
@@ -855,13 +856,21 @@ static struct rule *load_css(html_document *doc, struct rule *css, fz_xml *root)
if ((type && !strcmp(type, "text/css")) || !type)
{
char *href = fz_xml_att(node, "href");
- fz_strlcpy(filename, doc->dirname, sizeof filename);
- fz_strlcat(filename, href, sizeof filename);
- css = fz_parse_css_file(ctx, css, filename);
+ if (href)
+ {
+ fz_strlcpy(path, base_uri, sizeof path);
+ fz_strlcat(path, "/", sizeof path);
+ fz_strlcat(path, href, sizeof path);
+ fz_cleanname(path);
+
+ buf = fz_read_archive_entry(ctx, htx->zip, path);
+ fz_write_buffer_byte(ctx, buf, 0);
+ css = fz_parse_css(ctx, css, (char*)buf->data);
+ fz_drop_buffer(ctx, buf);
+ }
}
}
}
-#endif
if (tag && !strcmp(tag, "style"))
{
char *s = concat_text(ctx, node);
@@ -869,46 +878,70 @@ static struct rule *load_css(html_document *doc, struct rule *css, fz_xml *root)
fz_free(ctx, s);
}
if (fz_xml_down(node))
- css = load_css(doc, css, fz_xml_down(node));
+ css = html_load_css(ctx, htx, base_uri, css, fz_xml_down(node));
}
return css;
}
void
-html_layout_document(html_document *doc, float page_w, float page_h, float em)
+html_layout(fz_context *ctx, html_context *htx, struct box *box, float w, float h, float em)
+{
+ struct box page_box;
+
+ htx->page_w = w;
+ htx->page_h = h;
+
+ printf("html: laying out text.\n");
+
+ init_box(ctx, &page_box, NULL);
+ page_box.w = w;
+ page_box.h = 0;
+
+ layout_block(ctx, box, &page_box, em, 0, h);
+
+ printf("html: finished.\n");
+}
+
+struct box *
+html_generate(fz_context *ctx, html_context *htx, const char *base_uri, fz_buffer *buf)
{
- struct rule *css = NULL;
- struct box *root_box;
- struct box *page_box;
+ fz_xml *xml;
+ struct rule *css;
+ struct box *box;
struct style style;
- doc->page_w = page_w;
- doc->page_h = page_h;
+ printf("html: parsing XHTML.\n");
+ xml = fz_parse_xml(ctx, buf->data, buf->len, 1);
-printf("html: parsing style sheets.\n");
- css = fz_parse_css(doc->ctx, NULL, default_css);
- css = load_css(doc, css, doc->xml);
+ printf("html: parsing style sheets.\n");
+ css = fz_parse_css(ctx, NULL, default_css);
+ css = html_load_css(ctx, htx, base_uri, css, xml);
// print_rules(css);
+ printf("html: applying styles and generating boxes.\n");
+ box = new_box(ctx, NULL);
+
style.up = NULL;
style.count = 0;
- root_box = new_box(doc->ctx, NULL);
-
- page_box = new_box(doc->ctx, NULL);
- page_box->w = page_w;
- page_box->h = 0;
+ generate_boxes(ctx, htx, base_uri, xml, box, css, &style);
- // TODO: split generate and layout
+ return box;
+}
-printf("html: applying styles and generating boxes.\n");
- generate_boxes(doc, doc->xml, root_box, css, &style);
-printf("html: laying out text.\n");
- layout_block(doc->ctx, root_box, page_box, em, 0, page_h);
-printf("html: finished.\n");
+void html_init(fz_context *ctx, html_context *html, fz_archive *zip)
+{
+ memset(html, 0, sizeof *html);
+ html->zip = zip;
+}
- // print_box(doc->ctx, root_box, 0);
+void html_rebind(html_context *html, fz_context *ctx)
+{
+ fz_rebind_archive(html->zip, ctx);
+}
- doc->box = root_box;
+void html_fini(fz_context *ctx, html_context *html)
+{
+ fz_close_archive(ctx, html->zip);
}