From 9f7ce17525de749758a343c03200cc40cccab056 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Tue, 25 Oct 2016 16:45:25 +0200 Subject: epub: Add links to XHTML and FB2 formats. --- include/mupdf/html.h | 2 ++ source/html/epub-doc.c | 44 ++--------------------------- source/html/html-doc.c | 40 ++++++++++++++++++++++++++ source/html/html-layout.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 115 insertions(+), 42 deletions(-) diff --git a/include/mupdf/html.h b/include/mupdf/html.h index dd58b814..b6a1cd89 100644 --- a/include/mupdf/html.h +++ b/include/mupdf/html.h @@ -277,6 +277,8 @@ void fz_add_css_font_faces(fz_context *ctx, fz_html_font_set *set, fz_archive *z fz_html *fz_parse_html(fz_context *ctx, fz_html_font_set *htx, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css); void fz_layout_html(fz_context *ctx, fz_html *html, float w, float h, float em); void fz_draw_html(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, fz_html *html, float page_top, float page_bot); + +float fz_find_html_target(fz_context *ctx, fz_html *html, const char *id); fz_link *fz_load_html_links(fz_context *ctx, fz_html *html, int page, int page_h, const char *base_uri); void fz_drop_html(fz_context *ctx, fz_html *html); diff --git a/source/html/epub-doc.c b/source/html/epub-doc.c index f9d9e6aa..7b44107b 100644 --- a/source/html/epub-doc.c +++ b/source/html/epub-doc.c @@ -34,49 +34,11 @@ struct epub_page_s int number; }; -static int -find_anchor_flow(fz_html_flow *flow, const char *anchor, float page_h) -{ - while (flow) - { - if (flow->box->id && !strcmp(anchor, flow->box->id)) - return flow->y / page_h; - flow = flow->next; - } - return -1; -} - -static int -find_anchor_box(fz_html_box *box, const char *anchor, float page_h) -{ - int page; - while (box) - { - if (box->id && !strcmp(anchor, box->id)) - return box->y / page_h; - if (box->type == BOX_FLOW) - { - page = find_anchor_flow(box->flow_head, anchor, page_h); - if (page >= 0) - return page; - } - else - { - page = find_anchor_box(box->down, anchor, page_h); - if (page >= 0) - return page; - } - box = box->next; - } - return -1; -} - static int epub_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest) { epub_document *doc = (epub_document*)doc_; epub_chapter *ch; - int page = -1; const char *s = strchr(dest, '#'); int n = s ? s - dest : strlen(dest); @@ -90,9 +52,9 @@ epub_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest) if (s) { /* Search for a matching fragment */ - page = find_anchor_box(ch->html->root, s+1, ch->page_h); - if (page >= 0) - return ch->start + page; + float y = fz_find_html_target(ctx, ch->html, s+1); + if (y >= 0) + return ch->start + y / ch->page_h; } return ch->start; } diff --git a/source/html/html-doc.c b/source/html/html-doc.c index 12d2cecd..810b92c1 100644 --- a/source/html/html-doc.c +++ b/source/html/html-doc.c @@ -31,6 +31,22 @@ htdoc_drop_document(fz_context *ctx, fz_document *doc_) fz_drop_html_font_set(ctx, doc->set); } +static int +htdoc_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest) +{ + html_document *doc = (html_document*)doc_; + + const char *s = strchr(dest, '#'); + if (s && s[1] != 0) + { + float y = fz_find_html_target(ctx, doc->html, s+1); + if (y >= 0) + return y / doc->page_h; + } + + return -1; +} + static int htdoc_count_pages(fz_context *ctx, fz_document *doc_) { @@ -89,6 +105,27 @@ htdoc_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, const fz_matrix fz_draw_html(ctx, dev, &local_ctm, doc->html, n * doc->page_h, (n+1) * doc->page_h); } +static fz_link * +htdoc_load_links(fz_context *ctx, fz_page *page_) +{ + html_page *page = (html_page*)page_; + html_document *doc = page->doc; + fz_link *head, *link; + + head = fz_load_html_links(ctx, doc->html, page->number, doc->page_h, ""); + for (link = head; link; link = link->next) + { + link->doc = doc; + + /* Adjust for page margins */ + link->rect.x0 += doc->page_margin[L]; + link->rect.x1 += doc->page_margin[L]; + link->rect.y0 += doc->page_margin[T]; + link->rect.y1 += doc->page_margin[T]; + } + return head; +} + static fz_page * htdoc_load_page(fz_context *ctx, fz_document *doc_, int number) { @@ -96,6 +133,7 @@ htdoc_load_page(fz_context *ctx, fz_document *doc_, int number) html_page *page = fz_new_page(ctx, sizeof *page); page->super.bound_page = htdoc_bound_page; page->super.run_page_contents = htdoc_run_page; + page->super.load_links = htdoc_load_links; page->super.drop_page = htdoc_drop_page; page->doc = doc; page->number = number; @@ -120,6 +158,7 @@ htdoc_open_document_with_stream(fz_context *ctx, fz_stream *file) doc->super.drop_document = htdoc_drop_document; doc->super.layout = htdoc_layout; + doc->super.resolve_link = htdoc_resolve_link; doc->super.count_pages = htdoc_count_pages; doc->super.load_page = htdoc_load_page; doc->super.lookup_metadata = htdoc_lookup_metadata; @@ -155,6 +194,7 @@ htdoc_open_document(fz_context *ctx, const char *filename) doc = fz_new_document(ctx, html_document); doc->super.drop_document = htdoc_drop_document; doc->super.layout = htdoc_layout; + doc->super.resolve_link = htdoc_resolve_link; doc->super.count_pages = htdoc_count_pages; doc->super.load_page = htdoc_load_page; doc->super.lookup_metadata = htdoc_lookup_metadata; diff --git a/source/html/html-layout.c b/source/html/html-layout.c index fc0874ae..e57aac68 100644 --- a/source/html/html-layout.c +++ b/source/html/html-layout.c @@ -651,6 +651,8 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top, else if (g->is_fb2 && tag[0]=='i' && tag[1]=='m' && tag[2]=='a' && tag[3]=='g' && tag[4]=='e' && tag[5]==0) { const char *src = fz_xml_att(node, "l:href"); + if (!src) + src = fz_xml_att(node, "xlink:href"); if (src && src[0] == '#') { fz_image *img = fz_tree_lookup(ctx, g->images, src+1); @@ -721,7 +723,14 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top, generate_anchor(ctx, box, g); if (tag[0]=='a' && tag[1]==0) { - href = fz_xml_att(node, "href"); + if (g->is_fb2) + { + href = fz_xml_att(node, "l:href"); + if (!href) + href = fz_xml_att(node, "xlink:href"); + } + else + href = fz_xml_att(node, g->is_fb2 ? "l:href" : "href"); if (href) box->href = fz_pool_strdup(ctx, g->pool, href); } @@ -1989,6 +1998,66 @@ fz_load_html_links(fz_context *ctx, fz_html *html, int page, int page_h, const c return load_link_box(ctx, html->root, NULL, page, page_h, dir, file); } +static fz_html_flow * +find_first_content(fz_html_box *box) +{ + while (box) + { + if (box->type == BOX_FLOW) + return box->flow_head; + box = box->down; + } + return NULL; +} + +static float +find_flow_target(fz_html_flow *flow, const char *id) +{ + while (flow) + { + if (flow->box->id && !strcmp(id, flow->box->id)) + return flow->y; + flow = flow->next; + } + return -1; +} + +static float +find_box_target(fz_html_box *box, const char *id) +{ + float y; + while (box) + { + if (box->id && !strcmp(id, box->id)) + { + fz_html_flow *flow = find_first_content(box); + if (flow) + return flow->y; + return box->y; + } + if (box->type == BOX_FLOW) + { + y = find_flow_target(box->flow_head, id); + if (y >= 0) + return y; + } + else + { + y = find_box_target(box->down, id); + if (y >= 0) + return y; + } + box = box->next; + } + return -1; +} + +float +fz_find_html_target(fz_context *ctx, fz_html *html, const char *id) +{ + return find_box_target(html->root, id); +} + static char *concat_text(fz_context *ctx, fz_xml *root) { fz_xml *node; -- cgit v1.2.3