From 189b6cb84cb1ceb3ace8cb4304b43790e682ad20 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Fri, 14 Oct 2016 14:55:35 +0200 Subject: epub: Add hyperlinks. --- source/html/epub-doc.c | 88 ++++++++++++++++++++++-------- source/html/html-layout.c | 133 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 186 insertions(+), 35 deletions(-) (limited to 'source') diff --git a/source/html/epub-doc.c b/source/html/epub-doc.c index 246e343a..efc4ac18 100644 --- a/source/html/epub-doc.c +++ b/source/html/epub-doc.c @@ -50,13 +50,13 @@ find_anchor_flow(fz_html_flow *flow, const char *anchor, float page_h, int *page } static int -find_anchor(fz_html_box *box, const char *anchor, float page_h, int *page) +find_anchor_box(fz_html_box *box, const char *anchor, float page_h, int *page) { while (box) { if (box->flow_head && find_anchor_flow(box->flow_head, anchor, page_h, page)) return 1; - if (box->down && find_anchor(box->down, anchor, page_h, page)) + if (box->down && find_anchor_box(box->down, anchor, page_h, page)) return 1; box = box->next; } @@ -64,35 +64,41 @@ find_anchor(fz_html_box *box, const char *anchor, float page_h, int *page) } static void -epub_update_link_dests(fz_context *ctx, epub_document *doc, fz_outline *node) +resolve_link_dest(fz_context *ctx, epub_document *doc, fz_link_dest *ld) { epub_chapter *ch; - while (node) + if (ld->kind == FZ_LINK_GOTO) { - if (node->dest.kind == FZ_LINK_GOTO) - { - const char *dest = node->dest.ld.gotor.dest; - const char *s = strchr(dest, '#'); - int n = s ? s - dest : strlen(dest); - if (s && s[1] == 0) - s = NULL; + const char *dest = ld->ld.gotor.dest; + const char *s = strchr(dest, '#'); + int n = s ? s - dest : strlen(dest); + if (s && s[1] == 0) + s = NULL; - for (ch = doc->spine; ch; ch = ch->next) + for (ch = doc->spine; ch; ch = ch->next) + { + if (strncmp(ch->path, dest, n) || ch->path[n] != 0) + continue; + ld->ld.gotor.page = ch->start; + if (s) { - if (strncmp(ch->path, dest, n) || ch->path[n] != 0) + /* Search for a matching fragment */ + if (find_anchor_box(ch->html->root, s+1, ch->page_h, &ld->ld.gotor.page)) continue; - node->dest.ld.gotor.page = ch->start; - if (s) - { - /* Search for a matching fragment */ - if (find_anchor(ch->html->root, s+1, ch->page_h, &node->dest.ld.gotor.page)) - continue; - } - break; } + break; } - epub_update_link_dests(ctx, doc, node->down); + } +} + +static void +epub_update_outline(fz_context *ctx, epub_document *doc, fz_outline *node) +{ + while (node) + { + resolve_link_dest(ctx, doc, &node->dest); + epub_update_outline(ctx, doc, node->down); node = node->next; } } @@ -118,7 +124,7 @@ epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em) count += ceilf(ch->html->root->h / ch->page_h); } - epub_update_link_dests(ctx, doc, doc->outline); + epub_update_outline(ctx, doc, doc->outline); } static int @@ -187,6 +193,41 @@ epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, const fz_matrix * } } +static fz_link * +epub_load_links(fz_context *ctx, fz_page *page_) +{ + epub_page *page = (epub_page*)page_; + epub_document *doc = page->doc; + epub_chapter *ch; + int n = page->number; + int count = 0; + fz_link *head, *link; + + for (ch = doc->spine; ch; ch = ch->next) + { + int cn = ceilf(ch->html->root->h / ch->page_h); + if (n < count + cn) + { + head = fz_load_html_links(ctx, ch->html, n - count, ch->page_h, ch->path); + for (link = head; link; link = link->next) + { + /* Adjust for page margins */ + link->rect.x0 += ch->page_margin[L]; + link->rect.x1 += ch->page_margin[L]; + link->rect.y0 += ch->page_margin[T]; + link->rect.y1 += ch->page_margin[T]; + + /* Resolve local links */ + resolve_link_dest(ctx, doc, &link->dest); + } + return head; + } + count += cn; + } + + return NULL; +} + static fz_page * epub_load_page(fz_context *ctx, fz_document *doc_, int number) { @@ -194,6 +235,7 @@ epub_load_page(fz_context *ctx, fz_document *doc_, int number) epub_page *page = fz_new_page(ctx, sizeof *page); page->super.bound_page = epub_bound_page; page->super.run_page_contents = epub_run_page; + page->super.load_links = epub_load_links; page->super.drop_page = epub_drop_page; page->doc = doc; page->number = number; diff --git a/source/html/html-layout.c b/source/html/html-layout.c index fb039061..323b0532 100644 --- a/source/html/html-layout.c +++ b/source/html/html-layout.c @@ -680,7 +680,7 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top, else if (display != DIS_NONE) { - const char *dir, *lang, *id; + const char *dir, *lang, *id, *href; int child_dir = markup_dir; int child_lang = markup_lang; @@ -716,6 +716,16 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top, else if (display == DIS_INLINE) { insert_inline_box(ctx, box, top, child_dir, g); + if (!strcmp(tag, "a")) + { + id = fz_xml_att(node, "id"); + if (id) + generate_anchor(ctx, box, id, g); + href = fz_xml_att(node, "href"); + if (href) + box->a_href = fz_pool_strdup(ctx, g->pool, href); + } + } else { @@ -723,17 +733,6 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top, insert_box(ctx, box, BOX_BLOCK, top); } - if (!strcmp(tag, "a")) - { - id = fz_xml_att(node, "id"); - if (id) - { - /* We don't need to create a box here, because since tags are inline - * the DIS_INLINE case * above should already have done it for us. */ - generate_anchor(ctx, box, id, g); - } - } - if (fz_xml_down(node)) { int child_counter = list_counter; @@ -1885,6 +1884,113 @@ fz_draw_html(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, fz_html *htm } } +static int is_internal_uri(const char *uri) +{ + while (*uri >= 'a' && *uri <= 'z') + ++uri; + if (uri[0] == ':' && uri[1] == '/' && uri[2] == '/') + return 0; + return 1; +} + +static fz_link *load_link_flow(fz_context *ctx, fz_html_flow *flow, fz_link *head, int page, int page_h, const char *dir, const char *file) +{ + fz_link *link; + fz_html_flow *next; + char path[2048]; + fz_rect bbox; + fz_link_dest dest; + char *href; + float w; + + while (flow) + { + href = flow->box->a_href; + next = flow->next; + if (href && (int)(flow->y / page_h) == page) + { + /* Coalesce contiguous flow boxes into one link node */ + w = flow->w; + while (next && + next->y == flow->y && + next->h == flow->h && + next->box->a_href && + !strcmp(href, next->box->a_href)) + { + w += next->w; + next = next->next; + } + + bbox.x0 = flow->x; + bbox.y0 = flow->y - page * page_h; + bbox.x1 = bbox.x0 + w; + bbox.y1 = bbox.y0 + flow->h; + if (flow->type != FLOW_IMAGE) + { + /* flow->y is the baseline, adjust bbox appropriately */ + bbox.y0 -= 0.8 * flow->h; + bbox.y1 -= 0.8 * flow->h; + } + + if (is_internal_uri(href)) + { + if (href[0] == '#') + { + fz_strlcpy(path, file, sizeof path); + fz_strlcat(path, href, sizeof path); + } + else + { + fz_strlcpy(path, dir, sizeof path); + fz_strlcat(path, "/", sizeof path); + fz_strlcat(path, href, sizeof path); + } + fz_urldecode(path); + fz_cleanname(path); + + memset(&dest, 0, sizeof dest); + dest.kind = FZ_LINK_GOTO; + dest.ld.gotor.dest = fz_strdup(ctx, path); + dest.ld.gotor.page = 0; /* computed in epub_load_links */ + } + else + { + memset(&dest, 0, sizeof dest); + dest.kind = FZ_LINK_URI; + dest.ld.uri.uri = fz_strdup(ctx, href); + dest.ld.uri.is_map = 0; + } + + link = fz_new_link(ctx, &bbox, dest); + link->next = head; + head = link; + } + flow = next; + } + return head; +} + +static fz_link *load_link_box(fz_context *ctx, fz_html_box *box, fz_link *head, int page, int page_h, const char *dir, const char *file) +{ + while (box) + { + if (box->flow_head) + head = load_link_flow(ctx, box->flow_head, head, page, page_h, dir, file); + if (box->down) + head = load_link_box(ctx, box->down, head, page, page_h, dir, file); + box = box->next; + } + return head; +} + +fz_link * +fz_load_html_links(fz_context *ctx, fz_html *html, int page, int page_h, const char *file) +{ + char dir[2048]; + fz_dirname(dir, file, sizeof dir); + return load_link_box(ctx, html->root, NULL, page, page_h, dir, file); +} + static char *concat_text(fz_context *ctx, fz_xml *root) { fz_xml *node; @@ -2085,6 +2191,9 @@ fz_print_html_box(fz_context *ctx, fz_html_box *box, int pstyle, int level) if (box->list_item) printf(" list=%d", box->list_item); + if (box->a_href) + printf(" href='%s'", box->a_href); + if (box->down || box->flow_head) printf(" {\n"); else -- cgit v1.2.3