summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2016-10-14 14:55:35 +0200
committerTor Andersson <tor.andersson@artifex.com>2016-10-14 14:55:35 +0200
commit189b6cb84cb1ceb3ace8cb4304b43790e682ad20 (patch)
treef59b42650110d8817564c604d07e7790abe9f32a
parent4ef5e67c4165a8fbb4ec96c421f958fc7ec94ca1 (diff)
downloadmupdf-189b6cb84cb1ceb3ace8cb4304b43790e682ad20.tar.xz
epub: Add hyperlinks.
-rw-r--r--include/mupdf/html.h2
-rw-r--r--source/html/epub-doc.c88
-rw-r--r--source/html/html-layout.c133
3 files changed, 188 insertions, 35 deletions
diff --git a/include/mupdf/html.h b/include/mupdf/html.h
index f9bd4d98..259333a4 100644
--- a/include/mupdf/html.h
+++ b/include/mupdf/html.h
@@ -207,6 +207,7 @@ struct fz_html_box_s
float em;
fz_html_box *up, *down, *last, *next;
fz_html_flow *flow_head, **flow_tail;
+ char *a_href;
fz_css_style style;
};
@@ -276,6 +277,7 @@ void fz_add_css_font_faces(fz_context *ctx, fz_html_font_set *set, fz_archive *z
fz_html *fz_parse_html(fz_context *ctx, fz_html_font_set *htx, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css);
void fz_layout_html(fz_context *ctx, fz_html *html, float w, float h, float em);
void fz_draw_html(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, fz_html *html, float page_top, float page_bot);
+fz_link *fz_load_html_links(fz_context *ctx, fz_html *html, int page, int page_h, const char *base_uri);
void fz_drop_html(fz_context *ctx, fz_html *html);
#endif
diff --git a/source/html/epub-doc.c b/source/html/epub-doc.c
index 246e343a..efc4ac18 100644
--- a/source/html/epub-doc.c
+++ b/source/html/epub-doc.c
@@ -50,13 +50,13 @@ find_anchor_flow(fz_html_flow *flow, const char *anchor, float page_h, int *page
}
static int
-find_anchor(fz_html_box *box, const char *anchor, float page_h, int *page)
+find_anchor_box(fz_html_box *box, const char *anchor, float page_h, int *page)
{
while (box)
{
if (box->flow_head && find_anchor_flow(box->flow_head, anchor, page_h, page))
return 1;
- if (box->down && find_anchor(box->down, anchor, page_h, page))
+ if (box->down && find_anchor_box(box->down, anchor, page_h, page))
return 1;
box = box->next;
}
@@ -64,35 +64,41 @@ find_anchor(fz_html_box *box, const char *anchor, float page_h, int *page)
}
static void
-epub_update_link_dests(fz_context *ctx, epub_document *doc, fz_outline *node)
+resolve_link_dest(fz_context *ctx, epub_document *doc, fz_link_dest *ld)
{
epub_chapter *ch;
- while (node)
+ if (ld->kind == FZ_LINK_GOTO)
{
- if (node->dest.kind == FZ_LINK_GOTO)
- {
- const char *dest = node->dest.ld.gotor.dest;
- const char *s = strchr(dest, '#');
- int n = s ? s - dest : strlen(dest);
- if (s && s[1] == 0)
- s = NULL;
+ const char *dest = ld->ld.gotor.dest;
+ const char *s = strchr(dest, '#');
+ int n = s ? s - dest : strlen(dest);
+ if (s && s[1] == 0)
+ s = NULL;
- for (ch = doc->spine; ch; ch = ch->next)
+ for (ch = doc->spine; ch; ch = ch->next)
+ {
+ if (strncmp(ch->path, dest, n) || ch->path[n] != 0)
+ continue;
+ ld->ld.gotor.page = ch->start;
+ if (s)
{
- if (strncmp(ch->path, dest, n) || ch->path[n] != 0)
+ /* Search for a matching fragment */
+ if (find_anchor_box(ch->html->root, s+1, ch->page_h, &ld->ld.gotor.page))
continue;
- node->dest.ld.gotor.page = ch->start;
- if (s)
- {
- /* Search for a matching fragment */
- if (find_anchor(ch->html->root, s+1, ch->page_h, &node->dest.ld.gotor.page))
- continue;
- }
- break;
}
+ break;
}
- epub_update_link_dests(ctx, doc, node->down);
+ }
+}
+
+static void
+epub_update_outline(fz_context *ctx, epub_document *doc, fz_outline *node)
+{
+ while (node)
+ {
+ resolve_link_dest(ctx, doc, &node->dest);
+ epub_update_outline(ctx, doc, node->down);
node = node->next;
}
}
@@ -118,7 +124,7 @@ epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em)
count += ceilf(ch->html->root->h / ch->page_h);
}
- epub_update_link_dests(ctx, doc, doc->outline);
+ epub_update_outline(ctx, doc, doc->outline);
}
static int
@@ -187,6 +193,41 @@ epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, const fz_matrix *
}
}
+static fz_link *
+epub_load_links(fz_context *ctx, fz_page *page_)
+{
+ epub_page *page = (epub_page*)page_;
+ epub_document *doc = page->doc;
+ epub_chapter *ch;
+ int n = page->number;
+ int count = 0;
+ fz_link *head, *link;
+
+ for (ch = doc->spine; ch; ch = ch->next)
+ {
+ int cn = ceilf(ch->html->root->h / ch->page_h);
+ if (n < count + cn)
+ {
+ head = fz_load_html_links(ctx, ch->html, n - count, ch->page_h, ch->path);
+ for (link = head; link; link = link->next)
+ {
+ /* Adjust for page margins */
+ link->rect.x0 += ch->page_margin[L];
+ link->rect.x1 += ch->page_margin[L];
+ link->rect.y0 += ch->page_margin[T];
+ link->rect.y1 += ch->page_margin[T];
+
+ /* Resolve local links */
+ resolve_link_dest(ctx, doc, &link->dest);
+ }
+ return head;
+ }
+ count += cn;
+ }
+
+ return NULL;
+}
+
static fz_page *
epub_load_page(fz_context *ctx, fz_document *doc_, int number)
{
@@ -194,6 +235,7 @@ epub_load_page(fz_context *ctx, fz_document *doc_, int number)
epub_page *page = fz_new_page(ctx, sizeof *page);
page->super.bound_page = epub_bound_page;
page->super.run_page_contents = epub_run_page;
+ page->super.load_links = epub_load_links;
page->super.drop_page = epub_drop_page;
page->doc = doc;
page->number = number;
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
index fb039061..323b0532 100644
--- a/source/html/html-layout.c
+++ b/source/html/html-layout.c
@@ -680,7 +680,7 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top,
else if (display != DIS_NONE)
{
- const char *dir, *lang, *id;
+ const char *dir, *lang, *id, *href;
int child_dir = markup_dir;
int child_lang = markup_lang;
@@ -716,6 +716,16 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top,
else if (display == DIS_INLINE)
{
insert_inline_box(ctx, box, top, child_dir, g);
+ if (!strcmp(tag, "a"))
+ {
+ id = fz_xml_att(node, "id");
+ if (id)
+ generate_anchor(ctx, box, id, g);
+ href = fz_xml_att(node, "href");
+ if (href)
+ box->a_href = fz_pool_strdup(ctx, g->pool, href);
+ }
+
}
else
{
@@ -723,17 +733,6 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top,
insert_box(ctx, box, BOX_BLOCK, top);
}
- if (!strcmp(tag, "a"))
- {
- id = fz_xml_att(node, "id");
- if (id)
- {
- /* We don't need to create a box here, because since <a> tags are inline
- * the DIS_INLINE case * above should already have done it for us. */
- generate_anchor(ctx, box, id, g);
- }
- }
-
if (fz_xml_down(node))
{
int child_counter = list_counter;
@@ -1885,6 +1884,113 @@ fz_draw_html(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, fz_html *htm
}
}
+static int is_internal_uri(const char *uri)
+{
+ while (*uri >= 'a' && *uri <= 'z')
+ ++uri;
+ if (uri[0] == ':' && uri[1] == '/' && uri[2] == '/')
+ return 0;
+ return 1;
+}
+
+static fz_link *load_link_flow(fz_context *ctx, fz_html_flow *flow, fz_link *head, int page, int page_h, const char *dir, const char *file)
+{
+ fz_link *link;
+ fz_html_flow *next;
+ char path[2048];
+ fz_rect bbox;
+ fz_link_dest dest;
+ char *href;
+ float w;
+
+ while (flow)
+ {
+ href = flow->box->a_href;
+ next = flow->next;
+ if (href && (int)(flow->y / page_h) == page)
+ {
+ /* Coalesce contiguous flow boxes into one link node */
+ w = flow->w;
+ while (next &&
+ next->y == flow->y &&
+ next->h == flow->h &&
+ next->box->a_href &&
+ !strcmp(href, next->box->a_href))
+ {
+ w += next->w;
+ next = next->next;
+ }
+
+ bbox.x0 = flow->x;
+ bbox.y0 = flow->y - page * page_h;
+ bbox.x1 = bbox.x0 + w;
+ bbox.y1 = bbox.y0 + flow->h;
+ if (flow->type != FLOW_IMAGE)
+ {
+ /* flow->y is the baseline, adjust bbox appropriately */
+ bbox.y0 -= 0.8 * flow->h;
+ bbox.y1 -= 0.8 * flow->h;
+ }
+
+ if (is_internal_uri(href))
+ {
+ if (href[0] == '#')
+ {
+ fz_strlcpy(path, file, sizeof path);
+ fz_strlcat(path, href, sizeof path);
+ }
+ else
+ {
+ fz_strlcpy(path, dir, sizeof path);
+ fz_strlcat(path, "/", sizeof path);
+ fz_strlcat(path, href, sizeof path);
+ }
+ fz_urldecode(path);
+ fz_cleanname(path);
+
+ memset(&dest, 0, sizeof dest);
+ dest.kind = FZ_LINK_GOTO;
+ dest.ld.gotor.dest = fz_strdup(ctx, path);
+ dest.ld.gotor.page = 0; /* computed in epub_load_links */
+ }
+ else
+ {
+ memset(&dest, 0, sizeof dest);
+ dest.kind = FZ_LINK_URI;
+ dest.ld.uri.uri = fz_strdup(ctx, href);
+ dest.ld.uri.is_map = 0;
+ }
+
+ link = fz_new_link(ctx, &bbox, dest);
+ link->next = head;
+ head = link;
+ }
+ flow = next;
+ }
+ return head;
+}
+
+static fz_link *load_link_box(fz_context *ctx, fz_html_box *box, fz_link *head, int page, int page_h, const char *dir, const char *file)
+{
+ while (box)
+ {
+ if (box->flow_head)
+ head = load_link_flow(ctx, box->flow_head, head, page, page_h, dir, file);
+ if (box->down)
+ head = load_link_box(ctx, box->down, head, page, page_h, dir, file);
+ box = box->next;
+ }
+ return head;
+}
+
+fz_link *
+fz_load_html_links(fz_context *ctx, fz_html *html, int page, int page_h, const char *file)
+{
+ char dir[2048];
+ fz_dirname(dir, file, sizeof dir);
+ return load_link_box(ctx, html->root, NULL, page, page_h, dir, file);
+}
+
static char *concat_text(fz_context *ctx, fz_xml *root)
{
fz_xml *node;
@@ -2085,6 +2191,9 @@ fz_print_html_box(fz_context *ctx, fz_html_box *box, int pstyle, int level)
if (box->list_item)
printf(" list=%d", box->list_item);
+ if (box->a_href)
+ printf(" href='%s'", box->a_href);
+
if (box->down || box->flow_head)
printf(" {\n");
else