summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/page.h1
-rw-r--r--include/mupdf/syntax.h1
-rw-r--r--mupdf/pdf_parse.c30
-rw-r--r--mupdf/pdf_unicode.c10
4 files changed, 42 insertions, 0 deletions
diff --git a/include/mupdf/page.h b/include/mupdf/page.h
index bf14dca2..4304fa22 100644
--- a/include/mupdf/page.h
+++ b/include/mupdf/page.h
@@ -32,6 +32,7 @@ struct pdf_textchar_s
struct pdf_textline_s
{
+ fz_point height;
int len, cap;
pdf_textchar *text;
pdf_textline *next;
diff --git a/include/mupdf/syntax.h b/include/mupdf/syntax.h
index 54eafa9e..87fc946a 100644
--- a/include/mupdf/syntax.h
+++ b/include/mupdf/syntax.h
@@ -28,6 +28,7 @@ fz_error *pdf_parseindobj(fz_obj **op, fz_file *f, char *buf, int cap, int *oid,
fz_rect pdf_torect(fz_obj *array);
fz_matrix pdf_tomatrix(fz_obj *array);
fz_error *pdf_toutf8(char **dstp, fz_obj *src);
+fz_error *pdf_toucs2(unsigned short **dstp, fz_obj *src);
/*
* Encryption
diff --git a/mupdf/pdf_parse.c b/mupdf/pdf_parse.c
index 1bdd057e..e30bb3a2 100644
--- a/mupdf/pdf_parse.c
+++ b/mupdf/pdf_parse.c
@@ -77,6 +77,36 @@ pdf_toutf8(char **dstp, fz_obj *src)
}
fz_error *
+pdf_toucs2(unsigned short **dstp, fz_obj *src)
+{
+ unsigned char *srcptr = fz_tostrbuf(src);
+ unsigned short *dstptr;
+ int srclen = fz_tostrlen(src);
+ int i;
+
+ if (srclen > 2 && srcptr[0] == 254 && srcptr[1] == 255)
+ {
+ dstptr = *dstp = fz_malloc(((srclen - 2) / 2 + 1) * sizeof(short));
+ if (!dstptr)
+ return fz_outofmem;
+ for (i = 2; i < srclen; i += 2)
+ *dstptr++ = (srcptr[i] << 8) | srcptr[i+1];
+ }
+
+ else
+ {
+ dstptr = *dstp = fz_malloc((srclen + 1) * sizeof(short));
+ if (!dstptr)
+ return fz_outofmem;
+ for (i = 0; i < srclen; i++)
+ *dstptr++ = pdf_docencoding[srcptr[i]];
+ }
+
+ *dstptr = '\0';
+ return nil;
+}
+
+fz_error *
pdf_parsearray(fz_obj **op, fz_file *file, char *buf, int cap)
{
fz_error *error = nil;
diff --git a/mupdf/pdf_unicode.c b/mupdf/pdf_unicode.c
index d9103cd2..be57f7a2 100644
--- a/mupdf/pdf_unicode.c
+++ b/mupdf/pdf_unicode.c
@@ -99,6 +99,8 @@ pdf_newtextline(pdf_textline **linep)
line = *linep = fz_malloc(sizeof(pdf_textline));
if (!line)
return fz_outofmem;
+ line->height.x = 0; /* bad default value... */
+ line->height.y = 10;
line->len = 0;
line->cap = 0;
line->text = nil;
@@ -161,6 +163,14 @@ extracttext(pdf_textline **line, fz_node *node, fz_matrix ctm)
int i, g, x, y;
int c;
+ /* get line height */
+ trm = fz_concat(tm, ctm);
+ trm.e = 0;
+ trm.f = 0;
+ p.x = 0;
+ p.y = 1;
+ (*line)->height = fz_transformpoint(trm, p);
+
for (i = 0; i < text->len; i++)
{
g = text->els[i].cid;