diff options
-rw-r--r-- | include/mupdf/page.h | 1 | ||||
-rw-r--r-- | include/mupdf/syntax.h | 1 | ||||
-rw-r--r-- | mupdf/pdf_parse.c | 30 | ||||
-rw-r--r-- | mupdf/pdf_unicode.c | 10 |
4 files changed, 42 insertions, 0 deletions
diff --git a/include/mupdf/page.h b/include/mupdf/page.h index bf14dca2..4304fa22 100644 --- a/include/mupdf/page.h +++ b/include/mupdf/page.h @@ -32,6 +32,7 @@ struct pdf_textchar_s struct pdf_textline_s { + fz_point height; int len, cap; pdf_textchar *text; pdf_textline *next; diff --git a/include/mupdf/syntax.h b/include/mupdf/syntax.h index 54eafa9e..87fc946a 100644 --- a/include/mupdf/syntax.h +++ b/include/mupdf/syntax.h @@ -28,6 +28,7 @@ fz_error *pdf_parseindobj(fz_obj **op, fz_file *f, char *buf, int cap, int *oid, fz_rect pdf_torect(fz_obj *array); fz_matrix pdf_tomatrix(fz_obj *array); fz_error *pdf_toutf8(char **dstp, fz_obj *src); +fz_error *pdf_toucs2(unsigned short **dstp, fz_obj *src); /* * Encryption diff --git a/mupdf/pdf_parse.c b/mupdf/pdf_parse.c index 1bdd057e..e30bb3a2 100644 --- a/mupdf/pdf_parse.c +++ b/mupdf/pdf_parse.c @@ -77,6 +77,36 @@ pdf_toutf8(char **dstp, fz_obj *src) } fz_error * +pdf_toucs2(unsigned short **dstp, fz_obj *src) +{ + unsigned char *srcptr = fz_tostrbuf(src); + unsigned short *dstptr; + int srclen = fz_tostrlen(src); + int i; + + if (srclen > 2 && srcptr[0] == 254 && srcptr[1] == 255) + { + dstptr = *dstp = fz_malloc(((srclen - 2) / 2 + 1) * sizeof(short)); + if (!dstptr) + return fz_outofmem; + for (i = 2; i < srclen; i += 2) + *dstptr++ = (srcptr[i] << 8) | srcptr[i+1]; + } + + else + { + dstptr = *dstp = fz_malloc((srclen + 1) * sizeof(short)); + if (!dstptr) + return fz_outofmem; + for (i = 0; i < srclen; i++) + *dstptr++ = pdf_docencoding[srcptr[i]]; + } + + *dstptr = '\0'; + return nil; +} + +fz_error * pdf_parsearray(fz_obj **op, fz_file *file, char *buf, int cap) { fz_error *error = nil; diff --git a/mupdf/pdf_unicode.c b/mupdf/pdf_unicode.c index d9103cd2..be57f7a2 100644 --- a/mupdf/pdf_unicode.c +++ b/mupdf/pdf_unicode.c @@ -99,6 +99,8 @@ pdf_newtextline(pdf_textline **linep) line = *linep = fz_malloc(sizeof(pdf_textline)); if (!line) return fz_outofmem; + line->height.x = 0; /* bad default value... */ + line->height.y = 10; line->len = 0; line->cap = 0; line->text = nil; @@ -161,6 +163,14 @@ extracttext(pdf_textline **line, fz_node *node, fz_matrix ctm) int i, g, x, y; int c; + /* get line height */ + trm = fz_concat(tm, ctm); + trm.e = 0; + trm.f = 0; + p.x = 0; + p.y = 1; + (*line)->height = fz_transformpoint(trm, p); + for (i = 0; i < text->len; i++) { g = text->els[i].cid; |