summaryrefslogtreecommitdiff
path: root/pdf
diff options
context:
space:
mode:
authorPaul Gardiner <paul@glidos.net>2012-07-18 14:45:28 +0100
committerPaul Gardiner <paul@glidos.net>2012-07-18 16:42:02 +0100
commit7eb8e51b6b889fa14aae39d282675b838aa43dc9 (patch)
tree4bf26eacd64612cbe2626263fb3ca344b53ef04e /pdf
parentb03aad16426b89728c00e8e9888de7866dde03cc (diff)
downloadmupdf-7eb8e51b6b889fa14aae39d282675b838aa43dc9.tar.xz
Update pdf_to_utf8 to handle either a stream or a string
Also change first argument from fz_context to pdf_document in each of pdf_to_utf8, pdf_to_utf8_name, pdf_to_ucs2 and pdf_to_ucs2_name
Diffstat (limited to 'pdf')
-rw-r--r--pdf/mupdf.h8
-rw-r--r--pdf/pdf_annot.c8
-rw-r--r--pdf/pdf_nametree.c2
-rw-r--r--pdf/pdf_outline.c2
-rw-r--r--pdf/pdf_parse.c109
-rw-r--r--pdf/pdf_xref.c2
6 files changed, 81 insertions, 50 deletions
diff --git a/pdf/mupdf.h b/pdf/mupdf.h
index c2566c02..f3821efd 100644
--- a/pdf/mupdf.h
+++ b/pdf/mupdf.h
@@ -85,10 +85,10 @@ int pdf_fprint_obj(FILE *fp, pdf_obj *obj, int tight);
void pdf_print_obj(pdf_obj *obj);
void pdf_print_ref(pdf_obj *obj);
-char *pdf_to_utf8(fz_context *ctx, pdf_obj *src);
-unsigned short *pdf_to_ucs2(fz_context *ctx, pdf_obj *src); /* sumatrapdf */
-pdf_obj *pdf_to_utf8_name(fz_context *ctx, pdf_obj *src);
-char *pdf_from_ucs2(fz_context *ctx, unsigned short *str);
+char *pdf_to_utf8(pdf_document *xref, pdf_obj *src);
+unsigned short *pdf_to_ucs2(pdf_document *xref, pdf_obj *src); /* sumatrapdf */
+pdf_obj *pdf_to_utf8_name(pdf_document *xref, pdf_obj *src);
+char *pdf_from_ucs2(pdf_document *xref, unsigned short *str);
fz_rect pdf_to_rect(fz_context *ctx, pdf_obj *array);
fz_matrix pdf_to_matrix(fz_context *ctx, pdf_obj *array);
diff --git a/pdf/pdf_annot.c b/pdf/pdf_annot.c
index dc31498c..fc436409 100644
--- a/pdf/pdf_annot.c
+++ b/pdf/pdf_annot.c
@@ -213,7 +213,7 @@ pdf_parse_action(pdf_document *xref, pdf_obj *action)
{
ld.kind = FZ_LINK_URI;
ld.ld.uri.is_map = pdf_to_bool(pdf_dict_gets(action, "IsMap"));
- ld.ld.uri.uri = pdf_to_utf8(ctx, pdf_dict_gets(action, "URI"));
+ ld.ld.uri.uri = pdf_to_utf8(xref, pdf_dict_gets(action, "URI"));
}
else if (!strcmp(pdf_to_name(obj), "Launch"))
{
@@ -221,20 +221,20 @@ pdf_parse_action(pdf_document *xref, pdf_obj *action)
ld.kind = FZ_LINK_LAUNCH;
if (pdf_is_dict(dest))
dest = pdf_dict_gets(dest, "F");
- ld.ld.launch.file_spec = pdf_to_utf8(ctx, dest);
+ ld.ld.launch.file_spec = pdf_to_utf8(xref, dest);
ld.ld.launch.new_window = pdf_to_int(pdf_dict_gets(action, "NewWindow"));
}
else if (!strcmp(pdf_to_name(obj), "Named"))
{
ld.kind = FZ_LINK_NAMED;
- ld.ld.named.named = pdf_to_utf8(ctx, pdf_dict_gets(action, "N"));
+ ld.ld.named.named = pdf_to_utf8(xref, pdf_dict_gets(action, "N"));
}
else if (!strcmp(pdf_to_name(obj), "GoToR"))
{
dest = pdf_dict_gets(action, "D");
ld = pdf_parse_link_dest(xref, dest);
ld.kind = FZ_LINK_GOTOR;
- ld.ld.gotor.file_spec = pdf_to_utf8(ctx, pdf_dict_gets(action, "F"));
+ ld.ld.gotor.file_spec = pdf_to_utf8(xref, pdf_dict_gets(action, "F"));
ld.ld.gotor.new_window = pdf_to_int(pdf_dict_gets(action, "NewWindow"));
}
return ld;
diff --git a/pdf/pdf_nametree.c b/pdf/pdf_nametree.c
index 7d8ac319..25fced52 100644
--- a/pdf/pdf_nametree.c
+++ b/pdf/pdf_nametree.c
@@ -135,7 +135,7 @@ pdf_load_name_tree_imp(pdf_obj *dict, pdf_document *xref, pdf_obj *node)
pdf_obj *val = pdf_array_get(names, i + 1);
if (pdf_is_string(key))
{
- key = pdf_to_utf8_name(ctx, key);
+ key = pdf_to_utf8_name(xref, key);
pdf_dict_put(dict, key, val);
pdf_drop_obj(key);
}
diff --git a/pdf/pdf_outline.c b/pdf/pdf_outline.c
index d4bea75a..48f3853c 100644
--- a/pdf/pdf_outline.c
+++ b/pdf/pdf_outline.c
@@ -29,7 +29,7 @@ pdf_load_outline_imp(pdf_document *xref, pdf_obj *dict)
obj = pdf_dict_gets(dict, "Title");
if (obj)
- node->title = pdf_to_utf8(ctx, obj);
+ node->title = pdf_to_utf8(xref, obj);
if ((obj = pdf_dict_gets(dict, "Dest")))
node->dest = pdf_parse_link_dest(xref, obj);
diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c
index 0ba6b0a4..213c399e 100644
--- a/pdf/pdf_parse.c
+++ b/pdf/pdf_parse.c
@@ -31,60 +31,89 @@ pdf_to_matrix(fz_context *ctx, pdf_obj *array)
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
-pdf_to_utf8(fz_context *ctx, pdf_obj *src)
+pdf_to_utf8(pdf_document *xref, pdf_obj *src)
{
- unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
+ fz_context *ctx = xref->ctx;
+ fz_buffer *strmbuf = NULL;
+ unsigned char *srcptr;
char *dstptr, *dst;
- int srclen = pdf_to_str_len(src);
+ int srclen;
int dstlen = 0;
int ucs;
int i;
- if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
+ fz_var(strmbuf);
+ fz_try(ctx)
{
- for (i = 2; i + 1 < srclen; i += 2)
+ if (pdf_is_string(src))
{
- ucs = srcptr[i] << 8 | srcptr[i+1];
- dstlen += fz_runelen(ucs);
+ srcptr = (unsigned char *) pdf_to_str_buf(src);
+ srclen = pdf_to_str_len(src);
}
-
- dstptr = dst = fz_malloc(ctx, dstlen + 1);
-
- for (i = 2; i + 1 < srclen; i += 2)
+ else if (pdf_is_stream(xref, pdf_to_num(src), pdf_to_gen(src)))
{
- ucs = srcptr[i] << 8 | srcptr[i+1];
- dstptr += fz_runetochar(dstptr, ucs);
+ strmbuf = pdf_load_stream(xref, pdf_to_num(src), pdf_to_gen(src));
+ srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr);
}
- }
- else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
- {
- for (i = 2; i + 1 < srclen; i += 2)
+ else
{
- ucs = srcptr[i] | srcptr[i+1] << 8;
- dstlen += fz_runelen(ucs);
+ srclen = 0;
}
- dstptr = dst = fz_malloc(ctx, dstlen + 1);
-
- for (i = 2; i + 1 < srclen; i += 2)
+ if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
{
- ucs = srcptr[i] | srcptr[i+1] << 8;
- dstptr += fz_runetochar(dstptr, ucs);
+ for (i = 2; i + 1 < srclen; i += 2)
+ {
+ ucs = srcptr[i] << 8 | srcptr[i+1];
+ dstlen += fz_runelen(ucs);
+ }
+
+ dstptr = dst = fz_malloc(ctx, dstlen + 1);
+
+ for (i = 2; i + 1 < srclen; i += 2)
+ {
+ ucs = srcptr[i] << 8 | srcptr[i+1];
+ dstptr += fz_runetochar(dstptr, ucs);
+ }
}
- }
- else
- {
- for (i = 0; i < srclen; i++)
- dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);
+ else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
+ {
+ for (i = 2; i + 1 < srclen; i += 2)
+ {
+ ucs = srcptr[i] | srcptr[i+1] << 8;
+ dstlen += fz_runelen(ucs);
+ }
- dstptr = dst = fz_malloc(ctx, dstlen + 1);
+ dstptr = dst = fz_malloc(ctx, dstlen + 1);
- for (i = 0; i < srclen; i++)
+ for (i = 2; i + 1 < srclen; i += 2)
+ {
+ ucs = srcptr[i] | srcptr[i+1] << 8;
+ dstptr += fz_runetochar(dstptr, ucs);
+ }
+ }
+ else
{
- ucs = pdf_doc_encoding[srcptr[i]];
- dstptr += fz_runetochar(dstptr, ucs);
+ for (i = 0; i < srclen; i++)
+ dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);
+
+ dstptr = dst = fz_malloc(ctx, dstlen + 1);
+
+ for (i = 0; i < srclen; i++)
+ {
+ ucs = pdf_doc_encoding[srcptr[i]];
+ dstptr += fz_runetochar(dstptr, ucs);
+ }
}
}
+ fz_always(ctx)
+ {
+ fz_drop_buffer(ctx, strmbuf);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
*dstptr = '\0';
return dst;
@@ -92,8 +121,9 @@ pdf_to_utf8(fz_context *ctx, pdf_obj *src)
/* Convert Unicode/PdfDocEncoding string into ucs-2 */
unsigned short *
-pdf_to_ucs2(fz_context *ctx, pdf_obj *src)
+pdf_to_ucs2(pdf_document *xref, pdf_obj *src)
{
+ fz_context *ctx = xref->ctx;
unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
unsigned short *dstptr, *dst;
int srclen = pdf_to_str_len(src);
@@ -124,8 +154,9 @@ pdf_to_ucs2(fz_context *ctx, pdf_obj *src)
/* Convert UCS-2 string into PdfDocEncoding for authentication */
char *
-pdf_from_ucs2(fz_context *ctx, unsigned short *src)
+pdf_from_ucs2(pdf_document *xref, unsigned short *src)
{
+ fz_context *ctx = xref->ctx;
int i, j, len;
char *docstr;
@@ -162,11 +193,11 @@ pdf_from_ucs2(fz_context *ctx, unsigned short *src)
}
pdf_obj *
-pdf_to_utf8_name(fz_context *ctx, pdf_obj *src)
+pdf_to_utf8_name(pdf_document *xref, pdf_obj *src)
{
- char *buf = pdf_to_utf8(ctx, src);
- pdf_obj *dst = fz_new_name(ctx, buf);
- fz_free(ctx, buf);
+ char *buf = pdf_to_utf8(xref, src);
+ pdf_obj *dst = fz_new_name(xref->ctx, buf);
+ fz_free(xref->ctx, buf);
return dst;
}
diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c
index a1a19f22..4a03751c 100644
--- a/pdf/pdf_xref.c
+++ b/pdf/pdf_xref.c
@@ -1222,7 +1222,7 @@ pdf_meta(pdf_document *doc, int key, void *ptr, int size)
}
if (info && ptr && size)
{
- char *utf8 = pdf_to_utf8(doc->ctx, info);
+ char *utf8 = pdf_to_utf8(doc, info);
strncpy(ptr, utf8, size);
((char *)ptr)[size-1] = 0;
fz_free(doc->ctx, utf8);