summaryrefslogtreecommitdiff
path: root/pdf/pdf_parse.c
diff options
context:
space:
mode:
authorPaul Gardiner <paul@glidos.net>2012-07-18 14:45:28 +0100
committerPaul Gardiner <paul@glidos.net>2012-07-18 16:42:02 +0100
commit7eb8e51b6b889fa14aae39d282675b838aa43dc9 (patch)
tree4bf26eacd64612cbe2626263fb3ca344b53ef04e /pdf/pdf_parse.c
parentb03aad16426b89728c00e8e9888de7866dde03cc (diff)
downloadmupdf-7eb8e51b6b889fa14aae39d282675b838aa43dc9.tar.xz
Update pdf_to_utf8 to handle either a stream or a string
Also change first argument from fz_context to pdf_document in each of pdf_to_utf8, pdf_to_utf8_name, pdf_to_ucs2 and pdf_to_ucs2_name
Diffstat (limited to 'pdf/pdf_parse.c')
-rw-r--r--pdf/pdf_parse.c109
1 files changed, 70 insertions, 39 deletions
diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c
index 0ba6b0a4..213c399e 100644
--- a/pdf/pdf_parse.c
+++ b/pdf/pdf_parse.c
@@ -31,60 +31,89 @@ pdf_to_matrix(fz_context *ctx, pdf_obj *array)
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
-pdf_to_utf8(fz_context *ctx, pdf_obj *src)
+pdf_to_utf8(pdf_document *xref, pdf_obj *src)
{
- unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
+ fz_context *ctx = xref->ctx;
+ fz_buffer *strmbuf = NULL;
+ unsigned char *srcptr;
char *dstptr, *dst;
- int srclen = pdf_to_str_len(src);
+ int srclen;
int dstlen = 0;
int ucs;
int i;
- if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
+ fz_var(strmbuf);
+ fz_try(ctx)
{
- for (i = 2; i + 1 < srclen; i += 2)
+ if (pdf_is_string(src))
{
- ucs = srcptr[i] << 8 | srcptr[i+1];
- dstlen += fz_runelen(ucs);
+ srcptr = (unsigned char *) pdf_to_str_buf(src);
+ srclen = pdf_to_str_len(src);
}
-
- dstptr = dst = fz_malloc(ctx, dstlen + 1);
-
- for (i = 2; i + 1 < srclen; i += 2)
+ else if (pdf_is_stream(xref, pdf_to_num(src), pdf_to_gen(src)))
{
- ucs = srcptr[i] << 8 | srcptr[i+1];
- dstptr += fz_runetochar(dstptr, ucs);
+ strmbuf = pdf_load_stream(xref, pdf_to_num(src), pdf_to_gen(src));
+ srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr);
}
- }
- else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
- {
- for (i = 2; i + 1 < srclen; i += 2)
+ else
{
- ucs = srcptr[i] | srcptr[i+1] << 8;
- dstlen += fz_runelen(ucs);
+ srclen = 0;
}
- dstptr = dst = fz_malloc(ctx, dstlen + 1);
-
- for (i = 2; i + 1 < srclen; i += 2)
+ if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
{
- ucs = srcptr[i] | srcptr[i+1] << 8;
- dstptr += fz_runetochar(dstptr, ucs);
+ for (i = 2; i + 1 < srclen; i += 2)
+ {
+ ucs = srcptr[i] << 8 | srcptr[i+1];
+ dstlen += fz_runelen(ucs);
+ }
+
+ dstptr = dst = fz_malloc(ctx, dstlen + 1);
+
+ for (i = 2; i + 1 < srclen; i += 2)
+ {
+ ucs = srcptr[i] << 8 | srcptr[i+1];
+ dstptr += fz_runetochar(dstptr, ucs);
+ }
}
- }
- else
- {
- for (i = 0; i < srclen; i++)
- dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);
+ else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
+ {
+ for (i = 2; i + 1 < srclen; i += 2)
+ {
+ ucs = srcptr[i] | srcptr[i+1] << 8;
+ dstlen += fz_runelen(ucs);
+ }
- dstptr = dst = fz_malloc(ctx, dstlen + 1);
+ dstptr = dst = fz_malloc(ctx, dstlen + 1);
- for (i = 0; i < srclen; i++)
+ for (i = 2; i + 1 < srclen; i += 2)
+ {
+ ucs = srcptr[i] | srcptr[i+1] << 8;
+ dstptr += fz_runetochar(dstptr, ucs);
+ }
+ }
+ else
{
- ucs = pdf_doc_encoding[srcptr[i]];
- dstptr += fz_runetochar(dstptr, ucs);
+ for (i = 0; i < srclen; i++)
+ dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);
+
+ dstptr = dst = fz_malloc(ctx, dstlen + 1);
+
+ for (i = 0; i < srclen; i++)
+ {
+ ucs = pdf_doc_encoding[srcptr[i]];
+ dstptr += fz_runetochar(dstptr, ucs);
+ }
}
}
+ fz_always(ctx)
+ {
+ fz_drop_buffer(ctx, strmbuf);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
*dstptr = '\0';
return dst;
@@ -92,8 +121,9 @@ pdf_to_utf8(fz_context *ctx, pdf_obj *src)
/* Convert Unicode/PdfDocEncoding string into ucs-2 */
unsigned short *
-pdf_to_ucs2(fz_context *ctx, pdf_obj *src)
+pdf_to_ucs2(pdf_document *xref, pdf_obj *src)
{
+ fz_context *ctx = xref->ctx;
unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
unsigned short *dstptr, *dst;
int srclen = pdf_to_str_len(src);
@@ -124,8 +154,9 @@ pdf_to_ucs2(fz_context *ctx, pdf_obj *src)
/* Convert UCS-2 string into PdfDocEncoding for authentication */
char *
-pdf_from_ucs2(fz_context *ctx, unsigned short *src)
+pdf_from_ucs2(pdf_document *xref, unsigned short *src)
{
+ fz_context *ctx = xref->ctx;
int i, j, len;
char *docstr;
@@ -162,11 +193,11 @@ pdf_from_ucs2(fz_context *ctx, unsigned short *src)
}
pdf_obj *
-pdf_to_utf8_name(fz_context *ctx, pdf_obj *src)
+pdf_to_utf8_name(pdf_document *xref, pdf_obj *src)
{
- char *buf = pdf_to_utf8(ctx, src);
- pdf_obj *dst = fz_new_name(ctx, buf);
- fz_free(ctx, buf);
+ char *buf = pdf_to_utf8(xref, src);
+ pdf_obj *dst = fz_new_name(xref->ctx, buf);
+ fz_free(xref->ctx, buf);
return dst;
}