summaryrefslogtreecommitdiff
path: root/source/pdf/pdf-parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/pdf/pdf-parse.c')
-rw-r--r--source/pdf/pdf-parse.c267
1 files changed, 130 insertions, 137 deletions
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index a2fd09db..48761374 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -3,10 +3,10 @@
fz_rect *
pdf_to_rect(fz_context *ctx, pdf_obj *array, fz_rect *r)
{
- float a = pdf_to_real(pdf_array_get(array, 0));
- float b = pdf_to_real(pdf_array_get(array, 1));
- float c = pdf_to_real(pdf_array_get(array, 2));
- float d = pdf_to_real(pdf_array_get(array, 3));
+ float a = pdf_to_real(ctx, pdf_array_get(ctx, array, 0));
+ float b = pdf_to_real(ctx, pdf_array_get(ctx, array, 1));
+ float c = pdf_to_real(ctx, pdf_array_get(ctx, array, 2));
+ float d = pdf_to_real(ctx, pdf_array_get(ctx, array, 3));
r->x0 = fz_min(a, c);
r->y0 = fz_min(b, d);
r->x1 = fz_max(a, c);
@@ -17,20 +17,19 @@ pdf_to_rect(fz_context *ctx, pdf_obj *array, fz_rect *r)
fz_matrix *
pdf_to_matrix(fz_context *ctx, pdf_obj *array, fz_matrix *m)
{
- m->a = pdf_to_real(pdf_array_get(array, 0));
- m->b = pdf_to_real(pdf_array_get(array, 1));
- m->c = pdf_to_real(pdf_array_get(array, 2));
- m->d = pdf_to_real(pdf_array_get(array, 3));
- m->e = pdf_to_real(pdf_array_get(array, 4));
- m->f = pdf_to_real(pdf_array_get(array, 5));
+ m->a = pdf_to_real(ctx, pdf_array_get(ctx, array, 0));
+ m->b = pdf_to_real(ctx, pdf_array_get(ctx, array, 1));
+ m->c = pdf_to_real(ctx, pdf_array_get(ctx, array, 2));
+ m->d = pdf_to_real(ctx, pdf_array_get(ctx, array, 3));
+ m->e = pdf_to_real(ctx, pdf_array_get(ctx, array, 4));
+ m->f = pdf_to_real(ctx, pdf_array_get(ctx, array, 5));
return m;
}
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
-pdf_to_utf8(pdf_document *doc, pdf_obj *src)
+pdf_to_utf8(fz_context *ctx, pdf_document *doc, pdf_obj *src)
{
- fz_context *ctx = doc->ctx;
fz_buffer *strmbuf = NULL;
unsigned char *srcptr;
char *dstptr, *dst;
@@ -42,14 +41,14 @@ pdf_to_utf8(pdf_document *doc, pdf_obj *src)
fz_var(strmbuf);
fz_try(ctx)
{
- if (pdf_is_string(src))
+ if (pdf_is_string(ctx, src))
{
- srcptr = (unsigned char *) pdf_to_str_buf(src);
- srclen = pdf_to_str_len(src);
+ srcptr = (unsigned char *) pdf_to_str_buf(ctx, src);
+ srclen = pdf_to_str_len(ctx, src);
}
- else if (pdf_is_stream(doc, pdf_to_num(src), pdf_to_gen(src)))
+ else if (pdf_is_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src)))
{
- strmbuf = pdf_load_stream(doc, pdf_to_num(src), pdf_to_gen(src));
+ strmbuf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src));
srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr);
}
else
@@ -118,12 +117,11 @@ pdf_to_utf8(pdf_document *doc, pdf_obj *src)
/* Convert Unicode/PdfDocEncoding string into ucs-2 */
unsigned short *
-pdf_to_ucs2(pdf_document *doc, pdf_obj *src)
+pdf_to_ucs2(fz_context *ctx, pdf_document *doc, pdf_obj *src)
{
- fz_context *ctx = doc->ctx;
- unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
+ unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(ctx, src);
unsigned short *dstptr, *dst;
- int srclen = pdf_to_str_len(src);
+ int srclen = pdf_to_str_len(ctx, src);
int i;
if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
@@ -152,11 +150,11 @@ pdf_to_ucs2(pdf_document *doc, pdf_obj *src)
/* allow to convert to UCS-2 without the need for an fz_context */
/* (buffer must be at least (fz_to_str_len(src) + 1) * 2 bytes in size) */
void
-pdf_to_ucs2_buf(unsigned short *buffer, pdf_obj *src)
+pdf_to_ucs2_buf(fz_context *ctx, unsigned short *buffer, pdf_obj *src)
{
- unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
+ unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(ctx, src);
unsigned short *dstptr = buffer;
- int srclen = pdf_to_str_len(src);
+ int srclen = pdf_to_str_len(ctx, src);
int i;
if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
@@ -180,9 +178,8 @@ pdf_to_ucs2_buf(unsigned short *buffer, pdf_obj *src)
/* Convert UCS-2 string into PdfDocEncoding for authentication */
char *
-pdf_from_ucs2(pdf_document *doc, unsigned short *src)
+pdf_from_ucs2(fz_context *ctx, pdf_document *doc, unsigned short *src)
{
- fz_context *ctx = doc->ctx;
int i, j, len;
char *docstr;
@@ -219,48 +216,47 @@ pdf_from_ucs2(pdf_document *doc, unsigned short *src)
}
pdf_obj *
-pdf_to_utf8_name(pdf_document *doc, pdf_obj *src)
+pdf_to_utf8_name(fz_context *ctx, pdf_document *doc, pdf_obj *src)
{
- char *buf = pdf_to_utf8(doc, src);
- pdf_obj *dst = pdf_new_name(doc, buf);
- fz_free(doc->ctx, buf);
+ char *buf = pdf_to_utf8(ctx, doc, src);
+ pdf_obj *dst = pdf_new_name(ctx, doc, buf);
+ fz_free(ctx, buf);
return dst;
}
pdf_obj *
-pdf_parse_array(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
+pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
{
pdf_obj *ary = NULL;
pdf_obj *obj = NULL;
int a = 0, b = 0, n = 0;
pdf_token tok;
- fz_context *ctx = file->ctx;
pdf_obj *op = NULL;
fz_var(obj);
- ary = pdf_new_array(doc, 4);
+ ary = pdf_new_array(ctx, doc, 4);
fz_try(ctx)
{
while (1)
{
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
{
if (n > 0)
{
- obj = pdf_new_int(doc, a);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_int(ctx, doc, a);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
}
if (n > 1)
{
- obj = pdf_new_int(doc, b);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_int(ctx, doc, b);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
}
n = 0;
@@ -268,9 +264,9 @@ pdf_parse_array(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
if (tok == PDF_TOK_INT && n == 2)
{
- obj = pdf_new_int(doc, a);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_int(ctx, doc, a);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
a = b;
n --;
@@ -293,61 +289,61 @@ pdf_parse_array(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
case PDF_TOK_R:
if (n != 2)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse indirect reference in array");
- obj = pdf_new_indirect(doc, a, b);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_indirect(ctx, doc, a, b);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
n = 0;
break;
case PDF_TOK_OPEN_ARRAY:
- obj = pdf_parse_array(doc, file, buf);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_parse_array(ctx, doc, file, buf);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
break;
case PDF_TOK_OPEN_DICT:
- obj = pdf_parse_dict(doc, file, buf);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_parse_dict(ctx, doc, file, buf);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
break;
case PDF_TOK_NAME:
- obj = pdf_new_name(doc, buf->scratch);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_name(ctx, doc, buf->scratch);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
break;
case PDF_TOK_REAL:
- obj = pdf_new_real(doc, buf->f);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_real(ctx, doc, buf->f);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
break;
case PDF_TOK_STRING:
- obj = pdf_new_string(doc, buf->scratch, buf->len);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_string(ctx, doc, buf->scratch, buf->len);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
break;
case PDF_TOK_TRUE:
- obj = pdf_new_bool(doc, 1);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_bool(ctx, doc, 1);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
break;
case PDF_TOK_FALSE:
- obj = pdf_new_bool(doc, 0);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_bool(ctx, doc, 0);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
break;
case PDF_TOK_NULL:
- obj = pdf_new_null(doc);
- pdf_array_push(ary, obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_null(ctx, doc);
+ pdf_array_push(ctx, ary, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
break;
@@ -360,24 +356,23 @@ end:
}
fz_catch(ctx)
{
- pdf_drop_obj(obj);
- pdf_drop_obj(ary);
+ pdf_drop_obj(ctx, obj);
+ pdf_drop_obj(ctx, ary);
fz_rethrow_message(ctx, "cannot parse array");
}
return op;
}
pdf_obj *
-pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
+pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
{
pdf_obj *dict;
pdf_obj *key = NULL;
pdf_obj *val = NULL;
pdf_token tok;
int a, b;
- fz_context *ctx = file->ctx;
- dict = pdf_new_dict(doc, 8);
+ dict = pdf_new_dict(ctx, doc, 8);
fz_var(key);
fz_var(val);
@@ -386,7 +381,7 @@ pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
{
while (1)
{
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
skip:
if (tok == PDF_TOK_CLOSE_DICT)
break;
@@ -398,49 +393,49 @@ pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
if (tok != PDF_TOK_NAME)
fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key in dict");
- key = pdf_new_name(doc, buf->scratch);
+ key = pdf_new_name(ctx, doc, buf->scratch);
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
switch (tok)
{
case PDF_TOK_OPEN_ARRAY:
- val = pdf_parse_array(doc, file, buf);
+ val = pdf_parse_array(ctx, doc, file, buf);
break;
case PDF_TOK_OPEN_DICT:
- val = pdf_parse_dict(doc, file, buf);
+ val = pdf_parse_dict(ctx, doc, file, buf);
break;
- case PDF_TOK_NAME: val = pdf_new_name(doc, buf->scratch); break;
- case PDF_TOK_REAL: val = pdf_new_real(doc, buf->f); break;
- case PDF_TOK_STRING: val = pdf_new_string(doc, buf->scratch, buf->len); break;
- case PDF_TOK_TRUE: val = pdf_new_bool(doc, 1); break;
- case PDF_TOK_FALSE: val = pdf_new_bool(doc, 0); break;
- case PDF_TOK_NULL: val = pdf_new_null(doc); break;
+ case PDF_TOK_NAME: val = pdf_new_name(ctx, doc, buf->scratch); break;
+ case PDF_TOK_REAL: val = pdf_new_real(ctx, doc, buf->f); break;
+ case PDF_TOK_STRING: val = pdf_new_string(ctx, doc, buf->scratch, buf->len); break;
+ case PDF_TOK_TRUE: val = pdf_new_bool(ctx, doc, 1); break;
+ case PDF_TOK_FALSE: val = pdf_new_bool(ctx, doc, 0); break;
+ case PDF_TOK_NULL: val = pdf_new_null(ctx, doc); break;
case PDF_TOK_INT:
/* 64-bit to allow for numbers > INT_MAX and overflow */
a = buf->i;
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
(tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
{
- val = pdf_new_int(doc, a);
- pdf_dict_put(dict, key, val);
- pdf_drop_obj(val);
+ val = pdf_new_int(ctx, doc, a);
+ pdf_dict_put(ctx, dict, key, val);
+ pdf_drop_obj(ctx, val);
val = NULL;
- pdf_drop_obj(key);
+ pdf_drop_obj(ctx, key);
key = NULL;
goto skip;
}
if (tok == PDF_TOK_INT)
{
b = buf->i;
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok == PDF_TOK_R)
{
- val = pdf_new_indirect(doc, a, b);
+ val = pdf_new_indirect(ctx, doc, a, b);
break;
}
}
@@ -450,50 +445,49 @@ pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in dict");
}
- pdf_dict_put(dict, key, val);
- pdf_drop_obj(val);
+ pdf_dict_put(ctx, dict, key, val);
+ pdf_drop_obj(ctx, val);
val = NULL;
- pdf_drop_obj(key);
+ pdf_drop_obj(ctx, key);
key = NULL;
}
}
fz_catch(ctx)
{
- pdf_drop_obj(dict);
- pdf_drop_obj(key);
- pdf_drop_obj(val);
+ pdf_drop_obj(ctx, dict);
+ pdf_drop_obj(ctx, key);
+ pdf_drop_obj(ctx, val);
fz_rethrow_message(ctx, "cannot parse dict");
}
return dict;
}
pdf_obj *
-pdf_parse_stm_obj(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
+pdf_parse_stm_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
{
pdf_token tok;
- fz_context *ctx = file->ctx;
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
switch (tok)
{
case PDF_TOK_OPEN_ARRAY:
- return pdf_parse_array(doc, file, buf);
+ return pdf_parse_array(ctx, doc, file, buf);
case PDF_TOK_OPEN_DICT:
- return pdf_parse_dict(doc, file, buf);
- case PDF_TOK_NAME: return pdf_new_name(doc, buf->scratch); break;
- case PDF_TOK_REAL: return pdf_new_real(doc, buf->f); break;
- case PDF_TOK_STRING: return pdf_new_string(doc, buf->scratch, buf->len); break;
- case PDF_TOK_TRUE: return pdf_new_bool(doc, 1); break;
- case PDF_TOK_FALSE: return pdf_new_bool(doc, 0); break;
- case PDF_TOK_NULL: return pdf_new_null(doc); break;
- case PDF_TOK_INT: return pdf_new_int(doc, buf->i); break;
+ return pdf_parse_dict(ctx, doc, file, buf);
+ case PDF_TOK_NAME: return pdf_new_name(ctx, doc, buf->scratch); break;
+ case PDF_TOK_REAL: return pdf_new_real(ctx, doc, buf->f); break;
+ case PDF_TOK_STRING: return pdf_new_string(ctx, doc, buf->scratch, buf->len); break;
+ case PDF_TOK_TRUE: return pdf_new_bool(ctx, doc, 1); break;
+ case PDF_TOK_FALSE: return pdf_new_bool(ctx, doc, 0); break;
+ case PDF_TOK_NULL: return pdf_new_null(ctx, doc); break;
+ case PDF_TOK_INT: return pdf_new_int(ctx, doc, buf->i); break;
default: fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in object stream");
}
}
pdf_obj *
-pdf_parse_ind_obj(pdf_document *doc,
+pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc,
fz_stream *file, pdf_lexbuf *buf,
int *onum, int *ogen, int *ostmofs, int *try_repair)
{
@@ -501,11 +495,10 @@ pdf_parse_ind_obj(pdf_document *doc,
int num = 0, gen = 0, stm_ofs;
pdf_token tok;
int a, b;
- fz_context *ctx = file->ctx;
fz_var(obj);
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok != PDF_TOK_INT)
{
if (try_repair)
@@ -514,7 +507,7 @@ pdf_parse_ind_obj(pdf_document *doc,
}
num = buf->i;
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok != PDF_TOK_INT)
{
if (try_repair)
@@ -523,7 +516,7 @@ pdf_parse_ind_obj(pdf_document *doc,
}
gen = buf->i;
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok != PDF_TOK_OBJ)
{
if (try_repair)
@@ -531,48 +524,48 @@ pdf_parse_ind_obj(pdf_document *doc,
fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen);
}
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
switch (tok)
{
case PDF_TOK_OPEN_ARRAY:
- obj = pdf_parse_array(doc, file, buf);
+ obj = pdf_parse_array(ctx, doc, file, buf);
break;
case PDF_TOK_OPEN_DICT:
- obj = pdf_parse_dict(doc, file, buf);
+ obj = pdf_parse_dict(ctx, doc, file, buf);
break;
- case PDF_TOK_NAME: obj = pdf_new_name(doc, buf->scratch); break;
- case PDF_TOK_REAL: obj = pdf_new_real(doc, buf->f); break;
- case PDF_TOK_STRING: obj = pdf_new_string(doc, buf->scratch, buf->len); break;
- case PDF_TOK_TRUE: obj = pdf_new_bool(doc, 1); break;
- case PDF_TOK_FALSE: obj = pdf_new_bool(doc, 0); break;
- case PDF_TOK_NULL: obj = pdf_new_null(doc); break;
+ case PDF_TOK_NAME: obj = pdf_new_name(ctx, doc, buf->scratch); break;
+ case PDF_TOK_REAL: obj = pdf_new_real(ctx, doc, buf->f); break;
+ case PDF_TOK_STRING: obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); break;
+ case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, doc, 1); break;
+ case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, doc, 0); break;
+ case PDF_TOK_NULL: obj = pdf_new_null(ctx, doc); break;
case PDF_TOK_INT:
a = buf->i;
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
{
- obj = pdf_new_int(doc, a);
+ obj = pdf_new_int(ctx, doc, a);
goto skip;
}
if (tok == PDF_TOK_INT)
{
b = buf->i;
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok == PDF_TOK_R)
{
- obj = pdf_new_indirect(doc, a, b);
+ obj = pdf_new_indirect(ctx, doc, a, b);
break;
}
}
fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen);
case PDF_TOK_ENDOBJ:
- obj = pdf_new_null(doc);
+ obj = pdf_new_null(ctx, doc);
goto skip;
default:
@@ -581,29 +574,29 @@ pdf_parse_ind_obj(pdf_document *doc,
fz_try(ctx)
{
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
}
fz_catch(ctx)
{
- pdf_drop_obj(obj);
+ pdf_drop_obj(ctx, obj);
fz_rethrow_message(ctx, "cannot parse indirect object (%d %d R)", num, gen);
}
skip:
if (tok == PDF_TOK_STREAM)
{
- int c = fz_read_byte(file);
+ int c = fz_read_byte(ctx, file);
while (c == ' ')
- c = fz_read_byte(file);
+ c = fz_read_byte(ctx, file);
if (c == '\r')
{
- c = fz_peek_byte(file);
+ c = fz_peek_byte(ctx, file);
if (c != '\n')
fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
else
- fz_read_byte(file);
+ fz_read_byte(ctx, file);
}
- stm_ofs = fz_tell(file);
+ stm_ofs = fz_tell(ctx, file);
}
else if (tok == PDF_TOK_ENDOBJ)
{