summaryrefslogtreecommitdiff
path: root/pdf/pdf_parse.c
diff options
context:
space:
mode:
authorRobin Watts <Robin.Watts@artifex.com>2011-10-04 18:44:19 +0100
committerRobin Watts <Robin.Watts@artifex.com>2011-10-04 18:44:19 +0100
commitd208be26537db558edb70236ae517cea31b7ebab (patch)
tree57da95b97e354a53bd4517a42010e90968f007d9 /pdf/pdf_parse.c
parentba46cad4b09bb957085900a203206c8fa5868cd4 (diff)
downloadmupdf-d208be26537db558edb70236ae517cea31b7ebab.tar.xz
Move to exception handling rather than error passing throughout.
This frees us from passing errors back everywhere, and hence enables us to pass results back as return values. Rather than having to explicitly check for errors everywhere and bubble them, we now allow exception handling to do the work for us; the downside to this is that we no longer emit as much debugging information as we did before (though this could be put back in). For now, the debugging information we have lost has been retained in comments with 'RJW:' at the start. This code needs fuller testing, but is being committed as a work in progress.
Diffstat (limited to 'pdf/pdf_parse.c')
-rw-r--r--pdf/pdf_parse.c483
1 files changed, 214 insertions, 269 deletions
diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c
index 5a243289..4e393370 100644
--- a/pdf/pdf_parse.c
+++ b/pdf/pdf_parse.c
@@ -170,147 +170,142 @@ pdf_to_utf8_name(fz_context *ctx, fz_obj *src)
return dst;
}
-fz_error
-pdf_parse_array(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
+fz_obj *
+pdf_parse_array(pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
- fz_error error = fz_okay;
fz_obj *ary = NULL;
fz_obj *obj = NULL;
int a = 0, b = 0, n = 0;
int tok;
int len;
fz_context *ctx = file->ctx;
+ fz_obj *op;
ary = fz_new_array(ctx, 4);
- while (1)
+ fz_try(ctx)
{
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
+ while (1)
{
- fz_drop_obj(ary);
- return fz_error_note(error, "cannot parse array");
- }
+ tok = pdf_lex(file, buf, cap, &len);
- if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
- {
- if (n > 0)
+ if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
+ {
+ if (n > 0)
+ {
+ obj = fz_new_int(ctx, a);
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ }
+ if (n > 1)
+ {
+ obj = fz_new_int(ctx, b);
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ }
+ n = 0;
+ }
+
+ if (tok == PDF_TOK_INT && n == 2)
{
obj = fz_new_int(ctx, a);
fz_array_push(ary, obj);
fz_drop_obj(obj);
+ a = b;
+ n --;
}
- if (n > 1)
+
+ switch (tok)
{
- obj = fz_new_int(ctx, b);
+ case PDF_TOK_CLOSE_ARRAY:
+ op = ary;
+ goto end;
+
+ case PDF_TOK_INT:
+ if (n == 0)
+ a = atoi(buf);
+ if (n == 1)
+ b = atoi(buf);
+ n ++;
+ break;
+
+ case PDF_TOK_R:
+ if (n != 2)
+ {
+ fz_drop_obj(ary);
+ fz_throw(ctx, "cannot parse indirect reference in array");
+ }
+ obj = fz_new_indirect(ctx, a, b, xref);
fz_array_push(ary, obj);
fz_drop_obj(obj);
- }
- n = 0;
- }
+ n = 0;
+ break;
- if (tok == PDF_TOK_INT && n == 2)
- {
- obj = fz_new_int(ctx, a);
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- a = b;
- n --;
- }
+ case PDF_TOK_OPEN_ARRAY:
+ obj = pdf_parse_array(xref, file, buf, cap);
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ break;
- switch (tok)
- {
- case PDF_TOK_CLOSE_ARRAY:
- *op = ary;
- return fz_okay;
-
- case PDF_TOK_INT:
- if (n == 0)
- a = atoi(buf);
- if (n == 1)
- b = atoi(buf);
- n ++;
- break;
-
- case PDF_TOK_R:
- if (n != 2)
- {
- fz_drop_obj(ary);
- return fz_error_make("cannot parse indirect reference in array");
- }
- obj = fz_new_indirect(ctx, a, b, xref);
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- n = 0;
- break;
-
- case PDF_TOK_OPEN_ARRAY:
- error = pdf_parse_array(&obj, xref, file, buf, cap);
- if (error)
- {
- fz_drop_obj(ary);
- return fz_error_note(error, "cannot parse array");
- }
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- break;
+ case PDF_TOK_OPEN_DICT:
+ obj = pdf_parse_dict(xref, file, buf, cap);
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ break;
- case PDF_TOK_OPEN_DICT:
- error = pdf_parse_dict(&obj, xref, file, buf, cap);
- if (error)
- {
+ case PDF_TOK_NAME:
+ obj = fz_new_name(ctx, buf);
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ break;
+ case PDF_TOK_REAL:
+ obj = fz_new_real(ctx, fz_atof(buf));
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ break;
+ case PDF_TOK_STRING:
+ obj = fz_new_string(ctx, buf, len);
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ break;
+ case PDF_TOK_TRUE:
+ obj = fz_new_bool(ctx, 1);
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ break;
+ case PDF_TOK_FALSE:
+ obj = fz_new_bool(ctx, 0);
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ break;
+ case PDF_TOK_NULL:
+ obj = fz_new_null(ctx);
+ fz_array_push(ary, obj);
+ fz_drop_obj(obj);
+ break;
+
+ default:
fz_drop_obj(ary);
- return fz_error_note(error, "cannot parse array");
+ fz_throw(ctx, "cannot parse token in array");
}
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- break;
-
- case PDF_TOK_NAME:
- obj = fz_new_name(ctx, buf);
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- break;
- case PDF_TOK_REAL:
- obj = fz_new_real(ctx, fz_atof(buf));
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- break;
- case PDF_TOK_STRING:
- obj = fz_new_string(ctx, buf, len);
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- break;
- case PDF_TOK_TRUE:
- obj = fz_new_bool(ctx, 1);
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- break;
- case PDF_TOK_FALSE:
- obj = fz_new_bool(ctx, 0);
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- break;
- case PDF_TOK_NULL:
- obj = fz_new_null(ctx);
- fz_array_push(ary, obj);
- fz_drop_obj(obj);
- break;
-
- default:
- fz_drop_obj(ary);
- return fz_error_make("cannot parse token in array");
}
+end:
+ {}
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_obj(ary);
+ fz_throw(ctx, "cannot parse array");
}
+ return op;
}
-fz_error
-pdf_parse_dict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
+fz_obj *
+pdf_parse_dict(pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
- fz_error error = fz_okay;
- fz_obj *dict = NULL;
- fz_obj *key = NULL;
- fz_obj *val = NULL;
+ fz_obj * volatile dict = NULL;
+ fz_obj * volatile key = NULL;
+ fz_obj * volatile val = NULL;
int tok;
int len;
int a, b;
@@ -318,211 +313,161 @@ pdf_parse_dict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
dict = fz_new_dict(ctx, 8);
- while (1)
+ fz_try(ctx)
{
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
+ while (1)
{
- fz_drop_obj(dict);
- return fz_error_note(error, "cannot parse dict");
- }
+ tok = pdf_lex(file, buf, cap, &len);
+ skip:
+ if (tok == PDF_TOK_CLOSE_DICT)
+ break;
-skip:
- if (tok == PDF_TOK_CLOSE_DICT)
- {
- *op = dict;
- return fz_okay;
- }
+ /* for BI .. ID .. EI in content streams */
+ if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))
+ break;
- /* for BI .. ID .. EI in content streams */
- if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))
- {
- *op = dict;
- return fz_okay;
- }
+ if (tok != PDF_TOK_NAME)
+ fz_throw(ctx, "invalid key in dict");
- if (tok != PDF_TOK_NAME)
- {
- fz_drop_obj(dict);
- return fz_error_make("invalid key in dict");
- }
+ key = fz_new_name(ctx, buf);
- key = fz_new_name(ctx, buf);
+ tok = pdf_lex(file, buf, cap, &len);
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
- {
- fz_drop_obj(key);
- fz_drop_obj(dict);
- return fz_error_note(error, "cannot parse dict");
- }
-
- switch (tok)
- {
- case PDF_TOK_OPEN_ARRAY:
- error = pdf_parse_array(&val, xref, file, buf, cap);
- if (error)
+ switch (tok)
{
- fz_drop_obj(key);
- fz_drop_obj(dict);
- return fz_error_note(error, "cannot parse dict");
- }
- break;
+ case PDF_TOK_OPEN_ARRAY:
+ val = pdf_parse_array(xref, file, buf, cap);
+ break;
- case PDF_TOK_OPEN_DICT:
- error = pdf_parse_dict(&val, xref, file, buf, cap);
- if (error)
- {
- fz_drop_obj(key);
- fz_drop_obj(dict);
- return fz_error_note(error, "cannot parse dict");
- }
- break;
-
- case PDF_TOK_NAME: val = fz_new_name(ctx, buf); break;
- case PDF_TOK_REAL: val = fz_new_real(ctx, fz_atof(buf)); break;
- case PDF_TOK_STRING: val = fz_new_string(ctx, buf, len); break;
- case PDF_TOK_TRUE: val = fz_new_bool(ctx, 1); break;
- case PDF_TOK_FALSE: val = fz_new_bool(ctx, 0); break;
- case PDF_TOK_NULL: val = fz_new_null(ctx); break;
-
- case PDF_TOK_INT:
- /* 64-bit to allow for numbers > INT_MAX and overflow */
- a = (int) strtoll(buf, 0, 10);
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
- {
- fz_drop_obj(key);
- fz_drop_obj(dict);
- return fz_error_note(error, "cannot parse dict");
- }
- if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
- (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")))
- {
- val = fz_new_int(ctx, a);
- fz_dict_put(dict, key, val);
- fz_drop_obj(val);
- fz_drop_obj(key);
- goto skip;
- }
- if (tok == PDF_TOK_INT)
- {
- b = atoi(buf);
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
+ case PDF_TOK_OPEN_DICT:
+ val = pdf_parse_dict(xref, file, buf, cap);
+ break;
+
+ case PDF_TOK_NAME: val = fz_new_name(ctx, buf); break;
+ case PDF_TOK_REAL: val = fz_new_real(ctx, fz_atof(buf)); break;
+ case PDF_TOK_STRING: val = fz_new_string(ctx, buf, len); break;
+ case PDF_TOK_TRUE: val = fz_new_bool(ctx, 1); break;
+ case PDF_TOK_FALSE: val = fz_new_bool(ctx, 0); break;
+ case PDF_TOK_NULL: val = fz_new_null(ctx); break;
+
+ case PDF_TOK_INT:
+ /* 64-bit to allow for numbers > INT_MAX and overflow */
+ a = (int) strtoll(buf, 0, 10);
+ tok = pdf_lex(file, buf, cap, &len);
+ if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
+ (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")))
{
+ val = fz_new_int(ctx, a);
+ fz_dict_put(dict, key, val);
+ fz_drop_obj(val);
fz_drop_obj(key);
- fz_drop_obj(dict);
- return fz_error_note(error, "cannot parse dict");
+ goto skip;
}
- if (tok == PDF_TOK_R)
+ if (tok == PDF_TOK_INT)
{
- val = fz_new_indirect(ctx, a, b, xref);
- break;
+ b = atoi(buf);
+ tok = pdf_lex(file, buf, cap, &len);
+ if (tok == PDF_TOK_R)
+ {
+ val = fz_new_indirect(ctx, a, b, xref);
+ break;
+ }
}
+ fz_throw(ctx, "invalid indirect reference in dict");
+
+ default:
+ fz_drop_obj(key);
+ fz_drop_obj(dict);
+ fz_throw(ctx, "unknown token in dict");
}
- fz_drop_obj(key);
- fz_drop_obj(dict);
- return fz_error_make("invalid indirect reference in dict");
- default:
+ fz_dict_put(dict, key, val);
+ fz_drop_obj(val);
fz_drop_obj(key);
- fz_drop_obj(dict);
- return fz_error_make("unknown token in dict");
+ key = NULL;
}
-
- fz_dict_put(dict, key, val);
- fz_drop_obj(val);
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_obj(dict);
fz_drop_obj(key);
+ fz_throw(ctx, "cannot parse dict");
}
+ return dict;
}
-fz_error
-pdf_parse_stm_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
+fz_obj *
+pdf_parse_stm_obj(pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
- fz_error error;
int tok;
int len;
fz_context *ctx = file->ctx;
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
- return fz_error_note(error, "cannot parse token in object stream");
+ tok = pdf_lex(file, buf, cap, &len);
+ /* RJW: "cannot parse token in object stream") */
switch (tok)
{
case PDF_TOK_OPEN_ARRAY:
- error = pdf_parse_array(op, xref, file, buf, cap);
- if (error)
- return fz_error_note(error, "cannot parse object stream");
- break;
+ return pdf_parse_array(xref, file, buf, cap);
+ /* RJW: "cannot parse object stream" */
case PDF_TOK_OPEN_DICT:
- error = pdf_parse_dict(op, xref, file, buf, cap);
- if (error)
- return fz_error_note(error, "cannot parse object stream");
- break;
- case PDF_TOK_NAME: *op = fz_new_name(ctx, buf); break;
- case PDF_TOK_REAL: *op = fz_new_real(ctx, fz_atof(buf)); break;
- case PDF_TOK_STRING: *op = fz_new_string(ctx, buf, len); break;
- case PDF_TOK_TRUE: *op = fz_new_bool(ctx, 1); break;
- case PDF_TOK_FALSE: *op = fz_new_bool(ctx, 0); break;
- case PDF_TOK_NULL: *op = fz_new_null(ctx); break;
- case PDF_TOK_INT: *op = fz_new_int(ctx, atoi(buf)); break;
- default: return fz_error_make("unknown token in object stream");
+ return pdf_parse_dict(xref, file, buf, cap);
+ /* RJW: "cannot parse object stream" */
+ case PDF_TOK_NAME: return fz_new_name(ctx, buf); break;
+ case PDF_TOK_REAL: return fz_new_real(ctx, fz_atof(buf)); break;
+ case PDF_TOK_STRING: return fz_new_string(ctx, buf, len); break;
+ case PDF_TOK_TRUE: return fz_new_bool(ctx, 1); break;
+ case PDF_TOK_FALSE: return fz_new_bool(ctx, 0); break;
+ case PDF_TOK_NULL: return fz_new_null(ctx); break;
+ case PDF_TOK_INT: return fz_new_int(ctx, atoi(buf)); break;
+ default: fz_throw(ctx, "unknown token in object stream");
}
-
- return fz_okay;
+ return NULL; /* Stupid MSVC */
}
-fz_error
-pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref,
+fz_obj *
+pdf_parse_ind_obj(pdf_xref *xref,
fz_stream *file, char *buf, int cap,
int *onum, int *ogen, int *ostmofs)
{
- fz_error error = fz_okay;
- fz_obj *obj = NULL;
+ fz_obj * volatile obj = NULL;
int num = 0, gen = 0, stm_ofs;
int tok;
int len;
int a, b;
fz_context *ctx = file->ctx;
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
- return fz_error_note(error, "cannot parse indirect object (%d %d R)", num, gen);
+ tok = pdf_lex(file, buf, cap, &len);
+ /* RJW: cannot parse indirect object (%d %d R)", num, gen */
if (tok != PDF_TOK_INT)
- return fz_error_make("expected object number (%d %d R)", num, gen);
+ fz_throw(ctx, "expected object number (%d %d R)", num, gen);
num = atoi(buf);
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
- return fz_error_note(error, "cannot parse indirect object (%d %d R)", num, gen);
+ tok = pdf_lex(file, buf, cap, &len);
+ /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
if (tok != PDF_TOK_INT)
- return fz_error_make("expected generation number (%d %d R)", num, gen);
+ fz_throw(ctx, "expected generation number (%d %d R)", num, gen);
gen = atoi(buf);
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
- return fz_error_note(error, "cannot parse indirect object (%d %d R)", num, gen);
+ tok = pdf_lex(file, buf, cap, &len);
+ /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
if (tok != PDF_TOK_OBJ)
- return fz_error_make("expected 'obj' keyword (%d %d R)", num, gen);
+ fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen);
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
- return fz_error_note(error, "cannot parse indirect object (%d %d R)", num, gen);
+ tok = pdf_lex(file, buf, cap, &len);
+ /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
switch (tok)
{
case PDF_TOK_OPEN_ARRAY:
- error = pdf_parse_array(&obj, xref, file, buf, cap);
- if (error)
- return fz_error_note(error, "cannot parse indirect object (%d %d R)", num, gen);
+ obj = pdf_parse_array(xref, file, buf, cap);
+ /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
break;
case PDF_TOK_OPEN_DICT:
- error = pdf_parse_dict(&obj, xref, file, buf, cap);
- if (error)
- return fz_error_note(error, "cannot parse indirect object (%d %d R)", num, gen);
+ obj = pdf_parse_dict(xref, file, buf, cap);
+ /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
break;
case PDF_TOK_NAME: obj = fz_new_name(ctx, buf); break;
@@ -534,9 +479,8 @@ pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref,
case PDF_TOK_INT:
a = atoi(buf);
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
- return fz_error_note(error, "cannot parse indirect object (%d %d R)", num, gen);
+ tok = pdf_lex(file, buf, cap, &len);
+ /* "cannot parse indirect object (%d %d R)", num, gen */
if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
{
obj = fz_new_int(ctx, a);
@@ -545,30 +489,32 @@ pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref,
if (tok == PDF_TOK_INT)
{
b = atoi(buf);
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
- return fz_error_note(error, "cannot parse indirect object (%d %d R)", num, gen);
+ tok = pdf_lex(file, buf, cap, &len);
+ /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */
if (tok == PDF_TOK_R)
{
obj = fz_new_indirect(ctx, a, b, xref);
break;
}
}
- return fz_error_make("expected 'R' keyword (%d %d R)", num, gen);
+ fz_throw(ctx, "expected 'R' keyword (%d %d R)", num, gen);
case PDF_TOK_ENDOBJ:
obj = fz_new_null(ctx);
goto skip;
default:
- return fz_error_make("syntax error in object (%d %d R)", num, gen);
+ fz_throw(ctx, "syntax error in object (%d %d R)", num, gen);
}
- error = pdf_lex(&tok, file, buf, cap, &len);
- if (error)
+ fz_try(ctx)
+ {
+ tok = pdf_lex(file, buf, cap, &len);
+ }
+ fz_catch(ctx)
{
fz_drop_obj(obj);
- return fz_error_note(error, "cannot parse indirect object (%d %d R)", num, gen);
+ fz_throw(ctx, "cannot parse indirect object (%d %d R)", num, gen);
}
skip:
@@ -600,6 +546,5 @@ skip:
if (onum) *onum = num;
if (ogen) *ogen = gen;
if (ostmofs) *ostmofs = stm_ofs;
- *op = obj;
- return fz_okay;
+ return obj;
}