diff options
Diffstat (limited to 'source')
-rw-r--r-- | source/pdf/pdf-lex.c | 63 | ||||
-rw-r--r-- | source/pdf/pdf-repair.c | 35 |
2 files changed, 86 insertions, 12 deletions
diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c index 1be369eb..b019b7b2 100644 --- a/source/pdf/pdf-lex.c +++ b/source/pdf/pdf-lex.c @@ -507,6 +507,69 @@ pdf_lex(fz_stream *f, pdf_lexbuf *buf) } } +pdf_token +pdf_lex_no_string(fz_stream *f, pdf_lexbuf *buf) +{ + while (1) + { + int c = fz_read_byte(f); + switch (c) + { + case EOF: + return PDF_TOK_EOF; + case IS_WHITE: + lex_white(f); + break; + case '%': + lex_comment(f); + break; + case '/': + lex_name(f, buf); + return PDF_TOK_NAME; + case '(': + continue; + case ')': + continue; + case '<': + c = fz_read_byte(f); + if (c == '<') + { + return PDF_TOK_OPEN_DICT; + } + else + { + continue; + } + case '>': + c = fz_read_byte(f); + if (c == '>') + { + return PDF_TOK_CLOSE_DICT; + } + if (c == EOF) + { + return PDF_TOK_EOF; + } + fz_unread_byte(f); + continue; + case '[': + return PDF_TOK_OPEN_ARRAY; + case ']': + return PDF_TOK_CLOSE_ARRAY; + case '{': + return PDF_TOK_OPEN_BRACE; + case '}': + return PDF_TOK_CLOSE_BRACE; + case IS_NUMBER: + return lex_number(f, buf, c); + default: /* isregular: !isdelim && !iswhite && c != EOF */ + fz_unread_byte(f); + lex_name(f, buf); + return pdf_token_from_keyword(buf->scratch); + } + } +} + void pdf_print_token(fz_context *ctx, fz_buffer *fzbuf, int tok, pdf_lexbuf *buf) { switch (tok) diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c index bf0e2d83..e7449de8 100644 --- a/source/pdf/pdf-repair.c +++ b/source/pdf/pdf-repair.c @@ -311,7 +311,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) fz_try(ctx) { - tok = pdf_lex(doc->file, buf); + tok = pdf_lex_no_string(doc->file, buf); } fz_catch(ctx) { @@ -327,6 +327,12 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) if (tok == PDF_TOK_INT) { + if (buf->i < 0) + { + num = 0; + gen = 0; + continue; + } numofs = genofs; num = gen; genofs = tmpofs; @@ -380,7 +386,9 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) goto have_next_token; } - /* trailer dictionary */ + /* If we find a dictionary it is probably the trailer, + * but could be a stream (or bogus) dictionary caused + * by a corrupt file. */ else if (tok == PDF_TOK_OPEN_DICT) { fz_try(ctx) @@ -390,13 +398,11 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); - /* If we haven't seen a root yet, there is nothing - * we can do, but give up. Otherwise, we'll make - * do. */ - if (!root) - fz_rethrow(ctx); - fz_warn(ctx, "cannot parse trailer dictionary - ignoring rest of file"); - break; + /* If this was the real trailer dict + * it was broken, in which case we are + * in trouble. Keep going though in + * case this was just a bogus dict. */ + continue; } obj = pdf_dict_gets(dict, "Encrypt"); @@ -431,11 +437,16 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) obj = NULL; } - else if (tok == PDF_TOK_ERROR) - fz_read_byte(doc->file); - else if (tok == PDF_TOK_EOF) break; + else + { + if (tok == PDF_TOK_ERROR) + fz_read_byte(doc->file); + num = 0; + gen = 0; + } + } /* make xref reasonable */ |