summaryrefslogtreecommitdiff
path: root/source/pdf/pdf-repair.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/pdf/pdf-repair.c')
-rw-r--r--source/pdf/pdf-repair.c248
1 files changed, 122 insertions, 126 deletions
diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c
index 3cc05fc5..0c1c504c 100644
--- a/source/pdf/pdf-repair.c
+++ b/source/pdf/pdf-repair.c
@@ -15,12 +15,11 @@ struct entry
};
int
-pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int *tmpofs)
+pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int *tmpofs)
{
+ fz_stream *file = doc->file;
pdf_token tok;
int stm_len;
- fz_stream *file = doc->file;
- fz_context *ctx = file->ctx;
*stmofsp = 0;
if (stmlenp)
@@ -32,7 +31,7 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
* '<int> <int> obj'. We expect the next thing we see to be a
* pdf object. Regardless of the type of thing we meet next
* we only need to fully parse it if it is a dictionary. */
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok == PDF_TOK_OPEN_DICT)
{
@@ -41,7 +40,7 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
/* Send NULL xref so we don't try to resolve references */
fz_try(ctx)
{
- dict = pdf_parse_dict(doc, file, buf);
+ dict = pdf_parse_dict(ctx, doc, file, buf);
}
fz_catch(ctx)
{
@@ -50,45 +49,45 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
if (file->eof)
fz_rethrow_message(ctx, "broken object at EOF ignored");
/* Silently swallow the error */
- dict = pdf_new_dict(doc, 2);
+ dict = pdf_new_dict(ctx, doc, 2);
}
if (encrypt && id)
{
- obj = pdf_dict_gets(dict, "Type");
- if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "XRef"))
+ obj = pdf_dict_gets(ctx, dict, "Type");
+ if (pdf_is_name(ctx, obj) && !strcmp(pdf_to_name(ctx, obj), "XRef"))
{
- obj = pdf_dict_gets(dict, "Encrypt");
+ obj = pdf_dict_gets(ctx, dict, "Encrypt");
if (obj)
{
- pdf_drop_obj(*encrypt);
- *encrypt = pdf_keep_obj(obj);
+ pdf_drop_obj(ctx, *encrypt);
+ *encrypt = pdf_keep_obj(ctx, obj);
}
- obj = pdf_dict_gets(dict, "ID");
+ obj = pdf_dict_gets(ctx, dict, "ID");
if (obj)
{
- pdf_drop_obj(*id);
- *id = pdf_keep_obj(obj);
+ pdf_drop_obj(ctx, *id);
+ *id = pdf_keep_obj(ctx, obj);
}
}
}
- obj = pdf_dict_gets(dict, "Length");
- if (!pdf_is_indirect(obj) && pdf_is_int(obj))
- stm_len = pdf_to_int(obj);
+ obj = pdf_dict_gets(ctx, dict, "Length");
+ if (!pdf_is_indirect(ctx, obj) && pdf_is_int(ctx, obj))
+ stm_len = pdf_to_int(ctx, obj);
if (doc->file_reading_linearly && page)
{
- obj = pdf_dict_gets(dict, "Type");
- if (!strcmp(pdf_to_name(obj), "Page"))
+ obj = pdf_dict_gets(ctx, dict, "Type");
+ if (!strcmp(pdf_to_name(ctx, obj), "Page"))
{
- pdf_drop_obj(*page);
- *page = pdf_keep_obj(dict);
+ pdf_drop_obj(ctx, *page);
+ *page = pdf_keep_obj(ctx, dict);
}
}
- pdf_drop_obj(dict);
+ pdf_drop_obj(ctx, dict);
}
while ( tok != PDF_TOK_STREAM &&
@@ -97,31 +96,31 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
tok != PDF_TOK_EOF &&
tok != PDF_TOK_INT )
{
- *tmpofs = fz_tell(file);
+ *tmpofs = fz_tell(ctx, file);
if (*tmpofs < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
}
if (tok == PDF_TOK_STREAM)
{
- int c = fz_read_byte(file);
+ int c = fz_read_byte(ctx, file);
if (c == '\r') {
- c = fz_peek_byte(file);
+ c = fz_peek_byte(ctx, file);
if (c == '\n')
- fz_read_byte(file);
+ fz_read_byte(ctx, file);
}
- *stmofsp = fz_tell(file);
+ *stmofsp = fz_tell(ctx, file);
if (*stmofsp < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot seek in file");
if (stm_len > 0)
{
- fz_seek(file, *stmofsp + stm_len, 0);
+ fz_seek(ctx, file, *stmofsp + stm_len, 0);
fz_try(ctx)
{
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
}
fz_catch(ctx)
{
@@ -130,14 +129,14 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
}
if (tok == PDF_TOK_ENDSTREAM)
goto atobjend;
- fz_seek(file, *stmofsp, 0);
+ fz_seek(ctx, file, *stmofsp, 0);
}
- (void)fz_read(file, (unsigned char *) buf->scratch, 9);
+ (void)fz_read(ctx, file, (unsigned char *) buf->scratch, 9);
while (memcmp(buf->scratch, "endstream", 9) != 0)
{
- c = fz_read_byte(file);
+ c = fz_read_byte(ctx, file);
if (c == EOF)
break;
memmove(&buf->scratch[0], &buf->scratch[1], 8);
@@ -145,35 +144,34 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
}
if (stmlenp)
- *stmlenp = fz_tell(file) - *stmofsp - 9;
+ *stmlenp = fz_tell(ctx, file) - *stmofsp - 9;
atobjend:
- *tmpofs = fz_tell(file);
+ *tmpofs = fz_tell(ctx, file);
if (*tmpofs < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
if (tok != PDF_TOK_ENDOBJ)
fz_warn(ctx, "object missing 'endobj' token");
else
{
/* Read another token as we always return the next one */
- *tmpofs = fz_tell(file);
+ *tmpofs = fz_tell(ctx, file);
if (*tmpofs < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
- tok = pdf_lex(file, buf);
+ tok = pdf_lex(ctx, file, buf);
}
}
return tok;
}
static void
-pdf_repair_obj_stm(pdf_document *doc, int num, int gen)
+pdf_repair_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen)
{
pdf_obj *obj;
fz_stream *stm = NULL;
pdf_token tok;
int i, n, count;
- fz_context *ctx = doc->ctx;
pdf_lexbuf buf;
fz_var(stm);
@@ -182,19 +180,19 @@ pdf_repair_obj_stm(pdf_document *doc, int num, int gen)
fz_try(ctx)
{
- obj = pdf_load_object(doc, num, gen);
+ obj = pdf_load_object(ctx, doc, num, gen);
- count = pdf_to_int(pdf_dict_gets(obj, "N"));
+ count = pdf_to_int(ctx, pdf_dict_gets(ctx, obj, "N"));
- pdf_drop_obj(obj);
+ pdf_drop_obj(ctx, obj);
- stm = pdf_open_stream(doc, num, gen);
+ stm = pdf_open_stream(ctx, doc, num, gen);
for (i = 0; i < count; i++)
{
pdf_xref_entry *entry;
- tok = pdf_lex(stm, &buf);
+ tok = pdf_lex(ctx, stm, &buf);
if (tok != PDF_TOK_INT)
fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen);
@@ -204,29 +202,29 @@ pdf_repair_obj_stm(pdf_document *doc, int num, int gen)
fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i);
continue;
}
- else if (n >= pdf_xref_len(doc))
+ else if (n >= pdf_xref_len(ctx, doc))
{
fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i);
continue;
}
- entry = pdf_get_populating_xref_entry(doc, n);
+ entry = pdf_get_populating_xref_entry(ctx, doc, n);
entry->ofs = num;
entry->gen = i;
entry->stm_ofs = 0;
- pdf_drop_obj(entry->obj);
+ pdf_drop_obj(ctx, entry->obj);
entry->obj = NULL;
entry->type = 'o';
- tok = pdf_lex(stm, &buf);
+ tok = pdf_lex(ctx, stm, &buf);
if (tok != PDF_TOK_INT)
fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen);
}
}
fz_always(ctx)
{
- fz_drop_stream(stm);
- pdf_lexbuf_fin(&buf);
+ fz_drop_stream(ctx, stm);
+ pdf_lexbuf_fin(ctx, &buf);
}
fz_catch(ctx)
{
@@ -235,7 +233,7 @@ pdf_repair_obj_stm(pdf_document *doc, int num, int gen)
}
void
-pdf_repair_xref(pdf_document *doc)
+pdf_repair_xref(fz_context *ctx, pdf_document *doc)
{
pdf_obj *dict, *obj = NULL;
pdf_obj *length;
@@ -257,7 +255,6 @@ pdf_repair_xref(pdf_document *doc)
pdf_token tok;
int next;
int i, n, c;
- fz_context *ctx = doc->ctx;
pdf_lexbuf *buf = &doc->lexbuf.base;
fz_var(encrypt);
@@ -268,14 +265,14 @@ pdf_repair_xref(pdf_document *doc)
fz_var(obj);
if (doc->repair_attempted)
- fz_throw(doc->ctx, FZ_ERROR_GENERIC, "Repair failed already - not trying again");
+ fz_throw(ctx, FZ_ERROR_GENERIC, "Repair failed already - not trying again");
doc->repair_attempted = 1;
doc->dirty = 1;
/* Can't support incremental update after repair */
doc->freeze_updates = 1;
- fz_seek(doc->file, 0, 0);
+ fz_seek(ctx, doc->file, 0, 0);
fz_try(ctx)
{
@@ -285,34 +282,34 @@ pdf_repair_xref(pdf_document *doc)
list = fz_malloc_array(ctx, listcap, sizeof(struct entry));
/* look for '%PDF' version marker within first kilobyte of file */
- n = fz_read(doc->file, (unsigned char *)buf->scratch, fz_mini(buf->size, 1024));
+ n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, fz_mini(buf->size, 1024));
- fz_seek(doc->file, 0, 0);
+ fz_seek(ctx, doc->file, 0, 0);
for (i = 0; i < n - 4; i++)
{
if (memcmp(&buf->scratch[i], "%PDF", 4) == 0)
{
- fz_seek(doc->file, i + 8, 0); /* skip "%PDF-X.Y" */
+ fz_seek(ctx, doc->file, i + 8, 0); /* skip "%PDF-X.Y" */
break;
}
}
/* skip comment line after version marker since some generators
* forget to terminate the comment with a newline */
- c = fz_read_byte(doc->file);
+ c = fz_read_byte(ctx, doc->file);
while (c >= 0 && (c == ' ' || c == '%'))
- c = fz_read_byte(doc->file);
- fz_unread_byte(doc->file);
+ c = fz_read_byte(ctx, doc->file);
+ fz_unread_byte(ctx, doc->file);
while (1)
{
- tmpofs = fz_tell(doc->file);
+ tmpofs = fz_tell(ctx, doc->file);
if (tmpofs < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
fz_try(ctx)
{
- tok = pdf_lex_no_string(doc->file, buf);
+ tok = pdf_lex_no_string(ctx, doc->file, buf);
}
fz_catch(ctx)
{
@@ -346,7 +343,7 @@ pdf_repair_xref(pdf_document *doc)
{
stm_len = 0;
stm_ofs = 0;
- tok = pdf_repair_obj(doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs);
+ tok = pdf_repair_obj(ctx, doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs);
}
fz_catch(ctx)
{
@@ -394,7 +391,7 @@ pdf_repair_xref(pdf_document *doc)
{
fz_try(ctx)
{
- dict = pdf_parse_dict(doc, doc->file, buf);
+ dict = pdf_parse_dict(ctx, doc, doc->file, buf);
}
fz_catch(ctx)
{
@@ -406,35 +403,35 @@ pdf_repair_xref(pdf_document *doc)
continue;
}
- obj = pdf_dict_gets(dict, "Encrypt");
+ obj = pdf_dict_gets(ctx, dict, "Encrypt");
if (obj)
{
- pdf_drop_obj(encrypt);
- encrypt = pdf_keep_obj(obj);
+ pdf_drop_obj(ctx, encrypt);
+ encrypt = pdf_keep_obj(ctx, obj);
}
- obj = pdf_dict_gets(dict, "ID");
- if (obj && (!id || !encrypt || pdf_dict_gets(dict, "Encrypt")))
+ obj = pdf_dict_gets(ctx, dict, "ID");
+ if (obj && (!id || !encrypt || pdf_dict_gets(ctx, dict, "Encrypt")))
{
- pdf_drop_obj(id);
- id = pdf_keep_obj(obj);
+ pdf_drop_obj(ctx, id);
+ id = pdf_keep_obj(ctx, obj);
}
- obj = pdf_dict_gets(dict, "Root");
+ obj = pdf_dict_gets(ctx, dict, "Root");
if (obj)
{
- pdf_drop_obj(root);
- root = pdf_keep_obj(obj);
+ pdf_drop_obj(ctx, root);
+ root = pdf_keep_obj(ctx, obj);
}
- obj = pdf_dict_gets(dict, "Info");
+ obj = pdf_dict_gets(ctx, dict, "Info");
if (obj)
{
- pdf_drop_obj(info);
- info = pdf_keep_obj(obj);
+ pdf_drop_obj(ctx, info);
+ info = pdf_keep_obj(ctx, obj);
}
- pdf_drop_obj(dict);
+ pdf_drop_obj(ctx, dict);
obj = NULL;
}
@@ -443,7 +440,7 @@ pdf_repair_xref(pdf_document *doc)
else
{
if (tok == PDF_TOK_ERROR)
- fz_read_byte(doc->file);
+ fz_read_byte(ctx, doc->file);
num = 0;
gen = 0;
}
@@ -458,11 +455,11 @@ pdf_repair_xref(pdf_document *doc)
*/
/* Ensure that the first xref table is a 'solid' one from
* 0 to maxnum. */
- pdf_ensure_solid_xref(doc, maxnum);
+ pdf_ensure_solid_xref(ctx, doc, maxnum);
for (i = 0; i < listlen; i++)
{
- entry = pdf_get_populating_xref_entry(doc, list[i].num);
+ entry = pdf_get_populating_xref_entry(ctx, doc, list[i].num);
entry->type = 'n';
entry->ofs = list[i].ofs;
entry->gen = list[i].gen;
@@ -472,26 +469,26 @@ pdf_repair_xref(pdf_document *doc)
/* correct stream length for unencrypted documents */
if (!encrypt && list[i].stm_len >= 0)
{
- dict = pdf_load_object(doc, list[i].num, list[i].gen);
+ dict = pdf_load_object(ctx, doc, list[i].num, list[i].gen);
- length = pdf_new_int(doc, list[i].stm_len);
- pdf_dict_puts(dict, "Length", length);
- pdf_drop_obj(length);
+ length = pdf_new_int(ctx, doc, list[i].stm_len);
+ pdf_dict_puts(ctx, dict, "Length", length);
+ pdf_drop_obj(ctx, length);
- pdf_drop_obj(dict);
+ pdf_drop_obj(ctx, dict);
}
}
- entry = pdf_get_populating_xref_entry(doc, 0);
+ entry = pdf_get_populating_xref_entry(ctx, doc, 0);
entry->type = 'f';
entry->ofs = 0;
entry->gen = 65535;
entry->stm_ofs = 0;
next = 0;
- for (i = pdf_xref_len(doc) - 1; i >= 0; i--)
+ for (i = pdf_xref_len(ctx, doc) - 1; i >= 0; i--)
{
- entry = pdf_get_populating_xref_entry(doc, i);
+ entry = pdf_get_populating_xref_entry(ctx, doc, i);
if (entry->type == 'f')
{
entry->ofs = next;
@@ -503,57 +500,57 @@ pdf_repair_xref(pdf_document *doc)
/* create a repaired trailer, Root will be added later */
- obj = pdf_new_dict(doc, 5);
+ obj = pdf_new_dict(ctx, doc, 5);
/* During repair there is only a single xref section */
- pdf_set_populating_xref_trailer(doc, obj);
- pdf_drop_obj(obj);
+ pdf_set_populating_xref_trailer(ctx, doc, obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
- obj = pdf_new_int(doc, maxnum + 1);
- pdf_dict_puts(pdf_trailer(doc), "Size", obj);
- pdf_drop_obj(obj);
+ obj = pdf_new_int(ctx, doc, maxnum + 1);
+ pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "Size", obj);
+ pdf_drop_obj(ctx, obj);
obj = NULL;
if (root)
{
- pdf_dict_puts(pdf_trailer(doc), "Root", root);
- pdf_drop_obj(root);
+ pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "Root", root);
+ pdf_drop_obj(ctx, root);
root = NULL;
}
if (info)
{
- pdf_dict_puts(pdf_trailer(doc), "Info", info);
- pdf_drop_obj(info);
+ pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "Info", info);
+ pdf_drop_obj(ctx, info);
info = NULL;
}
if (encrypt)
{
- if (pdf_is_indirect(encrypt))
+ if (pdf_is_indirect(ctx, encrypt))
{
/* create new reference with non-NULL xref pointer */
- obj = pdf_new_indirect(doc, pdf_to_num(encrypt), pdf_to_gen(encrypt));
- pdf_drop_obj(encrypt);
+ obj = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, encrypt), pdf_to_gen(ctx, encrypt));
+ pdf_drop_obj(ctx, encrypt);
encrypt = obj;
obj = NULL;
}
- pdf_dict_puts(pdf_trailer(doc), "Encrypt", encrypt);
- pdf_drop_obj(encrypt);
+ pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "Encrypt", encrypt);
+ pdf_drop_obj(ctx, encrypt);
encrypt = NULL;
}
if (id)
{
- if (pdf_is_indirect(id))
+ if (pdf_is_indirect(ctx, id))
{
/* create new reference with non-NULL xref pointer */
- obj = pdf_new_indirect(doc, pdf_to_num(id), pdf_to_gen(id));
- pdf_drop_obj(id);
+ obj = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, id), pdf_to_gen(ctx, id));
+ pdf_drop_obj(ctx, id);
id = obj;
obj = NULL;
}
- pdf_dict_puts(pdf_trailer(doc), "ID", id);
- pdf_drop_obj(id);
+ pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "ID", id);
+ pdf_drop_obj(ctx, id);
id = NULL;
}
@@ -561,50 +558,49 @@ pdf_repair_xref(pdf_document *doc)
}
fz_catch(ctx)
{
- pdf_drop_obj(encrypt);
- pdf_drop_obj(id);
- pdf_drop_obj(root);
- pdf_drop_obj(obj);
- pdf_drop_obj(info);
+ pdf_drop_obj(ctx, encrypt);
+ pdf_drop_obj(ctx, id);
+ pdf_drop_obj(ctx, root);
+ pdf_drop_obj(ctx, obj);
+ pdf_drop_obj(ctx, info);
fz_free(ctx, list);
fz_rethrow(ctx);
}
}
void
-pdf_repair_obj_stms(pdf_document *doc)
+pdf_repair_obj_stms(fz_context *ctx, pdf_document *doc)
{
- fz_context *ctx = doc->ctx;
pdf_obj *dict;
int i;
- int xref_len = pdf_xref_len(doc);
+ int xref_len = pdf_xref_len(ctx, doc);
for (i = 0; i < xref_len; i++)
{
- pdf_xref_entry *entry = pdf_get_populating_xref_entry(doc, i);
+ pdf_xref_entry *entry = pdf_get_populating_xref_entry(ctx, doc, i);
if (entry->stm_ofs)
{
- dict = pdf_load_object(doc, i, 0);
+ dict = pdf_load_object(ctx, doc, i, 0);
fz_try(ctx)
{
- if (!strcmp(pdf_to_name(pdf_dict_gets(dict, "Type")), "ObjStm"))
- pdf_repair_obj_stm(doc, i, 0);
+ if (!strcmp(pdf_to_name(ctx, pdf_dict_gets(ctx, dict, "Type")), "ObjStm"))
+ pdf_repair_obj_stm(ctx, doc, i, 0);
}
fz_catch(ctx)
{
fz_warn(ctx, "ignoring broken object stream (%d 0 R)", i);
}
- pdf_drop_obj(dict);
+ pdf_drop_obj(ctx, dict);
}
}
/* Ensure that streamed objects reside inside a known non-streamed object */
for (i = 0; i < xref_len; i++)
{
- pdf_xref_entry *entry = pdf_get_populating_xref_entry(doc, i);
+ pdf_xref_entry *entry = pdf_get_populating_xref_entry(ctx, doc, i);
- if (entry->type == 'o' && pdf_get_populating_xref_entry(doc, entry->ofs)->type != 'n')
- fz_throw(doc->ctx, FZ_ERROR_GENERIC, "invalid reference to non-object-stream: %d (%d 0 R)", entry->ofs, i);
+ if (entry->type == 'o' && pdf_get_populating_xref_entry(ctx, doc, entry->ofs)->type != 'n')
+ fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to non-object-stream: %d (%d 0 R)", entry->ofs, i);
}
}