summaryrefslogtreecommitdiff
path: root/pdf/pdf_repair.c
diff options
context:
space:
mode:
Diffstat (limited to 'pdf/pdf_repair.c')
-rw-r--r--pdf/pdf_repair.c183
1 files changed, 92 insertions, 91 deletions
diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c
index 0dc0e132..fda1e6b5 100644
--- a/pdf/pdf_repair.c
+++ b/pdf/pdf_repair.c
@@ -1,5 +1,5 @@
-#include "fitz.h"
-#include "mupdf.h"
+#include "fitz-internal.h"
+#include "mupdf-internal.h"
/* Scan file for objects and reconstruct xref table */
@@ -13,11 +13,10 @@ struct entry
};
static void
-pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id)
+pdf_repair_obj(fz_stream *file, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id)
{
int tok;
int stm_len;
- int len;
int n;
fz_context *ctx = file->ctx;
@@ -26,16 +25,16 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
stm_len = 0;
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot parse object" */
if (tok == PDF_TOK_OPEN_DICT)
{
- fz_obj *dict, *obj;
+ pdf_obj *dict, *obj;
/* Send NULL xref so we don't try to resolve references */
fz_try(ctx)
{
- dict = pdf_parse_dict(NULL, file, buf, cap);
+ dict = pdf_parse_dict(NULL, file, buf);
}
fz_catch(ctx)
{
@@ -43,34 +42,34 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
if (file->eof)
fz_throw(ctx, "broken object at EOF ignored");
/* Silently swallow the error */
- dict = fz_new_dict(ctx, 2);
+ dict = pdf_new_dict(ctx, 2);
}
- obj = fz_dict_gets(dict, "Type");
- if (fz_is_name(obj) && !strcmp(fz_to_name(obj), "XRef"))
+ obj = pdf_dict_gets(dict, "Type");
+ if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "XRef"))
{
- obj = fz_dict_gets(dict, "Encrypt");
+ obj = pdf_dict_gets(dict, "Encrypt");
if (obj)
{
if (*encrypt)
- fz_drop_obj(*encrypt);
- *encrypt = fz_keep_obj(obj);
+ pdf_drop_obj(*encrypt);
+ *encrypt = pdf_keep_obj(obj);
}
- obj = fz_dict_gets(dict, "ID");
+ obj = pdf_dict_gets(dict, "ID");
if (obj)
{
if (*id)
- fz_drop_obj(*id);
- *id = fz_keep_obj(obj);
+ pdf_drop_obj(*id);
+ *id = pdf_keep_obj(obj);
}
}
- obj = fz_dict_gets(dict, "Length");
- if (!fz_is_indirect(obj) && fz_is_int(obj))
- stm_len = fz_to_int(obj);
+ obj = pdf_dict_gets(dict, "Length");
+ if (!pdf_is_indirect(obj) && pdf_is_int(obj))
+ stm_len = pdf_to_int(obj);
- fz_drop_obj(dict);
+ pdf_drop_obj(dict);
}
while ( tok != PDF_TOK_STREAM &&
@@ -79,13 +78,13 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
tok != PDF_TOK_EOF &&
tok != PDF_TOK_INT )
{
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot scan for endobj or stream token" */
}
if (tok == PDF_TOK_INT)
{
- while (len-- > 0)
+ while (buf->len-- > 0)
fz_unread_byte(file);
}
else if (tok == PDF_TOK_STREAM)
@@ -106,7 +105,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
fz_seek(file, *stmofsp + stm_len, 0);
fz_try(ctx)
{
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
}
fz_catch(ctx)
{
@@ -117,23 +116,23 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
fz_seek(file, *stmofsp, 0);
}
- n = fz_read(file, (unsigned char *) buf, 9);
+ n = fz_read(file, (unsigned char *) buf->scratch, 9);
if (n < 0)
fz_throw(ctx, "cannot read from file");
- while (memcmp(buf, "endstream", 9) != 0)
+ while (memcmp(buf->scratch, "endstream", 9) != 0)
{
c = fz_read_byte(file);
if (c == EOF)
break;
- memmove(buf, buf + 1, 8);
- buf[8] = c;
+ memmove(&buf->scratch[0], &buf->scratch[1], 8);
+ buf->scratch[8] = c;
}
*stmlenp = fz_tell(file) - *stmofsp - 9;
atobjend:
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot scan for endobj token" */
if (tok != PDF_TOK_ENDOBJ)
fz_warn(ctx, "object missing 'endobj' token");
@@ -143,43 +142,45 @@ atobjend:
static void
pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
{
- fz_obj *obj;
+ pdf_obj *obj;
fz_stream *stm = NULL;
int tok;
int i, n, count;
- char buf[256];
fz_context *ctx = xref->ctx;
+ pdf_lexbuf buf;
fz_var(stm);
+ buf.size = PDF_LEXBUF_SMALL;
+
fz_try(ctx)
{
obj = pdf_load_object(xref, num, gen);
- count = fz_to_int(fz_dict_gets(obj, "N"));
+ count = pdf_to_int(pdf_dict_gets(obj, "N"));
- fz_drop_obj(obj);
+ pdf_drop_obj(obj);
stm = pdf_open_stream(xref, num, gen);
for (i = 0; i < count; i++)
{
- tok = pdf_lex(stm, buf, sizeof buf, &n);
+ tok = pdf_lex(stm, &buf);
if (tok != PDF_TOK_INT)
fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
- n = atoi(buf);
+ n = buf.i;
if (n >= xref->len)
pdf_resize_xref(xref, n + 1);
xref->table[n].ofs = num;
xref->table[n].gen = i;
xref->table[n].stm_ofs = 0;
- fz_drop_obj(xref->table[n].obj);
+ pdf_drop_obj(xref->table[n].obj);
xref->table[n].obj = NULL;
xref->table[n].type = 'o';
- tok = pdf_lex(stm, buf, sizeof buf, &n);
+ tok = pdf_lex(stm, &buf);
if (tok != PDF_TOK_INT)
fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
}
@@ -195,15 +196,15 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
}
void
-pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
+pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf)
{
- fz_obj *dict, *obj;
- fz_obj *length;
+ pdf_obj *dict, *obj;
+ pdf_obj *length;
- fz_obj *encrypt = NULL;
- fz_obj *id = NULL;
- fz_obj *root = NULL;
- fz_obj *info = NULL;
+ pdf_obj *encrypt = NULL;
+ pdf_obj *id = NULL;
+ pdf_obj *root = NULL;
+ pdf_obj *info = NULL;
struct entry *list = NULL;
int listlen;
@@ -234,14 +235,14 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
list = fz_malloc_array(ctx, listcap, sizeof(struct entry));
/* look for '%PDF' version marker within first kilobyte of file */
- n = fz_read(xref->file, (unsigned char *)buf, MIN(bufsize, 1024));
+ n = fz_read(xref->file, (unsigned char *)buf->scratch, MIN(buf->size, 1024));
if (n < 0)
fz_throw(ctx, "cannot read from file");
fz_seek(xref->file, 0, 0);
for (i = 0; i < n - 4; i++)
{
- if (memcmp(buf + i, "%PDF", 4) == 0)
+ if (memcmp(&buf->scratch[i], "%PDF", 4) == 0)
{
fz_seek(xref->file, i + 8, 0); /* skip "%PDF-X.Y" */
break;
@@ -263,7 +264,7 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
fz_try(ctx)
{
- tok = pdf_lex(xref->file, buf, bufsize, &n);
+ tok = pdf_lex(xref->file, buf);
}
fz_catch(ctx)
{
@@ -276,14 +277,14 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
numofs = genofs;
num = gen;
genofs = tmpofs;
- gen = atoi(buf);
+ gen = buf->i;
}
else if (tok == PDF_TOK_OBJ)
{
fz_try(ctx)
{
- pdf_repair_obj(xref->file, buf, bufsize, &stm_ofs, &stm_len, &encrypt, &id);
+ pdf_repair_obj(xref->file, buf, &stm_ofs, &stm_len, &encrypt, &id);
}
fz_catch(ctx)
{
@@ -318,7 +319,7 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
{
fz_try(ctx)
{
- dict = pdf_parse_dict(xref, xref->file, buf, bufsize);
+ dict = pdf_parse_dict(xref, xref->file, buf);
}
fz_catch(ctx)
{
@@ -331,39 +332,39 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
break;
}
- obj = fz_dict_gets(dict, "Encrypt");
+ obj = pdf_dict_gets(dict, "Encrypt");
if (obj)
{
if (encrypt)
- fz_drop_obj(encrypt);
- encrypt = fz_keep_obj(obj);
+ pdf_drop_obj(encrypt);
+ encrypt = pdf_keep_obj(obj);
}
- obj = fz_dict_gets(dict, "ID");
+ obj = pdf_dict_gets(dict, "ID");
if (obj)
{
if (id)
- fz_drop_obj(id);
- id = fz_keep_obj(obj);
+ pdf_drop_obj(id);
+ id = pdf_keep_obj(obj);
}
- obj = fz_dict_gets(dict, "Root");
+ obj = pdf_dict_gets(dict, "Root");
if (obj)
{
if (root)
- fz_drop_obj(root);
- root = fz_keep_obj(obj);
+ pdf_drop_obj(root);
+ root = pdf_keep_obj(obj);
}
- obj = fz_dict_gets(dict, "Info");
+ obj = pdf_dict_gets(dict, "Info");
if (obj)
{
if (info)
- fz_drop_obj(info);
- info = fz_keep_obj(obj);
+ pdf_drop_obj(info);
+ info = pdf_keep_obj(obj);
}
- fz_drop_obj(dict);
+ pdf_drop_obj(dict);
}
else if (tok == PDF_TOK_ERROR)
@@ -393,11 +394,11 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
fz_lock(ctx, FZ_LOCK_FILE);
/* RJW: "cannot load stream object (%d %d R)", list[i].num, list[i].gen */
- length = fz_new_int(ctx, list[i].stm_len);
- fz_dict_puts(dict, "Length", length);
- fz_drop_obj(length);
+ length = pdf_new_int(ctx, list[i].stm_len);
+ pdf_dict_puts(dict, "Length", length);
+ pdf_drop_obj(length);
- fz_drop_obj(dict);
+ pdf_drop_obj(dict);
}
}
@@ -422,57 +423,57 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
/* create a repaired trailer, Root will be added later */
- xref->trailer = fz_new_dict(ctx, 5);
+ xref->trailer = pdf_new_dict(ctx, 5);
- obj = fz_new_int(ctx, maxnum + 1);
- fz_dict_puts(xref->trailer, "Size", obj);
- fz_drop_obj(obj);
+ obj = pdf_new_int(ctx, maxnum + 1);
+ pdf_dict_puts(xref->trailer, "Size", obj);
+ pdf_drop_obj(obj);
if (root)
{
- fz_dict_puts(xref->trailer, "Root", root);
- fz_drop_obj(root);
+ pdf_dict_puts(xref->trailer, "Root", root);
+ pdf_drop_obj(root);
}
if (info)
{
- fz_dict_puts(xref->trailer, "Info", info);
- fz_drop_obj(info);
+ pdf_dict_puts(xref->trailer, "Info", info);
+ pdf_drop_obj(info);
}
if (encrypt)
{
- if (fz_is_indirect(encrypt))
+ if (pdf_is_indirect(encrypt))
{
/* create new reference with non-NULL xref pointer */
- obj = fz_new_indirect(ctx, fz_to_num(encrypt), fz_to_gen(encrypt), xref);
- fz_drop_obj(encrypt);
+ obj = pdf_new_indirect(ctx, pdf_to_num(encrypt), pdf_to_gen(encrypt), xref);
+ pdf_drop_obj(encrypt);
encrypt = obj;
}
- fz_dict_puts(xref->trailer, "Encrypt", encrypt);
- fz_drop_obj(encrypt);
+ pdf_dict_puts(xref->trailer, "Encrypt", encrypt);
+ pdf_drop_obj(encrypt);
}
if (id)
{
- if (fz_is_indirect(id))
+ if (pdf_is_indirect(id))
{
/* create new reference with non-NULL xref pointer */
- obj = fz_new_indirect(ctx, fz_to_num(id), fz_to_gen(id), xref);
- fz_drop_obj(id);
+ obj = pdf_new_indirect(ctx, pdf_to_num(id), pdf_to_gen(id), xref);
+ pdf_drop_obj(id);
id = obj;
}
- fz_dict_puts(xref->trailer, "ID", id);
- fz_drop_obj(id);
+ pdf_dict_puts(xref->trailer, "ID", id);
+ pdf_drop_obj(id);
}
fz_free(ctx, list);
}
fz_catch(ctx)
{
- if (encrypt) fz_drop_obj(encrypt);
- if (id) fz_drop_obj(id);
- if (root) fz_drop_obj(root);
- if (info) fz_drop_obj(info);
+ if (encrypt) pdf_drop_obj(encrypt);
+ if (id) pdf_drop_obj(id);
+ if (root) pdf_drop_obj(root);
+ if (info) pdf_drop_obj(info);
fz_free(ctx, list);
fz_rethrow(ctx);
}
@@ -481,7 +482,7 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
void
pdf_repair_obj_stms(pdf_document *xref)
{
- fz_obj *dict;
+ pdf_obj *dict;
int i;
for (i = 0; i < xref->len; i++)
@@ -489,9 +490,9 @@ pdf_repair_obj_stms(pdf_document *xref)
if (xref->table[i].stm_ofs)
{
dict = pdf_load_object(xref, i, 0);
- if (!strcmp(fz_to_name(fz_dict_gets(dict, "Type")), "ObjStm"))
+ if (!strcmp(pdf_to_name(pdf_dict_gets(dict, "Type")), "ObjStm"))
pdf_repair_obj_stm(xref, i, 0);
- fz_drop_obj(dict);
+ pdf_drop_obj(dict);
}
}