summaryrefslogtreecommitdiff
path: root/source/pdf/pdf-xref.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2015-05-14 17:12:42 +0100
committerRobin Watts <robin.watts@artifex.com>2015-05-15 13:06:56 +0100
commit7d5ff30c37c9e5b271fdb2b8cb3219068048322e (patch)
tree5f60d1d03235f2cff161207e00515c5a4a69ef73 /source/pdf/pdf-xref.c
parent250e8a11e1debfbd9c4fc84ad895bf923aac135e (diff)
downloadmupdf-7d5ff30c37c9e5b271fdb2b8cb3219068048322e.tar.xz
Support pdf files larger than 2Gig.
If FZ_LARGEFILE is defined when building, MuPDF uses 64bit offsets for files; this allows us to open streams larger than 2Gig. The downsides to this are that: * The xref entries are larger. * All PDF ints are held as 64bit things rather than 32bit things (to cope with /Prev entries, hint stream offsets etc). * All file positions are stored as 64bits rather than 32. The implementation works by detecting FZ_LARGEFILE. Some #ifdeffery in fitz/system.h sets fz_off_t to either int or int64_t as appropriate, and sets defines for fz_fopen, fz_fseek, fz_ftell etc as required. These call the fseeko64 etc functions on linux (and so define _LARGEFILE64_SOURCE) and the explicit 64bit functions on windows.
Diffstat (limited to 'source/pdf/pdf-xref.c')
-rw-r--r--source/pdf/pdf-xref.c95
1 files changed, 50 insertions, 45 deletions
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 0ec3b6d4..bba189a2 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -267,7 +267,7 @@ pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i)
/* Didn't find the entry in any section. Return the entry from
* the final section. */
doc->xref_index[i] = 0;
- if (i < xref->num_objects)
+ if (xref == NULL || i < xref->num_objects)
{
xref = &doc->xref_sections[0];
for (sub = xref->subsec; sub != NULL; sub = sub->next)
@@ -478,14 +478,14 @@ static void
pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
{
unsigned char buf[1024];
- int t, n;
- int i;
+ int i, n;
+ fz_off_t t;
fz_seek(ctx, doc->file, 0, SEEK_END);
doc->file_size = fz_tell(ctx, doc->file);
- t = fz_maxi(0, doc->file_size - (int)sizeof buf);
+ t = fz_maxo(0, doc->file_size - (fz_off_t)sizeof buf);
fz_seek(ctx, doc->file, t, SEEK_SET);
n = fz_read(ctx, doc->file, buf, sizeof buf);
@@ -522,7 +522,7 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
pdf_token tok;
int c;
int size;
- int ofs;
+ fz_off_t ofs;
pdf_obj *trailer = NULL;
fz_var(trailer);
@@ -601,7 +601,7 @@ pdf_new_ref(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
}
static pdf_xref_entry *
-pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int ofs, int len)
+pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, fz_off_t ofs, int len)
{
pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1];
pdf_xref_subsec *sub;
@@ -663,11 +663,12 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
{
fz_stream *file = doc->file;
- int ofs, len;
+ fz_off_t ofs;
+ int len;
char *s;
int n;
pdf_token tok;
- int i;
+ fz_off_t i;
int c;
pdf_obj *trailer;
int xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf);
@@ -685,7 +686,7 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
fz_read_line(ctx, file, buf->scratch, buf->size);
s = buf->scratch;
- ofs = fz_atoi(fz_strsep(&s, " "));
+ ofs = fz_atoo(fz_strsep(&s, " "));
len = fz_atoi(fz_strsep(&s, " "));
/* broken pdfs where the section is not on a separate line */
@@ -696,7 +697,7 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
}
if (ofs < 0)
- fz_throw(ctx, FZ_ERROR_GENERIC, "out of range object num in xref: %d", ofs);
+ fz_throw(ctx, FZ_ERROR_GENERIC, "out of range object num in xref: %d", (int)ofs);
/* broken pdfs where size in trailer undershoots entries in xref sections */
if (ofs + len > xref_len)
@@ -720,11 +721,11 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
while (*s != '\0' && iswhite(*s))
s++;
- entry->ofs = atoi(s);
- entry->gen = atoi(s + 11);
+ entry->ofs = fz_atoo(s);
+ entry->gen = fz_atoi(s + 11);
entry->type = s[17];
if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
- fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], i, entry->gen);
+ fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], (int)i, entry->gen);
}
}
}
@@ -749,7 +750,7 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
}
static void
-pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
+pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, fz_off_t i0, int i1, int w0, int w1, int w2)
{
pdf_xref_entry *table;
int i, n;
@@ -764,7 +765,7 @@ pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, in
{
pdf_xref_entry *entry = &table[i-i0];
int a = 0;
- int b = 0;
+ fz_off_t b = 0;
int c = 0;
if (fz_is_eof(ctx, stm))
@@ -797,7 +798,8 @@ pdf_read_new_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
pdf_obj *trailer = NULL;
pdf_obj *index = NULL;
pdf_obj *obj = NULL;
- int num, gen, ofs, stm_ofs;
+ int num, gen;
+ fz_off_t ofs, stm_ofs;
int size, w0, w1, w2;
int t;
@@ -883,7 +885,7 @@ pdf_read_new_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
}
static pdf_obj *
-pdf_read_xref(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf)
+pdf_read_xref(fz_context *ctx, pdf_document *doc, fz_off_t ofs, pdf_lexbuf *buf)
{
pdf_obj *trailer;
int c;
@@ -905,7 +907,7 @@ pdf_read_xref(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf)
}
fz_catch(ctx)
{
- fz_rethrow_message(ctx, "cannot read xref (ofs=%d)", ofs);
+ fz_rethrow_message(ctx, "cannot read xref (ofs=%d)", (int)ofs);
}
return trailer;
}
@@ -916,15 +918,15 @@ struct ofs_list_s
{
int max;
int len;
- int *list;
+ fz_off_t *list;
};
-static int
-read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets)
+static fz_off_t
+read_xref_section(fz_context *ctx, pdf_document *doc, fz_off_t ofs, pdf_lexbuf *buf, ofs_list *offsets)
{
pdf_obj *trailer = NULL;
- int xrefstmofs = 0;
- int prevofs = 0;
+ fz_off_t xrefstmofs = 0;
+ fz_off_t prevofs = 0;
fz_var(trailer);
@@ -939,12 +941,12 @@ read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf,
}
if (i < offsets->len)
{
- fz_warn(ctx, "ignoring xref recursion with offset %d", ofs);
+ fz_warn(ctx, "ignoring xref recursion with offset %d", (int)ofs);
break;
}
if (offsets->len == offsets->max)
{
- offsets->list = fz_resize_array(ctx, offsets->list, offsets->max*2, sizeof(int));
+ offsets->list = fz_resize_array(ctx, offsets->list, offsets->max*2, sizeof(*offsets->list));
offsets->max *= 2;
}
offsets->list[offsets->len++] = ofs;
@@ -955,7 +957,7 @@ read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf,
/* FIXME: do we overwrite free entries properly? */
/* FIXME: Does this work properly with progression? */
- xrefstmofs = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_XRefStm));
+ xrefstmofs = pdf_to_offset(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_XRefStm));
if (xrefstmofs)
{
if (xrefstmofs < 0)
@@ -969,7 +971,8 @@ read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf,
pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs, buf));
}
- prevofs = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_Prev));
+ /* FIXME: pdf_to_offset? */
+ prevofs = pdf_to_offset(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_Prev));
if (prevofs < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset for previous xref stream");
}
@@ -979,20 +982,20 @@ read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf,
}
fz_catch(ctx)
{
- fz_rethrow_message(ctx, "cannot read xref at offset %d", ofs);
+ fz_rethrow_message(ctx, "cannot read xref at offset %d", (int)ofs);
}
return prevofs;
}
static void
-pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf, int read_previous)
+pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, fz_off_t ofs, pdf_lexbuf *buf, int read_previous)
{
ofs_list list;
list.len = 0;
list.max = 10;
- list.list = fz_malloc_array(ctx, 10, sizeof(int));
+ list.list = fz_malloc_array(ctx, 10, sizeof(*list.list));
fz_try(ctx)
{
while(ofs)
@@ -1084,11 +1087,11 @@ pdf_load_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
if (entry->ofs == 0)
entry->type = 'f';
else if (entry->ofs <= 0 || entry->ofs >= doc->file_size)
- fz_throw(ctx, FZ_ERROR_GENERIC, "object offset out of range: %d (%d 0 R)", entry->ofs, i);
+ fz_throw(ctx, FZ_ERROR_GENERIC, "object offset out of range: %d (%d 0 R)", (int)entry->ofs, i);
}
if (entry->type == 'o')
if (entry->ofs <= 0 || entry->ofs >= xref_len || pdf_get_xref_entry(ctx, doc, entry->ofs)->type != 'n')
- fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to an objstm that does not exist: %d (%d 0 R)", entry->ofs, i);
+ fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to an objstm that does not exist: %d (%d 0 R)", (int)entry->ofs, i);
}
}
@@ -1098,7 +1101,8 @@ pdf_load_linear(fz_context *ctx, pdf_document *doc)
pdf_obj *dict = NULL;
pdf_obj *hint = NULL;
pdf_obj *o;
- int num, gen, stmofs, lin, len;
+ int num, gen, lin, len;
+ fz_off_t stmofs;
fz_var(dict);
fz_var(hint);
@@ -1535,10 +1539,10 @@ pdf_print_xref(fz_context *ctx, pdf_document *doc)
{
pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
printf("%05d: %010d %05d %c (stm_ofs=%d; stm_buf=%p)\n", i,
- entry->ofs,
+ (int)entry->ofs,
entry->gen,
entry->type ? entry->type : '-',
- entry->stm_ofs,
+ (int)entry->stm_ofs,
entry->stm_buf);
}
}
@@ -1553,10 +1557,10 @@ pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_lexbu
fz_stream *stm = NULL;
pdf_obj *objstm = NULL;
int *numbuf = NULL;
- int *ofsbuf = NULL;
+ fz_off_t *ofsbuf = NULL;
pdf_obj *obj;
- int first;
+ fz_off_t first;
int count;
int i;
pdf_token tok;
@@ -1579,8 +1583,8 @@ pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_lexbu
if (first < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "first object in object stream resides outside stream");
- numbuf = fz_calloc(ctx, count, sizeof(int));
- ofsbuf = fz_calloc(ctx, count, sizeof(int));
+ numbuf = fz_calloc(ctx, count, sizeof(*numbuf));
+ ofsbuf = fz_calloc(ctx, count, sizeof(*ofsbuf));
stm = pdf_open_stream(ctx, doc, num, gen);
for (i = 0; i < count; i++)
@@ -1659,13 +1663,13 @@ pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_lexbu
* object loading
*/
static int
-pdf_obj_read(fz_context *ctx, pdf_document *doc, int *offset, int *nump, pdf_obj **page)
+pdf_obj_read(fz_context *ctx, pdf_document *doc, fz_off_t *offset, int *nump, pdf_obj **page)
{
pdf_lexbuf *buf = &doc->lexbuf.base;
- int num, numofs, gen, genofs, stmofs, tmpofs, tok;
+ int num, gen, tok;
+ fz_off_t numofs, genofs, stmofs, tmpofs, newtmpofs;
int xref_len;
pdf_xref_entry *entry;
- int newtmpofs;
numofs = *offset;
fz_seek(ctx, doc->file, numofs, SEEK_SET);
@@ -1824,7 +1828,7 @@ read_hinted_object(fz_context *ctx, pdf_document *doc, int num)
* there. */
int expected = num;
int curr_pos;
- int start, offset;
+ fz_off_t start, offset;
while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
expected--;
@@ -2500,7 +2504,7 @@ static void
pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
{
pdf_lexbuf *buf = &doc->lexbuf.base;
- int curr_pos;
+ fz_off_t curr_pos;
curr_pos = fz_tell(ctx, doc->file);
fz_seek(ctx, doc->file, doc->hint_object_offset, SEEK_SET);
@@ -2509,7 +2513,8 @@ pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
while (1)
{
pdf_obj *page = NULL;
- int tmpofs, num, gen, tok;
+ fz_off_t tmpofs;
+ int num, gen, tok;
tok = pdf_lex(ctx, doc->file, buf);
if (tok != PDF_TOK_INT)