From 7d5ff30c37c9e5b271fdb2b8cb3219068048322e Mon Sep 17 00:00:00 2001
From: Robin Watts <robin.watts@artifex.com>
Date: Thu, 14 May 2015 17:12:42 +0100
Subject: Support pdf files larger than 2Gig.

If FZ_LARGEFILE is defined when building, MuPDF uses 64bit offsets
for files; this allows us to open streams larger than 2Gig.

The downsides to this are that:

 * The xref entries are larger.

 * All PDF ints are held as 64bit things rather than 32bit things
   (to cope with /Prev entries, hint stream offsets etc).

 * All file positions are stored as 64bits rather than 32.

The implementation works by detecting FZ_LARGEFILE. Some #ifdeffery
in fitz/system.h sets fz_off_t to either int or int64_t as appropriate,
and sets defines for fz_fopen, fz_fseek, fz_ftell etc as required.

These call the fseeko64 etc functions on linux (and so define
_LARGEFILE64_SOURCE) and the explicit 64bit functions on windows.
---
 source/pdf/pdf-interpret.c |  2 +-
 source/pdf/pdf-lex.c       |  2 +-
 source/pdf/pdf-object.c    | 28 +++++++++++++-
 source/pdf/pdf-parse.c     | 23 +++++------
 source/pdf/pdf-pkcs7.c     |  4 +-
 source/pdf/pdf-repair.c    |  8 ++--
 source/pdf/pdf-stream.c    |  6 +--
 source/pdf/pdf-write.c     | 50 ++++++++++++------------
 source/pdf/pdf-xref.c      | 95 ++++++++++++++++++++++++----------------------
 9 files changed, 124 insertions(+), 94 deletions(-)

(limited to 'source/pdf')

diff --git a/source/pdf/pdf-interpret.c b/source/pdf/pdf-interpret.c
index 40aa51ad..e1df2c68 100644
--- a/source/pdf/pdf-interpret.c
+++ b/source/pdf/pdf-interpret.c
@@ -1029,7 +1029,7 @@ pdf_process_stream(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, fz_stream
 						pdf_array_push_drop(ctx, csi->obj, pdf_new_real(ctx, doc, buf->f));
 						break;
 					case PDF_TOK_INT:
-						pdf_array_push_drop(ctx, csi->obj, pdf_new_int(ctx, doc, buf->i));
+						pdf_array_push_drop(ctx, csi->obj, pdf_new_int_offset(ctx, doc, buf->i));
 						break;
 					case PDF_TOK_STRING:
 						pdf_array_push_drop(ctx, csi->obj, pdf_new_string(ctx, doc, buf->scratch, buf->len));
diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c
index a8dfd916..cc5bdd09 100644
--- a/source/pdf/pdf-lex.c
+++ b/source/pdf/pdf-lex.c
@@ -67,7 +67,7 @@ static int
 lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
 {
 	int neg = 0;
-	int i = 0;
+	fz_off_t i = 0;
 	int n;
 	int d;
 	float v;
diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c
index f6c10b63..f0415e98 100644
--- a/source/pdf/pdf-object.c
+++ b/source/pdf/pdf-object.c
@@ -42,7 +42,7 @@ typedef struct pdf_obj_num_s
 	pdf_obj super;
 	union
 	{
-		int i;
+		fz_off_t i;
 		float f;
 	} u;
 } pdf_obj_num;
@@ -119,6 +119,18 @@ pdf_new_int(fz_context *ctx, pdf_document *doc, int i)
 	return &obj->super;
 }
 
+pdf_obj *
+pdf_new_int_offset(fz_context *ctx, pdf_document *doc, fz_off_t i)
+{
+	pdf_obj_num *obj;
+	obj = Memento_label(fz_malloc(ctx, sizeof(pdf_obj_num)), "pdf_obj(offset)");
+	obj->super.refs = 1;
+	obj->super.kind = PDF_INT;
+	obj->super.flags = 0;
+	obj->u.i = i;
+	return &obj->super;
+}
+
 pdf_obj *
 pdf_new_real(fz_context *ctx, pdf_document *doc, float f)
 {
@@ -268,12 +280,24 @@ int pdf_to_int(fz_context *ctx, pdf_obj *obj)
 	if (obj < PDF_OBJ__LIMIT)
 		return 0;
 	if (obj->kind == PDF_INT)
-		return NUM(obj)->u.i;
+		return (int)NUM(obj)->u.i;
 	if (obj->kind == PDF_REAL)
 		return (int)(NUM(obj)->u.f + 0.5f); /* No roundf in MSVC */
 	return 0;
 }
 
+fz_off_t pdf_to_offset(fz_context *ctx, pdf_obj *obj)
+{
+	RESOLVE(obj);
+	if (obj < PDF_OBJ__LIMIT)
+		return 0;
+	if (obj->kind == PDF_INT)
+		return NUM(obj)->u.i;
+	if (obj->kind == PDF_REAL)
+		return (fz_off_t)(NUM(obj)->u.f + 0.5f); /* No roundf in MSVC */
+	return 0;
+}
+
 float pdf_to_real(fz_context *ctx, pdf_obj *obj)
 {
 	RESOLVE(obj);
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index 48761374..a5d37b22 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -229,7 +229,7 @@ pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf
 {
 	pdf_obj *ary = NULL;
 	pdf_obj *obj = NULL;
-	int a = 0, b = 0, n = 0;
+	fz_off_t a = 0, b = 0, n = 0;
 	pdf_token tok;
 	pdf_obj *op = NULL;
 
@@ -247,14 +247,14 @@ pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf
 			{
 				if (n > 0)
 				{
-					obj = pdf_new_int(ctx, doc, a);
+					obj = pdf_new_int_offset(ctx, doc, a);
 					pdf_array_push(ctx, ary, obj);
 					pdf_drop_obj(ctx, obj);
 					obj = NULL;
 				}
 				if (n > 1)
 				{
-					obj = pdf_new_int(ctx, doc, b);
+					obj = pdf_new_int_offset(ctx, doc, b);
 					pdf_array_push(ctx, ary, obj);
 					pdf_drop_obj(ctx, obj);
 					obj = NULL;
@@ -264,7 +264,7 @@ pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf
 
 			if (tok == PDF_TOK_INT && n == 2)
 			{
-				obj = pdf_new_int(ctx, doc, a);
+				obj = pdf_new_int_offset(ctx, doc, a);
 				pdf_array_push(ctx, ary, obj);
 				pdf_drop_obj(ctx, obj);
 				obj = NULL;
@@ -370,7 +370,7 @@ pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *
 	pdf_obj *key = NULL;
 	pdf_obj *val = NULL;
 	pdf_token tok;
-	int a, b;
+	fz_off_t a, b;
 
 	dict = pdf_new_dict(ctx, doc, 8);
 
@@ -421,7 +421,7 @@ pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *
 				if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
 					(tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
 				{
-					val = pdf_new_int(ctx, doc, a);
+					val = pdf_new_int_offset(ctx, doc, a);
 					pdf_dict_put(ctx, dict, key, val);
 					pdf_drop_obj(ctx, val);
 					val = NULL;
@@ -481,7 +481,7 @@ pdf_parse_stm_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbu
 	case PDF_TOK_TRUE: return pdf_new_bool(ctx, doc, 1); break;
 	case PDF_TOK_FALSE: return pdf_new_bool(ctx, doc, 0); break;
 	case PDF_TOK_NULL: return pdf_new_null(ctx, doc); break;
-	case PDF_TOK_INT: return pdf_new_int(ctx, doc, buf->i); break;
+	case PDF_TOK_INT: return pdf_new_int_offset(ctx, doc, buf->i); break;
 	default: fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in object stream");
 	}
 }
@@ -489,12 +489,13 @@ pdf_parse_stm_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbu
 pdf_obj *
 pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc,
 	fz_stream *file, pdf_lexbuf *buf,
-	int *onum, int *ogen, int *ostmofs, int *try_repair)
+	int *onum, int *ogen, fz_off_t *ostmofs, int *try_repair)
 {
 	pdf_obj *obj = NULL;
-	int num = 0, gen = 0, stm_ofs;
+	int num = 0, gen = 0;
+	fz_off_t stm_ofs;
 	pdf_token tok;
-	int a, b;
+	fz_off_t a, b;
 
 	fz_var(obj);
 
@@ -549,7 +550,7 @@ pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc,
 
 		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
 		{
-			obj = pdf_new_int(ctx, doc, a);
+			obj = pdf_new_int_offset(ctx, doc, a);
 			goto skip;
 		}
 		if (tok == PDF_TOK_INT)
diff --git a/source/pdf/pdf-pkcs7.c b/source/pdf/pdf-pkcs7.c
index 8feb07bb..15609153 100644
--- a/source/pdf/pdf-pkcs7.c
+++ b/source/pdf/pdf-pkcs7.c
@@ -638,11 +638,11 @@ void pdf_write_digest(fz_context *ctx, pdf_document *doc, char *filename, pdf_ob
 		if (p7_len*2 + 2 > digest_length)
 			fz_throw(ctx, FZ_ERROR_GENERIC, "Insufficient space for digest");
 
-		f = fopen(filename, "rb+");
+		f = fz_fopen(filename, "rb+");
 		if (f == NULL)
 			fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to write digest");
 
-		fseek(f, digest_offset+1, SEEK_SET);
+		fz_fseek(f, digest_offset+1, SEEK_SET);
 
 		for (i = 0; i < p7_len; i++)
 			fprintf(f, "%02x", p7_ptr[i]);
diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c
index bf3a1e9b..65077637 100644
--- a/source/pdf/pdf-repair.c
+++ b/source/pdf/pdf-repair.c
@@ -15,7 +15,7 @@ struct entry
 };
 
 int
-pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int *tmpofs)
+pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, fz_off_t *tmpofs)
 {
 	fz_stream *file = doc->file;
 	pdf_token tok;
@@ -250,8 +250,8 @@ pdf_repair_xref(fz_context *ctx, pdf_document *doc)
 
 	int num = 0;
 	int gen = 0;
-	int tmpofs, numofs = 0, genofs = 0;
-	int stm_len, stm_ofs;
+	fz_off_t tmpofs, stm_ofs, numofs = 0, genofs = 0;
+	int stm_len;
 	pdf_token tok;
 	int next;
 	int i, n, c;
@@ -626,6 +626,6 @@ pdf_repair_obj_stms(fz_context *ctx, pdf_document *doc)
 		pdf_xref_entry *entry = pdf_get_populating_xref_entry(ctx, doc, i);
 
 		if (entry->type == 'o' && pdf_get_populating_xref_entry(ctx, doc, entry->ofs)->type != 'n')
-			fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to non-object-stream: %d (%d 0 R)", entry->ofs, i);
+			fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to non-object-stream: %d (%d 0 R)", (int)entry->ofs, i);
 	}
 }
diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c
index b7b85cc3..32440266 100644
--- a/source/pdf/pdf-stream.c
+++ b/source/pdf/pdf-stream.c
@@ -243,7 +243,7 @@ build_filter_chain(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj
  * orig_num and orig_gen are used purely to seed the encryption.
  */
 static fz_stream *
-pdf_open_raw_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *stmobj, int num, int orig_num, int orig_gen, int offset)
+pdf_open_raw_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *stmobj, int num, int orig_num, int orig_gen, fz_off_t offset)
 {
 	int hascrypt;
 	int len;
@@ -273,7 +273,7 @@ pdf_open_raw_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_ob
  * to stream length and decrypting.
  */
 static fz_stream *
-pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *chain, pdf_obj *stmobj, int num, int gen, int offset, fz_compression_params *imparams)
+pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *chain, pdf_obj *stmobj, int num, int gen, fz_off_t offset, fz_compression_params *imparams)
 {
 	pdf_obj *filters;
 	pdf_obj *params;
@@ -410,7 +410,7 @@ pdf_open_stream(fz_context *ctx, pdf_document *doc, int num, int gen)
 }
 
 fz_stream *
-pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_obj *dict, int stm_ofs)
+pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_obj *dict, fz_off_t stm_ofs)
 {
 	if (stm_ofs == 0)
 		fz_throw(ctx, FZ_ERROR_GENERIC, "object is not a stream");
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index 54f0ac00..8660484d 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -57,7 +57,7 @@ struct pdf_write_options_s
 	int do_linear;
 	int do_clean;
 	int *use_list;
-	int *ofs_list;
+	fz_off_t *ofs_list;
 	int *gen_list;
 	int *renumber_map;
 	int continue_on_error;
@@ -66,10 +66,10 @@ struct pdf_write_options_s
 	int *rev_renumber_map;
 	int *rev_gen_list;
 	int start;
-	int first_xref_offset;
-	int main_xref_offset;
-	int first_xref_entry_offset;
-	int file_len;
+	fz_off_t first_xref_offset;
+	fz_off_t main_xref_offset;
+	fz_off_t first_xref_entry_offset;
+	fz_off_t file_len;
 	int hints_shared_offset;
 	int hintstream_len;
 	pdf_obj *linear_l;
@@ -1435,7 +1435,7 @@ linearize(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
 static void
 update_linearization_params(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
 {
-	int offset;
+	fz_off_t offset;
 	pdf_set_int(ctx, opts->linear_l, opts->file_len);
 	/* Primary hint stream offset (of object, not stream!) */
 	pdf_set_int(ctx, opts->linear_h0, opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
@@ -1842,7 +1842,7 @@ static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_options *opt
 	pdf_obj *nobj = NULL;
 
 	fputs("xref\n", opts->out);
-	opts->first_xref_entry_offset = ftell(opts->out);
+	opts->first_xref_entry_offset = fz_ftell(opts->out);
 
 	if (opts->do_incremental)
 	{
@@ -1970,7 +1970,7 @@ static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_option
 		dict = pdf_new_dict(ctx, doc, 6);
 		pdf_update_object(ctx, doc, num, dict);
 
-		opts->first_xref_entry_offset = ftell(opts->out);
+		opts->first_xref_entry_offset = fz_ftell(opts->out);
 
 		to++;
 
@@ -2067,9 +2067,9 @@ static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_option
 }
 
 static void
-padto(FILE *file, int target)
+padto(FILE *file, fz_off_t target)
 {
-	int pos = ftell(file);
+	fz_off_t pos = fz_ftell(file);
 
 	assert(pos <= target);
 	while (pos < target)
@@ -2105,7 +2105,7 @@ dowriteobject(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, int n
 	{
 		if (pass > 0)
 			padto(opts->out, opts->ofs_list[num]);
-		opts->ofs_list[num] = ftell(opts->out);
+		opts->ofs_list[num] = fz_ftell(opts->out);
 		if (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num))
 			writeobject(ctx, doc, opts, num, opts->gen_list[num], 1);
 	}
@@ -2131,7 +2131,7 @@ writeobjects(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, int pa
 	{
 		/* Write first xref */
 		if (pass == 0)
-			opts->first_xref_offset = ftell(opts->out);
+			opts->first_xref_offset = fz_ftell(opts->out);
 		else
 			padto(opts->out, opts->first_xref_offset);
 		writexref(ctx, doc, opts, opts->start, pdf_xref_len(ctx, doc), 1, opts->main_xref_offset, 0);
@@ -2141,7 +2141,7 @@ writeobjects(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, int pa
 		dowriteobject(ctx, doc, opts, num, pass);
 	if (opts->do_linear && pass == 1)
 	{
-		int offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
+		fz_off_t offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
 		padto(opts->out, offset);
 	}
 	for (num = 1; num < opts->start; num++)
@@ -2513,19 +2513,19 @@ static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_op
 	{
 		pdf_obj *byte_range;
 
-		f = fopen(filename, "rb+");
+		f = fz_fopen(filename, "rb+");
 		if (!f)
 			fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to open %s to complete signatures", filename);
 
-		fseek(f, 0, SEEK_END);
-		flen = ftell(f);
+		fz_fseek(f, 0, SEEK_END);
+		flen = fz_ftell(f);
 
 		/* Locate the byte ranges and contents in the saved file */
 		for (usig = doc->unsaved_sigs; usig; usig = usig->next)
 		{
 			char *bstr, *cstr, *fstr;
 			int pnum = pdf_obj_parent_num(ctx, pdf_dict_getl(ctx, usig->field, PDF_NAME_V, PDF_NAME_ByteRange, NULL));
-			fseek(f, opts->ofs_list[pnum], SEEK_SET);
+			fz_fseek(f, opts->ofs_list[pnum], SEEK_SET);
 			(void)fread(buf, 1, sizeof(buf), f);
 			buf[sizeof(buf)-1] = 0;
 
@@ -2568,7 +2568,7 @@ static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_op
 		/* Write the byte range to the file */
 		for (usig = doc->unsaved_sigs; usig; usig = usig->next)
 		{
-			fseek(f, usig->byte_range_start, SEEK_SET);
+			fz_fseek(f, usig->byte_range_start, SEEK_SET);
 			fwrite(buf, 1, usig->byte_range_end - usig->byte_range_start, f);
 		}
 
@@ -2633,16 +2633,16 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w
 		/* If no changes, nothing to write */
 		if (!doc->xref_altered)
 			return;
-		opts.out = fopen(filename, "ab");
+		opts.out = fz_fopen(filename, "ab");
 		if (opts.out)
 		{
-			fseek(opts.out, 0, SEEK_END);
+			fz_fseek(opts.out, 0, SEEK_END);
 			fputs("\n", opts.out);
 		}
 	}
 	else
 	{
-		opts.out = fopen(filename, "wb");
+		opts.out = fz_fopen(filename, "wb");
 	}
 
 	if (!opts.out)
@@ -2755,9 +2755,9 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w
 
 		if (opts.do_linear)
 		{
-			opts.main_xref_offset = ftell(opts.out);
+			opts.main_xref_offset = fz_ftell(opts.out);
 			writexref(ctx, doc, &opts, 0, opts.start, 0, 0, opts.first_xref_offset);
-			opts.file_len = ftell(opts.out);
+			opts.file_len = fz_ftell(opts.out);
 
 			make_hint_stream(ctx, doc, &opts);
 			if (opts.do_ascii)
@@ -2768,7 +2768,7 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w
 			opts.file_len += opts.hintstream_len;
 			opts.main_xref_offset += opts.hintstream_len;
 			update_linearization_params(ctx, doc, &opts);
-			fseek(opts.out, 0, 0);
+			fz_fseek(opts.out, 0, 0);
 			writeobjects(ctx, doc, &opts, 1);
 
 			padto(opts.out, opts.main_xref_offset);
@@ -2776,7 +2776,7 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w
 		}
 		else
 		{
-			opts.first_xref_offset = ftell(opts.out);
+			opts.first_xref_offset = fz_ftell(opts.out);
 			if (opts.do_incremental && doc->has_xref_streams)
 				writexrefstream(ctx, doc, &opts, 0, xref_len, 1, 0, opts.first_xref_offset);
 			else
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 0ec3b6d4..bba189a2 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -267,7 +267,7 @@ pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i)
 	/* Didn't find the entry in any section. Return the entry from
 	 * the final section. */
 	doc->xref_index[i] = 0;
-	if (i < xref->num_objects)
+	if (xref == NULL || i < xref->num_objects)
 	{
 		xref = &doc->xref_sections[0];
 		for (sub = xref->subsec; sub != NULL; sub = sub->next)
@@ -478,14 +478,14 @@ static void
 pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
 {
 	unsigned char buf[1024];
-	int t, n;
-	int i;
+	int i, n;
+	fz_off_t t;
 
 	fz_seek(ctx, doc->file, 0, SEEK_END);
 
 	doc->file_size = fz_tell(ctx, doc->file);
 
-	t = fz_maxi(0, doc->file_size - (int)sizeof buf);
+	t = fz_maxo(0, doc->file_size - (fz_off_t)sizeof buf);
 	fz_seek(ctx, doc->file, t, SEEK_SET);
 
 	n = fz_read(ctx, doc->file, buf, sizeof buf);
@@ -522,7 +522,7 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
 	pdf_token tok;
 	int c;
 	int size;
-	int ofs;
+	fz_off_t ofs;
 	pdf_obj *trailer = NULL;
 
 	fz_var(trailer);
@@ -601,7 +601,7 @@ pdf_new_ref(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
 }
 
 static pdf_xref_entry *
-pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int ofs, int len)
+pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, fz_off_t ofs, int len)
 {
 	pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1];
 	pdf_xref_subsec *sub;
@@ -663,11 +663,12 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 {
 	fz_stream *file = doc->file;
 
-	int ofs, len;
+	fz_off_t ofs;
+	int len;
 	char *s;
 	int n;
 	pdf_token tok;
-	int i;
+	fz_off_t i;
 	int c;
 	pdf_obj *trailer;
 	int xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf);
@@ -685,7 +686,7 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 
 		fz_read_line(ctx, file, buf->scratch, buf->size);
 		s = buf->scratch;
-		ofs = fz_atoi(fz_strsep(&s, " "));
+		ofs = fz_atoo(fz_strsep(&s, " "));
 		len = fz_atoi(fz_strsep(&s, " "));
 
 		/* broken pdfs where the section is not on a separate line */
@@ -696,7 +697,7 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 		}
 
 		if (ofs < 0)
-			fz_throw(ctx, FZ_ERROR_GENERIC, "out of range object num in xref: %d", ofs);
+			fz_throw(ctx, FZ_ERROR_GENERIC, "out of range object num in xref: %d", (int)ofs);
 
 		/* broken pdfs where size in trailer undershoots entries in xref sections */
 		if (ofs + len > xref_len)
@@ -720,11 +721,11 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 				while (*s != '\0' && iswhite(*s))
 					s++;
 
-				entry->ofs = atoi(s);
-				entry->gen = atoi(s + 11);
+				entry->ofs = fz_atoo(s);
+				entry->gen = fz_atoi(s + 11);
 				entry->type = s[17];
 				if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
-					fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], i, entry->gen);
+					fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], (int)i, entry->gen);
 			}
 		}
 	}
@@ -749,7 +750,7 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 }
 
 static void
-pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
+pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, fz_off_t i0, int i1, int w0, int w1, int w2)
 {
 	pdf_xref_entry *table;
 	int i, n;
@@ -764,7 +765,7 @@ pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, in
 	{
 		pdf_xref_entry *entry = &table[i-i0];
 		int a = 0;
-		int b = 0;
+		fz_off_t b = 0;
 		int c = 0;
 
 		if (fz_is_eof(ctx, stm))
@@ -797,7 +798,8 @@ pdf_read_new_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 	pdf_obj *trailer = NULL;
 	pdf_obj *index = NULL;
 	pdf_obj *obj = NULL;
-	int num, gen, ofs, stm_ofs;
+	int num, gen;
+	fz_off_t ofs, stm_ofs;
 	int size, w0, w1, w2;
 	int t;
 
@@ -883,7 +885,7 @@ pdf_read_new_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 }
 
 static pdf_obj *
-pdf_read_xref(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf)
+pdf_read_xref(fz_context *ctx, pdf_document *doc, fz_off_t ofs, pdf_lexbuf *buf)
 {
 	pdf_obj *trailer;
 	int c;
@@ -905,7 +907,7 @@ pdf_read_xref(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf)
 	}
 	fz_catch(ctx)
 	{
-		fz_rethrow_message(ctx, "cannot read xref (ofs=%d)", ofs);
+		fz_rethrow_message(ctx, "cannot read xref (ofs=%d)", (int)ofs);
 	}
 	return trailer;
 }
@@ -916,15 +918,15 @@ struct ofs_list_s
 {
 	int max;
 	int len;
-	int *list;
+	fz_off_t *list;
 };
 
-static int
-read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets)
+static fz_off_t
+read_xref_section(fz_context *ctx, pdf_document *doc, fz_off_t ofs, pdf_lexbuf *buf, ofs_list *offsets)
 {
 	pdf_obj *trailer = NULL;
-	int xrefstmofs = 0;
-	int prevofs = 0;
+	fz_off_t xrefstmofs = 0;
+	fz_off_t prevofs = 0;
 
 	fz_var(trailer);
 
@@ -939,12 +941,12 @@ read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf,
 		}
 		if (i < offsets->len)
 		{
-			fz_warn(ctx, "ignoring xref recursion with offset %d", ofs);
+			fz_warn(ctx, "ignoring xref recursion with offset %d", (int)ofs);
 			break;
 		}
 		if (offsets->len == offsets->max)
 		{
-			offsets->list = fz_resize_array(ctx, offsets->list, offsets->max*2, sizeof(int));
+			offsets->list = fz_resize_array(ctx, offsets->list, offsets->max*2, sizeof(*offsets->list));
 			offsets->max *= 2;
 		}
 		offsets->list[offsets->len++] = ofs;
@@ -955,7 +957,7 @@ read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf,
 
 		/* FIXME: do we overwrite free entries properly? */
 		/* FIXME: Does this work properly with progression? */
-		xrefstmofs = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_XRefStm));
+		xrefstmofs = pdf_to_offset(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_XRefStm));
 		if (xrefstmofs)
 		{
 			if (xrefstmofs < 0)
@@ -969,7 +971,8 @@ read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf,
 			pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs, buf));
 		}
 
-		prevofs = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_Prev));
+		/* FIXME: pdf_to_offset? */
+		prevofs = pdf_to_offset(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_Prev));
 		if (prevofs < 0)
 			fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset for previous xref stream");
 	}
@@ -979,20 +982,20 @@ read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf,
 	}
 	fz_catch(ctx)
 	{
-		fz_rethrow_message(ctx, "cannot read xref at offset %d", ofs);
+		fz_rethrow_message(ctx, "cannot read xref at offset %d", (int)ofs);
 	}
 
 	return prevofs;
 }
 
 static void
-pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf, int read_previous)
+pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, fz_off_t ofs, pdf_lexbuf *buf, int read_previous)
 {
 	ofs_list list;
 
 	list.len = 0;
 	list.max = 10;
-	list.list = fz_malloc_array(ctx, 10, sizeof(int));
+	list.list = fz_malloc_array(ctx, 10, sizeof(*list.list));
 	fz_try(ctx)
 	{
 		while(ofs)
@@ -1084,11 +1087,11 @@ pdf_load_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 			if (entry->ofs == 0)
 				entry->type = 'f';
 			else if (entry->ofs <= 0 || entry->ofs >= doc->file_size)
-				fz_throw(ctx, FZ_ERROR_GENERIC, "object offset out of range: %d (%d 0 R)", entry->ofs, i);
+				fz_throw(ctx, FZ_ERROR_GENERIC, "object offset out of range: %d (%d 0 R)", (int)entry->ofs, i);
 		}
 		if (entry->type == 'o')
 			if (entry->ofs <= 0 || entry->ofs >= xref_len || pdf_get_xref_entry(ctx, doc, entry->ofs)->type != 'n')
-				fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to an objstm that does not exist: %d (%d 0 R)", entry->ofs, i);
+				fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to an objstm that does not exist: %d (%d 0 R)", (int)entry->ofs, i);
 	}
 }
 
@@ -1098,7 +1101,8 @@ pdf_load_linear(fz_context *ctx, pdf_document *doc)
 	pdf_obj *dict = NULL;
 	pdf_obj *hint = NULL;
 	pdf_obj *o;
-	int num, gen, stmofs, lin, len;
+	int num, gen, lin, len;
+	fz_off_t stmofs;
 
 	fz_var(dict);
 	fz_var(hint);
@@ -1535,10 +1539,10 @@ pdf_print_xref(fz_context *ctx, pdf_document *doc)
 	{
 		pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
 		printf("%05d: %010d %05d %c (stm_ofs=%d; stm_buf=%p)\n", i,
-			entry->ofs,
+			(int)entry->ofs,
 			entry->gen,
 			entry->type ? entry->type : '-',
-			entry->stm_ofs,
+			(int)entry->stm_ofs,
 			entry->stm_buf);
 	}
 }
@@ -1553,10 +1557,10 @@ pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_lexbu
 	fz_stream *stm = NULL;
 	pdf_obj *objstm = NULL;
 	int *numbuf = NULL;
-	int *ofsbuf = NULL;
+	fz_off_t *ofsbuf = NULL;
 
 	pdf_obj *obj;
-	int first;
+	fz_off_t first;
 	int count;
 	int i;
 	pdf_token tok;
@@ -1579,8 +1583,8 @@ pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_lexbu
 		if (first < 0)
 			fz_throw(ctx, FZ_ERROR_GENERIC, "first object in object stream resides outside stream");
 
-		numbuf = fz_calloc(ctx, count, sizeof(int));
-		ofsbuf = fz_calloc(ctx, count, sizeof(int));
+		numbuf = fz_calloc(ctx, count, sizeof(*numbuf));
+		ofsbuf = fz_calloc(ctx, count, sizeof(*ofsbuf));
 
 		stm = pdf_open_stream(ctx, doc, num, gen);
 		for (i = 0; i < count; i++)
@@ -1659,13 +1663,13 @@ pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_lexbu
  * object loading
  */
 static int
-pdf_obj_read(fz_context *ctx, pdf_document *doc, int *offset, int *nump, pdf_obj **page)
+pdf_obj_read(fz_context *ctx, pdf_document *doc, fz_off_t *offset, int *nump, pdf_obj **page)
 {
 	pdf_lexbuf *buf = &doc->lexbuf.base;
-	int num, numofs, gen, genofs, stmofs, tmpofs, tok;
+	int num, gen, tok;
+	fz_off_t numofs, genofs, stmofs, tmpofs, newtmpofs;
 	int xref_len;
 	pdf_xref_entry *entry;
-	int newtmpofs;
 
 	numofs = *offset;
 	fz_seek(ctx, doc->file, numofs, SEEK_SET);
@@ -1824,7 +1828,7 @@ read_hinted_object(fz_context *ctx, pdf_document *doc, int num)
 	 * there. */
 	int expected = num;
 	int curr_pos;
-	int start, offset;
+	fz_off_t start, offset;
 
 	while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
 		expected--;
@@ -2500,7 +2504,7 @@ static void
 pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
 {
 	pdf_lexbuf *buf = &doc->lexbuf.base;
-	int curr_pos;
+	fz_off_t curr_pos;
 
 	curr_pos = fz_tell(ctx, doc->file);
 	fz_seek(ctx, doc->file, doc->hint_object_offset, SEEK_SET);
@@ -2509,7 +2513,8 @@ pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
 		while (1)
 		{
 			pdf_obj *page = NULL;
-			int tmpofs, num, gen, tok;
+			fz_off_t tmpofs;
+			int num, gen, tok;
 
 			tok = pdf_lex(ctx, doc->file, buf);
 			if (tok != PDF_TOK_INT)
-- 
cgit v1.2.3