Initial work on progressive loading

We are testing this using a new -p flag to mupdf that sets a bitrate at which data will appear to arrive progressively as time goes on. For example: mupdf -p 102400 pdf_reference17.pdf Details of the scheme used here are presented in docs/progressive.txt
author: Robin Watts <robin.watts@artifex.com> 2013-07-17 18:42:33 +0100
committer: Robin Watts <robin.watts@artifex.com> 2013-07-19 19:54:27 +0100
commit: f5f7c0e4dd83257f526b158e3998970717852a0e (patch)
tree: c34ee93ab7773e4fbe48506c97fb515c03707e57 /source/pdf
parent: 3c559928d88fccfe17da4953ea1c93ceb42a90cb (diff)
download: mupdf-f5f7c0e4dd83257f526b158e3998970717852a0e.tar.xz
11 files changed, 904 insertions, 68 deletions
diff --git a/source/pdf/js/pdf-js.c b/source/pdf/js/pdf-js.c
index 26c6d6f8..d801716a 100644
--- a/source/pdf/js/pdf-js.c
+++ b/source/pdf/js/pdf-js.c
@@ -492,7 +492,7 @@ static pdf_jsimp_obj *doc_getField(void *jsctx, void *obj, int argc, pdf_jsimp_o
 	}
 	fz_catch(ctx)
 	{
-		/* FIXME: TryLater ? */
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 		fz_warn(ctx, "doc_getField failed: %s", fz_caught_message(ctx));
 		dict = NULL;
 	}
@@ -832,7 +832,7 @@ void pdf_js_load_document_level(pdf_js *js)
 			}
 			fz_catch(ctx)
 			{
-				/* FIXME: TryLater ? */
+				fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 				fz_warn(ctx, "Warning: %s", fz_caught_message(ctx));
 			}
 		}
diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c
index c571a0a1..4c76fce9 100644
--- a/source/pdf/pdf-annot.c
+++ b/source/pdf/pdf-annot.c
@@ -331,6 +331,7 @@ pdf_load_link_annots(pdf_document *doc, pdf_obj *annots, const fz_matrix *page_c
 	n = pdf_array_len(annots);
 	for (i = 0; i < n; i++)
 	{
+		/* FIXME: Move the try/catch out of the loop for performance? */
 		fz_try(doc->ctx)
 		{
 			obj = pdf_array_get(annots, i);
@@ -338,7 +339,7 @@ pdf_load_link_annots(pdf_document *doc, pdf_obj *annots, const fz_matrix *page_c
 		}
 		fz_catch(doc->ctx)
 		{
-			/* FIXME: TryLater */
+			fz_rethrow_if(doc->ctx, FZ_ERROR_TRYLATER);
 			link = NULL;
 		}
 
@@ -594,9 +595,13 @@ pdf_load_annots(pdf_document *doc, pdf_obj *annots, pdf_page *page)
 		}
 		fz_catch(ctx)
 		{
+			if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+			{
+				pdf_free_annot(ctx, head);
+				fz_rethrow(ctx);
+			}
 			keep_annot = 0;
 			fz_warn(ctx, "ignoring broken annotation");
-			/* FIXME: TryLater */
 		}
 		if (!keep_annot)
 		{
@@ -657,8 +662,8 @@ pdf_update_annot(pdf_document *doc, pdf_annot *annot)
 			}
 			fz_catch(ctx)
 			{
+				fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 				fz_warn(ctx, "ignoring broken annotation");
-				/* FIXME: TryLater */
 			}
 		}
 	}
diff --git a/source/pdf/pdf-font.c b/source/pdf/pdf-font.c
index 2c18d96a..a5174979 100644
--- a/source/pdf/pdf-font.c
+++ b/source/pdf/pdf-font.c
@@ -397,7 +397,7 @@ pdf_new_font_desc(fz_context *ctx)
  */
 
 static pdf_font_desc *
-pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
+pdf_load_simple_font_by_name(pdf_document *doc, pdf_obj *dict, char *basefont)
 {
 	pdf_obj *descriptor;
 	pdf_obj *encoding;
@@ -410,7 +410,6 @@ pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
 	int symbolic;
 	int kind;
 
-	char *basefont;
 	char *estrings[256];
 	char ebuffer[256][32];
 	int i, k, n;
@@ -422,8 +421,6 @@ pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
 	fz_var(etable);
 	fz_var(has_lock);
 
-	basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont"));
-
 	/* Load font file */
 	fz_try(ctx)
 	{
@@ -688,7 +685,7 @@ pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
 		}
 		fz_catch(ctx)
 		{
-			/* FIXME: TryLater */
+			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 			fz_warn(ctx, "cannot load ToUnicode CMap");
 		}
 
@@ -744,6 +741,79 @@ pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
 	return fontdesc;
 }
 
+static pdf_font_desc *
+pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
+{
+	char *basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont"));
+
+	return pdf_load_simple_font_by_name(doc, dict, basefont);
+}
+
+static int
+hail_mary_make_hash_key(fz_store_hash *hash, void *key_)
+{
+	hash->u.i.i0 = 0;
+	hash->u.i.i1 = 0;
+	return 1;
+}
+
+static void *
+hail_mary_keep_key(fz_context *ctx, void *key)
+{
+	return key;
+}
+
+static void
+hail_mary_drop_key(fz_context *ctx, void *key)
+{
+}
+
+static int
+hail_mary_cmp_key(void *k0, void *k1)
+{
+	return k0 == k1;
+}
+
+#ifndef NDEBUG
+static void
+hail_mary_debug_key(FILE *out, void *key_)
+{
+	fprintf(out, "hail mary ");
+}
+#endif
+
+static fz_store_type hail_mary_store_type =
+{
+	hail_mary_make_hash_key,
+	hail_mary_keep_key,
+	hail_mary_drop_key,
+	hail_mary_cmp_key,
+#ifndef NDEBUG
+	hail_mary_debug_key
+#endif
+};
+
+pdf_font_desc *
+pdf_load_hail_mary_font(pdf_document *doc)
+{
+	fz_context *ctx = doc->ctx;
+	pdf_font_desc *fontdesc;
+	pdf_font_desc *existing;
+
+	if ((fontdesc = fz_find_item(ctx, pdf_free_font_imp, &hail_mary_store_type, &hail_mary_store_type)))
+	{
+		return fontdesc;
+	}
+
+	/* FIXME: Get someone with a clue about fonts to fix this */
+	fontdesc = pdf_load_simple_font_by_name(doc, NULL, "Helvetica");
+
+	existing = fz_store_item(ctx, &hail_mary_store_type, fontdesc, fontdesc->size, &hail_mary_store_type);
+	assert(existing == NULL);
+
+	return fontdesc;
+}
+
 /*
  * CID Fonts
  */
@@ -1057,7 +1127,7 @@ pdf_load_font_descriptor(pdf_font_desc *fontdesc, pdf_document *doc, pdf_obj *di
 		}
 		fz_catch(ctx)
 		{
-			/* FIXME: TryLater */
+			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 			fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font");
 			if (origname != fontname && !iscidfont)
 				pdf_load_builtin_font(ctx, fontdesc, fontname);
diff --git a/source/pdf/pdf-interpret.c b/source/pdf/pdf-interpret.c
index cec0d1e5..515a87fa 100644
--- a/source/pdf/pdf-interpret.c
+++ b/source/pdf/pdf-interpret.c
@@ -372,7 +372,7 @@ begin_softmask(pdf_csi * csi, softmask_save *save)
 	}
 	fz_catch(ctx)
 	{
-		/* FIXME: TryLater */
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 		/* FIXME: Ignore error - nasty, but if we throw from
 		 * here the clip stack would be messed up. */
 		if (csi->cookie)
@@ -1188,7 +1188,7 @@ pdf_grestore(pdf_csi *csi)
 		}
 		fz_catch(ctx)
 		{
-			/* FIXME: TryLater */
+			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 			/* Silently swallow the problem */
 		}
 		clip_depth--;
@@ -1590,6 +1590,31 @@ pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, const fz_ma
 
 }
 
+static pdf_font_desc *
+load_font_or_hail_mary(pdf_csi *csi, pdf_obj *rdb, pdf_obj *font, int depth)
+{
+	pdf_document *doc = csi->doc;
+	fz_context *ctx = doc->ctx;
+	pdf_font_desc *desc;
+
+	fz_try(ctx)
+	{
+		desc = pdf_load_font(doc, rdb, font, depth);
+	}
+	fz_catch(ctx)
+	{
+		if (fz_caught(ctx) != FZ_ERROR_TRYLATER)
+			fz_rethrow(ctx);
+		if (!csi->cookie || !csi->cookie->incomplete_ok)
+			fz_rethrow(ctx);
+		desc = NULL;
+		csi->cookie->incomplete++;
+	}
+	if (desc == NULL)
+		desc = pdf_load_hail_mary_font(doc);
+	return desc;
+}
+
 static void
 pdf_run_extgstate(pdf_csi *csi, pdf_obj *rdb, pdf_obj *extgstate)
 {
@@ -1619,7 +1644,7 @@ pdf_run_extgstate(pdf_csi *csi, pdf_obj *rdb, pdf_obj *extgstate)
 					gstate->font = NULL;
 				}
 
-				gstate->font = pdf_load_font(csi->doc, rdb, font, csi->nested_depth);
+				gstate->font = load_font_or_hail_mary(csi, rdb, font, csi->nested_depth);
 				if (!gstate->font)
 					fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find font in store");
 				gstate->size = pdf_to_real(pdf_array_get(val, 1));
@@ -2191,7 +2216,7 @@ static void pdf_run_Tf(pdf_csi *csi, pdf_obj *rdb)
 	if (!obj)
 		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find font resource: '%s'", csi->name);
 
-	gstate->font = pdf_load_font(csi->doc, rdb, obj, csi->nested_depth);
+	gstate->font = load_font_or_hail_mary(csi, rdb, obj, csi->nested_depth);
 }
 
 static void pdf_run_Tr(pdf_csi *csi)
@@ -2855,10 +2880,21 @@ pdf_run_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, pdf_lexbuf *buf)
 		}
 		fz_catch(ctx)
 		{
-			/* FIXME: TryLater */
-			/* Swallow the error */
-			if (csi->cookie)
-				csi->cookie->errors++;
+			if (!csi->cookie)
+			{
+				fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+			}
+			else if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+			{
+				if (csi->cookie->incomplete_ok)
+					csi->cookie->incomplete++;
+				else
+					fz_rethrow(ctx);
+			}
+			else
+			{
+				 csi->cookie->errors++;
+			}
 			if (!ignoring_errors)
 			{
 				fz_warn(ctx, "Ignoring errors during rendering");
@@ -2901,7 +2937,7 @@ pdf_run_contents_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file)
 	}
 	fz_catch(ctx)
 	{
-		/* FIXME: TryLater */
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 		fz_warn(ctx, "Content stream parsing error - rendering truncated");
 	}
 	while (csi->gtop > csi->gbot)
@@ -3000,6 +3036,8 @@ static void pdf_run_page_contents_with_usage(pdf_document *doc, pdf_page *page,
 void pdf_run_page_contents(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie)
 {
 	pdf_run_page_contents_with_usage(doc, page, dev, ctm, "View", cookie);
+	if (page->incomplete & PDF_PAGE_INCOMPLETE_CONTENTS)
+		fz_throw(doc->ctx, FZ_ERROR_TRYLATER, "incomplete rendering");
 }
 
 static void pdf_run_annot_with_usage(pdf_document *doc, pdf_page *page, pdf_annot *annot, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie)
@@ -3047,6 +3085,8 @@ static void pdf_run_annot_with_usage(pdf_document *doc, pdf_page *page, pdf_anno
 void pdf_run_annot(pdf_document *doc, pdf_page *page, pdf_annot *annot, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie)
 {
 	pdf_run_annot_with_usage(doc, page, annot, dev, ctm, "View", cookie);
+	if (page->incomplete & PDF_PAGE_INCOMPLETE_ANNOTS)
+		fz_throw(doc->ctx, FZ_ERROR_TRYLATER, "incomplete rendering");
 }
 
 static void pdf_run_page_annots_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie)
@@ -3080,6 +3120,8 @@ pdf_run_page_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev, const
 {
 	pdf_run_page_contents_with_usage(doc, page, dev, ctm, event, cookie);
 	pdf_run_page_annots_with_usage(doc, page, dev, ctm, event, cookie);
+	if (page->incomplete)
+		fz_throw(doc->ctx, FZ_ERROR_TRYLATER, "incomplete rendering");
 }
 
 void
diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c
index 6a77e7b4..708794ee 100644
--- a/source/pdf/pdf-object.c
+++ b/source/pdf/pdf-object.c
@@ -1392,7 +1392,7 @@ pdf_obj *pdf_new_obj_from_str(pdf_document *doc, const char *src)
 	}
 	fz_catch(ctx)
 	{
-		/* FIXME: TryLater */
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 		return NULL;
 	}
 
diff --git a/source/pdf/pdf-page.c b/source/pdf/pdf-page.c
index 7de81f00..d4d8015d 100644
--- a/source/pdf/pdf-page.c
+++ b/source/pdf/pdf-page.c
@@ -349,7 +349,14 @@ pdf_load_page(pdf_document *doc, int number)
 	float userunit;
 	fz_matrix mat;
 
-	pageref = pdf_lookup_page_obj(doc, number);
+	if (doc->file_reading_linearly)
+	{
+		pageref = pdf_progressive_advance(doc, number);
+		if (pageref == NULL)
+			fz_throw(doc->ctx, FZ_ERROR_TRYLATER, "page %d not available yet", number);
+	}
+	else
+		pageref = pdf_lookup_page_obj(doc, number);
 	pageobj = pdf_resolve_indirect(pageref);
 
 	page = fz_malloc_struct(ctx, pdf_page);
@@ -361,6 +368,7 @@ pdf_load_page(pdf_document *doc, int number)
 	page->deleted_annots = NULL;
 	page->tmp_annots = NULL;
 	page->me = pdf_keep_obj(pageobj);
+	page->incomplete = 0;
 
 	obj = pdf_dict_gets(pageobj, "UserUnit");
 	if (pdf_is_real(obj))
@@ -409,11 +417,22 @@ pdf_load_page(pdf_document *doc, int number)
 	fz_pre_scale(fz_translate(&mat, -realbox.x0, -realbox.y0), userunit, userunit);
 	fz_concat(&page->ctm, &page->ctm, &mat);
 
-	obj = pdf_dict_gets(pageobj, "Annots");
-	if (obj)
+	fz_try(ctx)
 	{
-		page->links = pdf_load_link_annots(doc, obj, &page->ctm);
-		page->annots = pdf_load_annots(doc, obj, page);
+		obj = pdf_dict_gets(pageobj, "Annots");
+		if (obj)
+		{
+			page->links = pdf_load_link_annots(doc, obj, &page->ctm);
+			page->annots = pdf_load_annots(doc, obj, page);
+		}
+	}
+	fz_catch(ctx)
+	{
+		if (fz_caught(ctx) != FZ_ERROR_TRYLATER)
+			fz_rethrow(ctx);
+		page->incomplete |= PDF_PAGE_INCOMPLETE_ANNOTS;
+		fz_drop_link(ctx, page->links);
+		page->links = NULL;
 	}
 
 	page->duration = pdf_to_real(pdf_dict_gets(pageobj, "Dur"));
@@ -444,8 +463,12 @@ pdf_load_page(pdf_document *doc, int number)
 	}
 	fz_catch(ctx)
 	{
-		pdf_free_page(doc, page);
-		fz_rethrow_message(ctx, "cannot load page %d contents (%d 0 R)", number + 1, pdf_to_num(pageref));
+		if (fz_caught(ctx) != FZ_ERROR_TRYLATER)
+		{
+			pdf_free_page(doc, page);
+			fz_rethrow_message(ctx, "cannot load page %d contents (%d 0 R)", number + 1, pdf_to_num(pageref));
+		}
+		page->incomplete |= PDF_PAGE_INCOMPLETE_CONTENTS;
 	}
 
 	return page;
diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c
index 3db32de0..fc603d1d 100644
--- a/source/pdf/pdf-repair.c
+++ b/source/pdf/pdf-repair.c
@@ -14,8 +14,8 @@ struct entry
 	int stm_len;
 };
 
-static int
-pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, int *tmpofs)
+int
+pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int *tmpofs)
 {
 	pdf_token tok;
 	int stm_len;
@@ -24,7 +24,8 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
 	fz_context *ctx = file->ctx;
 
 	*stmofsp = 0;
-	*stmlenp = -1;
+	if (stmlenp)
+		*stmlenp = -1;
 
 	stm_len = 0;
 
@@ -45,7 +46,7 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
 		}
 		fz_catch(ctx)
 		{
-			/* FIXME: TryLater */
+			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 			/* Don't let a broken object at EOF overwrite a good one */
 			if (file->eof)
 				fz_rethrow_message(ctx, "broken object at EOF ignored");
@@ -53,21 +54,24 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
 			dict = pdf_new_dict(doc, 2);
 		}
 
-		obj = pdf_dict_gets(dict, "Type");
-		if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "XRef"))
+		if (encrypt && id)
 		{
-			obj = pdf_dict_gets(dict, "Encrypt");
-			if (obj)
+			obj = pdf_dict_gets(dict, "Type");
+			if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "XRef"))
 			{
-				pdf_drop_obj(*encrypt);
-				*encrypt = pdf_keep_obj(obj);
-			}
+				obj = pdf_dict_gets(dict, "Encrypt");
+				if (obj)
+				{
+					pdf_drop_obj(*encrypt);
+					*encrypt = pdf_keep_obj(obj);
+				}
 
-			obj = pdf_dict_gets(dict, "ID");
-			if (obj)
-			{
-				pdf_drop_obj(*id);
-				*id = pdf_keep_obj(obj);
+				obj = pdf_dict_gets(dict, "ID");
+				if (obj)
+				{
+					pdf_drop_obj(*id);
+					*id = pdf_keep_obj(obj);
+				}
 			}
 		}
 
@@ -75,6 +79,16 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
 		if (!pdf_is_indirect(obj) && pdf_is_int(obj))
 			stm_len = pdf_to_int(obj);
 
+		if (doc->file_reading_linearly && page)
+		{
+			obj = pdf_dict_gets(dict, "Type");
+			if (!strcmp(pdf_to_name(obj), "Page"))
+			{
+				pdf_drop_obj(*page);
+				*page = pdf_keep_obj(dict);
+			}
+		}
+
 		pdf_drop_obj(dict);
 	}
 
@@ -112,7 +126,7 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
 			}
 			fz_catch(ctx)
 			{
-				/* FIXME: TryLater */
+				fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 				fz_warn(ctx, "cannot find endstream token, falling back to scanning");
 			}
 			if (tok == PDF_TOK_ENDSTREAM)
@@ -133,7 +147,8 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
 			buf->scratch[8] = c;
 		}
 
-		*stmlenp = fz_tell(file) - *stmofsp - 9;
+		if (stmlenp)
+			*stmlenp = fz_tell(file) - *stmofsp - 9;
 
 atobjend:
 		*tmpofs = fz_tell(file);
@@ -302,7 +317,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
 			}
 			fz_catch(ctx)
 			{
-				/* FIXME: TryLater */
+				fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 				fz_warn(ctx, "ignoring the rest of the file");
 				break;
 			}
@@ -324,11 +339,11 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
 			{
 				fz_try(ctx)
 				{
-					tok = pdf_repair_obj(doc, buf, &stm_ofs, &stm_len, &encrypt, &id, &tmpofs);
+					tok = pdf_repair_obj(doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs);
 				}
 				fz_catch(ctx)
 				{
-					/* FIXME: TryLater */
+					fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 					/* If we haven't seen a root yet, there is nothing
 					 * we can do, but give up. Otherwise, we'll make
 					 * do. */
@@ -374,7 +389,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
 				}
 				fz_catch(ctx)
 				{
-					/* FIXME: TryLater */
+					fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 					/* If we haven't seen a root yet, there is nothing
 					 * we can do, but give up. Otherwise, we'll make
 					 * do. */
diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c
index 9d25915b..a46cdcc7 100644
--- a/source/pdf/pdf-stream.c
+++ b/source/pdf/pdf-stream.c
@@ -536,7 +536,7 @@ pdf_open_object_array(pdf_document *doc, pdf_obj *list)
 		}
 		fz_catch(ctx)
 		{
-			/* FIXME: TryLater */
+			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 			fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
 			continue;
 		}
diff --git a/source/pdf/pdf-type3.c b/source/pdf/pdf-type3.c
index d4c9b31a..b216b950 100644
--- a/source/pdf/pdf-type3.c
+++ b/source/pdf/pdf-type3.c
@@ -198,7 +198,7 @@ void pdf_load_type3_glyphs(pdf_document *doc, pdf_font_desc *fontdesc, int neste
 	}
 	fz_catch(ctx)
 	{
-		/* FIXME: TryLater */
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 		fz_warn(ctx, "Type3 glyph load failed: %s", fz_caught_message(ctx));
 	}
 }
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index a1b0caa2..a161c763 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -532,7 +532,7 @@ static pdf_obj *sweepref(pdf_document *doc, pdf_write_options *opts, pdf_obj *ob
 	}
 	fz_catch(ctx)
 	{
-		/* FIXME: TryLater */
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 		/* Leave broken */
 	}
 
@@ -1650,7 +1650,7 @@ static void writeobject(pdf_document *doc, pdf_write_options *opts, int num, int
 	}
 	fz_catch(ctx)
 	{
-		/* FIXME: TryLater ? */
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 		if (opts->continue_on_error)
 		{
 			fprintf(opts->out, "%d %d obj\nnull\nendobj\n", num, gen);
@@ -1732,7 +1732,7 @@ static void writeobject(pdf_document *doc, pdf_write_options *opts, int num, int
 		}
 		fz_catch(ctx)
 		{
-			/* FIXME: TryLater ? */
+			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 			if (opts->continue_on_error)
 			{
 				fprintf(opts->out, "%d %d obj\nnull\nendobj\n", num, gen);
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index a50cafff..33bd6c22 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -1,5 +1,13 @@
 #include "mupdf/pdf.h"
 
+#undef DEBUG_PROGESSIVE_ADVANCE
+
+#ifdef DEBUG_PROGESSIVE_ADVANCE
+#define DEBUGMESS(A) do { fz_warn A; } while (0)
+#else
+#define DEBUGMESS(A) do { } while (0)
+#endif
+
 static inline int iswhite(int ch)
 {
 	return
@@ -699,6 +707,7 @@ read_xref_section(pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets
 		pdf_set_populating_xref_trailer(doc, trailer);
 
 		/* FIXME: do we overwrite free entries properly? */
+		/* FIXME: Does this work properly with progression? */
 		xrefstmofs = pdf_to_int(pdf_dict_gets(trailer, "XRefStm"));
 		if (xrefstmofs)
 		{
@@ -720,7 +729,6 @@ read_xref_section(pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets
 	fz_always(ctx)
 	{
 		pdf_drop_obj(trailer);
-		trailer = NULL;
 	}
 	fz_catch(ctx)
 	{
@@ -731,7 +739,7 @@ read_xref_section(pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets
 }
 
 static void
-pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf)
+pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf, int read_previous)
 {
 	fz_context *ctx = doc->ctx;
 	ofs_list list;
@@ -745,6 +753,8 @@ pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf)
 		{
 			pdf_populate_next_xref_level(doc);
 			ofs = read_xref_section(doc, ofs, buf, &list);
+			if (!read_previous)
+				break;
 		}
 	}
 	fz_always(ctx)
@@ -770,11 +780,9 @@ pdf_load_xref(pdf_document *doc, pdf_lexbuf *buf)
 	int xref_len;
 	fz_context *ctx = doc->ctx;
 
-	pdf_load_version(doc);
-
 	pdf_read_start_xref(doc);
 
-	pdf_read_xref_sections(doc, doc->startxref, buf);
+	pdf_read_xref_sections(doc, doc->startxref, buf, 1);
 
 	if (pdf_xref_len(doc) == 0)
 		fz_throw(ctx, FZ_ERROR_GENERIC, "found xref was empty");
@@ -803,6 +811,61 @@ pdf_load_xref(pdf_document *doc, pdf_lexbuf *buf)
 	}
 }
 
+static void
+pdf_load_linear(pdf_document *doc)
+{
+	pdf_obj *dict = NULL;
+	pdf_obj *hint = NULL;
+	pdf_obj *o;
+	int num, gen, stmofs, lin, len;
+	fz_context *ctx = doc->ctx;
+
+	fz_var(dict);
+	fz_var(hint);
+
+	fz_try(ctx)
+	{
+		pdf_xref_entry *entry;
+
+		dict = pdf_parse_ind_obj(doc, doc->file, &doc->lexbuf.base, &num, &gen, &stmofs);
+		if (!pdf_is_dict(dict))
+			fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
+		o = pdf_dict_gets(dict, "Linearized");
+		if (o == NULL)
+			fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
+		lin = pdf_to_int(o);
+		if (lin != 1)
+			fz_throw(ctx, FZ_ERROR_GENERIC, "Unexpected version of Linearized tag (%d)", lin);
+		len = pdf_to_int(pdf_dict_gets(dict, "L"));
+		if (len != doc->file_length)
+			fz_throw(ctx, FZ_ERROR_GENERIC, "File has been updated since linearization");
+
+		pdf_read_xref_sections(doc, fz_tell(doc->file), &doc->lexbuf.base, 0);
+
+		doc->page_count = pdf_to_int(pdf_dict_gets(dict, "N"));
+		doc->linear_page_refs = fz_resize_array(ctx, doc->linear_page_refs, doc->page_count, sizeof(pdf_obj *));
+		memset(doc->linear_page_refs, 0, doc->page_count * sizeof(pdf_obj*));
+		doc->linear_obj = dict;
+		doc->linear_pos = fz_tell(doc->file);
+		doc->linear_page1_obj_num = pdf_to_int(pdf_dict_gets(dict, "O"));
+		doc->linear_page_refs[0] = pdf_new_indirect(doc, doc->linear_page1_obj_num, 0);
+		doc->linear_page_num = 0;
+		hint = pdf_dict_gets(dict, "H");
+		doc->hint_object_offset = pdf_to_int(pdf_array_get(hint, 0));
+		doc->hint_object_length = pdf_to_int(pdf_array_get(hint, 1));
+
+		entry = pdf_get_populating_xref_entry(doc, 0);
+		entry->type = 'f';
+	}
+	fz_catch(ctx)
+	{
+		pdf_drop_obj(dict);
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+		/* Drop back to non linearized reading mode */
+		doc->file_reading_linearly = 0;
+	}
+}
+
 void
 pdf_ocg_set_config(pdf_document *doc, int config)
 {
@@ -982,12 +1045,31 @@ pdf_init_document(pdf_document *doc)
 
 	fz_try(ctx)
 	{
-		pdf_load_xref(doc, &doc->lexbuf.base);
+		pdf_load_version(doc);
+
+		doc->file_length = fz_stream_meta(doc->file, FZ_STREAM_META_LENGTH, 0, NULL);
+		if (doc->file_length < 0)
+			doc->file_length = 0;
+
+		/* Check to see if we should work in progressive mode */
+		if (fz_stream_meta(doc->file, FZ_STREAM_META_PROGRESSIVE, 0, NULL) > 0)
+			doc->file_reading_linearly = 1;
+
+		/* Try to load the linearized file if we are in progressive
+		 * mode. */
+		if (doc->file_reading_linearly)
+			pdf_load_linear(doc);
+
+		/* If we aren't in progressive mode (or the linear load failed
+		 * and has set us back to non-progressive mode), load normally.
+		 */
+		if (!doc->file_reading_linearly)
+			pdf_load_xref(doc, &doc->lexbuf.base);
 	}
 	fz_catch(ctx)
 	{
-		/* FIXME: TryLater ? */
 		pdf_free_xref_sections(doc);
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 		fz_warn(ctx, "trying to repair broken xref");
 		repaired = 1;
 	}
@@ -1027,7 +1109,7 @@ pdf_init_document(pdf_document *doc)
 				}
 				fz_catch(ctx)
 				{
-					/* FIXME: TryLater ? */
+					fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 					fz_warn(ctx, "ignoring broken object (%d 0 R)", i);
 					continue;
 				}
@@ -1066,7 +1148,6 @@ pdf_init_document(pdf_document *doc)
 	{
 		pdf_drop_obj(dict);
 		pdf_drop_obj(nobj);
-		pdf_close_document(doc);
 		fz_rethrow_message(ctx, "cannot open document");
 	}
 
@@ -1076,7 +1157,6 @@ pdf_init_document(pdf_document *doc)
 	}
 	fz_catch(ctx)
 	{
-		/* FIXME: TryLater ? */
 		fz_warn(ctx, "Ignoring Broken Optional Content");
 	}
 }
@@ -1107,6 +1187,20 @@ pdf_close_document(pdf_document *doc)
 	if (doc->crypt)
 		pdf_free_crypt(ctx, doc->crypt);
 
+	pdf_drop_obj(doc->linear_obj);
+	if (doc->linear_page_refs)
+	{
+		for (i=0; i < doc->page_count; i++)
+		{
+			pdf_drop_obj(doc->linear_page_refs[i]);
+		}
+		fz_free(ctx, doc->linear_page_refs);
+	}
+	fz_free(ctx, doc->hint_page);
+	fz_free(ctx, doc->hint_shared_ref);
+	fz_free(ctx, doc->hint_shared);
+	fz_free(ctx, doc->hint_obj_offsets);
+
 	for (i=0; i < doc->num_type3_fonts; i++)
 	{
 		fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
@@ -1249,6 +1343,239 @@ pdf_load_obj_stm(pdf_document *doc, int num, int gen, pdf_lexbuf *buf)
 /*
  * object loading
  */
+static int
+pdf_obj_read(pdf_document *doc, int *offset, int *nump, pdf_obj **page)
+{
+	int num, numofs, gen, genofs, stmofs, tmpofs, tok;
+	pdf_lexbuf *buf = &doc->lexbuf.base;
+	fz_context *ctx = doc->ctx;
+	int xref_len;
+	pdf_xref_entry *entry;
+	int newtmpofs;
+
+	numofs = *offset;
+	fz_seek(doc->file, numofs, SEEK_SET);
+
+	/* We expect to read 'num' here */
+	tok = pdf_lex(doc->file, buf);
+	genofs = fz_tell(doc->file);
+	if (tok != PDF_TOK_INT)
+	{
+		/* Failed! */
+		DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
+		*offset = genofs;
+		return tok == PDF_TOK_EOF;
+	}
+	*nump = num = buf->i;
+
+	/* We expect to read 'gen' here */
+	tok = pdf_lex(doc->file, buf);
+	tmpofs = fz_tell(doc->file);
+	if (tok != PDF_TOK_INT)
+	{
+		/* Failed! */
+		DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
+		*offset = tmpofs;
+		return tok == PDF_TOK_EOF;
+	}
+	gen = buf->i;
+
+	/* We expect to read 'obj' here */
+	do
+	{
+		tmpofs = fz_tell(doc->file);
+		tok = pdf_lex(doc->file, buf);
+		if (tok == PDF_TOK_OBJ)
+			break;
+		if (tok != PDF_TOK_INT)
+		{
+			DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
+			*offset = fz_tell(doc->file);
+			return tok == PDF_TOK_EOF;
+		}
+		DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
+		*nump = num = gen;
+		numofs = genofs;
+		gen = buf->i;
+		genofs = tmpofs;
+	}
+	while (1);
+
+	/* Now we read the actual object */
+	xref_len = pdf_xref_len(doc);
+
+	/* When we are reading a progressive file, we typically see:
+	 *    File Header
+	 *    obj m (Linearization params)
+	 *    xref #1 (refers to objects m-n)
+	 *    obj m+1
+	 *    ...
+	 *    obj n
+	 *    obj 1
+	 *    ...
+	 *    obj n-1
+	 *    xref #2
+	 *
+	 * The linearisation params are read elsewhere, hence
+	 * whenever we read an object it should just go into the
+	 * previous xref.
+	 */
+	tok = pdf_repair_obj(doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs);
+
+	do /* So we can break out of it */
+	{
+		if (num <= 0 || num >= xref_len)
+		{
+			fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
+			break;
+		}
+		if (gen != 0)
+		{
+			fz_warn(ctx, "Unexpected non zero generation number in linearized file");
+		}
+		entry = pdf_get_populating_xref_entry(doc, num);
+		if (entry->type != 0)
+		{
+			DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
+			break;
+		}
+		if (page && *page)
+		{
+			DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
+			if (!entry->obj)
+				entry->obj = pdf_keep_obj(*page);
+
+			if (doc->linear_page_refs[doc->linear_page_num] == NULL)
+				doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(doc, num, gen);
+		}
+		else
+		{
+			DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
+		}
+		entry->type = 'n';
+		entry->gen = 0;
+		entry->ofs = numofs;
+		entry->stm_ofs = stmofs;
+	}
+	while (0);
+	if (page && *page)
+		doc->linear_page_num++;
+
+	if (tok == PDF_TOK_ENDOBJ)
+	{
+		*offset = fz_tell(doc->file);
+	}
+	else
+	{
+		*offset = newtmpofs;
+	}
+	return 0;
+}
+
+static void
+pdf_load_hinted_page(pdf_document *doc, int pagenum)
+{
+	fz_context *ctx = doc->ctx;
+
+	if (!doc->hints_loaded || !doc->linear_page_refs)
+		return;
+
+	if (doc->linear_page_refs[pagenum])
+		return;
+
+	fz_try(ctx)
+	{
+		int num = doc->hint_page[pagenum].number;
+		pdf_obj *page = pdf_load_object(doc, num, 0);
+		if (!strcmp("Page", pdf_to_name(pdf_dict_gets(page, "Type"))))
+		{
+			/* We have found the page object! */
+			DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
+			doc->linear_page_refs[pagenum] = pdf_new_indirect(doc, num, 0);
+		}
+		pdf_drop_obj(page);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+		/* Silently swallow the error and proceed as normal */
+	}
+
+}
+
+static int
+read_hinted_object(pdf_document *doc, int num)
+{
+	/* Try to find the object using our hint table. Find the closest
+	 * object <= the one we want that has a hint and read forward from
+	 * there. */
+	fz_context *ctx = doc->ctx;
+	int expected = num;
+	int curr_pos;
+	int start, offset;
+
+	while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
+		expected--;
+	if (expected != num)
+		DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
+	if (expected == 0)	/* No hints found, just bale */
+		return 0;
+
+
+	curr_pos = fz_tell(doc->file);
+	offset = doc->hint_obj_offsets[expected];
+
+	fz_var(expected);
+
+	fz_try(ctx)
+	{
+		int found;
+
+		/* Try to read forward from there */
+		do
+		{
+			start = offset;
+			DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
+			pdf_obj_read(doc, &offset, &found, 0);
+			DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
+			if (found <= expected)
+			{
+				/* We found the right one (or one earlier than
+				 * we expected). Update the hints. */
+				doc->hint_obj_offsets[expected] = offset;
+				doc->hint_obj_offsets[found] = start;
+				doc->hint_obj_offsets[found+1] = offset;
+				/* Retry with the next one */
+				expected = found+1;
+			}
+			else
+			{
+				/* We found one later than we expected. */
+				doc->hint_obj_offsets[expected] = 0;
+				doc->hint_obj_offsets[found] = start;
+				doc->hint_obj_offsets[found+1] = offset;
+				while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
+					expected--;
+				if (expected == 0)	/* No hints found, just bale */
+					return 0;
+			}
+		}
+		while (found != num);
+	}
+	fz_always(ctx)
+	{
+		fz_seek(doc->file, curr_pos, SEEK_SET);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+		/* FIXME: Currently we ignore the hint. Perhaps we should
+		 * drop back to non-hinted operation here. */
+		doc->hint_obj_offsets[expected] = 0;
+		fz_rethrow(ctx);
+	}
+	return 1;
+}
 
 void
 pdf_cache_object(pdf_document *doc, int num, int gen)
@@ -1260,6 +1587,7 @@ pdf_cache_object(pdf_document *doc, int num, int gen)
 	if (num < 0 || num >= pdf_xref_len(doc))
 		fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d %d R); xref size %d", num, gen, pdf_xref_len(doc));
 
+object_updated:
 	x = pdf_get_xref_entry(doc, num);
 
 	if (x->obj)
@@ -1309,6 +1637,14 @@ pdf_cache_object(pdf_document *doc, int num, int gen)
 				fz_throw(ctx, FZ_ERROR_GENERIC, "object (%d %d R) was not found in its object stream", num, gen);
 		}
 	}
+	else if (doc->hint_obj_offsets && read_hinted_object(doc, num))
+	{
+		goto object_updated;
+	}
+	else if (doc->file_length && doc->linear_pos < doc->file_length)
+	{
+		fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d %d R) - not loaded yet?", num, gen);
+	}
 	else
 	{
 		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d %d R)", num, gen);
@@ -1368,7 +1704,7 @@ pdf_resolve_indirect(pdf_obj *ref)
 		}
 		fz_catch(ctx)
 		{
-			/* FIXME: TryLater ? */
+			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
 			fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen);
 			return NULL;
 		}
@@ -1591,7 +1927,18 @@ pdf_document *
 pdf_open_document_no_run_with_stream(fz_context *ctx, fz_stream *file)
 {
 	pdf_document *doc = pdf_new_document(ctx, file);
-	pdf_init_document(doc);
+
+	fz_var(doc);
+
+	fz_try(ctx)
+	{
+		pdf_init_document(doc);
+	}
+	fz_catch(ctx)
+	{
+		pdf_close_document(doc);
+		fz_rethrow_message(ctx, "cannot load document from stream");
+	}
 	return doc;
 }
 
@@ -1599,9 +1946,10 @@ pdf_document *
 pdf_open_document_no_run(fz_context *ctx, const char *filename)
 {
 	fz_stream *file = NULL;
-	pdf_document *doc;
+	pdf_document *doc = NULL;
 
 	fz_var(file);
+	fz_var(doc);
 
 	fz_try(ctx)
 	{
@@ -1615,11 +1963,344 @@ pdf_open_document_no_run(fz_context *ctx, const char *filename)
 	}
 	fz_catch(ctx)
 	{
+		pdf_close_document(doc);
 		fz_rethrow_message(ctx, "cannot load document '%s'", filename);
 	}
 	return doc;
 }
 
+static void
+pdf_load_hints(pdf_document *doc, int objnum, int gennum)
+{
+	fz_stream *stream = NULL;
+	pdf_obj *dict;
+	fz_context *ctx = doc->ctx;
+
+	fz_var(stream);
+	fz_var(dict);
+
+	fz_try(ctx)
+	{
+		int i, j, least_num_page_objs, page_obj_num_bits;
+		int least_page_len, page_len_num_bits, shared_hint_offset;
+		int least_page_offset, page_offset_num_bits;
+		int least_content_stream_len, content_stream_len_num_bits;
+		int num_shared_obj_num_bits, shared_obj_num_bits;
+		int numerator_bits, denominator_bits, shared;
+		int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
+		int shared_obj_count_total;
+		int least_shared_group_len, shared_group_len_num_bits;
+		int max_object_num = pdf_xref_len(doc);
+
+		stream = pdf_open_stream(doc, objnum, gennum);
+		dict = pdf_get_xref_entry(doc, objnum)->obj;
+		if (dict == NULL || !pdf_is_dict(dict))
+			fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint object");
+
+		shared_hint_offset = pdf_to_int(pdf_dict_gets(dict, "S"));
+
+		/* Malloc the structures (use realloc to cope with the fact we
+		 * may try this several times before enough data is loaded) */
+		doc->hint_page = fz_resize_array(ctx, doc->hint_page, doc->page_count+1, sizeof(*doc->hint_page));
+		memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->page_count+1));
+		doc->hint_obj_offsets = fz_resize_array(ctx, doc->hint_obj_offsets, max_object_num, sizeof(*doc->hint_obj_offsets));
+		memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num);
+		doc->hint_obj_offsets_max = max_object_num;
+
+		/* Read the page object hints table: Header first */
+		least_num_page_objs = fz_read_bits(stream, 32);
+		/* The following is sometimes a lie, but we read this version,
+		 * as other table values are built from it. In
+		 * pdf_reference17.pdf, this points to 2 objects before the
+		 * first pages page object. */
+		doc->hint_page[0].offset = fz_read_bits(stream, 32);
+		if (doc->hint_page[0].offset > doc->hint_object_offset)
+			doc->hint_page[0].offset += doc->hint_object_length;
+		page_obj_num_bits = fz_read_bits(stream, 16);
+		least_page_len = fz_read_bits(stream, 32);
+		page_len_num_bits = fz_read_bits(stream, 16);
+		least_page_offset = fz_read_bits(stream, 32);
+		page_offset_num_bits = fz_read_bits(stream, 16);
+		least_content_stream_len = fz_read_bits(stream, 32);
+		content_stream_len_num_bits = fz_read_bits(stream, 16);
+		num_shared_obj_num_bits = fz_read_bits(stream, 16);
+		shared_obj_num_bits = fz_read_bits(stream, 16);
+		numerator_bits = fz_read_bits(stream, 16);
+		denominator_bits = fz_read_bits(stream, 16);
+
+		/* Item 1: Page object numbers */
+		doc->hint_page[0].number = doc->linear_page1_obj_num;
+		/* We don't care about the number of objects in the first page */
+		(void)fz_read_bits(stream, page_obj_num_bits);
+		j = 1;
+		for (i = 1; i < doc->page_count; i++)
+		{
+			int delta_page_objs = fz_read_bits(stream, page_obj_num_bits);
+
+			doc->hint_page[i].number = j;
+			j += least_num_page_objs + delta_page_objs;
+		}
+		doc->hint_page[i].number = j; /* Not a real page object */
+		fz_sync_bits(stream);
+		/* Item 2: Page lengths */
+		j = doc->hint_page[0].offset;
+		for (i = 0; i < doc->page_count; i++)
+		{
+			int delta_page_len = fz_read_bits(stream, page_len_num_bits);
+			int old = j;
+
+			doc->hint_page[i].offset = j;
+			j += least_page_len + delta_page_len;
+			if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
+				j += doc->hint_object_length;
+		}
+		doc->hint_page[i].offset = j;
+		fz_sync_bits(stream);
+		/* Item 3: Shared references */
+		shared = 0;
+		for (i = 0; i < doc->page_count; i++)
+		{
+			int num_shared_objs = fz_read_bits(stream, num_shared_obj_num_bits);
+			doc->hint_page[i].index = shared;
+			shared += num_shared_objs;
+		}
+		doc->hint_page[i].index = shared;
+		doc->hint_shared_ref = fz_resize_array(ctx, doc->hint_shared_ref, shared, sizeof(*doc->hint_shared_ref));
+		memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared);
+		fz_sync_bits(stream);
+		/* Item 4: Shared references */
+		for (i = 0; i < shared; i++)
+		{
+			int ref = fz_read_bits(stream, shared_obj_num_bits);
+			doc->hint_shared_ref[i] = ref;
+		}
+		/* Skip items 5,6,7 as we don't use them */
+
+		fz_seek(stream, shared_hint_offset, SEEK_SET);
+
+		/* Read the shared object hints table: Header first */
+		shared_obj_num = fz_read_bits(stream, 32);
+		shared_obj_offset = fz_read_bits(stream, 32);
+		if (shared_obj_offset > doc->hint_object_offset)
+			shared_obj_offset += doc->hint_object_length;
+		shared_obj_count_page1 = fz_read_bits(stream, 32);
+		shared_obj_count_total = fz_read_bits(stream, 32);
+		shared_obj_num_bits = fz_read_bits(stream, 16);
+		least_shared_group_len = fz_read_bits(stream, 32);
+		shared_group_len_num_bits = fz_read_bits(stream, 16);
+
+		/* Sanity check the references in Item 4 above to ensure we
+		 * don't access out of range with malicious files. */
+		for (i = 0; i < shared; i++)
+		{
+			if (doc->hint_shared_ref[i] >= shared_obj_count_total)
+			{
+				fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint stream (shared refs)");
+			}
+		}
+
+		doc->hint_shared = fz_resize_array(ctx, doc->hint_shared, shared_obj_count_total+1, sizeof(*doc->hint_shared));
+		memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1));
+
+		/* Item 1: Shared references */
+		j = doc->hint_page[0].offset;
+		for (i = 0; i < shared_obj_count_page1; i++)
+		{
+			int off = fz_read_bits(stream, shared_group_len_num_bits);
+			int old = j;
+			doc->hint_shared[i].offset = j;
+			j += off + least_shared_group_len;
+			if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
+				j += doc->hint_object_length;
+		}
+		/* FIXME: We would have problems recreating the length of the
+		 * last page 1 shared reference group. But we'll never need
+		 * to, so ignore it. */
+		j = shared_obj_offset;
+		for (; i < shared_obj_count_total; i++)
+		{
+			int off = fz_read_bits(stream, shared_group_len_num_bits);
+			int old = j;
+			doc->hint_shared[i].offset = j;
+			j += off + least_shared_group_len;
+			if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
+				j += doc->hint_object_length;
+		}
+		doc->hint_shared[i].offset = j;
+		fz_sync_bits(stream);
+		/* Item 2: Signature flags: read these just so we can skip */
+		for (i = 0; i < shared_obj_count_total; i++)
+		{
+			doc->hint_shared[i].number = fz_read_bits(stream, 1);
+		}
+		fz_sync_bits(stream);
+		/* Item 3: Signatures: just skip */
+		for (i = 0; i < shared_obj_count_total; i++)
+		{
+			if (doc->hint_shared[i].number)
+			{
+				int dummy = fz_read_bits(stream, 128);
+			}
+		}
+		fz_sync_bits(stream);
+		/* Item 4: Shared object object numbers */
+		j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */
+		for (i = 0; i < shared_obj_count_page1; i++)
+		{
+			doc->hint_shared[i].number = j;
+			j += fz_read_bits(stream, shared_obj_num_bits) + 1;
+		}
+		j = shared_obj_num;
+		for (; i < shared_obj_count_total; i++)
+		{
+			doc->hint_shared[i].number = j;
+			j += fz_read_bits(stream, shared_obj_num_bits) + 1;
+		}
+		doc->hint_shared[i].number = j;
+
+		/* Now, actually use the data we have gathered. */
+		for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++)
+		{
+			doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
+		}
+		for (i = 0; i < doc->page_count; i++)
+		{
+			doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
+		}
+	}
+	fz_always(ctx)
+	{
+		fz_close(stream);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+		/* Don't try to load hints again */
+		doc->hints_loaded = 1;
+		/* We won't use the linearized object any more. */
+		doc->file_reading_linearly = 0;
+		/* Any other error becomes a TRYLATER */
+		fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
+	}
+	doc->hints_loaded = 1;
+}
+
+static void
+pdf_load_hint_object(pdf_document *doc)
+{
+	fz_context *ctx = doc->ctx;
+	pdf_lexbuf *buf = &doc->lexbuf.base;
+	int curr_pos;
+
+	curr_pos = fz_tell(doc->file);
+	fz_seek(doc->file, doc->hint_object_offset, SEEK_SET);
+	fz_try(ctx)
+	{
+		while (1)
+		{
+			pdf_obj *page = NULL;
+			int tmpofs, num, gen, tok;
+
+			tok = pdf_lex(doc->file, buf);
+			if (tok != PDF_TOK_INT)
+				break;
+			num = buf->i;
+			tok = pdf_lex(doc->file, buf);
+			if (tok != PDF_TOK_INT)
+				break;
+			gen = buf->i;
+			tok = pdf_lex(doc->file, buf);
+			if (tok != PDF_TOK_OBJ)
+				break;
+			(void)pdf_repair_obj(doc, buf, &tmpofs, NULL, NULL, NULL, &page, &tmpofs);
+			pdf_load_hints(doc, num, gen);
+		}
+	}
+	fz_always(ctx)
+	{
+		fz_seek(doc->file, curr_pos, SEEK_SET);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow(ctx);
+	}
+}
+
+pdf_obj *pdf_progressive_advance(pdf_document *doc, int pagenum)
+{
+	fz_context *ctx = doc->ctx;
+	pdf_lexbuf *buf = &doc->lexbuf.base;
+	int curr_pos;
+	pdf_obj *page;
+
+	pdf_load_hinted_page(doc, pagenum);
+
+	if (pagenum < 0 || pagenum >= doc->page_count)
+		fz_throw(doc->ctx, FZ_ERROR_GENERIC, "page load out of range (%d of %d)", pagenum, doc->page_count);
+
+	if (doc->linear_pos == doc->file_length)
+		return doc->linear_page_refs[pagenum];
+
+	/* Only load hints once, and then only after we have got page 0 */
+	if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset)
+	{
+		/* Found hint object */
+		pdf_load_hint_object(doc);
+	}
+
+	DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
+	curr_pos = fz_tell(doc->file);
+
+	fz_var(page);
+
+	fz_try(ctx)
+	{
+		int eof;
+		do
+		{
+			int num;
+			page = NULL;
+			eof = pdf_obj_read(doc, &doc->linear_pos, &num, &page);
+			pdf_drop_obj(page);
+			page = NULL;
+		}
+		while (!eof);
+
+		{
+			pdf_obj *catalog;
+			pdf_obj *pages;
+			doc->linear_pos = doc->file_length;
+			pdf_load_xref(doc, buf);
+			catalog = pdf_dict_gets(pdf_trailer(doc), "Root");
+			pages = pdf_dict_gets(catalog, "Pages");
+
+			if (!pdf_is_dict(pages))
+				fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree");
+			break;
+		}
+	}
+	fz_always(ctx)
+	{
+		fz_seek(doc->file, curr_pos, SEEK_SET);
+	}
+	fz_catch(ctx)
+	{
+		pdf_drop_obj(page);
+		if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+		{
+			if (doc->linear_page_refs[pagenum] == NULL)
+			{
+				/* Still not got a page */
+				fz_rethrow(ctx);
+			}
+		}
+		else
+			fz_rethrow(ctx);
+	}
+
+	return doc->linear_page_refs[pagenum];
+}
+
 pdf_document *pdf_specifics(fz_document *doc)
 {
 	return (pdf_document *)((doc && doc->close == (void *)pdf_close_document) ? doc : NULL);
author	Robin Watts <robin.watts@artifex.com>	2013-07-17 18:42:33 +0100
committer	Robin Watts <robin.watts@artifex.com>	2013-07-19 19:54:27 +0100
commit	f5f7c0e4dd83257f526b158e3998970717852a0e (patch)
tree	c34ee93ab7773e4fbe48506c97fb515c03707e57 /source/pdf
parent	3c559928d88fccfe17da4953ea1c93ceb42a90cb (diff)
download	mupdf-f5f7c0e4dd83257f526b158e3998970717852a0e.tar.xz