diff options
authorRobin Watts <>2013-07-17 18:42:33 +0100
committerRobin Watts <>2013-07-19 19:54:27 +0100
commitf5f7c0e4dd83257f526b158e3998970717852a0e (patch)
parent3c559928d88fccfe17da4953ea1c93ceb42a90cb (diff)
Initial work on progressive loading
We are testing this using a new -p flag to mupdf that sets a bitrate at which data will appear to arrive progressively as time goes on. For example: mupdf -p 102400 pdf_reference17.pdf Details of the scheme used here are presented in docs/progressive.txt
33 files changed, 1600 insertions, 86 deletions
diff --git a/docs/progressive.txt b/docs/progressive.txt
new file mode 100644
index 00000000..e626c26f
--- /dev/null
+++ b/docs/progressive.txt
@@ -0,0 +1,289 @@
+How to do progressive loading with MuPDF.
+What is progressive loading?
+The idea of progressive loading is that as you download a PDF file
+into a browser, you can display the pages as they appear.
+There are 2 mechanisms by which this can be achieved. The first
+relies on the file being "linearized", the second relies on the
+caller of MuPDF having fine control over the http fetch and on
+the server supporting byte-range fetches.
+Progressive download using "linearized" files
+Adobe defines "linearized" PDFs as being ones that have both a
+specific layout of objects and a small amount of extra
+information to help avoid seeking within a file. The stated aim
+is to deliver the first page of a document in advance of the whole
+document downloading, whereupon subsequent pages will become
+available. Adobe also refers to these as "Optimized for fast web
+view" or "Web Optimized".
+MuPDF can actually slightly outperform this by displaying the first
+page quickly, and then providing 'incomplete' renderings of
+subsequent pages, as more and more resources are gradually delivered.
+Essentially the file starts with a slightly modified header, and the
+first object in the file is a special one (the linearization object)
+that a) indicates that the file is linearized, and b) gives some
+useful information (like the number of pages in the file etc).
+This object is then followed by all the objects required for the
+first page, then the "hint stream", then sets of object for each
+subsequent page in turn, then shared objects required for those
+pages, then various other random things.
+[Yes, really. While page 1 is sent with all the objects that it
+uses, shared or otherwise, subsequent pages do not get shared
+resources until after all the unshared page objects have been
+The Hint Stream, and why we don't use it.
+Adobe intended Hint Stream to be useful to facilitate the display
+of subsequent pages, but it has never used it. Consequently you
+can't trust people to write it properly. Consequently no one
+actually uses it. Consequently we should make do without it too.
+So how does MuPDF handle progressive loading?
+MuPDF has made various extensions to its mechanisms for handling
+progressive loading.
+ + Progressive streams
+ At its lowest level MuPDF reads file data from an fz_stream,
+ using the fz_open_document_with_stream call. (fz_open_document
+ is implemented by calling this). We have extended the fz_stream
+ slightly, giving the system a way to ask for meta information
+ (or perform meta operations) on a stream.
+ Using this mechanism MuPDF can query:
+ + whether a stream is progressive or not (i.e. whether the
+ entire stream is accessible immediately)
+ + what the length of a stream should ultimately be (which an
+ http fetcher should know from the Content-Length header),
+ With this information MuPDF can decide whether to use its normal
+ object reading code, or whether to make use of a linearized
+ object. Knowing the length enables us to check with the length
+ value given in the linearized object - if these differ, the
+ assumption is that an incremental save has taken place, thus the
+ file is no longer linearized.
+ When data is pulled from a progressive stream, if we attempt to
+ read data that is not currently available, the stream should
+ throw an FZ_ERROR_TRYLATER error. This particular error code
+ will be interpreted by the caller as an indication that it
+ should retry the parsing of the current objects at a later time.
+ When a MuPDF call is made on a progressive stream, such as
+ fz_open_document_with_stream, or fz_load_page, the caller should
+ be prepared to handle an FZ_ERROR_TRYLATER error as meaning that
+ more data is required before it can continue. No indication is
+ directly given as to exactly how much more data is required, but
+ as the caller will be implementing the progressive fz_stream
+ that it has passed into MuPDF to start with, it can reasonably
+ be expected to figure out an estimate for itself.
+ + Cookie
+ Once a page has been loaded, if its contents are to be 'run'
+ as normal (using e.g. fz_run_page) any error (such as failing
+ to read a font, or an image, or even a content stream belonging
+ to the page) will result in a rendering that aborts with an
+ FZ_ERROR_TRYLATER error. The caller can catch this and display
+ a placeholder instead.
+ If each pages data was entirely self-contained and sent in
+ sequence this would perhaps be acceptable, with each page
+ appearing one after the other. Unfortunately, the linearization
+ procedure as laid down by Adobe does NOT do this: objects shared
+ between multiple pages (other than the first) are not sent with
+ the pages themselves, but rather AFTER all the pages have been
+ sent.
+ This means that a document that has a title page, then contents
+ that share a font used on pages 2 onwards, will not be able to
+ correctly display page 2 until after the font has arrived in
+ the file, which will not be until all the page data has been
+ sent.
+ To mitigate against this, MuPDF provides a way whereby callers
+ can indicate that they are prepared to accept an 'incomplete'
+ rendering of the file (perhaps with missing images, or with
+ substitute fonts).
+ Callers prepared to tolerate such renderings should set the
+ 'incomplete_ok' flag in the cookie, then call fz_run_page etc
+ as normal. If an FZ_ERROR_TRYLATER error is thrown at any point
+ during the page rendering, the error will be swallowed, the
+ 'incomplete' field in the cookie will become non-zero and
+ rendering will continue. When control returns to the caller
+ the caller can check the value of the 'incomplete' field and
+ know that the rendering it received is not authoritative.
+Progressive loading using byte range requests
+If the caller has control over the http fetch, then it is possible
+to use byte range requests to fetch the document 'out of order'.
+This enables non-linearized files to be progressively displayed as
+they download, and fetches complete renderings of pages earlier than
+would otherwise be the case. This process requires no changes within
+MuPDF itself, but rather in the way the progressive stream learns
+from the attempts MuPDF makes to fetch data.
+Consider for example, an attempt to fetch a hypothetical file from
+a server.
+ + The initial http request for the document is sent with a "Range:"
+ header to pull down the first (say) 4k of the file.
+ + As soon as we get the header in from this initial request, we can
+ respond to meta stream operations to give the length, and whether
+ byte requests are accepted.
+ - If the header indicates that byte ranges are acceptable the
+ stream proceeds to go into a loop fetching chunks of the file
+ at a time (not necessarily in-order). Otherwise the server
+ will ignore the Range: header, and just serve the whole file.
+ - If the header indicates a content-length, the stream returns
+ that.
+ + MuPDF can then decide how to proceed based upon these flags and
+ whether the file is linearized or not. (If the file contains a
+ linearized object, and the content length matches, then the file
+ is considered to be linear, otherwise it is not).
+ If the file is linear:
+ - we proceed to read objects out of the file as it downloads.
+ This will provide us the first page and all its resources. It
+ will also enable us to read the hint streams (if present).
+ - Once we have read the hint streams, we unpack (and sanity
+ check) them to give us a map of where in the file each object
+ is predicted to live, and which objects are required for each
+ page. If any of these values are out of range, we treat the
+ file as if there were no hint streams.
+ - If we have hints, any attempt to load a subsequent page will
+ cause MuPDF to attempt to read exactly the objects required.
+ This will cause a sequence of seeks in the fz_stream followed
+ by reads. If the stream does not have the data to satisfy that
+ request yet, the stream code should remember the location that
+ was fetched (and fetch that block in the background so that
+ future retries will succeed) and should raise an
+ [Typically therefore when we jump to a page in a linear file
+ on a byte request capable link, we will quickly see a rough
+ rendering, which will improve fairly fast as images and fonts
+ arrive.]
+ - Regardless of whether we have hints or byte requests, on every
+ fz_load_page call MuPDF will attempt to process more of the
+ stream (that is assumed to be being downloaded in the
+ background). As linearized files are guaranteed to have pages
+ in order, pages will gradually become available. In the absence
+ of byte requests and hints however, we have no way of getting
+ resources early, so the renderings for these pages will remain
+ incomplete until much more of the file has arrived.
+ [Typically therefore when we jump to a page in a linear file
+ on a non byte request capable link, we will see a rough
+ rendering for that page as soon as data arrives for it (which
+ will typically take much longer than would be the case with
+ byte range capable downloads), and that will improve much more
+ slowly as images and fonts may not appear until almost the
+ whole file has arrived.]
+ - When the whole file has arrived, then we will attempt to read
+ the outlines for the file.
+ For a non-linearized PDF on a byte request capable stream:
+ - MuPDF will immediately seek to the end of the file to attempt
+ to read the trailer. This will fail with an FZ_ERROR_TRYLATER
+ due to the data not being here yet, but the stream code should
+ remember that this data is required and it should be prioritized
+ in the background fetch process.
+ - Repeated attempts to open the stream should eventually succeed
+ therefore. As MuPDF jumps through the file trying to read first
+ the xrefs, then the page tree objects, then the page contents
+ themselves etc, the background fetching process will be driven
+ by the attempts to read the file in the foreground.
+ [Typically therefore the opening of a non-linearized file will
+ be slower than a linearized one, as the xrefs/page trees for a
+ non-linear file can be 20%+ of the file data. Once past this
+ initial point however, pages and data can be pulled from the
+ file almost as fast as with a linearized file.]
+ For a non-linearized PDF on a non-byte request capable stream:
+ - MuPDF will immediately seek to the end of the file to attempt
+ to read the trailer. This will fail with an FZ_ERROR_TRYLATER
+ due to the data not being here yet. Subsequent retries will
+ continue to fail until the whole file has arrived, whereupon
+ the whole file will be instantly available.
+ [This is the worst case situation - nothing at all can be
+ displayed until the entire file has downloaded.]
+ A typical structure for a fetcher process (see curl-stream.c in
+ mupdf-curl as an example) might therefore look like this:
+ + We consider the file as an (initially empty) buffer which we are
+ filling by making requests. In order to ensure that we make
+ maximum use of our download link, we should ensure that whenever
+ one request finishes, we immediately launch another. Further, to
+ avoid the overheads for the request/response headers being too
+ large, we may want to divide the file into 'chunks', perhaps 4 or 32k
+ in size.
+ + We can then imagine a receiver process that sits there in a loop
+ requesting chunks to fill this buffer. In the absence of
+ any other impetus the receiver should request the next 'chunk'
+ of data from the file that it does not yet have, following the last
+ fill point. Initially we start the fill point at the beginning of
+ the file, but this will move around based on the requests made of
+ the progressive stream.
+ + We attempt to open the file, and MuPDF will read from the progressive
+ stream. It will first read the PDF file header (i.e. it will read from
+ within the area of the file we have already requested). It will then
+ attempt to seek to the end of the file to read the trailer. The
+ stream then has a choice; it can choose to block until such time as
+ data arrives (unlikely to be satisfactory as this blocks all other
+ MuPDF operations), or it can throw an FZ_ERROR_TRYLATER error.
+ + Whichever of these it chooses to do, it knows that the next 'block'
+ of the file that is required will be at the end of the file, so it
+ can set the desired fill pointer in the receiver process to arrange
+ that the next block requested will be that at the end of the file.
+ + When this data arrives, it can either unblock and continue, or
+ retry the MuPDF call that exited with the FZ_ERROR_TRYLATER error.
+ + When the file trailer has been read, the file will then attempt to
+ seek and read the xref information. Again this will cause the http
+ receiver to request that area of the file next.
+ + Accordingly, the file will be read using 'random access', where
+ the stream is in control of either blocking or asking operations
+ to retry later.
diff --git a/include/mupdf/fitz/context.h b/include/mupdf/fitz/context.h
index d7365227..737f163e 100644
--- a/include/mupdf/fitz/context.h
+++ b/include/mupdf/fitz/context.h
@@ -71,11 +71,13 @@ FZ_NORETURN void fz_rethrow_message(fz_context *, const char *, ...) __printfli
void fz_warn(fz_context *ctx, const char *fmt, ...) __printflike(2, 3);
const char *fz_caught_message(fz_context *ctx);
int fz_caught(fz_context *ctx);
+void fz_rethrow_if(fz_context *ctx, int errcode);
diff --git a/include/mupdf/fitz/device.h b/include/mupdf/fitz/device.h
index 7c64c8b8..b39ff4ee 100644
--- a/include/mupdf/fitz/device.h
+++ b/include/mupdf/fitz/device.h
@@ -218,6 +218,16 @@ typedef struct fz_cookie_s fz_cookie;
value of progress to that of progress_max.
errors: count of errors during current rendering.
+ incomplete_ok: If this is set to 1 by the caller, then TRYLATER
+ errors are swallowed as they occur, setting the 'incomplete' flag.
+ Rendering continues as much as possible ignoring errors. The caller
+ is expected to check the 'incomplete' flag at the end to see if the
+ rendering may be considered final or not.
+ incomplete: Initially should be set to 0. Will be set to non-zero
+ if a TRYLATER error is thrown during rendering and the incomplete_ok
+ flag is set.
struct fz_cookie_s
@@ -225,6 +235,8 @@ struct fz_cookie_s
int progress;
int progress_max; /* -1 for unknown */
int errors;
+ int incomplete_ok;
+ int incomplete;
diff --git a/include/mupdf/fitz/stream.h b/include/mupdf/fitz/stream.h
index 8fba3379..1f616eef 100644
--- a/include/mupdf/fitz/stream.h
+++ b/include/mupdf/fitz/stream.h
@@ -28,6 +28,9 @@ typedef struct fz_stream_s fz_stream;
fz_stream *fz_open_file(fz_context *ctx, const char *filename);
+fz_stream *fz_open_fd_progressive(fz_context *ctx, int fd, int bps);
+fz_stream *fz_open_file_progressive(fz_context *ctx, const char *filename, int bps);
fz_open_file_w: Open the named file and wrap it in a stream.
@@ -125,6 +128,14 @@ int fz_read(fz_stream *stm, unsigned char *data, int len);
fz_buffer *fz_read_all(fz_stream *stm, int initial);
+int fz_stream_meta(fz_stream *stm, int key, int size, void *ptr);
struct fz_stream_s
fz_context *ctx;
@@ -139,6 +150,7 @@ struct fz_stream_s
int (*read)(fz_stream *stm, unsigned char *buf, int len);
void (*close)(fz_context *ctx, void *state);
void (*seek)(fz_stream *stm, int offset, int whence);
+ int (*meta)(fz_stream *stm, int key, int size, void *ptr);
unsigned char buf[4096];
diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h
index 9a23fd7e..3afabfdb 100644
--- a/include/mupdf/pdf/document.h
+++ b/include/mupdf/pdf/document.h
@@ -168,6 +168,18 @@ void pdf_update_page(pdf_document *doc, pdf_page *page);
int pdf_has_unsaved_changes(pdf_document *doc);
+typedef struct pdf_obj_read_state_s pdf_obj_read_state;
+ int offset;
+ int num;
+ int numofs;
+ int gen;
+ int genofs;
struct pdf_document_s
fz_document super;
@@ -188,6 +200,52 @@ struct pdf_document_s
int freeze_updates;
int page_count;
+ /* State indicating which file parsing method we are using */
+ int file_reading_linearly;
+ int file_length;
+ pdf_obj *linear_obj; /* Linearized object (if used) */
+ pdf_obj **linear_page_refs; /* Page objects for linear loading */
+ int linear_page1_obj_num;
+ /* The state for the pdf_progressive_advance parser */
+ int linear_pos;
+ int linear_page_num;
+ int hint_object_offset;
+ int hint_object_length;
+ int hints_loaded; /* Set to 1 after the hints loading has completed,
+ * whether successful or not! */
+ /* Page n references shared object references:
+ * hint_shared_ref[i]
+ * where
+ * i = s to e-1
+ * s = hint_page[n]->index
+ * e = hint_page[n+1]->index
+ * Shared object reference r accesses objects:
+ * rs to re-1
+ * where
+ * rs = hint_shared[r]->number
+ * re = hint_shared[r]->count + rs
+ * These are guaranteed to lie within the region starting at
+ * hint_shared[r]->offset of length hint_shared[r]->length
+ */
+ struct
+ {
+ int number; /* Page object number */
+ int offset; /* Offset of page object */
+ int index; /* Index into shared hint_shared_ref */
+ } *hint_page;
+ int *hint_shared_ref;
+ struct
+ {
+ int number; /* Object number of first object */
+ int offset; /* Offset of first object */
+ } *hint_shared;
+ int hint_obj_offsets_max;
+ int *hint_obj_offsets;
int resources_localised;
pdf_lexbuf_large lexbuf;
diff --git a/include/mupdf/pdf/font.h b/include/mupdf/pdf/font.h
index 90b0fe01..ce140051 100644
--- a/include/mupdf/pdf/font.h
+++ b/include/mupdf/pdf/font.h
@@ -112,6 +112,7 @@ unsigned char *pdf_lookup_substitute_cjk_font(int ros, int serif, unsigned int *
pdf_font_desc *pdf_load_type3_font(pdf_document *doc, pdf_obj *rdb, pdf_obj *obj);
void pdf_load_type3_glyphs(pdf_document *doc, pdf_font_desc *fontdesc, int nestedDepth);
pdf_font_desc *pdf_load_font(pdf_document *doc, pdf_obj *rdb, pdf_obj *obj, int nestedDepth);
+pdf_font_desc *pdf_load_hail_mary_font(pdf_document *doc);
pdf_font_desc *pdf_new_font_desc(fz_context *ctx);
pdf_font_desc *pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc);
diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h
index c1fffa57..f0a8de0d 100644
--- a/include/mupdf/pdf/page.h
+++ b/include/mupdf/pdf/page.h
@@ -91,6 +91,13 @@ struct pdf_page_s
float duration;
int transition_present;
fz_transition transition;
+ int incomplete;
diff --git a/include/mupdf/pdf/xref.h b/include/mupdf/pdf/xref.h
index 5d4ecde9..de53da7c 100644
--- a/include/mupdf/pdf/xref.h
+++ b/include/mupdf/pdf/xref.h
@@ -82,6 +82,10 @@ void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf);
void pdf_repair_obj_stms(pdf_document *doc);
pdf_obj *pdf_new_ref(pdf_document *doc, pdf_obj *obj);
+int pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int *tmpofs);
+pdf_obj *pdf_progressive_advance(pdf_document *doc, int pagenum);
void pdf_print_xref(pdf_document *);
diff --git a/platform/win32/libmupdf.vcproj b/platform/win32/libmupdf.vcproj
index ac9e3707..0fdfd672 100644
--- a/platform/win32/libmupdf.vcproj
+++ b/platform/win32/libmupdf.vcproj
@@ -626,6 +626,10 @@
+ RelativePath="..\..\source\fitz\stream-prog.c"
+ >
+ </File>
+ <File
diff --git a/platform/x11/pdfapp.c b/platform/x11/pdfapp.c
index 71561f14..c891558f 100644
--- a/platform/x11/pdfapp.c
+++ b/platform/x11/pdfapp.c
@@ -157,6 +157,11 @@ static void event_cb(pdf_doc_event *event, void *data)
void pdfapp_open(pdfapp_t *app, char *filename, int reload)
+ pdfapp_open_progressive(app, filename, reload, 0);
+void pdfapp_open_progressive(pdfapp_t *app, char *filename, int reload, int bps)
fz_context *ctx = app->ctx;
char *password = "";
@@ -164,7 +169,32 @@ void pdfapp_open(pdfapp_t *app, char *filename, int reload)
pdf_document *idoc;
- app->doc = fz_open_document(ctx, filename);
+ if (bps == 0)
+ {
+ app->doc = fz_open_document(ctx, filename);
+ }
+ else
+ {
+ fz_stream *stream = fz_open_file_progressive(ctx, filename, bps);
+ while (1)
+ {
+ fz_try(ctx)
+ {
+ fz_seek(stream, 0, SEEK_SET);
+ app->doc = fz_open_document_with_stream(ctx, filename, stream);
+ }
+ fz_catch(ctx)
+ {
+ if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+ {
+ pdfapp_warn(app, "not enough data to open yet");
+ continue;
+ }
+ fz_rethrow(ctx);
+ }
+ break;
+ }
+ }
idoc = pdf_specifics(app->doc);
@@ -193,8 +223,41 @@ void pdfapp_open(pdfapp_t *app, char *filename, int reload)
app->doctitle = strrchr(app->doctitle, '/') + 1;
app->doctitle = fz_strdup(ctx, app->doctitle);
- app->pagecount = fz_count_pages(app->doc);
- app->outline = fz_load_outline(app->doc);
+ while (1)
+ {
+ fz_try(ctx)
+ {
+ app->pagecount = fz_count_pages(app->doc);
+ }
+ fz_catch(ctx)
+ {
+ if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+ {
+ pdfapp_warn(app, "not enough data to count pages yet");
+ continue;
+ }
+ fz_rethrow(ctx);
+ }
+ break;
+ }
+ while (1)
+ {
+ fz_try(ctx)
+ {
+ app->outline = fz_load_outline(app->doc);
+ }
+ fz_catch(ctx)
+ {
+ if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+ {
+ pdfapp_warn(app, "not enough data to load outline yet - ignoring");
+ /* FIXME: Set 'outline_deferred' and retry at end? */
+ }
+ else
+ fz_rethrow(ctx);
+ }
+ break;
+ }
@@ -441,6 +504,7 @@ static void pdfapp_loadpage(pdfapp_t *app)
/* Create display lists */
app->page_list = fz_new_display_list(app->ctx);
mdev = fz_new_list_device(app->ctx, app->page_list);
+ cookie.incomplete_ok = 1;
fz_run_page_contents(app->doc, app->page, mdev, &fz_identity, &cookie);
mdev = NULL;
@@ -453,6 +517,10 @@ static void pdfapp_loadpage(pdfapp_t *app)
pdfapp_warn(app, "Errors found on page");
errored = 1;
+ if (cookie.incomplete)
+ {
+ pdfapp_warn(app, "Incomplete page rendering");
+ }
diff --git a/platform/x11/pdfapp.h b/platform/x11/pdfapp.h
index 0b89923e..eabb6ca5 100644
--- a/platform/x11/pdfapp.h
+++ b/platform/x11/pdfapp.h
@@ -130,6 +130,7 @@ struct pdfapp_s
void pdfapp_init(fz_context *ctx, pdfapp_t *app);
void pdfapp_open(pdfapp_t *app, char *filename, int reload);
+void pdfapp_open_progressive(pdfapp_t *app, char *filename, int reload, int bps);
void pdfapp_close(pdfapp_t *app);
int pdfapp_preclose(pdfapp_t *app);
diff --git a/platform/x11/win_main.c b/platform/x11/win_main.c
index a22e7fc0..51dada02 100644
--- a/platform/x11/win_main.c
+++ b/platform/x11/win_main.c
@@ -1143,6 +1143,8 @@ WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShow
MSG msg;
int code;
fz_context *ctx;
+ int arg;
+ int bps = 0;
ctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT);
if (!ctx)
@@ -1157,9 +1159,24 @@ WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShow
- if (argc == 2)
+ arg = 1;
+ while (arg < argc)
- wcscpy(wbuf, argv[1]);
+ if (!wcscmp(argv[arg], L"-p"))
+ {
+ if (arg+1 < argc)
+ bps = _wtoi(argv[++arg]);
+ else
+ bps = 4096;
+ }
+ else
+ break;
+ arg++;
+ }
+ if (arg < argc)
+ {
+ wcscpy(wbuf, argv[arg]);
@@ -1171,7 +1188,10 @@ WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShow
if (code == 0)
winerror(&gapp, "cannot convert filename to utf-8");
- pdfapp_open(&gapp, filename, 0);
+ if (bps)
+ pdfapp_open_progressive(&gapp, filename, 0, bps);
+ else
+ pdfapp_open(&gapp, filename, 0);
while (GetMessage(&msg, NULL, 0, 0))
diff --git a/source/fitz/error.c b/source/fitz/error.c
index fb0dc77e..0da7d29e 100644
--- a/source/fitz/error.c
+++ b/source/fitz/error.c
@@ -180,3 +180,10 @@ void fz_rethrow_message(fz_context *ctx, const char *fmt, ...)
+void fz_rethrow_if(fz_context *ctx, int err)
+ assert(ctx && ctx->error && ctx->error->errcode >= FZ_ERROR_NONE);
+ if (ctx->error->errcode == err)
+ fz_rethrow(ctx);
diff --git a/source/fitz/filter-dct.c b/source/fitz/filter-dct.c
index 1a55e584..189d2aae 100644
--- a/source/fitz/filter-dct.c
+++ b/source/fitz/filter-dct.c
@@ -53,7 +53,7 @@ static boolean fill_input_buffer(j_decompress_ptr cinfo)
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
return 0;
src->next_input_byte = chain->rp;
diff --git a/source/fitz/image.c b/source/fitz/image.c
index 62561762..0aeb449d 100644
--- a/source/fitz/image.c
+++ b/source/fitz/image.c
@@ -157,7 +157,7 @@ fz_decomp_image_from_stream(fz_context *ctx, fz_stream *stm, fz_image *image, in
- /* FIXME: TryLater? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "ignoring error at end of image");
diff --git a/source/fitz/stream-prog.c b/source/fitz/stream-prog.c
new file mode 100644
index 00000000..e35a1ac5
--- /dev/null
+++ b/source/fitz/stream-prog.c
@@ -0,0 +1,181 @@
+#include "mupdf/fitz/stream.h"
+#include "mupdf/fitz/string.h"
+#if defined(_WIN32) && !defined(NDEBUG)
+#include "windows.h"
+static void
+show_progress(int av, int pos)
+ char text[80];
+ sprintf(text, "Have %d, Want %d\n", av, pos);
+ OutputDebugStringA(text);
+#define show_progress(A,B) do {} while (0)
+/* File stream - progressive reading to simulate http download */
+typedef struct prog_state
+ int fd;
+ int length;
+ int available;
+ int bps;
+ clock_t start_time;
+} prog_state;
+static int read_prog(fz_stream *stm, unsigned char *buf, int len)
+ prog_state *ps = (prog_state *)stm->state;
+ int n;
+ /* Simulate more data having arrived */
+ if (ps->available < ps->length)
+ {
+ int av = (int)((float)(clock() - ps->start_time) * ps->bps / (CLOCKS_PER_SEC*8));
+ if (av > ps->length)
+ av = ps->length;
+ ps->available = av;
+ /* Limit any fetches to be within the data we have */
+ if (av < ps->length && len + stm->pos > av)
+ {
+ len = av - stm->pos;
+ if (len <= 0)
+ {
+ show_progress(av, stm->pos);
+ fz_throw(stm->ctx, FZ_ERROR_TRYLATER, "Not enough data yet");
+ }
+ }
+ }
+ n = (len > 0 ? read(ps->fd, buf, len) : 0);
+ if (n < 0)
+ fz_throw(stm->ctx, FZ_ERROR_GENERIC, "read error: %s", strerror(errno));
+ return n;
+static void seek_prog(fz_stream *stm, int offset, int whence)
+ prog_state *ps = (prog_state *)stm->state;
+ int n;
+ /* Simulate more data having arrived */
+ if (ps->available < ps->length)
+ {
+ int av = (int)((float)(clock() - ps->start_time) * ps->bps / (CLOCKS_PER_SEC*8));
+ if (av > ps->length)
+ av = ps->length;
+ ps->available = av;
+ }
+ if (ps->available < ps->length)
+ {
+ if (whence == SEEK_END)
+ {
+ show_progress(ps->available, ps->length);
+ fz_throw(stm->ctx, FZ_ERROR_TRYLATER, "Not enough data to seek to end yet");
+ }
+ }
+ if (whence == SEEK_CUR)
+ {
+ whence = SEEK_SET;
+ offset += stm->pos;
+ if (offset > ps->available)
+ {
+ show_progress(ps->available, offset);
+ fz_throw(stm->ctx, FZ_ERROR_TRYLATER, "Not enough data to seek (relatively) to offset yet");
+ }
+ }
+ if (whence == SEEK_SET)
+ {
+ if (offset > ps->available)
+ {
+ show_progress(ps->available, offset);
+ fz_throw(stm->ctx, FZ_ERROR_TRYLATER, "Not enough data to seek to offset yet");
+ }
+ }
+ n = lseek(ps->fd, offset, whence);
+ if (n < 0)
+ fz_throw(stm->ctx, FZ_ERROR_GENERIC, "cannot lseek: %s", strerror(errno));
+ stm->pos = n;
+ stm->rp = stm->bp;
+ stm->wp = stm->bp;
+static void close_prog(fz_context *ctx, void *state)
+ prog_state *ps = (prog_state *)state;
+ int n = close(ps->fd);
+ if (n < 0)
+ fz_warn(ctx, "close error: %s", strerror(errno));
+ fz_free(ctx, state);
+static int meta_prog(fz_stream *stm, int key, int size, void *ptr)
+ prog_state *ps = (prog_state *)stm->state;
+ switch(key)
+ {
+ return 1;
+ break;
+ return ps->length;
+ }
+ return -1;
+fz_stream *
+fz_open_fd_progressive(fz_context *ctx, int fd, int bps)
+ fz_stream *stm;
+ prog_state *state;
+ state = fz_malloc_struct(ctx, prog_state);
+ state->fd = fd;
+ state->bps = bps;
+ state->start_time = clock();
+ state->available = 0;
+ state->length = lseek(state->fd, 0, SEEK_END);
+ lseek(state->fd, 0, SEEK_SET);
+ fz_try(ctx)
+ {
+ stm = fz_new_stream(ctx, state, read_prog, close_prog);
+ }
+ fz_catch(ctx)
+ {
+ fz_free(ctx, state);
+ fz_rethrow(ctx);
+ }
+ stm->seek = seek_prog;
+ stm->meta = meta_prog;
+ return stm;
+fz_stream *
+fz_open_file_progressive(fz_context *ctx, const char *name, int bps)
+#ifdef _WIN32
+ char *s = (char*)name;
+ wchar_t *wname, *d;
+ int c, fd;
+ d = wname = fz_malloc(ctx, (strlen(name)+1) * sizeof(wchar_t));
+ while (*s) {
+ s += fz_chartorune(&c, s);
+ *d++ = c;
+ }
+ *d = 0;
+ fd = _wopen(wname, O_BINARY | O_RDONLY, 0);
+ fz_free(ctx, wname);
+ int fd = open(name, O_BINARY | O_RDONLY, 0);
+ if (fd == -1)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open %s", name);
+ return fz_open_fd_progressive(ctx, fd, bps);
diff --git a/source/fitz/stream-read.c b/source/fitz/stream-read.c
index ee3d1cad..d433ce32 100644
--- a/source/fitz/stream-read.c
+++ b/source/fitz/stream-read.c
@@ -84,7 +84,7 @@ fz_fill_buffer(fz_stream *stm)
- /* FIXME: TryLater */
+ fz_rethrow_if(stm->ctx, FZ_ERROR_TRYLATER);
fz_warn(stm->ctx, "read error; treating as end of file");
stm->error = 1;
@@ -134,7 +134,11 @@ fz_read_best(fz_stream *stm, int initial, int *truncated)
- /* FIXME: TryLater */
+ if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+ {
+ fz_drop_buffer(ctx, buf);
+ fz_rethrow(ctx);
+ }
if (truncated)
*truncated = 1;
@@ -184,6 +188,7 @@ fz_tell(fz_stream *stm)
fz_seek(fz_stream *stm, int offset, int whence)
+ stm->avail = 0; /* Reset bit reading */
if (stm->seek)
if (whence == 1)
@@ -217,3 +222,10 @@ fz_seek(fz_stream *stm, int offset, int whence)
fz_warn(stm->ctx, "cannot seek");
+int fz_stream_meta(fz_stream *stm, int key, int size, void *ptr)
+ if (!stm || !stm->meta)
+ return -1;
+ return stm->meta(stm, key, size, ptr);
diff --git a/source/pdf/js/pdf-js.c b/source/pdf/js/pdf-js.c
index 26c6d6f8..d801716a 100644
--- a/source/pdf/js/pdf-js.c
+++ b/source/pdf/js/pdf-js.c
@@ -492,7 +492,7 @@ static pdf_jsimp_obj *doc_getField(void *jsctx, void *obj, int argc, pdf_jsimp_o
- /* FIXME: TryLater ? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "doc_getField failed: %s", fz_caught_message(ctx));
dict = NULL;
@@ -832,7 +832,7 @@ void pdf_js_load_document_level(pdf_js *js)
- /* FIXME: TryLater ? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "Warning: %s", fz_caught_message(ctx));
diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c
index c571a0a1..4c76fce9 100644
--- a/source/pdf/pdf-annot.c
+++ b/source/pdf/pdf-annot.c
@@ -331,6 +331,7 @@ pdf_load_link_annots(pdf_document *doc, pdf_obj *annots, const fz_matrix *page_c
n = pdf_array_len(annots);
for (i = 0; i < n; i++)
+ /* FIXME: Move the try/catch out of the loop for performance? */
obj = pdf_array_get(annots, i);
@@ -338,7 +339,7 @@ pdf_load_link_annots(pdf_document *doc, pdf_obj *annots, const fz_matrix *page_c
- /* FIXME: TryLater */
+ fz_rethrow_if(doc->ctx, FZ_ERROR_TRYLATER);
link = NULL;
@@ -594,9 +595,13 @@ pdf_load_annots(pdf_document *doc, pdf_obj *annots, pdf_page *page)
+ if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+ {
+ pdf_free_annot(ctx, head);
+ fz_rethrow(ctx);
+ }
keep_annot = 0;
fz_warn(ctx, "ignoring broken annotation");
- /* FIXME: TryLater */
if (!keep_annot)
@@ -657,8 +662,8 @@ pdf_update_annot(pdf_document *doc, pdf_annot *annot)
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "ignoring broken annotation");
- /* FIXME: TryLater */
diff --git a/source/pdf/pdf-font.c b/source/pdf/pdf-font.c
index 2c18d96a..a5174979 100644
--- a/source/pdf/pdf-font.c
+++ b/source/pdf/pdf-font.c
@@ -397,7 +397,7 @@ pdf_new_font_desc(fz_context *ctx)
static pdf_font_desc *
-pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
+pdf_load_simple_font_by_name(pdf_document *doc, pdf_obj *dict, char *basefont)
pdf_obj *descriptor;
pdf_obj *encoding;
@@ -410,7 +410,6 @@ pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
int symbolic;
int kind;
- char *basefont;
char *estrings[256];
char ebuffer[256][32];
int i, k, n;
@@ -422,8 +421,6 @@ pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
- basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont"));
/* Load font file */
@@ -688,7 +685,7 @@ pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "cannot load ToUnicode CMap");
@@ -744,6 +741,79 @@ pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
return fontdesc;
+static pdf_font_desc *
+pdf_load_simple_font(pdf_document *doc, pdf_obj *dict)
+ char *basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont"));
+ return pdf_load_simple_font_by_name(doc, dict, basefont);
+static int
+hail_mary_make_hash_key(fz_store_hash *hash, void *key_)
+ hash->u.i.i0 = 0;
+ hash->u.i.i1 = 0;
+ return 1;
+static void *
+hail_mary_keep_key(fz_context *ctx, void *key)
+ return key;
+static void
+hail_mary_drop_key(fz_context *ctx, void *key)
+static int
+hail_mary_cmp_key(void *k0, void *k1)
+ return k0 == k1;
+#ifndef NDEBUG
+static void
+hail_mary_debug_key(FILE *out, void *key_)
+ fprintf(out, "hail mary ");
+static fz_store_type hail_mary_store_type =
+ hail_mary_make_hash_key,
+ hail_mary_keep_key,
+ hail_mary_drop_key,
+ hail_mary_cmp_key,
+#ifndef NDEBUG
+ hail_mary_debug_key
+pdf_font_desc *
+pdf_load_hail_mary_font(pdf_document *doc)
+ fz_context *ctx = doc->ctx;
+ pdf_font_desc *fontdesc;
+ pdf_font_desc *existing;
+ if ((fontdesc = fz_find_item(ctx, pdf_free_font_imp, &hail_mary_store_type, &hail_mary_store_type)))
+ {
+ return fontdesc;
+ }
+ /* FIXME: Get someone with a clue about fonts to fix this */
+ fontdesc = pdf_load_simple_font_by_name(doc, NULL, "Helvetica");
+ existing = fz_store_item(ctx, &hail_mary_store_type, fontdesc, fontdesc->size, &hail_mary_store_type);
+ assert(existing == NULL);
+ return fontdesc;
* CID Fonts
@@ -1057,7 +1127,7 @@ pdf_load_font_descriptor(pdf_font_desc *fontdesc, pdf_document *doc, pdf_obj *di
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font");
if (origname != fontname && !iscidfont)
pdf_load_builtin_font(ctx, fontdesc, fontname);
diff --git a/source/pdf/pdf-interpret.c b/source/pdf/pdf-interpret.c
index cec0d1e5..515a87fa 100644
--- a/source/pdf/pdf-interpret.c
+++ b/source/pdf/pdf-interpret.c
@@ -372,7 +372,7 @@ begin_softmask(pdf_csi * csi, softmask_save *save)
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
/* FIXME: Ignore error - nasty, but if we throw from
* here the clip stack would be messed up. */
if (csi->cookie)
@@ -1188,7 +1188,7 @@ pdf_grestore(pdf_csi *csi)
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
/* Silently swallow the problem */
@@ -1590,6 +1590,31 @@ pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, const fz_ma
+static pdf_font_desc *
+load_font_or_hail_mary(pdf_csi *csi, pdf_obj *rdb, pdf_obj *font, int depth)
+ pdf_document *doc = csi->doc;
+ fz_context *ctx = doc->ctx;
+ pdf_font_desc *desc;
+ fz_try(ctx)
+ {
+ desc = pdf_load_font(doc, rdb, font, depth);
+ }
+ fz_catch(ctx)
+ {
+ if (fz_caught(ctx) != FZ_ERROR_TRYLATER)
+ fz_rethrow(ctx);
+ if (!csi->cookie || !csi->cookie->incomplete_ok)
+ fz_rethrow(ctx);
+ desc = NULL;
+ csi->cookie->incomplete++;
+ }
+ if (desc == NULL)
+ desc = pdf_load_hail_mary_font(doc);
+ return desc;
static void
pdf_run_extgstate(pdf_csi *csi, pdf_obj *rdb, pdf_obj *extgstate)
@@ -1619,7 +1644,7 @@ pdf_run_extgstate(pdf_csi *csi, pdf_obj *rdb, pdf_obj *extgstate)
gstate->font = NULL;
- gstate->font = pdf_load_font(csi->doc, rdb, font, csi->nested_depth);
+ gstate->font = load_font_or_hail_mary(csi, rdb, font, csi->nested_depth);
if (!gstate->font)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find font in store");
gstate->size = pdf_to_real(pdf_array_get(val, 1));
@@ -2191,7 +2216,7 @@ static void pdf_run_Tf(pdf_csi *csi, pdf_obj *rdb)
if (!obj)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find font resource: '%s'", csi->name);
- gstate->font = pdf_load_font(csi->doc, rdb, obj, csi->nested_depth);
+ gstate->font = load_font_or_hail_mary(csi, rdb, obj, csi->nested_depth);
static void pdf_run_Tr(pdf_csi *csi)
@@ -2855,10 +2880,21 @@ pdf_run_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, pdf_lexbuf *buf)
- /* FIXME: TryLater */
- /* Swallow the error */
- if (csi->cookie)
- csi->cookie->errors++;
+ if (!csi->cookie)
+ {
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+ }
+ else if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+ {
+ if (csi->cookie->incomplete_ok)
+ csi->cookie->incomplete++;
+ else
+ fz_rethrow(ctx);
+ }
+ else
+ {
+ csi->cookie->errors++;
+ }
if (!ignoring_errors)
fz_warn(ctx, "Ignoring errors during rendering");
@@ -2901,7 +2937,7 @@ pdf_run_contents_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file)
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "Content stream parsing error - rendering truncated");
while (csi->gtop > csi->gbot)
@@ -3000,6 +3036,8 @@ static void pdf_run_page_contents_with_usage(pdf_document *doc, pdf_page *page,
void pdf_run_page_contents(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie)
pdf_run_page_contents_with_usage(doc, page, dev, ctm, "View", cookie);
+ if (page->incomplete & PDF_PAGE_INCOMPLETE_CONTENTS)
+ fz_throw(doc->ctx, FZ_ERROR_TRYLATER, "incomplete rendering");
static void pdf_run_annot_with_usage(pdf_document *doc, pdf_page *page, pdf_annot *annot, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie)
@@ -3047,6 +3085,8 @@ static void pdf_run_annot_with_usage(pdf_document *doc, pdf_page *page, pdf_anno
void pdf_run_annot(pdf_document *doc, pdf_page *page, pdf_annot *annot, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie)
pdf_run_annot_with_usage(doc, page, annot, dev, ctm, "View", cookie);
+ if (page->incomplete & PDF_PAGE_INCOMPLETE_ANNOTS)
+ fz_throw(doc->ctx, FZ_ERROR_TRYLATER, "incomplete rendering");
static void pdf_run_page_annots_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie)
@@ -3080,6 +3120,8 @@ pdf_run_page_with_usage(pdf_document *doc, pdf_page *page, fz_device *dev, const
pdf_run_page_contents_with_usage(doc, page, dev, ctm, event, cookie);
pdf_run_page_annots_with_usage(doc, page, dev, ctm, event, cookie);
+ if (page->incomplete)
+ fz_throw(doc->ctx, FZ_ERROR_TRYLATER, "incomplete rendering");
diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c
index 6a77e7b4..708794ee 100644
--- a/source/pdf/pdf-object.c
+++ b/source/pdf/pdf-object.c
@@ -1392,7 +1392,7 @@ pdf_obj *pdf_new_obj_from_str(pdf_document *doc, const char *src)
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
return NULL;
diff --git a/source/pdf/pdf-page.c b/source/pdf/pdf-page.c
index 7de81f00..d4d8015d 100644
--- a/source/pdf/pdf-page.c
+++ b/source/pdf/pdf-page.c
@@ -349,7 +349,14 @@ pdf_load_page(pdf_document *doc, int number)
float userunit;
fz_matrix mat;
- pageref = pdf_lookup_page_obj(doc, number);
+ if (doc->file_reading_linearly)
+ {
+ pageref = pdf_progressive_advance(doc, number);
+ if (pageref == NULL)
+ fz_throw(doc->ctx, FZ_ERROR_TRYLATER, "page %d not available yet", number);
+ }
+ else
+ pageref = pdf_lookup_page_obj(doc, number);
pageobj = pdf_resolve_indirect(pageref);
page = fz_malloc_struct(ctx, pdf_page);
@@ -361,6 +368,7 @@ pdf_load_page(pdf_document *doc, int number)
page->deleted_annots = NULL;
page->tmp_annots = NULL;
page->me = pdf_keep_obj(pageobj);
+ page->incomplete = 0;
obj = pdf_dict_gets(pageobj, "UserUnit");
if (pdf_is_real(obj))
@@ -409,11 +417,22 @@ pdf_load_page(pdf_document *doc, int number)
fz_pre_scale(fz_translate(&mat, -realbox.x0, -realbox.y0), userunit, userunit);
fz_concat(&page->ctm, &page->ctm, &mat);
- obj = pdf_dict_gets(pageobj, "Annots");
- if (obj)
+ fz_try(ctx)
- page->links = pdf_load_link_annots(doc, obj, &page->ctm);
- page->annots = pdf_load_annots(doc, obj, page);
+ obj = pdf_dict_gets(pageobj, "Annots");
+ if (obj)
+ {
+ page->links = pdf_load_link_annots(doc, obj, &page->ctm);
+ page->annots = pdf_load_annots(doc, obj, page);
+ }
+ }
+ fz_catch(ctx)
+ {
+ if (fz_caught(ctx) != FZ_ERROR_TRYLATER)
+ fz_rethrow(ctx);
+ page->incomplete |= PDF_PAGE_INCOMPLETE_ANNOTS;
+ fz_drop_link(ctx, page->links);
+ page->links = NULL;
page->duration = pdf_to_real(pdf_dict_gets(pageobj, "Dur"));
@@ -444,8 +463,12 @@ pdf_load_page(pdf_document *doc, int number)
- pdf_free_page(doc, page);
- fz_rethrow_message(ctx, "cannot load page %d contents (%d 0 R)", number + 1, pdf_to_num(pageref));
+ if (fz_caught(ctx) != FZ_ERROR_TRYLATER)
+ {
+ pdf_free_page(doc, page);
+ fz_rethrow_message(ctx, "cannot load page %d contents (%d 0 R)", number + 1, pdf_to_num(pageref));
+ }
+ page->incomplete |= PDF_PAGE_INCOMPLETE_CONTENTS;
return page;
diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c
index 3db32de0..fc603d1d 100644
--- a/source/pdf/pdf-repair.c
+++ b/source/pdf/pdf-repair.c
@@ -14,8 +14,8 @@ struct entry
int stm_len;
-static int
-pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, int *tmpofs)
+pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int *tmpofs)
pdf_token tok;
int stm_len;
@@ -24,7 +24,8 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
fz_context *ctx = file->ctx;
*stmofsp = 0;
- *stmlenp = -1;
+ if (stmlenp)
+ *stmlenp = -1;
stm_len = 0;
@@ -45,7 +46,7 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
/* Don't let a broken object at EOF overwrite a good one */
if (file->eof)
fz_rethrow_message(ctx, "broken object at EOF ignored");
@@ -53,21 +54,24 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
dict = pdf_new_dict(doc, 2);
- obj = pdf_dict_gets(dict, "Type");
- if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "XRef"))
+ if (encrypt && id)
- obj = pdf_dict_gets(dict, "Encrypt");
- if (obj)
+ obj = pdf_dict_gets(dict, "Type");
+ if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "XRef"))
- pdf_drop_obj(*encrypt);
- *encrypt = pdf_keep_obj(obj);
- }
+ obj = pdf_dict_gets(dict, "Encrypt");
+ if (obj)
+ {
+ pdf_drop_obj(*encrypt);
+ *encrypt = pdf_keep_obj(obj);
+ }
- obj = pdf_dict_gets(dict, "ID");
- if (obj)
- {
- pdf_drop_obj(*id);
- *id = pdf_keep_obj(obj);
+ obj = pdf_dict_gets(dict, "ID");
+ if (obj)
+ {
+ pdf_drop_obj(*id);
+ *id = pdf_keep_obj(obj);
+ }
@@ -75,6 +79,16 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
if (!pdf_is_indirect(obj) && pdf_is_int(obj))
stm_len = pdf_to_int(obj);
+ if (doc->file_reading_linearly && page)
+ {
+ obj = pdf_dict_gets(dict, "Type");
+ if (!strcmp(pdf_to_name(obj), "Page"))
+ {
+ pdf_drop_obj(*page);
+ *page = pdf_keep_obj(dict);
+ }
+ }
@@ -112,7 +126,7 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "cannot find endstream token, falling back to scanning");
@@ -133,7 +147,8 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p
buf->scratch[8] = c;
- *stmlenp = fz_tell(file) - *stmofsp - 9;
+ if (stmlenp)
+ *stmlenp = fz_tell(file) - *stmofsp - 9;
*tmpofs = fz_tell(file);
@@ -302,7 +317,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "ignoring the rest of the file");
@@ -324,11 +339,11 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
- tok = pdf_repair_obj(doc, buf, &stm_ofs, &stm_len, &encrypt, &id, &tmpofs);
+ tok = pdf_repair_obj(doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs);
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
/* If we haven't seen a root yet, there is nothing
* we can do, but give up. Otherwise, we'll make
* do. */
@@ -374,7 +389,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf)
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
/* If we haven't seen a root yet, there is nothing
* we can do, but give up. Otherwise, we'll make
* do. */
diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c
index 9d25915b..a46cdcc7 100644
--- a/source/pdf/pdf-stream.c
+++ b/source/pdf/pdf-stream.c
@@ -536,7 +536,7 @@ pdf_open_object_array(pdf_document *doc, pdf_obj *list)
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
diff --git a/source/pdf/pdf-type3.c b/source/pdf/pdf-type3.c
index d4c9b31a..b216b950 100644
--- a/source/pdf/pdf-type3.c
+++ b/source/pdf/pdf-type3.c
@@ -198,7 +198,7 @@ void pdf_load_type3_glyphs(pdf_document *doc, pdf_font_desc *fontdesc, int neste
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "Type3 glyph load failed: %s", fz_caught_message(ctx));
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index a1b0caa2..a161c763 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -532,7 +532,7 @@ static pdf_obj *sweepref(pdf_document *doc, pdf_write_options *opts, pdf_obj *ob
- /* FIXME: TryLater */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
/* Leave broken */
@@ -1650,7 +1650,7 @@ static void writeobject(pdf_document *doc, pdf_write_options *opts, int num, int
- /* FIXME: TryLater ? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
if (opts->continue_on_error)
fprintf(opts->out, "%d %d obj\nnull\nendobj\n", num, gen);
@@ -1732,7 +1732,7 @@ static void writeobject(pdf_document *doc, pdf_write_options *opts, int num, int
- /* FIXME: TryLater ? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
if (opts->continue_on_error)
fprintf(opts->out, "%d %d obj\nnull\nendobj\n", num, gen);
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index a50cafff..33bd6c22 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -1,5 +1,13 @@
#include "mupdf/pdf.h"
+#define DEBUGMESS(A) do { fz_warn A; } while (0)
+#define DEBUGMESS(A) do { } while (0)
static inline int iswhite(int ch)
@@ -699,6 +707,7 @@ read_xref_section(pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets
pdf_set_populating_xref_trailer(doc, trailer);
/* FIXME: do we overwrite free entries properly? */
+ /* FIXME: Does this work properly with progression? */
xrefstmofs = pdf_to_int(pdf_dict_gets(trailer, "XRefStm"));
if (xrefstmofs)
@@ -720,7 +729,6 @@ read_xref_section(pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets
- trailer = NULL;
@@ -731,7 +739,7 @@ read_xref_section(pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets
static void
-pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf)
+pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf, int read_previous)
fz_context *ctx = doc->ctx;
ofs_list list;
@@ -745,6 +753,8 @@ pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf)
ofs = read_xref_section(doc, ofs, buf, &list);
+ if (!read_previous)
+ break;
@@ -770,11 +780,9 @@ pdf_load_xref(pdf_document *doc, pdf_lexbuf *buf)
int xref_len;
fz_context *ctx = doc->ctx;
- pdf_load_version(doc);
- pdf_read_xref_sections(doc, doc->startxref, buf);
+ pdf_read_xref_sections(doc, doc->startxref, buf, 1);
if (pdf_xref_len(doc) == 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "found xref was empty");
@@ -803,6 +811,61 @@ pdf_load_xref(pdf_document *doc, pdf_lexbuf *buf)
+static void
+pdf_load_linear(pdf_document *doc)
+ pdf_obj *dict = NULL;
+ pdf_obj *hint = NULL;
+ pdf_obj *o;
+ int num, gen, stmofs, lin, len;
+ fz_context *ctx = doc->ctx;
+ fz_var(dict);
+ fz_var(hint);
+ fz_try(ctx)
+ {
+ pdf_xref_entry *entry;
+ dict = pdf_parse_ind_obj(doc, doc->file, &doc->lexbuf.base, &num, &gen, &stmofs);
+ if (!pdf_is_dict(dict))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
+ o = pdf_dict_gets(dict, "Linearized");
+ if (o == NULL)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
+ lin = pdf_to_int(o);
+ if (lin != 1)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "Unexpected version of Linearized tag (%d)", lin);
+ len = pdf_to_int(pdf_dict_gets(dict, "L"));
+ if (len != doc->file_length)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "File has been updated since linearization");
+ pdf_read_xref_sections(doc, fz_tell(doc->file), &doc->lexbuf.base, 0);
+ doc->page_count = pdf_to_int(pdf_dict_gets(dict, "N"));
+ doc->linear_page_refs = fz_resize_array(ctx, doc->linear_page_refs, doc->page_count, sizeof(pdf_obj *));
+ memset(doc->linear_page_refs, 0, doc->page_count * sizeof(pdf_obj*));
+ doc->linear_obj = dict;
+ doc->linear_pos = fz_tell(doc->file);
+ doc->linear_page1_obj_num = pdf_to_int(pdf_dict_gets(dict, "O"));
+ doc->linear_page_refs[0] = pdf_new_indirect(doc, doc->linear_page1_obj_num, 0);
+ doc->linear_page_num = 0;
+ hint = pdf_dict_gets(dict, "H");
+ doc->hint_object_offset = pdf_to_int(pdf_array_get(hint, 0));
+ doc->hint_object_length = pdf_to_int(pdf_array_get(hint, 1));
+ entry = pdf_get_populating_xref_entry(doc, 0);
+ entry->type = 'f';
+ }
+ fz_catch(ctx)
+ {
+ pdf_drop_obj(dict);
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+ /* Drop back to non linearized reading mode */
+ doc->file_reading_linearly = 0;
+ }
pdf_ocg_set_config(pdf_document *doc, int config)
@@ -982,12 +1045,31 @@ pdf_init_document(pdf_document *doc)
- pdf_load_xref(doc, &doc->lexbuf.base);
+ pdf_load_version(doc);
+ doc->file_length = fz_stream_meta(doc->file, FZ_STREAM_META_LENGTH, 0, NULL);
+ if (doc->file_length < 0)
+ doc->file_length = 0;
+ /* Check to see if we should work in progressive mode */
+ if (fz_stream_meta(doc->file, FZ_STREAM_META_PROGRESSIVE, 0, NULL) > 0)
+ doc->file_reading_linearly = 1;
+ /* Try to load the linearized file if we are in progressive
+ * mode. */
+ if (doc->file_reading_linearly)
+ pdf_load_linear(doc);
+ /* If we aren't in progressive mode (or the linear load failed
+ * and has set us back to non-progressive mode), load normally.
+ */
+ if (!doc->file_reading_linearly)
+ pdf_load_xref(doc, &doc->lexbuf.base);
- /* FIXME: TryLater ? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "trying to repair broken xref");
repaired = 1;
@@ -1027,7 +1109,7 @@ pdf_init_document(pdf_document *doc)
- /* FIXME: TryLater ? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "ignoring broken object (%d 0 R)", i);
@@ -1066,7 +1148,6 @@ pdf_init_document(pdf_document *doc)
- pdf_close_document(doc);
fz_rethrow_message(ctx, "cannot open document");
@@ -1076,7 +1157,6 @@ pdf_init_document(pdf_document *doc)
- /* FIXME: TryLater ? */
fz_warn(ctx, "Ignoring Broken Optional Content");
@@ -1107,6 +1187,20 @@ pdf_close_document(pdf_document *doc)
if (doc->crypt)
pdf_free_crypt(ctx, doc->crypt);
+ pdf_drop_obj(doc->linear_obj);
+ if (doc->linear_page_refs)
+ {
+ for (i=0; i < doc->page_count; i++)
+ {
+ pdf_drop_obj(doc->linear_page_refs[i]);
+ }
+ fz_free(ctx, doc->linear_page_refs);
+ }
+ fz_free(ctx, doc->hint_page);
+ fz_free(ctx, doc->hint_shared_ref);
+ fz_free(ctx, doc->hint_shared);
+ fz_free(ctx, doc->hint_obj_offsets);
for (i=0; i < doc->num_type3_fonts; i++)
fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
@@ -1249,6 +1343,239 @@ pdf_load_obj_stm(pdf_document *doc, int num, int gen, pdf_lexbuf *buf)
* object loading
+static int
+pdf_obj_read(pdf_document *doc, int *offset, int *nump, pdf_obj **page)
+ int num, numofs, gen, genofs, stmofs, tmpofs, tok;
+ pdf_lexbuf *buf = &doc->lexbuf.base;
+ fz_context *ctx = doc->ctx;
+ int xref_len;
+ pdf_xref_entry *entry;
+ int newtmpofs;
+ numofs = *offset;
+ fz_seek(doc->file, numofs, SEEK_SET);
+ /* We expect to read 'num' here */
+ tok = pdf_lex(doc->file, buf);
+ genofs = fz_tell(doc->file);
+ if (tok != PDF_TOK_INT)
+ {
+ /* Failed! */
+ DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
+ *offset = genofs;
+ return tok == PDF_TOK_EOF;
+ }
+ *nump = num = buf->i;
+ /* We expect to read 'gen' here */
+ tok = pdf_lex(doc->file, buf);
+ tmpofs = fz_tell(doc->file);
+ if (tok != PDF_TOK_INT)
+ {
+ /* Failed! */
+ DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
+ *offset = tmpofs;
+ return tok == PDF_TOK_EOF;
+ }
+ gen = buf->i;
+ /* We expect to read 'obj' here */
+ do
+ {
+ tmpofs = fz_tell(doc->file);
+ tok = pdf_lex(doc->file, buf);
+ if (tok == PDF_TOK_OBJ)
+ break;
+ if (tok != PDF_TOK_INT)
+ {
+ DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
+ *offset = fz_tell(doc->file);
+ return tok == PDF_TOK_EOF;
+ }
+ DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
+ *nump = num = gen;
+ numofs = genofs;
+ gen = buf->i;
+ genofs = tmpofs;
+ }
+ while (1);
+ /* Now we read the actual object */
+ xref_len = pdf_xref_len(doc);
+ /* When we are reading a progressive file, we typically see:
+ * File Header
+ * obj m (Linearization params)
+ * xref #1 (refers to objects m-n)
+ * obj m+1
+ * ...
+ * obj n
+ * obj 1
+ * ...
+ * obj n-1
+ * xref #2
+ *
+ * The linearisation params are read elsewhere, hence
+ * whenever we read an object it should just go into the
+ * previous xref.
+ */
+ tok = pdf_repair_obj(doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs);
+ do /* So we can break out of it */
+ {
+ if (num <= 0 || num >= xref_len)
+ {
+ fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
+ break;
+ }
+ if (gen != 0)
+ {
+ fz_warn(ctx, "Unexpected non zero generation number in linearized file");
+ }
+ entry = pdf_get_populating_xref_entry(doc, num);
+ if (entry->type != 0)
+ {
+ DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
+ break;
+ }
+ if (page && *page)
+ {
+ DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
+ if (!entry->obj)
+ entry->obj = pdf_keep_obj(*page);
+ if (doc->linear_page_refs[doc->linear_page_num] == NULL)
+ doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(doc, num, gen);
+ }
+ else
+ {
+ DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
+ }
+ entry->type = 'n';
+ entry->gen = 0;
+ entry->ofs = numofs;
+ entry->stm_ofs = stmofs;
+ }
+ while (0);
+ if (page && *page)
+ doc->linear_page_num++;
+ if (tok == PDF_TOK_ENDOBJ)
+ {
+ *offset = fz_tell(doc->file);
+ }
+ else
+ {
+ *offset = newtmpofs;
+ }
+ return 0;
+static void
+pdf_load_hinted_page(pdf_document *doc, int pagenum)
+ fz_context *ctx = doc->ctx;
+ if (!doc->hints_loaded || !doc->linear_page_refs)
+ return;
+ if (doc->linear_page_refs[pagenum])
+ return;
+ fz_try(ctx)
+ {
+ int num = doc->hint_page[pagenum].number;
+ pdf_obj *page = pdf_load_object(doc, num, 0);
+ if (!strcmp("Page", pdf_to_name(pdf_dict_gets(page, "Type"))))
+ {
+ /* We have found the page object! */
+ DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
+ doc->linear_page_refs[pagenum] = pdf_new_indirect(doc, num, 0);
+ }
+ pdf_drop_obj(page);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+ /* Silently swallow the error and proceed as normal */
+ }
+static int
+read_hinted_object(pdf_document *doc, int num)
+ /* Try to find the object using our hint table. Find the closest
+ * object <= the one we want that has a hint and read forward from
+ * there. */
+ fz_context *ctx = doc->ctx;
+ int expected = num;
+ int curr_pos;
+ int start, offset;
+ while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
+ expected--;
+ if (expected != num)
+ DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
+ if (expected == 0) /* No hints found, just bale */
+ return 0;
+ curr_pos = fz_tell(doc->file);
+ offset = doc->hint_obj_offsets[expected];
+ fz_var(expected);
+ fz_try(ctx)
+ {
+ int found;
+ /* Try to read forward from there */
+ do
+ {
+ start = offset;
+ DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
+ pdf_obj_read(doc, &offset, &found, 0);
+ DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
+ if (found <= expected)
+ {
+ /* We found the right one (or one earlier than
+ * we expected). Update the hints. */
+ doc->hint_obj_offsets[expected] = offset;
+ doc->hint_obj_offsets[found] = start;
+ doc->hint_obj_offsets[found+1] = offset;
+ /* Retry with the next one */
+ expected = found+1;
+ }
+ else
+ {
+ /* We found one later than we expected. */
+ doc->hint_obj_offsets[expected] = 0;
+ doc->hint_obj_offsets[found] = start;
+ doc->hint_obj_offsets[found+1] = offset;
+ while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
+ expected--;
+ if (expected == 0) /* No hints found, just bale */
+ return 0;
+ }
+ }
+ while (found != num);
+ }
+ fz_always(ctx)
+ {
+ fz_seek(doc->file, curr_pos, SEEK_SET);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+ /* FIXME: Currently we ignore the hint. Perhaps we should
+ * drop back to non-hinted operation here. */
+ doc->hint_obj_offsets[expected] = 0;
+ fz_rethrow(ctx);
+ }
+ return 1;
pdf_cache_object(pdf_document *doc, int num, int gen)
@@ -1260,6 +1587,7 @@ pdf_cache_object(pdf_document *doc, int num, int gen)
if (num < 0 || num >= pdf_xref_len(doc))
fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d %d R); xref size %d", num, gen, pdf_xref_len(doc));
x = pdf_get_xref_entry(doc, num);
if (x->obj)
@@ -1309,6 +1637,14 @@ pdf_cache_object(pdf_document *doc, int num, int gen)
fz_throw(ctx, FZ_ERROR_GENERIC, "object (%d %d R) was not found in its object stream", num, gen);
+ else if (doc->hint_obj_offsets && read_hinted_object(doc, num))
+ {
+ goto object_updated;
+ }
+ else if (doc->file_length && doc->linear_pos < doc->file_length)
+ {
+ fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d %d R) - not loaded yet?", num, gen);
+ }
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d %d R)", num, gen);
@@ -1368,7 +1704,7 @@ pdf_resolve_indirect(pdf_obj *ref)
- /* FIXME: TryLater ? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen);
return NULL;
@@ -1591,7 +1927,18 @@ pdf_document *
pdf_open_document_no_run_with_stream(fz_context *ctx, fz_stream *file)
pdf_document *doc = pdf_new_document(ctx, file);
- pdf_init_document(doc);
+ fz_var(doc);
+ fz_try(ctx)
+ {
+ pdf_init_document(doc);
+ }
+ fz_catch(ctx)
+ {
+ pdf_close_document(doc);
+ fz_rethrow_message(ctx, "cannot load document from stream");
+ }
return doc;
@@ -1599,9 +1946,10 @@ pdf_document *
pdf_open_document_no_run(fz_context *ctx, const char *filename)
fz_stream *file = NULL;
- pdf_document *doc;
+ pdf_document *doc = NULL;
+ fz_var(doc);
@@ -1615,11 +1963,344 @@ pdf_open_document_no_run(fz_context *ctx, const char *filename)
+ pdf_close_document(doc);
fz_rethrow_message(ctx, "cannot load document '%s'", filename);
return doc;
+static void
+pdf_load_hints(pdf_document *doc, int objnum, int gennum)
+ fz_stream *stream = NULL;
+ pdf_obj *dict;
+ fz_context *ctx = doc->ctx;
+ fz_var(stream);
+ fz_var(dict);
+ fz_try(ctx)
+ {
+ int i, j, least_num_page_objs, page_obj_num_bits;
+ int least_page_len, page_len_num_bits, shared_hint_offset;
+ int least_page_offset, page_offset_num_bits;
+ int least_content_stream_len, content_stream_len_num_bits;
+ int num_shared_obj_num_bits, shared_obj_num_bits;
+ int numerator_bits, denominator_bits, shared;
+ int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
+ int shared_obj_count_total;
+ int least_shared_group_len, shared_group_len_num_bits;
+ int max_object_num = pdf_xref_len(doc);
+ stream = pdf_open_stream(doc, objnum, gennum);
+ dict = pdf_get_xref_entry(doc, objnum)->obj;
+ if (dict == NULL || !pdf_is_dict(dict))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint object");
+ shared_hint_offset = pdf_to_int(pdf_dict_gets(dict, "S"));
+ /* Malloc the structures (use realloc to cope with the fact we
+ * may try this several times before enough data is loaded) */
+ doc->hint_page = fz_resize_array(ctx, doc->hint_page, doc->page_count+1, sizeof(*doc->hint_page));
+ memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->page_count+1));
+ doc->hint_obj_offsets = fz_resize_array(ctx, doc->hint_obj_offsets, max_object_num, sizeof(*doc->hint_obj_offsets));
+ memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num);
+ doc->hint_obj_offsets_max = max_object_num;
+ /* Read the page object hints table: Header first */
+ least_num_page_objs = fz_read_bits(stream, 32);
+ /* The following is sometimes a lie, but we read this version,
+ * as other table values are built from it. In
+ * pdf_reference17.pdf, this points to 2 objects before the
+ * first pages page object. */
+ doc->hint_page[0].offset = fz_read_bits(stream, 32);
+ if (doc->hint_page[0].offset > doc->hint_object_offset)
+ doc->hint_page[0].offset += doc->hint_object_length;
+ page_obj_num_bits = fz_read_bits(stream, 16);
+ least_page_len = fz_read_bits(stream, 32);
+ page_len_num_bits = fz_read_bits(stream, 16);
+ least_page_offset = fz_read_bits(stream, 32);
+ page_offset_num_bits = fz_read_bits(stream, 16);
+ least_content_stream_len = fz_read_bits(stream, 32);
+ content_stream_len_num_bits = fz_read_bits(stream, 16);
+ num_shared_obj_num_bits = fz_read_bits(stream, 16);
+ shared_obj_num_bits = fz_read_bits(stream, 16);
+ numerator_bits = fz_read_bits(stream, 16);
+ denominator_bits = fz_read_bits(stream, 16);
+ /* Item 1: Page object numbers */
+ doc->hint_page[0].number = doc->linear_page1_obj_num;
+ /* We don't care about the number of objects in the first page */
+ (void)fz_read_bits(stream, page_obj_num_bits);
+ j = 1;
+ for (i = 1; i < doc->page_count; i++)
+ {
+ int delta_page_objs = fz_read_bits(stream, page_obj_num_bits);
+ doc->hint_page[i].number = j;
+ j += least_num_page_objs + delta_page_objs;
+ }
+ doc->hint_page[i].number = j; /* Not a real page object */
+ fz_sync_bits(stream);
+ /* Item 2: Page lengths */
+ j = doc->hint_page[0].offset;
+ for (i = 0; i < doc->page_count; i++)
+ {
+ int delta_page_len = fz_read_bits(stream, page_len_num_bits);
+ int old = j;
+ doc->hint_page[i].offset = j;
+ j += least_page_len + delta_page_len;
+ if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
+ j += doc->hint_object_length;
+ }
+ doc->hint_page[i].offset = j;
+ fz_sync_bits(stream);
+ /* Item 3: Shared references */
+ shared = 0;
+ for (i = 0; i < doc->page_count; i++)
+ {
+ int num_shared_objs = fz_read_bits(stream, num_shared_obj_num_bits);
+ doc->hint_page[i].index = shared;
+ shared += num_shared_objs;
+ }
+ doc->hint_page[i].index = shared;
+ doc->hint_shared_ref = fz_resize_array(ctx, doc->hint_shared_ref, shared, sizeof(*doc->hint_shared_ref));
+ memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared);
+ fz_sync_bits(stream);
+ /* Item 4: Shared references */
+ for (i = 0; i < shared; i++)
+ {
+ int ref = fz_read_bits(stream, shared_obj_num_bits);
+ doc->hint_shared_ref[i] = ref;
+ }
+ /* Skip items 5,6,7 as we don't use them */
+ fz_seek(stream, shared_hint_offset, SEEK_SET);
+ /* Read the shared object hints table: Header first */
+ shared_obj_num = fz_read_bits(stream, 32);
+ shared_obj_offset = fz_read_bits(stream, 32);
+ if (shared_obj_offset > doc->hint_object_offset)
+ shared_obj_offset += doc->hint_object_length;
+ shared_obj_count_page1 = fz_read_bits(stream, 32);
+ shared_obj_count_total = fz_read_bits(stream, 32);
+ shared_obj_num_bits = fz_read_bits(stream, 16);
+ least_shared_group_len = fz_read_bits(stream, 32);
+ shared_group_len_num_bits = fz_read_bits(stream, 16);
+ /* Sanity check the references in Item 4 above to ensure we
+ * don't access out of range with malicious files. */
+ for (i = 0; i < shared; i++)
+ {
+ if (doc->hint_shared_ref[i] >= shared_obj_count_total)
+ {
+ fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint stream (shared refs)");
+ }
+ }
+ doc->hint_shared = fz_resize_array(ctx, doc->hint_shared, shared_obj_count_total+1, sizeof(*doc->hint_shared));
+ memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1));
+ /* Item 1: Shared references */
+ j = doc->hint_page[0].offset;
+ for (i = 0; i < shared_obj_count_page1; i++)
+ {
+ int off = fz_read_bits(stream, shared_group_len_num_bits);
+ int old = j;
+ doc->hint_shared[i].offset = j;
+ j += off + least_shared_group_len;
+ if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
+ j += doc->hint_object_length;
+ }
+ /* FIXME: We would have problems recreating the length of the
+ * last page 1 shared reference group. But we'll never need
+ * to, so ignore it. */
+ j = shared_obj_offset;
+ for (; i < shared_obj_count_total; i++)
+ {
+ int off = fz_read_bits(stream, shared_group_len_num_bits);
+ int old = j;
+ doc->hint_shared[i].offset = j;
+ j += off + least_shared_group_len;
+ if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
+ j += doc->hint_object_length;
+ }
+ doc->hint_shared[i].offset = j;
+ fz_sync_bits(stream);
+ /* Item 2: Signature flags: read these just so we can skip */
+ for (i = 0; i < shared_obj_count_total; i++)
+ {
+ doc->hint_shared[i].number = fz_read_bits(stream, 1);
+ }
+ fz_sync_bits(stream);
+ /* Item 3: Signatures: just skip */
+ for (i = 0; i < shared_obj_count_total; i++)
+ {
+ if (doc->hint_shared[i].number)
+ {
+ int dummy = fz_read_bits(stream, 128);
+ }
+ }
+ fz_sync_bits(stream);
+ /* Item 4: Shared object object numbers */
+ j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */
+ for (i = 0; i < shared_obj_count_page1; i++)
+ {
+ doc->hint_shared[i].number = j;
+ j += fz_read_bits(stream, shared_obj_num_bits) + 1;
+ }
+ j = shared_obj_num;
+ for (; i < shared_obj_count_total; i++)
+ {
+ doc->hint_shared[i].number = j;
+ j += fz_read_bits(stream, shared_obj_num_bits) + 1;
+ }
+ doc->hint_shared[i].number = j;
+ /* Now, actually use the data we have gathered. */
+ for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++)
+ {
+ doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
+ }
+ for (i = 0; i < doc->page_count; i++)
+ {
+ doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
+ }
+ }
+ fz_always(ctx)
+ {
+ fz_close(stream);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+ /* Don't try to load hints again */
+ doc->hints_loaded = 1;
+ /* We won't use the linearized object any more. */
+ doc->file_reading_linearly = 0;
+ /* Any other error becomes a TRYLATER */
+ fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
+ }
+ doc->hints_loaded = 1;
+static void
+pdf_load_hint_object(pdf_document *doc)
+ fz_context *ctx = doc->ctx;
+ pdf_lexbuf *buf = &doc->lexbuf.base;
+ int curr_pos;
+ curr_pos = fz_tell(doc->file);
+ fz_seek(doc->file, doc->hint_object_offset, SEEK_SET);
+ fz_try(ctx)
+ {
+ while (1)
+ {
+ pdf_obj *page = NULL;
+ int tmpofs, num, gen, tok;
+ tok = pdf_lex(doc->file, buf);
+ if (tok != PDF_TOK_INT)
+ break;
+ num = buf->i;
+ tok = pdf_lex(doc->file, buf);
+ if (tok != PDF_TOK_INT)
+ break;
+ gen = buf->i;
+ tok = pdf_lex(doc->file, buf);
+ if (tok != PDF_TOK_OBJ)
+ break;
+ (void)pdf_repair_obj(doc, buf, &tmpofs, NULL, NULL, NULL, &page, &tmpofs);
+ pdf_load_hints(doc, num, gen);
+ }
+ }
+ fz_always(ctx)
+ {
+ fz_seek(doc->file, curr_pos, SEEK_SET);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+pdf_obj *pdf_progressive_advance(pdf_document *doc, int pagenum)
+ fz_context *ctx = doc->ctx;
+ pdf_lexbuf *buf = &doc->lexbuf.base;
+ int curr_pos;
+ pdf_obj *page;
+ pdf_load_hinted_page(doc, pagenum);
+ if (pagenum < 0 || pagenum >= doc->page_count)
+ fz_throw(doc->ctx, FZ_ERROR_GENERIC, "page load out of range (%d of %d)", pagenum, doc->page_count);
+ if (doc->linear_pos == doc->file_length)
+ return doc->linear_page_refs[pagenum];
+ /* Only load hints once, and then only after we have got page 0 */
+ if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset)
+ {
+ /* Found hint object */
+ pdf_load_hint_object(doc);
+ }
+ DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
+ curr_pos = fz_tell(doc->file);
+ fz_var(page);
+ fz_try(ctx)
+ {
+ int eof;
+ do
+ {
+ int num;
+ page = NULL;
+ eof = pdf_obj_read(doc, &doc->linear_pos, &num, &page);
+ pdf_drop_obj(page);
+ page = NULL;
+ }
+ while (!eof);
+ {
+ pdf_obj *catalog;
+ pdf_obj *pages;
+ doc->linear_pos = doc->file_length;
+ pdf_load_xref(doc, buf);
+ catalog = pdf_dict_gets(pdf_trailer(doc), "Root");
+ pages = pdf_dict_gets(catalog, "Pages");
+ if (!pdf_is_dict(pages))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree");
+ break;
+ }
+ }
+ fz_always(ctx)
+ {
+ fz_seek(doc->file, curr_pos, SEEK_SET);
+ }
+ fz_catch(ctx)
+ {
+ pdf_drop_obj(page);
+ if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
+ {
+ if (doc->linear_page_refs[pagenum] == NULL)
+ {
+ /* Still not got a page */
+ fz_rethrow(ctx);
+ }
+ }
+ else
+ fz_rethrow(ctx);
+ }
+ return doc->linear_page_refs[pagenum];
pdf_document *pdf_specifics(fz_document *doc)
return (pdf_document *)((doc && doc->close == (void *)pdf_close_document) ? doc : NULL);
diff --git a/source/xps/xps-doc.c b/source/xps/xps-doc.c
index 9953cb37..097392e8 100644
--- a/source/xps/xps-doc.c
+++ b/source/xps/xps-doc.c
@@ -414,7 +414,7 @@ xps_read_page_list(xps_document *doc)
- /* FIXME: TryLater ? */
+ fz_rethrow_if(doc->ctx, FZ_ERROR_TRYLATER);
fz_warn(doc->ctx, "cannot process FixedDocument rels part");
xps_read_and_process_metadata_part(doc, fixdoc->name, fixdoc);
@@ -447,7 +447,7 @@ xps_load_fixed_page(xps_document *doc, xps_page *page)
- /* FIXME: TryLater ? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
root = NULL;
if (!root)
diff --git a/source/xps/xps-glyphs.c b/source/xps/xps-glyphs.c
index 72f37ca1..52cc0042 100644
--- a/source/xps/xps-glyphs.c
+++ b/source/xps/xps-glyphs.c
@@ -510,7 +510,7 @@ xps_parse_glyphs(xps_document *doc, const fz_matrix *ctm,
- /* FIXME: TryLater ? */
+ fz_rethrow_if(doc->ctx, FZ_ERROR_TRYLATER);
fz_warn(doc->ctx, "cannot find font resource part '%s'", partname);
@@ -527,7 +527,7 @@ xps_parse_glyphs(xps_document *doc, const fz_matrix *ctm,
- /* FIXME: TryLater ? */
+ fz_rethrow_if(doc->ctx, FZ_ERROR_TRYLATER);
fz_warn(doc->ctx, "cannot load font resource '%s'", partname);
xps_free_part(doc, part);
diff --git a/source/xps/xps-image.c b/source/xps/xps-image.c
index eb0d876f..cff7860d 100644
--- a/source/xps/xps-image.c
+++ b/source/xps/xps-image.c
@@ -102,7 +102,7 @@ xps_parse_image_brush(xps_document *doc, const fz_matrix *ctm, const fz_rect *ar
- /* FIXME: TryLater ? */
+ fz_rethrow_if(doc->ctx, FZ_ERROR_TRYLATER);
fz_warn(doc->ctx, "cannot find image source");
@@ -117,7 +117,7 @@ xps_parse_image_brush(xps_document *doc, const fz_matrix *ctm, const fz_rect *ar
- /* FIXME: TryLater ? */
+ fz_rethrow_if(doc->ctx, FZ_ERROR_TRYLATER);
fz_warn(doc->ctx, "cannot decode image resource");
diff --git a/source/xps/xps-outline.c b/source/xps/xps-outline.c
index b87460a4..d8b573a9 100644
--- a/source/xps/xps-outline.c
+++ b/source/xps/xps-outline.c
@@ -131,7 +131,7 @@ xps_load_outline(xps_document *doc)
- /* FIXME: TryLater ? */
+ fz_rethrow_if(doc->ctx, FZ_ERROR_TRYLATER);
outline = NULL;
if (!outline)
diff --git a/source/xps/xps-resource.c b/source/xps/xps-resource.c
index 5c927e1d..ef699619 100644
--- a/source/xps/xps-resource.c
+++ b/source/xps/xps-resource.c
@@ -75,7 +75,7 @@ xps_parse_remote_resource_dictionary(xps_document *doc, char *base_uri, char *so
- /* FIXME: TryLater ? */
+ fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
xml = NULL;