diff options
Diffstat (limited to 'source/pdf/pdf-xref.c')
-rw-r--r-- | source/pdf/pdf-xref.c | 707 |
1 files changed, 694 insertions, 13 deletions
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index a50cafff..33bd6c22 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -1,5 +1,13 @@ #include "mupdf/pdf.h" +#undef DEBUG_PROGESSIVE_ADVANCE + +#ifdef DEBUG_PROGESSIVE_ADVANCE +#define DEBUGMESS(A) do { fz_warn A; } while (0) +#else +#define DEBUGMESS(A) do { } while (0) +#endif + static inline int iswhite(int ch) { return @@ -699,6 +707,7 @@ read_xref_section(pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets pdf_set_populating_xref_trailer(doc, trailer); /* FIXME: do we overwrite free entries properly? */ + /* FIXME: Does this work properly with progression? */ xrefstmofs = pdf_to_int(pdf_dict_gets(trailer, "XRefStm")); if (xrefstmofs) { @@ -720,7 +729,6 @@ read_xref_section(pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets fz_always(ctx) { pdf_drop_obj(trailer); - trailer = NULL; } fz_catch(ctx) { @@ -731,7 +739,7 @@ read_xref_section(pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets } static void -pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf) +pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf, int read_previous) { fz_context *ctx = doc->ctx; ofs_list list; @@ -745,6 +753,8 @@ pdf_read_xref_sections(pdf_document *doc, int ofs, pdf_lexbuf *buf) { pdf_populate_next_xref_level(doc); ofs = read_xref_section(doc, ofs, buf, &list); + if (!read_previous) + break; } } fz_always(ctx) @@ -770,11 +780,9 @@ pdf_load_xref(pdf_document *doc, pdf_lexbuf *buf) int xref_len; fz_context *ctx = doc->ctx; - pdf_load_version(doc); - pdf_read_start_xref(doc); - pdf_read_xref_sections(doc, doc->startxref, buf); + pdf_read_xref_sections(doc, doc->startxref, buf, 1); if (pdf_xref_len(doc) == 0) fz_throw(ctx, FZ_ERROR_GENERIC, "found xref was empty"); @@ -803,6 +811,61 @@ pdf_load_xref(pdf_document *doc, pdf_lexbuf *buf) } } +static void +pdf_load_linear(pdf_document *doc) +{ + pdf_obj *dict = NULL; + pdf_obj *hint = NULL; + pdf_obj *o; + int num, gen, stmofs, lin, len; + fz_context *ctx = doc->ctx; + + fz_var(dict); + fz_var(hint); + + fz_try(ctx) + { + pdf_xref_entry *entry; + + dict = pdf_parse_ind_obj(doc, doc->file, &doc->lexbuf.base, &num, &gen, &stmofs); + if (!pdf_is_dict(dict)) + fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary"); + o = pdf_dict_gets(dict, "Linearized"); + if (o == NULL) + fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary"); + lin = pdf_to_int(o); + if (lin != 1) + fz_throw(ctx, FZ_ERROR_GENERIC, "Unexpected version of Linearized tag (%d)", lin); + len = pdf_to_int(pdf_dict_gets(dict, "L")); + if (len != doc->file_length) + fz_throw(ctx, FZ_ERROR_GENERIC, "File has been updated since linearization"); + + pdf_read_xref_sections(doc, fz_tell(doc->file), &doc->lexbuf.base, 0); + + doc->page_count = pdf_to_int(pdf_dict_gets(dict, "N")); + doc->linear_page_refs = fz_resize_array(ctx, doc->linear_page_refs, doc->page_count, sizeof(pdf_obj *)); + memset(doc->linear_page_refs, 0, doc->page_count * sizeof(pdf_obj*)); + doc->linear_obj = dict; + doc->linear_pos = fz_tell(doc->file); + doc->linear_page1_obj_num = pdf_to_int(pdf_dict_gets(dict, "O")); + doc->linear_page_refs[0] = pdf_new_indirect(doc, doc->linear_page1_obj_num, 0); + doc->linear_page_num = 0; + hint = pdf_dict_gets(dict, "H"); + doc->hint_object_offset = pdf_to_int(pdf_array_get(hint, 0)); + doc->hint_object_length = pdf_to_int(pdf_array_get(hint, 1)); + + entry = pdf_get_populating_xref_entry(doc, 0); + entry->type = 'f'; + } + fz_catch(ctx) + { + pdf_drop_obj(dict); + fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); + /* Drop back to non linearized reading mode */ + doc->file_reading_linearly = 0; + } +} + void pdf_ocg_set_config(pdf_document *doc, int config) { @@ -982,12 +1045,31 @@ pdf_init_document(pdf_document *doc) fz_try(ctx) { - pdf_load_xref(doc, &doc->lexbuf.base); + pdf_load_version(doc); + + doc->file_length = fz_stream_meta(doc->file, FZ_STREAM_META_LENGTH, 0, NULL); + if (doc->file_length < 0) + doc->file_length = 0; + + /* Check to see if we should work in progressive mode */ + if (fz_stream_meta(doc->file, FZ_STREAM_META_PROGRESSIVE, 0, NULL) > 0) + doc->file_reading_linearly = 1; + + /* Try to load the linearized file if we are in progressive + * mode. */ + if (doc->file_reading_linearly) + pdf_load_linear(doc); + + /* If we aren't in progressive mode (or the linear load failed + * and has set us back to non-progressive mode), load normally. + */ + if (!doc->file_reading_linearly) + pdf_load_xref(doc, &doc->lexbuf.base); } fz_catch(ctx) { - /* FIXME: TryLater ? */ pdf_free_xref_sections(doc); + fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); fz_warn(ctx, "trying to repair broken xref"); repaired = 1; } @@ -1027,7 +1109,7 @@ pdf_init_document(pdf_document *doc) } fz_catch(ctx) { - /* FIXME: TryLater ? */ + fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); fz_warn(ctx, "ignoring broken object (%d 0 R)", i); continue; } @@ -1066,7 +1148,6 @@ pdf_init_document(pdf_document *doc) { pdf_drop_obj(dict); pdf_drop_obj(nobj); - pdf_close_document(doc); fz_rethrow_message(ctx, "cannot open document"); } @@ -1076,7 +1157,6 @@ pdf_init_document(pdf_document *doc) } fz_catch(ctx) { - /* FIXME: TryLater ? */ fz_warn(ctx, "Ignoring Broken Optional Content"); } } @@ -1107,6 +1187,20 @@ pdf_close_document(pdf_document *doc) if (doc->crypt) pdf_free_crypt(ctx, doc->crypt); + pdf_drop_obj(doc->linear_obj); + if (doc->linear_page_refs) + { + for (i=0; i < doc->page_count; i++) + { + pdf_drop_obj(doc->linear_page_refs[i]); + } + fz_free(ctx, doc->linear_page_refs); + } + fz_free(ctx, doc->hint_page); + fz_free(ctx, doc->hint_shared_ref); + fz_free(ctx, doc->hint_shared); + fz_free(ctx, doc->hint_obj_offsets); + for (i=0; i < doc->num_type3_fonts; i++) { fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc); @@ -1249,6 +1343,239 @@ pdf_load_obj_stm(pdf_document *doc, int num, int gen, pdf_lexbuf *buf) /* * object loading */ +static int +pdf_obj_read(pdf_document *doc, int *offset, int *nump, pdf_obj **page) +{ + int num, numofs, gen, genofs, stmofs, tmpofs, tok; + pdf_lexbuf *buf = &doc->lexbuf.base; + fz_context *ctx = doc->ctx; + int xref_len; + pdf_xref_entry *entry; + int newtmpofs; + + numofs = *offset; + fz_seek(doc->file, numofs, SEEK_SET); + + /* We expect to read 'num' here */ + tok = pdf_lex(doc->file, buf); + genofs = fz_tell(doc->file); + if (tok != PDF_TOK_INT) + { + /* Failed! */ + DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset)); + *offset = genofs; + return tok == PDF_TOK_EOF; + } + *nump = num = buf->i; + + /* We expect to read 'gen' here */ + tok = pdf_lex(doc->file, buf); + tmpofs = fz_tell(doc->file); + if (tok != PDF_TOK_INT) + { + /* Failed! */ + DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset)); + *offset = tmpofs; + return tok == PDF_TOK_EOF; + } + gen = buf->i; + + /* We expect to read 'obj' here */ + do + { + tmpofs = fz_tell(doc->file); + tok = pdf_lex(doc->file, buf); + if (tok == PDF_TOK_OBJ) + break; + if (tok != PDF_TOK_INT) + { + DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs)); + *offset = fz_tell(doc->file); + return tok == PDF_TOK_EOF; + } + DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs)); + *nump = num = gen; + numofs = genofs; + gen = buf->i; + genofs = tmpofs; + } + while (1); + + /* Now we read the actual object */ + xref_len = pdf_xref_len(doc); + + /* When we are reading a progressive file, we typically see: + * File Header + * obj m (Linearization params) + * xref #1 (refers to objects m-n) + * obj m+1 + * ... + * obj n + * obj 1 + * ... + * obj n-1 + * xref #2 + * + * The linearisation params are read elsewhere, hence + * whenever we read an object it should just go into the + * previous xref. + */ + tok = pdf_repair_obj(doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs); + + do /* So we can break out of it */ + { + if (num <= 0 || num >= xref_len) + { + fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen); + break; + } + if (gen != 0) + { + fz_warn(ctx, "Unexpected non zero generation number in linearized file"); + } + entry = pdf_get_populating_xref_entry(doc, num); + if (entry->type != 0) + { + DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen)); + break; + } + if (page && *page) + { + DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num)); + if (!entry->obj) + entry->obj = pdf_keep_obj(*page); + + if (doc->linear_page_refs[doc->linear_page_num] == NULL) + doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(doc, num, gen); + } + else + { + DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs)); + } + entry->type = 'n'; + entry->gen = 0; + entry->ofs = numofs; + entry->stm_ofs = stmofs; + } + while (0); + if (page && *page) + doc->linear_page_num++; + + if (tok == PDF_TOK_ENDOBJ) + { + *offset = fz_tell(doc->file); + } + else + { + *offset = newtmpofs; + } + return 0; +} + +static void +pdf_load_hinted_page(pdf_document *doc, int pagenum) +{ + fz_context *ctx = doc->ctx; + + if (!doc->hints_loaded || !doc->linear_page_refs) + return; + + if (doc->linear_page_refs[pagenum]) + return; + + fz_try(ctx) + { + int num = doc->hint_page[pagenum].number; + pdf_obj *page = pdf_load_object(doc, num, 0); + if (!strcmp("Page", pdf_to_name(pdf_dict_gets(page, "Type")))) + { + /* We have found the page object! */ + DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num)); + doc->linear_page_refs[pagenum] = pdf_new_indirect(doc, num, 0); + } + pdf_drop_obj(page); + } + fz_catch(ctx) + { + fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); + /* Silently swallow the error and proceed as normal */ + } + +} + +static int +read_hinted_object(pdf_document *doc, int num) +{ + /* Try to find the object using our hint table. Find the closest + * object <= the one we want that has a hint and read forward from + * there. */ + fz_context *ctx = doc->ctx; + int expected = num; + int curr_pos; + int start, offset; + + while (doc->hint_obj_offsets[expected] == 0 && expected > 0) + expected--; + if (expected != num) + DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num)); + if (expected == 0) /* No hints found, just bale */ + return 0; + + + curr_pos = fz_tell(doc->file); + offset = doc->hint_obj_offsets[expected]; + + fz_var(expected); + + fz_try(ctx) + { + int found; + + /* Try to read forward from there */ + do + { + start = offset; + DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset)); + pdf_obj_read(doc, &offset, &found, 0); + DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset)); + if (found <= expected) + { + /* We found the right one (or one earlier than + * we expected). Update the hints. */ + doc->hint_obj_offsets[expected] = offset; + doc->hint_obj_offsets[found] = start; + doc->hint_obj_offsets[found+1] = offset; + /* Retry with the next one */ + expected = found+1; + } + else + { + /* We found one later than we expected. */ + doc->hint_obj_offsets[expected] = 0; + doc->hint_obj_offsets[found] = start; + doc->hint_obj_offsets[found+1] = offset; + while (doc->hint_obj_offsets[expected] == 0 && expected > 0) + expected--; + if (expected == 0) /* No hints found, just bale */ + return 0; + } + } + while (found != num); + } + fz_always(ctx) + { + fz_seek(doc->file, curr_pos, SEEK_SET); + } + fz_catch(ctx) + { + fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); + /* FIXME: Currently we ignore the hint. Perhaps we should + * drop back to non-hinted operation here. */ + doc->hint_obj_offsets[expected] = 0; + fz_rethrow(ctx); + } + return 1; +} void pdf_cache_object(pdf_document *doc, int num, int gen) @@ -1260,6 +1587,7 @@ pdf_cache_object(pdf_document *doc, int num, int gen) if (num < 0 || num >= pdf_xref_len(doc)) fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d %d R); xref size %d", num, gen, pdf_xref_len(doc)); +object_updated: x = pdf_get_xref_entry(doc, num); if (x->obj) @@ -1309,6 +1637,14 @@ pdf_cache_object(pdf_document *doc, int num, int gen) fz_throw(ctx, FZ_ERROR_GENERIC, "object (%d %d R) was not found in its object stream", num, gen); } } + else if (doc->hint_obj_offsets && read_hinted_object(doc, num)) + { + goto object_updated; + } + else if (doc->file_length && doc->linear_pos < doc->file_length) + { + fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d %d R) - not loaded yet?", num, gen); + } else { fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d %d R)", num, gen); @@ -1368,7 +1704,7 @@ pdf_resolve_indirect(pdf_obj *ref) } fz_catch(ctx) { - /* FIXME: TryLater ? */ + fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen); return NULL; } @@ -1591,7 +1927,18 @@ pdf_document * pdf_open_document_no_run_with_stream(fz_context *ctx, fz_stream *file) { pdf_document *doc = pdf_new_document(ctx, file); - pdf_init_document(doc); + + fz_var(doc); + + fz_try(ctx) + { + pdf_init_document(doc); + } + fz_catch(ctx) + { + pdf_close_document(doc); + fz_rethrow_message(ctx, "cannot load document from stream"); + } return doc; } @@ -1599,9 +1946,10 @@ pdf_document * pdf_open_document_no_run(fz_context *ctx, const char *filename) { fz_stream *file = NULL; - pdf_document *doc; + pdf_document *doc = NULL; fz_var(file); + fz_var(doc); fz_try(ctx) { @@ -1615,11 +1963,344 @@ pdf_open_document_no_run(fz_context *ctx, const char *filename) } fz_catch(ctx) { + pdf_close_document(doc); fz_rethrow_message(ctx, "cannot load document '%s'", filename); } return doc; } +static void +pdf_load_hints(pdf_document *doc, int objnum, int gennum) +{ + fz_stream *stream = NULL; + pdf_obj *dict; + fz_context *ctx = doc->ctx; + + fz_var(stream); + fz_var(dict); + + fz_try(ctx) + { + int i, j, least_num_page_objs, page_obj_num_bits; + int least_page_len, page_len_num_bits, shared_hint_offset; + int least_page_offset, page_offset_num_bits; + int least_content_stream_len, content_stream_len_num_bits; + int num_shared_obj_num_bits, shared_obj_num_bits; + int numerator_bits, denominator_bits, shared; + int shared_obj_num, shared_obj_offset, shared_obj_count_page1; + int shared_obj_count_total; + int least_shared_group_len, shared_group_len_num_bits; + int max_object_num = pdf_xref_len(doc); + + stream = pdf_open_stream(doc, objnum, gennum); + dict = pdf_get_xref_entry(doc, objnum)->obj; + if (dict == NULL || !pdf_is_dict(dict)) + fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint object"); + + shared_hint_offset = pdf_to_int(pdf_dict_gets(dict, "S")); + + /* Malloc the structures (use realloc to cope with the fact we + * may try this several times before enough data is loaded) */ + doc->hint_page = fz_resize_array(ctx, doc->hint_page, doc->page_count+1, sizeof(*doc->hint_page)); + memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->page_count+1)); + doc->hint_obj_offsets = fz_resize_array(ctx, doc->hint_obj_offsets, max_object_num, sizeof(*doc->hint_obj_offsets)); + memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num); + doc->hint_obj_offsets_max = max_object_num; + + /* Read the page object hints table: Header first */ + least_num_page_objs = fz_read_bits(stream, 32); + /* The following is sometimes a lie, but we read this version, + * as other table values are built from it. In + * pdf_reference17.pdf, this points to 2 objects before the + * first pages page object. */ + doc->hint_page[0].offset = fz_read_bits(stream, 32); + if (doc->hint_page[0].offset > doc->hint_object_offset) + doc->hint_page[0].offset += doc->hint_object_length; + page_obj_num_bits = fz_read_bits(stream, 16); + least_page_len = fz_read_bits(stream, 32); + page_len_num_bits = fz_read_bits(stream, 16); + least_page_offset = fz_read_bits(stream, 32); + page_offset_num_bits = fz_read_bits(stream, 16); + least_content_stream_len = fz_read_bits(stream, 32); + content_stream_len_num_bits = fz_read_bits(stream, 16); + num_shared_obj_num_bits = fz_read_bits(stream, 16); + shared_obj_num_bits = fz_read_bits(stream, 16); + numerator_bits = fz_read_bits(stream, 16); + denominator_bits = fz_read_bits(stream, 16); + + /* Item 1: Page object numbers */ + doc->hint_page[0].number = doc->linear_page1_obj_num; + /* We don't care about the number of objects in the first page */ + (void)fz_read_bits(stream, page_obj_num_bits); + j = 1; + for (i = 1; i < doc->page_count; i++) + { + int delta_page_objs = fz_read_bits(stream, page_obj_num_bits); + + doc->hint_page[i].number = j; + j += least_num_page_objs + delta_page_objs; + } + doc->hint_page[i].number = j; /* Not a real page object */ + fz_sync_bits(stream); + /* Item 2: Page lengths */ + j = doc->hint_page[0].offset; + for (i = 0; i < doc->page_count; i++) + { + int delta_page_len = fz_read_bits(stream, page_len_num_bits); + int old = j; + + doc->hint_page[i].offset = j; + j += least_page_len + delta_page_len; + if (old <= doc->hint_object_offset && j > doc->hint_object_offset) + j += doc->hint_object_length; + } + doc->hint_page[i].offset = j; + fz_sync_bits(stream); + /* Item 3: Shared references */ + shared = 0; + for (i = 0; i < doc->page_count; i++) + { + int num_shared_objs = fz_read_bits(stream, num_shared_obj_num_bits); + doc->hint_page[i].index = shared; + shared += num_shared_objs; + } + doc->hint_page[i].index = shared; + doc->hint_shared_ref = fz_resize_array(ctx, doc->hint_shared_ref, shared, sizeof(*doc->hint_shared_ref)); + memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared); + fz_sync_bits(stream); + /* Item 4: Shared references */ + for (i = 0; i < shared; i++) + { + int ref = fz_read_bits(stream, shared_obj_num_bits); + doc->hint_shared_ref[i] = ref; + } + /* Skip items 5,6,7 as we don't use them */ + + fz_seek(stream, shared_hint_offset, SEEK_SET); + + /* Read the shared object hints table: Header first */ + shared_obj_num = fz_read_bits(stream, 32); + shared_obj_offset = fz_read_bits(stream, 32); + if (shared_obj_offset > doc->hint_object_offset) + shared_obj_offset += doc->hint_object_length; + shared_obj_count_page1 = fz_read_bits(stream, 32); + shared_obj_count_total = fz_read_bits(stream, 32); + shared_obj_num_bits = fz_read_bits(stream, 16); + least_shared_group_len = fz_read_bits(stream, 32); + shared_group_len_num_bits = fz_read_bits(stream, 16); + + /* Sanity check the references in Item 4 above to ensure we + * don't access out of range with malicious files. */ + for (i = 0; i < shared; i++) + { + if (doc->hint_shared_ref[i] >= shared_obj_count_total) + { + fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint stream (shared refs)"); + } + } + + doc->hint_shared = fz_resize_array(ctx, doc->hint_shared, shared_obj_count_total+1, sizeof(*doc->hint_shared)); + memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1)); + + /* Item 1: Shared references */ + j = doc->hint_page[0].offset; + for (i = 0; i < shared_obj_count_page1; i++) + { + int off = fz_read_bits(stream, shared_group_len_num_bits); + int old = j; + doc->hint_shared[i].offset = j; + j += off + least_shared_group_len; + if (old <= doc->hint_object_offset && j > doc->hint_object_offset) + j += doc->hint_object_length; + } + /* FIXME: We would have problems recreating the length of the + * last page 1 shared reference group. But we'll never need + * to, so ignore it. */ + j = shared_obj_offset; + for (; i < shared_obj_count_total; i++) + { + int off = fz_read_bits(stream, shared_group_len_num_bits); + int old = j; + doc->hint_shared[i].offset = j; + j += off + least_shared_group_len; + if (old <= doc->hint_object_offset && j > doc->hint_object_offset) + j += doc->hint_object_length; + } + doc->hint_shared[i].offset = j; + fz_sync_bits(stream); + /* Item 2: Signature flags: read these just so we can skip */ + for (i = 0; i < shared_obj_count_total; i++) + { + doc->hint_shared[i].number = fz_read_bits(stream, 1); + } + fz_sync_bits(stream); + /* Item 3: Signatures: just skip */ + for (i = 0; i < shared_obj_count_total; i++) + { + if (doc->hint_shared[i].number) + { + int dummy = fz_read_bits(stream, 128); + } + } + fz_sync_bits(stream); + /* Item 4: Shared object object numbers */ + j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */ + for (i = 0; i < shared_obj_count_page1; i++) + { + doc->hint_shared[i].number = j; + j += fz_read_bits(stream, shared_obj_num_bits) + 1; + } + j = shared_obj_num; + for (; i < shared_obj_count_total; i++) + { + doc->hint_shared[i].number = j; + j += fz_read_bits(stream, shared_obj_num_bits) + 1; + } + doc->hint_shared[i].number = j; + + /* Now, actually use the data we have gathered. */ + for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++) + { + doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset; + } + for (i = 0; i < doc->page_count; i++) + { + doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset; + } + } + fz_always(ctx) + { + fz_close(stream); + } + fz_catch(ctx) + { + fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); + /* Don't try to load hints again */ + doc->hints_loaded = 1; + /* We won't use the linearized object any more. */ + doc->file_reading_linearly = 0; + /* Any other error becomes a TRYLATER */ + fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object"); + } + doc->hints_loaded = 1; +} + +static void +pdf_load_hint_object(pdf_document *doc) +{ + fz_context *ctx = doc->ctx; + pdf_lexbuf *buf = &doc->lexbuf.base; + int curr_pos; + + curr_pos = fz_tell(doc->file); + fz_seek(doc->file, doc->hint_object_offset, SEEK_SET); + fz_try(ctx) + { + while (1) + { + pdf_obj *page = NULL; + int tmpofs, num, gen, tok; + + tok = pdf_lex(doc->file, buf); + if (tok != PDF_TOK_INT) + break; + num = buf->i; + tok = pdf_lex(doc->file, buf); + if (tok != PDF_TOK_INT) + break; + gen = buf->i; + tok = pdf_lex(doc->file, buf); + if (tok != PDF_TOK_OBJ) + break; + (void)pdf_repair_obj(doc, buf, &tmpofs, NULL, NULL, NULL, &page, &tmpofs); + pdf_load_hints(doc, num, gen); + } + } + fz_always(ctx) + { + fz_seek(doc->file, curr_pos, SEEK_SET); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +pdf_obj *pdf_progressive_advance(pdf_document *doc, int pagenum) +{ + fz_context *ctx = doc->ctx; + pdf_lexbuf *buf = &doc->lexbuf.base; + int curr_pos; + pdf_obj *page; + + pdf_load_hinted_page(doc, pagenum); + + if (pagenum < 0 || pagenum >= doc->page_count) + fz_throw(doc->ctx, FZ_ERROR_GENERIC, "page load out of range (%d of %d)", pagenum, doc->page_count); + + if (doc->linear_pos == doc->file_length) + return doc->linear_page_refs[pagenum]; + + /* Only load hints once, and then only after we have got page 0 */ + if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset) + { + /* Found hint object */ + pdf_load_hint_object(doc); + } + + DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos)); + curr_pos = fz_tell(doc->file); + + fz_var(page); + + fz_try(ctx) + { + int eof; + do + { + int num; + page = NULL; + eof = pdf_obj_read(doc, &doc->linear_pos, &num, &page); + pdf_drop_obj(page); + page = NULL; + } + while (!eof); + + { + pdf_obj *catalog; + pdf_obj *pages; + doc->linear_pos = doc->file_length; + pdf_load_xref(doc, buf); + catalog = pdf_dict_gets(pdf_trailer(doc), "Root"); + pages = pdf_dict_gets(catalog, "Pages"); + + if (!pdf_is_dict(pages)) + fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree"); + break; + } + } + fz_always(ctx) + { + fz_seek(doc->file, curr_pos, SEEK_SET); + } + fz_catch(ctx) + { + pdf_drop_obj(page); + if (fz_caught(ctx) == FZ_ERROR_TRYLATER) + { + if (doc->linear_page_refs[pagenum] == NULL) + { + /* Still not got a page */ + fz_rethrow(ctx); + } + } + else + fz_rethrow(ctx); + } + + return doc->linear_page_refs[pagenum]; +} + pdf_document *pdf_specifics(fz_document *doc) { return (pdf_document *)((doc && doc->close == (void *)pdf_close_document) ? doc : NULL); |