summaryrefslogtreecommitdiff
path: root/pdf
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-02-08 11:13:46 +0000
committerRobin Watts <robin.watts@artifex.com>2012-02-08 20:07:47 +0000
commitc07d0087384a45db43b6efd2f6808b31d2e60c59 (patch)
tree3685be21c279af4c99c85be65ba387893429f82e /pdf
parent3e4cd0765ca1080d2b23c83076cc248310b5b2a2 (diff)
downloadmupdf-c07d0087384a45db43b6efd2f6808b31d2e60c59.tar.xz
Lock reworking.
This is a significant change to the use of locks in MuPDF. Previously, the user had the option of passing us lock/unlock functions for a single mutex as part of the allocation struct. Now we remove these entries from the allocation struct, and make a separate 'locks' struct. This enables people to use fz_alloc_default with locking. If multithreaded operation is required, then the user is required to create FZ_LOCK_MAX mutexes, which will be locked or unlocked by MuPDF calling the lock/unlock functions within the new fz_locks_context structure passed in at context creation. These mutexes are not required to be recursive (they may be, but MuPDF should never call them in this way). MuPDF avoids deadlocks by imposing a locking ordering on itself; a thread will never take lock n, if it already holds any lock i for which 0 <= i <= n. Currently, there are 4 locks used within MuPDF. Lock 0: The alloc lock; taken around all calls to user supplied (or default) allocation functions. Also taken around all accesses to the refs field of storable items. Lock 1: The store lock; taken whenever the store data structures (specifically the linked list pointers) are accessed. Lock 2: The file lock; taken whenever a thread is accessing the raw file. We use the debugging macros to insist that this is held whenever we do a file based seek or read. We also insist that this is never held when we resolve an indirect reference, as this can have the effect of moving the file pointer. Lock 3: The glyphcache lock; taken whenever a thread calls freetype, or accesses the glyphcache data structures. This introduces some complexities w.r.t type3 fonts. Locking can be hugely problematic, so to ease our minds as to the correctness of this code, we introduce some debugging macros. These compile away to nothing unless FITZ_DEBUG_LOCKING is defined. fz_assert_lock_held(ctx, lock) checks that we hold lock. fz_assert_lock_not_held(ctx, lock) checks that we do not hold lock. In addition fz_lock_debug_lock and fz_lock_debug_unlock are used on every fz_lock/fz_unlock to check the validity of the operation we are performing - in particular it checks that we do/do not already hold the lock we are trying to take/drop, and that by taking this lock we are not violating our defined locking order. The RESOLVE macro (used throughout the code to check whether we need to resolve an indirect reference) calls fz_assert_lock_not_held to ensure that we aren't about to resolve an indirect reference (and hence move the stream pointer) when the file is locked. In order to implement the file locking properly, pdf_open_stream (and friends) now lock the file as a side effect (because they fz_seek to the start of the stream). The lock is automatically dropped on an fz_close of such streams. Previously, the glyph cache was created in a context when it was first required; this presents problems as it can be shared between several contexts or not, depending on whether it is created before the contexts are cloned. We now always create it at startup, so it is always shared. This means that we need reference counting for the glyph caches. Added here. In fz_render_glyph, we take the glyph cache lock, and check to see whether the glyph is in the cache. If it is, we bump the refcount, drop the lock and returned the cached character. If it is not, we need to render the character. For freetype based fonts we keep the lock throughout the rendering process, thus ensuring that freetype is only called in a single threaded manner. For type3 fonts, however, we need to invoke the interpreter again to render the glyph streams. This can require reentrance to this routine. We therefore drop the glyph cache lock, call the interpreter to render us our pixmap, and take the lock again. This dropping and retaking of the lock introduces a possible race condition; 2 threads may try to render the same character at the same time. We therefore modify our hash table insert routines to behave differently if it comes to insert an entry only to find that an entry with the same key is already there. We spot this case; if we have just rendered a type3 glyph and when we try to insert it into the cache discover that someone has beaten us to it, we just discard our entry and use the cached one. Hopefully this will seldom be a problem in practise; to solve it properly would require greater complexity (probably involving spotting that another thread is already working on the desired rendering, and sleeping on a semaphore until it completes).
Diffstat (limited to 'pdf')
-rw-r--r--pdf/pdf_cmap_load.c1
-rw-r--r--pdf/pdf_function.c9
-rw-r--r--pdf/pdf_interpret.c2
-rw-r--r--pdf/pdf_repair.c10
-rw-r--r--pdf/pdf_shade.c44
-rw-r--r--pdf/pdf_stream.c15
-rw-r--r--pdf/pdf_xref.c31
7 files changed, 75 insertions, 37 deletions
diff --git a/pdf/pdf_cmap_load.c b/pdf/pdf_cmap_load.c
index fcf30ad0..3257516c 100644
--- a/pdf/pdf_cmap_load.c
+++ b/pdf/pdf_cmap_load.c
@@ -38,7 +38,6 @@ pdf_load_embedded_cmap(pdf_document *xref, fz_obj *stmobj)
fz_try(ctx)
{
-
file = pdf_open_stream(xref, fz_to_num(stmobj), fz_to_gen(stmobj));
phase = 1;
cmap = pdf_load_cmap(ctx, file);
diff --git a/pdf/pdf_function.c b/pdf/pdf_function.c
index ff602021..23acc4db 100644
--- a/pdf/pdf_function.c
+++ b/pdf/pdf_function.c
@@ -836,8 +836,10 @@ load_postscript_func(pdf_function *func, pdf_document *xref, fz_obj *dict, int n
int tok;
int len;
fz_context *ctx = xref->ctx;
+ int locked = 0;
fz_var(stream);
+ fz_var(locked);
fz_try(ctx)
{
@@ -856,15 +858,16 @@ load_postscript_func(pdf_function *func, pdf_document *xref, fz_obj *dict, int n
codeptr = 0;
parse_code(func, stream, &codeptr);
}
- fz_catch(ctx)
+ fz_always(ctx)
{
fz_close(stream);
+ }
+ fz_catch(ctx)
+ {
fz_throw(ctx, "cannot parse calculator function (%d %d R)", num, gen);
}
func->size += func->u.p.cap * sizeof(psobj);
-
- fz_close(stream);
}
static void
diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c
index b5938829..ea7f7692 100644
--- a/pdf/pdf_interpret.c
+++ b/pdf/pdf_interpret.c
@@ -942,6 +942,7 @@ copy_state(fz_context *ctx, pdf_gstate *gs, pdf_gstate *old)
pdf_keep_xobject(ctx, gs->softmask);
}
+
static pdf_csi *
pdf_new_csi(pdf_document *xref, fz_device *dev, fz_matrix ctm, char *event, fz_cookie *cookie, pdf_gstate *gstate)
{
@@ -2517,6 +2518,7 @@ pdf_run_keyword(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf)
fz_warn(ctx, "unknown keyword: '%s'", buf);
break;
}
+ fz_assert_lock_not_held(ctx, FZ_LOCK_FILE);
}
static void
diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c
index 15886a8f..0dc0e132 100644
--- a/pdf/pdf_repair.c
+++ b/pdf/pdf_repair.c
@@ -67,7 +67,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
}
obj = fz_dict_gets(dict, "Length");
- if (fz_is_int(obj))
+ if (!fz_is_indirect(obj) && fz_is_int(obj))
stm_len = fz_to_int(obj);
fz_drop_obj(dict);
@@ -184,12 +184,14 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
}
}
- fz_catch(ctx)
+ fz_always(ctx)
{
fz_close(stm);
+ }
+ fz_catch(ctx)
+ {
fz_throw(ctx, "cannot load object stream object (%d %d R)", num, gen);
}
- fz_close(stm);
}
void
@@ -386,7 +388,9 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
/* corrected stream length */
if (list[i].stm_len >= 0)
{
+ fz_unlock(ctx, FZ_LOCK_FILE);
dict = pdf_load_object(xref, list[i].num, list[i].gen);
+ fz_lock(ctx, FZ_LOCK_FILE);
/* RJW: "cannot load stream object (%d %d R)", list[i].num, list[i].gen */
length = fz_new_int(ctx, list[i].stm_len);
diff --git a/pdf/pdf_shade.c b/pdf/pdf_shade.c
index 15f5de6b..fb5dd72c 100644
--- a/pdf/pdf_shade.c
+++ b/pdf/pdf_shade.c
@@ -604,7 +604,7 @@ pdf_load_mesh_params(pdf_document *xref, fz_obj *dict, struct mesh_params *p)
static void
pdf_load_type4_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
- int funcs, pdf_function **func, fz_stream *stream)
+ int funcs, pdf_function **func)
{
fz_context *ctx = xref->ctx;
struct mesh_params p;
@@ -612,6 +612,7 @@ pdf_load_type4_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
int ncomp;
int flag;
int i;
+ fz_stream *stream;
pdf_load_mesh_params(xref, dict, &p);
@@ -623,6 +624,8 @@ pdf_load_type4_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
else
ncomp = shade->colorspace->n;
+ stream = pdf_open_stream(xref, fz_to_num(dict), fz_to_gen(dict));
+
while (!fz_is_eof_bits(stream))
{
flag = fz_read_bits(stream, p.bpflag);
@@ -665,11 +668,12 @@ pdf_load_type4_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
break;
}
}
+ fz_close(stream);
}
static void
pdf_load_type5_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
- int funcs, pdf_function **func, fz_stream *stream)
+ int funcs, pdf_function **func)
{
fz_context *ctx = xref->ctx;
struct mesh_params p;
@@ -677,6 +681,7 @@ pdf_load_type5_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
int first;
int ncomp;
int i, k;
+ fz_stream *stream;
pdf_load_mesh_params(xref, dict, &p);
@@ -692,6 +697,8 @@ pdf_load_type5_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
buf = fz_malloc_array(ctx, p.vprow, sizeof(struct vertex));
first = 1;
+ stream = pdf_open_stream(xref, fz_to_num(dict), fz_to_gen(dict));
+
while (!fz_is_eof_bits(stream))
{
for (i = 0; i < p.vprow; i++)
@@ -713,13 +720,14 @@ pdf_load_type5_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
fz_free(ctx, ref);
fz_free(ctx, buf);
+ fz_close(stream);
}
/* Type 6 & 7 -- Patch mesh shadings */
static void
pdf_load_type6_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
- int funcs, pdf_function **func, fz_stream *stream)
+ int funcs, pdf_function **func)
{
fz_context *ctx = xref->ctx;
struct mesh_params p;
@@ -728,6 +736,7 @@ pdf_load_type6_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
fz_point prevp[12];
int ncomp;
int i, k;
+ fz_stream *stream;
pdf_load_mesh_params(xref, dict, &p);
@@ -741,6 +750,8 @@ pdf_load_type6_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
hasprevpatch = 0;
+ stream = pdf_open_stream(xref, fz_to_num(dict), fz_to_gen(dict));
+
while (!fz_is_eof_bits(stream))
{
float c[4][FZ_MAX_COLORS];
@@ -834,11 +845,12 @@ pdf_load_type6_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
hasprevpatch = 1;
}
}
+ fz_close(stream);
}
static void
pdf_load_type7_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
- int funcs, pdf_function **func, fz_stream *stream)
+ int funcs, pdf_function **func)
{
fz_context *ctx = xref->ctx;
struct mesh_params p;
@@ -847,6 +859,7 @@ pdf_load_type7_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
fz_point prevp[16];
int ncomp;
int i, k;
+ fz_stream *stream;
pdf_load_mesh_params(xref, dict, &p);
@@ -860,6 +873,8 @@ pdf_load_type7_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
hasprevpatch = 0;
+ stream = pdf_open_stream(xref, fz_to_num(dict), fz_to_gen(dict));
+
while (!fz_is_eof_bits(stream))
{
float c[4][FZ_MAX_COLORS];
@@ -953,6 +968,7 @@ pdf_load_type7_shade(fz_shade *shade, pdf_document *xref, fz_obj *dict,
hasprevpatch = 1;
}
}
+ fz_close(stream);
}
/* Load all of the shading dictionary parameters, then switch on the shading type. */
@@ -962,7 +978,6 @@ pdf_load_shading_dict(pdf_document *xref, fz_obj *dict, fz_matrix transform)
{
fz_shade *shade = NULL;
pdf_function *func[FZ_MAX_COLORS] = { NULL };
- fz_stream *stream = NULL;
fz_obj *obj;
int funcs = 0;
int type = 0;
@@ -972,7 +987,6 @@ pdf_load_shading_dict(pdf_document *xref, fz_obj *dict, fz_matrix transform)
fz_var(shade);
fz_var(func);
fz_var(funcs);
- fz_var(stream);
fz_var(type);
fz_try(ctx)
@@ -1041,35 +1055,25 @@ pdf_load_shading_dict(pdf_document *xref, fz_obj *dict, fz_matrix transform)
}
}
- if (type >= 4 && type <= 7)
- {
- stream = pdf_open_stream(xref, fz_to_num(dict), fz_to_gen(dict));
- /* RJW: "cannot open shading stream (%d %d R)", fz_to_num(dict), fz_to_gen(dict) */
- }
-
switch (type)
{
case 1: pdf_load_function_based_shading(shade, xref, dict, func[0]); break;
case 2: pdf_load_axial_shading(shade, xref, dict, funcs, func); break;
case 3: pdf_load_radial_shading(shade, xref, dict, funcs, func); break;
- case 4: pdf_load_type4_shade(shade, xref, dict, funcs, func, stream); break;
- case 5: pdf_load_type5_shade(shade, xref, dict, funcs, func, stream); break;
- case 6: pdf_load_type6_shade(shade, xref, dict, funcs, func, stream); break;
- case 7: pdf_load_type7_shade(shade, xref, dict, funcs, func, stream); break;
+ case 4: pdf_load_type4_shade(shade, xref, dict, funcs, func); break;
+ case 5: pdf_load_type5_shade(shade, xref, dict, funcs, func); break;
+ case 6: pdf_load_type6_shade(shade, xref, dict, funcs, func); break;
+ case 7: pdf_load_type7_shade(shade, xref, dict, funcs, func); break;
default:
fz_throw(ctx, "unknown shading type: %d", type);
}
- if (stream)
- fz_close(stream);
for (i = 0; i < funcs; i++)
if (func[i])
pdf_drop_function(ctx, func[i]);
}
fz_catch(ctx)
{
- if (stream)
- fz_close(stream);
for (i = 0; i < funcs; i++)
if (func[i])
pdf_drop_function(ctx, func[i]);
diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c
index 3db8eb1c..a01bab55 100644
--- a/pdf/pdf_stream.c
+++ b/pdf/pdf_stream.c
@@ -225,6 +225,7 @@ pdf_open_filter(fz_stream *chain, pdf_document *xref, fz_obj *stmobj, int num, i
else if (fz_array_len(filters) > 0)
chain = build_filter_chain(chain, xref, filters, params, num, gen);
+ fz_lock_stream(chain);
return chain;
}
@@ -276,6 +277,7 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen)
fz_throw(xref->ctx, "object is not a stream");
stm = pdf_open_raw_filter(xref->file, xref, x->obj, num, gen);
+ fz_lock_stream(stm);
fz_seek(xref->file, x->stm_ofs, 0);
return stm;
}
@@ -316,6 +318,7 @@ pdf_open_stream_with_offset(pdf_document *xref, int num, int gen, fz_obj *dict,
fz_throw(xref->ctx, "object is not a stream");
stm = pdf_open_filter(xref->file, xref, dict, num, gen);
+ fz_lock_stream(stm);
fz_seek(xref->file, stm_ofs, 0);
return stm;
}
@@ -378,9 +381,6 @@ pdf_load_stream(pdf_document *xref, int num, int gen)
fz_var(buf);
- stm = pdf_open_stream(xref, num, gen);
- /* RJW: "cannot open stream (%d %d R)", num, gen */
-
dict = pdf_load_object(xref, num, gen);
/* RJW: "cannot load stream dictionary (%d %d R)", num, gen */
@@ -393,16 +393,21 @@ pdf_load_stream(pdf_document *xref, int num, int gen)
fz_drop_obj(dict);
+ stm = pdf_open_stream(xref, num, gen);
+ /* RJW: "cannot open stream (%d %d R)", num, gen */
+
fz_try(ctx)
{
buf = fz_read_all(stm, len);
}
- fz_catch(ctx)
+ fz_always(ctx)
{
fz_close(stm);
+ }
+ fz_catch(ctx)
+ {
fz_throw(ctx, "cannot read raw stream (%d %d R)", num, gen);
}
- fz_close(stm);
return buf;
}
diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c
index 210c2f44..41a7ba3a 100644
--- a/pdf/pdf_xref.c
+++ b/pdf/pdf_xref.c
@@ -293,6 +293,8 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in
}
}
+/* Entered with file locked. Drops the lock in the middle, but then picks
+ * it up again before exiting. */
static fz_obj *
pdf_read_new_xref(pdf_document *xref, char *buf, int cap)
{
@@ -319,6 +321,7 @@ pdf_read_new_xref(pdf_document *xref, char *buf, int cap)
fz_try(ctx)
{
+ fz_unlock(ctx, FZ_LOCK_FILE);
obj = fz_dict_gets(trailer, "Size");
if (!obj)
fz_throw(ctx, "xref stream missing Size entry (%d %d R)", num, gen);
@@ -368,10 +371,12 @@ pdf_read_new_xref(pdf_document *xref, char *buf, int cap)
fz_drop_obj(index);
fz_rethrow(ctx);
}
+ fz_lock(ctx, FZ_LOCK_FILE);
return trailer;
}
+/* File is locked on entry, and exit (but may be dropped in the middle) */
static fz_obj *
pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap)
{
@@ -649,10 +654,12 @@ pdf_open_document_with_stream(fz_stream *file)
fz_obj *obj;
fz_obj *nobj = NULL;
int i, repaired = 0;
+ int locked;
fz_context *ctx = file->ctx;
fz_var(dict);
fz_var(nobj);
+ fz_var(locked);
/* install pdf specific callback */
fz_resolve_indirect = pdf_resolve_indirect;
@@ -663,6 +670,9 @@ pdf_open_document_with_stream(fz_stream *file)
xref->file = fz_keep_stream(file);
xref->ctx = ctx;
+ fz_lock(ctx, FZ_LOCK_FILE);
+ locked = 1;
+
fz_try(ctx)
{
pdf_load_xref(xref, xref->scratch, sizeof xref->scratch);
@@ -691,6 +701,9 @@ pdf_open_document_with_stream(fz_stream *file)
if (repaired)
pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch);
+ fz_unlock(ctx, FZ_LOCK_FILE);
+ locked = 0;
+
encrypt = fz_dict_gets(xref->trailer, "Encrypt");
id = fz_dict_gets(xref->trailer, "ID");
if (fz_is_dict(encrypt))
@@ -749,6 +762,11 @@ pdf_open_document_with_stream(fz_stream *file)
}
}
}
+ fz_always(ctx)
+ {
+ if (locked)
+ fz_unlock(ctx, FZ_LOCK_FILE);
+ }
fz_catch(ctx)
{
fz_drop_obj(dict);
@@ -910,18 +928,17 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap)
}
}
}
- fz_catch(ctx)
+ fz_always(ctx)
{
fz_close(stm);
fz_free(xref->ctx, ofsbuf);
fz_free(xref->ctx, numbuf);
fz_drop_obj(objstm);
+ }
+ fz_catch(ctx)
+ {
fz_throw(ctx, "cannot open object stream (%d %d R)", num, gen);
}
- fz_close(stm);
- fz_free(xref->ctx, ofsbuf);
- fz_free(xref->ctx, numbuf);
- fz_drop_obj(objstm);
}
/*
@@ -950,6 +967,7 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
}
else if (x->type == 'n')
{
+ fz_lock(ctx, FZ_LOCK_FILE);
fz_seek(xref->file, x->ofs, 0);
fz_try(ctx)
@@ -959,6 +977,7 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
}
fz_catch(ctx)
{
+ fz_unlock(ctx, FZ_LOCK_FILE);
fz_throw(ctx, "cannot parse object (%d %d R)", num, gen);
}
@@ -966,11 +985,13 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
{
fz_drop_obj(x->obj);
x->obj = NULL;
+ fz_unlock(ctx, FZ_LOCK_FILE);
fz_throw(ctx, "found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen);
}
if (xref->crypt)
pdf_crypt_obj(ctx, xref->crypt, x->obj, num, gen);
+ fz_unlock(ctx, FZ_LOCK_FILE);
}
else if (x->type == 'o')
{