From 59bd2f5bfc486b107c4bd689bd65ea7d23e2fad0 Mon Sep 17 00:00:00 2001
From: Tor Andersson <tor@ghostscript.com>
Date: Wed, 6 Oct 2004 11:36:25 +0200
Subject: hashtable and object store memory bugs

---
 base/hash.c         | 28 +++++++++++++++------------
 include/fitz/hash.h |  4 ++--
 mupdf/save.c        |  5 ++++-
 mupdf/xref.c        | 56 +++++++++++++++++++++++++++++++++++++++++++++--------
 test/pdfclean.c     | 20 +++++++++++++++++++
 test/pdfdebug.c     | 18 +++++++++++++++--
 6 files changed, 106 insertions(+), 25 deletions(-)

diff --git a/base/hash.c b/base/hash.c
index 24d49ab5..962b92e1 100644
--- a/base/hash.c
+++ b/base/hash.c
@@ -23,9 +23,9 @@ struct fz_hashentry_s
 
 struct fz_hashtable_s
 {
-	unsigned keylen;
-	unsigned size;
-	unsigned load;
+	int keylen;
+	int size;
+	int load;
 	fz_hashentry *ents;
 };
 
@@ -80,13 +80,13 @@ fz_hashlen(fz_hashtable *table)
 }
 
 void *
-fz_gethashkey(fz_hashtable *table, int idx)
+fz_hashgetkey(fz_hashtable *table, int idx)
 {
 	return table->ents[idx].key;
 }
 
 void *
-fz_gethashval(fz_hashtable *table, int idx)
+fz_hashgetval(fz_hashtable *table, int idx)
 {
 	return table->ents[idx].val;
 }
@@ -104,9 +104,9 @@ fz_resizehash(fz_hashtable *table, int newsize)
 	fz_error *error;
 	fz_hashentry *newents;
 	fz_hashentry *oldents;
-	unsigned oldload;
-	unsigned oldsize;
-	unsigned i;
+	int oldload;
+	int oldsize;
+	int i;
 
 	oldsize = table->size;
 	oldload = table->load;
@@ -115,7 +115,7 @@ fz_resizehash(fz_hashtable *table, int newsize)
 	if (newsize < oldload * 8 / 10)
 		return fz_throw("rangecheck: resize hash too small");
 
-	newents = fz_realloc(table->ents, sizeof(fz_hashentry) * newsize);
+	newents = fz_malloc(sizeof(fz_hashentry) * newsize);
 	if (!newents)
 		return fz_outofmem;
 
@@ -167,9 +167,9 @@ fz_error *
 fz_hashinsert(fz_hashtable *table, void *key, void *val)
 {
 	fz_error *error;
-	fz_hashentry *ents = table->ents;
-	unsigned size = table->size;
-	unsigned pos = hash(key, table->keylen) % size;
+	fz_hashentry *ents;
+	unsigned size;
+	unsigned pos;
 
 	if (table->load > table->size * 8 / 10)
 	{
@@ -178,6 +178,10 @@ fz_hashinsert(fz_hashtable *table, void *key, void *val)
 			return error;
 	}
 
+	ents = table->ents;
+	size = table->size;
+	pos = hash(key, table->keylen) % size;
+
 	while (1)
 	{
 		if (!ents[pos].val)
diff --git a/include/fitz/hash.h b/include/fitz/hash.h
index 4229a02c..731e5fab 100644
--- a/include/fitz/hash.h
+++ b/include/fitz/hash.h
@@ -10,6 +10,6 @@ fz_error *fz_hashinsert(fz_hashtable *table, void *key, void *val);
 fz_error *fz_hashremove(fz_hashtable *table, void *key);
 
 int fz_hashlen(fz_hashtable *table);
-void *fz_gethashkey(fz_hashtable *table, int idx);
-void *fz_gethashval(fz_hashtable *table, int idx);
+void *fz_hashgetkey(fz_hashtable *table, int idx);
+void *fz_hashgetval(fz_hashtable *table, int idx);
 
diff --git a/mupdf/save.c b/mupdf/save.c
index 36695dd4..5d370286 100644
--- a/mupdf/save.c
+++ b/mupdf/save.c
@@ -336,12 +336,15 @@ pdf_savepdf(pdf_xref *xref, char *path, pdf_crypt *encrypt)
 
 	for (oid = 0; oid < xref->size; oid++)
 	{
+		int gid = xref->table[oid].gen;
 		int type = xref->table[oid].type;
+		if (type == 'o')
+			gid = 0;
 		if (type == 'a' || type == 'o')
 			type = 'n';
 		if (type == 'd')
 			type = 'f';
-		fz_print(out, "%010d %05d %c \n", ofsbuf[oid], xref->table[oid].gen, type);
+		fz_print(out, "%010d %05d %c \n", ofsbuf[oid], gid, type);
 	}
 
 	fz_print(out, "\n");
diff --git a/mupdf/xref.c b/mupdf/xref.c
index 08ab8621..a53f06e5 100644
--- a/mupdf/xref.c
+++ b/mupdf/xref.c
@@ -84,8 +84,8 @@ pdf_closexref(pdf_xref *xref)
 	{
 		for (i = 0; i < fz_hashlen(xref->store); i++)
 		{
-			key = fz_gethashkey(xref->store, i);
-			val = fz_gethashval(xref->store, i);
+			key = fz_hashgetkey(xref->store, i);
+			val = fz_hashgetval(xref->store, i);
 			if (val && key[2] == 0)
 				fz_dropobj((fz_obj*)val);
 			if (val && key[2] == 1)
@@ -125,6 +125,30 @@ pdf_debugxref(pdf_xref *xref)
  * object and stream store (cached from objstm and saved for mutation)
  */
 
+void
+pdf_debugstore(fz_hashtable *store)
+{
+	int *key;
+	void *val;
+	int i;
+
+	printf("object store (%d)\n", fz_hashlen(store));
+	for (i = 0; i < fz_hashlen(store); i++)
+	{
+		key = fz_hashgetkey(store, i);
+		val = fz_hashgetval(store, i);
+		if (val)
+		{
+			printf("slot %d: %d %d ", i, key[0], key[1]);
+			if (key[2] == 0)
+				printf("obj[%d] ", ((fz_obj*)val)->refcount), fz_debugobj(val);
+			if (key[2] == 1)
+				printf("stream %d", ((fz_buffer*)val)->wp - ((fz_buffer*)val)->rp);
+			printf("\n");
+		}
+	}
+}
+
 fz_obj *
 pdf_findstoredobject(fz_hashtable *store, int oid, int gid)
 {
@@ -177,20 +201,32 @@ fz_error *
 pdf_storeobject(fz_hashtable *store, int oid, int gid, fz_obj *obj)
 {
 	int key[3];	
+	fz_obj *old;
 	key[0] = oid;
 	key[1] = gid;
 	key[2] = 0;
-	return fz_hashinsert(store, key, obj);
+	old = fz_hashfind(store, key);
+	if (old) {
+		fz_hashremove(store, key);
+		fz_dropobj(old);
+	}
+	return fz_hashinsert(store, key, fz_keepobj(obj));
 }
 
 fz_error *
-pdf_storestream(fz_hashtable *store, int oid, int gid, fz_buffer *buf)
+pdf_storestream(fz_hashtable *store, int oid, int gid, fz_buffer *stm)
 {
 	int key[3];	
+	fz_buffer *old;
 	key[0] = oid;
 	key[1] = gid;
 	key[2] = 1;
-	return fz_hashinsert(store, key, buf);
+	old = fz_hashfind(store, key);
+	if (old) {
+		fz_hashremove(store, key);
+		fz_freebuffer(old);
+	}
+	return fz_hashinsert(store, key, stm);
 }
 
 /*
@@ -326,9 +362,9 @@ pdf_saveobject(pdf_xref *xref, int oid, int gid, fz_obj *obj)
 }
 
 fz_error *
-pdf_savestream(pdf_xref *xref, int oid, int gid, fz_buffer *buf)
+pdf_savestream(pdf_xref *xref, int oid, int gid, fz_buffer *stm)
 {
-	return pdf_storestream(xref->store, oid, gid, buf);
+	return pdf_storestream(xref->store, oid, gid, stm);
 }
 
 fz_error *
@@ -380,8 +416,10 @@ pdf_loadobject0(fz_obj **objp, pdf_xref *xref, int oid, int gid, int *stmofs)
 	else if (x->type == 'o')
 	{
 		*objp = pdf_findstoredobject(xref->store, oid, gid);
-		if (*objp)
+		if (*objp) {
+			fz_keepobj(*objp);
 			return nil;
+		}
 
 		error = pdf_readobjstm(xref, x->ofs, 0, buf, sizeof buf);
 		if (error)
@@ -390,6 +428,7 @@ pdf_loadobject0(fz_obj **objp, pdf_xref *xref, int oid, int gid, int *stmofs)
 		*objp = pdf_findstoredobject(xref->store, oid, gid);
 		if (!*objp)
 			return fz_throw("rangecheck: could not find object");
+		fz_keepobj(*objp);
 	}
 
 	else if (x->type == 'a')
@@ -397,6 +436,7 @@ pdf_loadobject0(fz_obj **objp, pdf_xref *xref, int oid, int gid, int *stmofs)
 		*objp = pdf_findstoredobject(xref->store, oid, gid);
 		if (!*objp)
 			return fz_throw("rangecheck: could not find object");
+		fz_keepobj(*objp);
 	}
 
 	else
diff --git a/test/pdfclean.c b/test/pdfclean.c
index c6af657b..69c51590 100644
--- a/test/pdfclean.c
+++ b/test/pdfclean.c
@@ -18,6 +18,23 @@ void usage()
 	exit(1);
 }
 
+void preloadobjstms(pdf_xref *xref)
+{
+	fz_error *error;
+	fz_obj *obj;
+	int i;
+
+	for (i = 0; i < xref->size; i++)
+	{
+		if (xref->table[i].type == 'o')
+		{
+			error = pdf_loadobject0(&obj, xref, i, 0, nil);
+			if (error) fz_abort(error);
+			fz_dropobj(obj);
+		}
+	}
+}
+
 void expandstreams(pdf_xref *xref)
 {
 	fz_error *error;
@@ -140,7 +157,10 @@ int main(int argc, char **argv)
 		expandstreams(xref);
 
 	if (dogc)
+	{
+		preloadobjstms(xref);
 		pdf_garbagecollect(xref);
+	}
 
 	error = pdf_savepdf(xref, outfile, encrypt);
 	if (error)
diff --git a/test/pdfdebug.c b/test/pdfdebug.c
index ddc10dff..1009849f 100644
--- a/test/pdfdebug.c
+++ b/test/pdfdebug.c
@@ -5,10 +5,11 @@ static char *password = "";
 static int dodecode = 0;
 static int dorepair = 0;
 static int doprintxref = 0;
+static int dosave = 0;
 
 void usage()
 {
-	fprintf(stderr, "usage: pdfdebug [-drx] [-u password] file.pdf [oid ...]\n");
+	fprintf(stderr, "usage: pdfdebug [-drxs] [-u password] file.pdf [oid ...]\n");
 	exit(1);
 }
 
@@ -43,9 +44,13 @@ void printsafe(unsigned char *buf, int n)
 
 void decodestream(pdf_xref *xref, fz_obj *stream, int oid, int gid, int ofs)
 {
+	FILE *copy;
 	fz_error *error;
 	unsigned char buf[512];
 
+	if (dosave)
+		copy = fopen("/tmp/dump.stm", "wb");
+
 	safecol = 0;
 
 	error = pdf_openstream0(xref, stream, oid, gid, ofs);
@@ -59,8 +64,14 @@ void decodestream(pdf_xref *xref, fz_obj *stream, int oid, int gid, int ofs)
 		if (n < 0)
 			fz_abort(fz_ferror(xref->file));
 		printsafe(buf, n);
+
+		if (dosave)
+			fwrite(buf, 1, n, copy);
 	}
 
+	if (dosave)
+		fclose(copy);
+
 	pdf_closestream(xref);
 }
 
@@ -133,10 +144,13 @@ int main(int argc, char **argv)
 	pdf_xref *xref;
 	int c;
 
-	while ((c = getopt(argc, argv, "drxopu:")) != -1)
+	while ((c = getopt(argc, argv, "drxsopu:")) != -1)
 	{
 		switch (c)
 		{
+		case 's':
+			dodecode ++;
+			dosave ++;
 		case 'd':
 			dodecode ++;
 			break;
-- 
cgit v1.2.3