rename and shuffle -- part 2

author: Tor Andersson <tor@ghostscript.com> 2005-03-30 10:45:21 +0200
committer: Tor Andersson <tor@ghostscript.com> 2005-03-30 10:45:21 +0200
commit: 5f4d61903ee8fc514ed7e23eac4d5ac6409ff760 (patch)
tree: a824aa883d9d5df072c17ec0a2ac4a2b5074c2c0 /mupdf/pdf_open.c
parent: ee154f16bd09a43359967f7e7b86c3677c09461d (diff)
download: mupdf-5f4d61903ee8fc514ed7e23eac4d5ac6409ff760.tar.xz
1 files changed, 537 insertions, 0 deletions
diff --git a/mupdf/pdf_open.c b/mupdf/pdf_open.c
new file mode 100644
index 00000000..34caa862
--- /dev/null
+++ b/mupdf/pdf_open.c
@@ -0,0 +1,537 @@
+#include <fitz.h>
+#include <mupdf.h>
+
+static inline int iswhite(int ch)
+{
+	return	ch == '\000' || ch == '\011' || ch == '\012' ||
+			ch == '\014' || ch == '\015' || ch == '\040';
+}
+
+/*
+ * magic version tag and startxref
+ */
+
+static fz_error *
+loadversion(pdf_xref *xref)
+{
+	char buf[20];
+	int n;
+
+	n = fz_seek(xref->file, 0, 0);
+	if (n < 0)
+		return fz_ferror(xref->file);
+
+	fz_readline(xref->file, buf, sizeof buf);
+	if (memcmp(buf, "%PDF-", 5) != 0)
+		return fz_throw("syntaxerror: corrupt version marker");
+
+	xref->version = atof(buf + 5);
+
+	pdf_logxref("version %g\n", xref->version);
+
+	return nil;
+}
+
+static fz_error *
+readstartxref(pdf_xref *xref)
+{
+	char buf[1024];
+	int t, n;
+	int i;
+
+	t = fz_seek(xref->file, 0, 2);
+	if (t == -1)
+		return fz_ferror(xref->file);
+
+	t = fz_seek(xref->file, MAX(0, t - ((int)sizeof buf)), 0);
+	if (t == -1)
+		return fz_ferror(xref->file);
+
+	n = fz_read(xref->file, buf, sizeof buf);
+	if (n == -1)
+		return fz_ferror(xref->file);
+
+	for (i = n - 9; i >= 0; i--)
+	{
+		if (memcmp(buf + i, "startxref", 9) == 0)
+		{
+			i += 9;
+			while (iswhite(buf[i]) && i < n)
+				i ++;
+			xref->startxref = atoi(buf + i);
+			return nil;
+		}
+	}
+
+	return fz_throw("syntaxerror: could not find startxref");
+}
+
+/*
+ * trailer dictionary
+ */
+
+static fz_error *
+readoldtrailer(pdf_xref *xref, char *buf, int cap)
+{
+	int ofs, len;
+	char *s;
+	int n;
+	int t;
+	int c;
+
+	pdf_logxref("load old xref format trailer\n");
+
+	fz_readline(xref->file, buf, cap);
+	if (strcmp(buf, "xref") != 0)
+		return fz_throw("syntaxerror: missing xref");
+
+	while (1)
+	{
+		c = fz_peekbyte(xref->file);
+		if (!(c >= '0' && c <= '9'))
+			break;
+
+		n = fz_readline(xref->file, buf, cap);
+		if (n < 0) return fz_ferror(xref->file);
+
+		s = buf;
+		ofs = atoi(strsep(&s, " "));
+		len = atoi(strsep(&s, " "));
+
+		/* broken pdfs where the section is not on a separate line */
+		if (s && *s != '\0')
+			fz_seek(xref->file, -(n + buf - s + 2), 1);
+
+		t = fz_tell(xref->file);
+		if (t < 0) return fz_ferror(xref->file);
+
+		n = fz_seek(xref->file, t + 20 * len, 0);
+		if (n < 0) return fz_ferror(xref->file);
+	}
+
+	t = pdf_lex(xref->file, buf, cap, &n);
+	if (t != PDF_TTRAILER)
+		return fz_throw("syntaxerror: expected trailer");
+
+	t = pdf_lex(xref->file, buf, cap, &n);
+	if (t != PDF_TODICT)
+		return fz_throw("syntaxerror: expected trailer dictionary");
+
+	return pdf_parsedict(&xref->trailer, xref->file, buf, cap);
+}
+
+static fz_error *
+readnewtrailer(pdf_xref *xref, char *buf, int cap)
+{
+	pdf_logxref("load new xref format trailer\n");
+	return pdf_parseindobj(&xref->trailer, xref->file, buf, cap, nil, nil, nil);
+}
+
+static fz_error *
+readtrailer(pdf_xref *xref, char *buf, int cap)
+{
+	int n;
+	int c;
+
+	n = fz_seek(xref->file, xref->startxref, 0);
+	if (n < 0)
+		return fz_ferror(xref->file);
+
+	c = fz_peekbyte(xref->file);
+	if (c == 'x')
+		return readoldtrailer(xref, buf, cap);
+	else if (c >= '0' && c <= '9')
+		return readnewtrailer(xref, buf, cap);
+
+	return fz_throw("syntaxerror: could not find xref");
+}
+
+/*
+ * xref tables
+ */
+
+static fz_error *
+readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
+{
+	int ofs, len;
+	char *s;
+	int n;
+	int t;
+	int i;
+	int c;
+
+	pdf_logxref("load old xref format\n");
+
+	fz_readline(xref->file, buf, cap);
+	if (strcmp(buf, "xref") != 0)
+		return fz_throw("syntaxerror: expected xref");
+
+	while (1)
+	{
+		c = fz_peekbyte(xref->file);
+		if (!(c >= '0' && c <= '9'))
+			break;
+
+		n = fz_readline(xref->file, buf, cap);
+		if (n < 0) return fz_ferror(xref->file);
+		
+		s = buf;
+		ofs = atoi(strsep(&s, " "));
+		len = atoi(strsep(&s, " "));
+
+		/* broken pdfs where the section is not on a separate line */
+		if (s && *s != '\0')
+		{
+			fz_warn("syntaxerror: broken xref section");
+			fz_seek(xref->file, -(n + buf - s + 2), 1);
+		}
+
+		for (i = 0; i < len; i++)
+		{
+			n = fz_read(xref->file, buf, 20);
+			if (n < 0) return fz_ferror(xref->file);
+			if (n != 20) return fz_throw("syntaxerror: truncated xref table");
+			if (!xref->table[ofs + i].type)
+			{
+				s = buf;
+				xref->table[ofs + i].ofs = atoi(s);
+				xref->table[ofs + i].gen = atoi(s + 11);
+				xref->table[ofs + i].type = s[17];
+			}
+		}
+	}
+
+	t = pdf_lex(xref->file, buf, cap, &n);
+	if (t != PDF_TTRAILER)
+		return fz_throw("syntaxerror: expected trailer");
+	t = pdf_lex(xref->file, buf, cap, &n);
+	if (t != PDF_TODICT)
+		return fz_throw("syntaxerror: expected trailer dictionary");
+
+	return pdf_parsedict(trailerp, xref->file, buf, cap);
+}
+
+static fz_error *
+readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
+{
+	fz_error *error;
+	fz_obj *trailer;
+	fz_obj *obj;
+	int oid, gen, stmofs;
+	int size, w0, w1, w2, i0, i1;
+	int i, n;
+
+	pdf_logxref("load new xref format\n");
+
+	error = pdf_parseindobj(&trailer, xref->file, buf, cap, &oid, &gen, &stmofs);
+	if (error)
+		return error;
+
+	if (oid < 0 || oid >= xref->len) {
+		error = fz_throw("rangecheck: object id out of range");
+		goto cleanup;
+	}
+
+	xref->table[oid].type = 'n';
+	xref->table[oid].gen = gen;
+	xref->table[oid].obj = fz_keepobj(trailer);
+	xref->table[oid].stmofs = stmofs;
+
+	obj = fz_dictgets(trailer, "Size");
+	if (!obj) {
+		error = fz_throw("syntaxerror: xref stream missing Size entry");
+		goto cleanup;
+	}
+	size = fz_toint(obj);
+
+	obj = fz_dictgets(trailer, "W");
+	if (!obj) {
+		error = fz_throw("syntaxerror: xref stream missing W entry");
+		goto cleanup;
+	}
+	w0 = fz_toint(fz_arrayget(obj, 0));
+	w1 = fz_toint(fz_arrayget(obj, 1));
+	w2 = fz_toint(fz_arrayget(obj, 2));
+
+	obj = fz_dictgets(trailer, "Index");
+	if (obj) {
+		i0 = fz_toint(fz_arrayget(obj, 0));
+		i1 = fz_toint(fz_arrayget(obj, 1));
+	}
+	else {
+		i0 = 0;
+		i1 = size;
+	}
+
+	if (i0 < 0 || i1 > xref->len) {
+		error = fz_throw("syntaxerror: xref stream has too many entries");
+		goto cleanup;
+	}
+
+	error = pdf_openstream(xref, oid, gen);
+	if (error)
+		goto cleanup;
+
+	for (i = i0; i < i0 + i1; i++)
+	{
+		int a = 0;
+		int b = 0;
+		int c = 0;
+
+		if (fz_peekbyte(xref->stream) == EOF)
+		{
+			error = fz_ferror(xref->stream);
+			if (!error)
+				error = fz_throw("syntaxerror: truncated xref stream");
+			pdf_closestream(xref);
+			goto cleanup;
+		}
+
+		for (n = 0; n < w0; n++)
+			a = (a << 8) + fz_readbyte(xref->stream);
+		for (n = 0; n < w1; n++)
+			b = (b << 8) + fz_readbyte(xref->stream);
+		for (n = 0; n < w2; n++)
+			c = (c << 8) + fz_readbyte(xref->stream);
+
+		if (!xref->table[i].type)
+		{
+			int t = w0 ? a : 1;
+			xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
+			xref->table[i].ofs = w2 ? b : 0;
+			xref->table[i].gen = w1 ? c : 0;
+		}
+	}
+
+	pdf_closestream(xref);
+
+	*trailerp = trailer;
+
+	return nil;
+
+cleanup:
+	fz_dropobj(trailer);
+	return error;
+}
+
+static fz_error *
+readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap)
+{
+	int n;
+	int c;
+
+	n = fz_seek(xref->file, ofs, 0);
+	if (n < 0)
+		return fz_ferror(xref->file);
+
+	c = fz_peekbyte(xref->file);
+	if (c == 'x')
+		return readoldxref(trailerp, xref, buf, cap);
+	else if (c >= '0' && c <= '9')
+		return readnewxref(trailerp, xref, buf, cap);
+
+	return fz_throw("syntaxerror: expected xref");
+}
+
+static fz_error *
+readxrefsections(pdf_xref *xref, int ofs, char *buf, int cap)
+{
+	fz_error *error;
+	fz_obj *trailer;
+	fz_obj *prev;
+	fz_obj *xrefstm;
+
+	error = readxref(&trailer, xref, ofs, buf, cap);
+	if (error)
+		return error;
+
+	/* FIXME: do we overwrite free entries properly? */
+	xrefstm = fz_dictgets(trailer, "XrefStm");
+	if (xrefstm)
+	{
+		pdf_logxref("load xrefstm\n");
+		error = readxrefsections(xref, fz_toint(xrefstm), buf, cap);
+		if (error)
+			goto cleanup;
+	}
+
+	prev = fz_dictgets(trailer, "Prev");
+	if (prev)
+	{
+		pdf_logxref("load prev\n");
+		error = readxrefsections(xref, fz_toint(prev), buf, cap);
+		if (error)
+			goto cleanup;
+	}
+
+	fz_dropobj(trailer);
+	return nil;
+
+cleanup:
+	fz_dropobj(trailer);
+	return error;
+}
+
+/*
+ * compressed object streams
+ */
+
+fz_error *
+pdf_loadobjstm(pdf_xref *xref, int oid, int gen, char *buf, int cap)
+{
+	fz_error *error;
+	fz_obj *objstm;
+	int *oidbuf;
+	int *ofsbuf;
+
+	fz_obj *obj;
+	int first;
+	int count;
+	int i, n, t;
+
+	pdf_logxref("loadobjstm %d %d\n", oid, gen);
+
+	error = pdf_loadobject(&objstm, xref, oid, gen);
+	if (error)
+		return error;
+
+	count = fz_toint(fz_dictgets(objstm, "N"));
+	first = fz_toint(fz_dictgets(objstm, "First"));
+
+	pdf_logxref("  count %d\n", count);
+
+	oidbuf = fz_malloc(count * sizeof(int));
+	if (!oidbuf) { error = fz_outofmem; goto cleanup1; }
+
+	ofsbuf = fz_malloc(count * sizeof(int));
+	if (!ofsbuf) { error = fz_outofmem; goto cleanup2; }
+
+	error = pdf_openstream(xref, oid, gen);
+	if (error)
+		goto cleanup3;
+
+	for (i = 0; i < count; i++)
+	{
+		t = pdf_lex(xref->stream, buf, cap, &n);
+		if (t != PDF_TINT)
+		{
+			error = fz_throw("syntaxerror: corrupt object stream");
+			goto cleanup4;
+		}
+		oidbuf[i] = atoi(buf);
+
+		t = pdf_lex(xref->stream, buf, cap, &n);
+		if (t != PDF_TINT)
+		{
+			error = fz_throw("syntaxerror: corrupt object stream");
+			goto cleanup4;
+		}
+		ofsbuf[i] = atoi(buf);
+	}
+
+	n = fz_seek(xref->stream, first, 0);
+	if (n < 0)
+	{
+		error = fz_ferror(xref->stream);
+		goto cleanup4;
+	}
+
+	for (i = 0; i < count; i++)
+	{
+		/* FIXME: seek to first + ofsbuf[i] */
+
+		error = pdf_parsestmobj(&obj, xref->stream, buf, cap);
+		if (error)
+			goto cleanup4;
+
+		if (oidbuf[i] < 1 || oidbuf[i] >= xref->len)
+		{
+			error = fz_throw("rangecheck: object number out of range");
+			goto cleanup4;
+		}
+
+		if (xref->table[oidbuf[i]].obj)
+			fz_dropobj(xref->table[oidbuf[i]].obj);
+		xref->table[oidbuf[i]].obj = obj;
+	}
+
+	pdf_closestream(xref);
+	fz_free(ofsbuf);
+	fz_free(oidbuf);
+	fz_dropobj(objstm);
+	return nil;
+
+cleanup4:
+	pdf_closestream(xref);
+cleanup3:
+	fz_free(ofsbuf);
+cleanup2:
+	fz_free(oidbuf);
+cleanup1:
+	fz_dropobj(objstm);
+	return error;
+}
+
+/*
+ * open and load xref tables from pdf
+ */
+
+fz_error *
+pdf_loadxref(pdf_xref *xref, char *filename)
+{
+	fz_error *error;
+	fz_obj *size;
+	int i;
+
+	char buf[65536];	/* yeowch! */
+
+	pdf_logxref("loadxref '%s' %p\n", filename, xref);
+
+	error = fz_openfile(&xref->file, filename, FZ_READ);
+	if (error)
+		return error;
+
+	error = loadversion(xref);
+	if (error)
+		return error;
+
+	error = readstartxref(xref);
+	if (error)
+		return error;
+
+	error = readtrailer(xref, buf, sizeof buf);
+	if (error)
+		return error;
+
+	size = fz_dictgets(xref->trailer, "Size");
+	if (!size)
+		return fz_throw("syntaxerror: trailer missing Size entry");
+
+	pdf_logxref("  size %d\n", fz_toint(size));
+
+	assert(xref->table == nil);
+
+	xref->cap = fz_toint(size);
+	xref->len = fz_toint(size);
+	xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry));
+	if (!xref->table)
+		return fz_outofmem;
+
+	for (i = 0; i < xref->len; i++)
+	{
+		xref->table[i].ofs = 0;
+		xref->table[i].gen = 0;
+		xref->table[i].type = 0;
+		xref->table[i].mark = 0;
+		xref->table[i].stmbuf = nil;
+		xref->table[i].stmofs = 0;
+		xref->table[i].obj = nil;
+	}
+
+	error = readxrefsections(xref, xref->startxref, buf, sizeof buf);
+	if (error)
+		return error;
+
+	return nil;
+}
+
author	Tor Andersson <tor@ghostscript.com>	2005-03-30 10:45:21 +0200
committer	Tor Andersson <tor@ghostscript.com>	2005-03-30 10:45:21 +0200
commit	5f4d61903ee8fc514ed7e23eac4d5ac6409ff760 (patch)
tree	a824aa883d9d5df072c17ec0a2ac4a2b5074c2c0 /mupdf/pdf_open.c
parent	ee154f16bd09a43359967f7e7b86c3677c09461d (diff)
download	mupdf-5f4d61903ee8fc514ed7e23eac4d5ac6409ff760.tar.xz