summaryrefslogtreecommitdiff
path: root/mupdf/pdf_open.c
diff options
context:
space:
mode:
authorTor Andersson <tor@ghostscript.com>2005-03-30 10:45:21 +0200
committerTor Andersson <tor@ghostscript.com>2005-03-30 10:45:21 +0200
commit5f4d61903ee8fc514ed7e23eac4d5ac6409ff760 (patch)
treea824aa883d9d5df072c17ec0a2ac4a2b5074c2c0 /mupdf/pdf_open.c
parentee154f16bd09a43359967f7e7b86c3677c09461d (diff)
downloadmupdf-5f4d61903ee8fc514ed7e23eac4d5ac6409ff760.tar.xz
rename and shuffle -- part 2
Diffstat (limited to 'mupdf/pdf_open.c')
-rw-r--r--mupdf/pdf_open.c537
1 files changed, 537 insertions, 0 deletions
diff --git a/mupdf/pdf_open.c b/mupdf/pdf_open.c
new file mode 100644
index 00000000..34caa862
--- /dev/null
+++ b/mupdf/pdf_open.c
@@ -0,0 +1,537 @@
+#include <fitz.h>
+#include <mupdf.h>
+
+static inline int iswhite(int ch)
+{
+ return ch == '\000' || ch == '\011' || ch == '\012' ||
+ ch == '\014' || ch == '\015' || ch == '\040';
+}
+
+/*
+ * magic version tag and startxref
+ */
+
+static fz_error *
+loadversion(pdf_xref *xref)
+{
+ char buf[20];
+ int n;
+
+ n = fz_seek(xref->file, 0, 0);
+ if (n < 0)
+ return fz_ferror(xref->file);
+
+ fz_readline(xref->file, buf, sizeof buf);
+ if (memcmp(buf, "%PDF-", 5) != 0)
+ return fz_throw("syntaxerror: corrupt version marker");
+
+ xref->version = atof(buf + 5);
+
+ pdf_logxref("version %g\n", xref->version);
+
+ return nil;
+}
+
+static fz_error *
+readstartxref(pdf_xref *xref)
+{
+ char buf[1024];
+ int t, n;
+ int i;
+
+ t = fz_seek(xref->file, 0, 2);
+ if (t == -1)
+ return fz_ferror(xref->file);
+
+ t = fz_seek(xref->file, MAX(0, t - ((int)sizeof buf)), 0);
+ if (t == -1)
+ return fz_ferror(xref->file);
+
+ n = fz_read(xref->file, buf, sizeof buf);
+ if (n == -1)
+ return fz_ferror(xref->file);
+
+ for (i = n - 9; i >= 0; i--)
+ {
+ if (memcmp(buf + i, "startxref", 9) == 0)
+ {
+ i += 9;
+ while (iswhite(buf[i]) && i < n)
+ i ++;
+ xref->startxref = atoi(buf + i);
+ return nil;
+ }
+ }
+
+ return fz_throw("syntaxerror: could not find startxref");
+}
+
+/*
+ * trailer dictionary
+ */
+
+static fz_error *
+readoldtrailer(pdf_xref *xref, char *buf, int cap)
+{
+ int ofs, len;
+ char *s;
+ int n;
+ int t;
+ int c;
+
+ pdf_logxref("load old xref format trailer\n");
+
+ fz_readline(xref->file, buf, cap);
+ if (strcmp(buf, "xref") != 0)
+ return fz_throw("syntaxerror: missing xref");
+
+ while (1)
+ {
+ c = fz_peekbyte(xref->file);
+ if (!(c >= '0' && c <= '9'))
+ break;
+
+ n = fz_readline(xref->file, buf, cap);
+ if (n < 0) return fz_ferror(xref->file);
+
+ s = buf;
+ ofs = atoi(strsep(&s, " "));
+ len = atoi(strsep(&s, " "));
+
+ /* broken pdfs where the section is not on a separate line */
+ if (s && *s != '\0')
+ fz_seek(xref->file, -(n + buf - s + 2), 1);
+
+ t = fz_tell(xref->file);
+ if (t < 0) return fz_ferror(xref->file);
+
+ n = fz_seek(xref->file, t + 20 * len, 0);
+ if (n < 0) return fz_ferror(xref->file);
+ }
+
+ t = pdf_lex(xref->file, buf, cap, &n);
+ if (t != PDF_TTRAILER)
+ return fz_throw("syntaxerror: expected trailer");
+
+ t = pdf_lex(xref->file, buf, cap, &n);
+ if (t != PDF_TODICT)
+ return fz_throw("syntaxerror: expected trailer dictionary");
+
+ return pdf_parsedict(&xref->trailer, xref->file, buf, cap);
+}
+
+static fz_error *
+readnewtrailer(pdf_xref *xref, char *buf, int cap)
+{
+ pdf_logxref("load new xref format trailer\n");
+ return pdf_parseindobj(&xref->trailer, xref->file, buf, cap, nil, nil, nil);
+}
+
+static fz_error *
+readtrailer(pdf_xref *xref, char *buf, int cap)
+{
+ int n;
+ int c;
+
+ n = fz_seek(xref->file, xref->startxref, 0);
+ if (n < 0)
+ return fz_ferror(xref->file);
+
+ c = fz_peekbyte(xref->file);
+ if (c == 'x')
+ return readoldtrailer(xref, buf, cap);
+ else if (c >= '0' && c <= '9')
+ return readnewtrailer(xref, buf, cap);
+
+ return fz_throw("syntaxerror: could not find xref");
+}
+
+/*
+ * xref tables
+ */
+
+static fz_error *
+readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
+{
+ int ofs, len;
+ char *s;
+ int n;
+ int t;
+ int i;
+ int c;
+
+ pdf_logxref("load old xref format\n");
+
+ fz_readline(xref->file, buf, cap);
+ if (strcmp(buf, "xref") != 0)
+ return fz_throw("syntaxerror: expected xref");
+
+ while (1)
+ {
+ c = fz_peekbyte(xref->file);
+ if (!(c >= '0' && c <= '9'))
+ break;
+
+ n = fz_readline(xref->file, buf, cap);
+ if (n < 0) return fz_ferror(xref->file);
+
+ s = buf;
+ ofs = atoi(strsep(&s, " "));
+ len = atoi(strsep(&s, " "));
+
+ /* broken pdfs where the section is not on a separate line */
+ if (s && *s != '\0')
+ {
+ fz_warn("syntaxerror: broken xref section");
+ fz_seek(xref->file, -(n + buf - s + 2), 1);
+ }
+
+ for (i = 0; i < len; i++)
+ {
+ n = fz_read(xref->file, buf, 20);
+ if (n < 0) return fz_ferror(xref->file);
+ if (n != 20) return fz_throw("syntaxerror: truncated xref table");
+ if (!xref->table[ofs + i].type)
+ {
+ s = buf;
+ xref->table[ofs + i].ofs = atoi(s);
+ xref->table[ofs + i].gen = atoi(s + 11);
+ xref->table[ofs + i].type = s[17];
+ }
+ }
+ }
+
+ t = pdf_lex(xref->file, buf, cap, &n);
+ if (t != PDF_TTRAILER)
+ return fz_throw("syntaxerror: expected trailer");
+ t = pdf_lex(xref->file, buf, cap, &n);
+ if (t != PDF_TODICT)
+ return fz_throw("syntaxerror: expected trailer dictionary");
+
+ return pdf_parsedict(trailerp, xref->file, buf, cap);
+}
+
+static fz_error *
+readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
+{
+ fz_error *error;
+ fz_obj *trailer;
+ fz_obj *obj;
+ int oid, gen, stmofs;
+ int size, w0, w1, w2, i0, i1;
+ int i, n;
+
+ pdf_logxref("load new xref format\n");
+
+ error = pdf_parseindobj(&trailer, xref->file, buf, cap, &oid, &gen, &stmofs);
+ if (error)
+ return error;
+
+ if (oid < 0 || oid >= xref->len) {
+ error = fz_throw("rangecheck: object id out of range");
+ goto cleanup;
+ }
+
+ xref->table[oid].type = 'n';
+ xref->table[oid].gen = gen;
+ xref->table[oid].obj = fz_keepobj(trailer);
+ xref->table[oid].stmofs = stmofs;
+
+ obj = fz_dictgets(trailer, "Size");
+ if (!obj) {
+ error = fz_throw("syntaxerror: xref stream missing Size entry");
+ goto cleanup;
+ }
+ size = fz_toint(obj);
+
+ obj = fz_dictgets(trailer, "W");
+ if (!obj) {
+ error = fz_throw("syntaxerror: xref stream missing W entry");
+ goto cleanup;
+ }
+ w0 = fz_toint(fz_arrayget(obj, 0));
+ w1 = fz_toint(fz_arrayget(obj, 1));
+ w2 = fz_toint(fz_arrayget(obj, 2));
+
+ obj = fz_dictgets(trailer, "Index");
+ if (obj) {
+ i0 = fz_toint(fz_arrayget(obj, 0));
+ i1 = fz_toint(fz_arrayget(obj, 1));
+ }
+ else {
+ i0 = 0;
+ i1 = size;
+ }
+
+ if (i0 < 0 || i1 > xref->len) {
+ error = fz_throw("syntaxerror: xref stream has too many entries");
+ goto cleanup;
+ }
+
+ error = pdf_openstream(xref, oid, gen);
+ if (error)
+ goto cleanup;
+
+ for (i = i0; i < i0 + i1; i++)
+ {
+ int a = 0;
+ int b = 0;
+ int c = 0;
+
+ if (fz_peekbyte(xref->stream) == EOF)
+ {
+ error = fz_ferror(xref->stream);
+ if (!error)
+ error = fz_throw("syntaxerror: truncated xref stream");
+ pdf_closestream(xref);
+ goto cleanup;
+ }
+
+ for (n = 0; n < w0; n++)
+ a = (a << 8) + fz_readbyte(xref->stream);
+ for (n = 0; n < w1; n++)
+ b = (b << 8) + fz_readbyte(xref->stream);
+ for (n = 0; n < w2; n++)
+ c = (c << 8) + fz_readbyte(xref->stream);
+
+ if (!xref->table[i].type)
+ {
+ int t = w0 ? a : 1;
+ xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
+ xref->table[i].ofs = w2 ? b : 0;
+ xref->table[i].gen = w1 ? c : 0;
+ }
+ }
+
+ pdf_closestream(xref);
+
+ *trailerp = trailer;
+
+ return nil;
+
+cleanup:
+ fz_dropobj(trailer);
+ return error;
+}
+
+static fz_error *
+readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap)
+{
+ int n;
+ int c;
+
+ n = fz_seek(xref->file, ofs, 0);
+ if (n < 0)
+ return fz_ferror(xref->file);
+
+ c = fz_peekbyte(xref->file);
+ if (c == 'x')
+ return readoldxref(trailerp, xref, buf, cap);
+ else if (c >= '0' && c <= '9')
+ return readnewxref(trailerp, xref, buf, cap);
+
+ return fz_throw("syntaxerror: expected xref");
+}
+
+static fz_error *
+readxrefsections(pdf_xref *xref, int ofs, char *buf, int cap)
+{
+ fz_error *error;
+ fz_obj *trailer;
+ fz_obj *prev;
+ fz_obj *xrefstm;
+
+ error = readxref(&trailer, xref, ofs, buf, cap);
+ if (error)
+ return error;
+
+ /* FIXME: do we overwrite free entries properly? */
+ xrefstm = fz_dictgets(trailer, "XrefStm");
+ if (xrefstm)
+ {
+ pdf_logxref("load xrefstm\n");
+ error = readxrefsections(xref, fz_toint(xrefstm), buf, cap);
+ if (error)
+ goto cleanup;
+ }
+
+ prev = fz_dictgets(trailer, "Prev");
+ if (prev)
+ {
+ pdf_logxref("load prev\n");
+ error = readxrefsections(xref, fz_toint(prev), buf, cap);
+ if (error)
+ goto cleanup;
+ }
+
+ fz_dropobj(trailer);
+ return nil;
+
+cleanup:
+ fz_dropobj(trailer);
+ return error;
+}
+
+/*
+ * compressed object streams
+ */
+
+fz_error *
+pdf_loadobjstm(pdf_xref *xref, int oid, int gen, char *buf, int cap)
+{
+ fz_error *error;
+ fz_obj *objstm;
+ int *oidbuf;
+ int *ofsbuf;
+
+ fz_obj *obj;
+ int first;
+ int count;
+ int i, n, t;
+
+ pdf_logxref("loadobjstm %d %d\n", oid, gen);
+
+ error = pdf_loadobject(&objstm, xref, oid, gen);
+ if (error)
+ return error;
+
+ count = fz_toint(fz_dictgets(objstm, "N"));
+ first = fz_toint(fz_dictgets(objstm, "First"));
+
+ pdf_logxref(" count %d\n", count);
+
+ oidbuf = fz_malloc(count * sizeof(int));
+ if (!oidbuf) { error = fz_outofmem; goto cleanup1; }
+
+ ofsbuf = fz_malloc(count * sizeof(int));
+ if (!ofsbuf) { error = fz_outofmem; goto cleanup2; }
+
+ error = pdf_openstream(xref, oid, gen);
+ if (error)
+ goto cleanup3;
+
+ for (i = 0; i < count; i++)
+ {
+ t = pdf_lex(xref->stream, buf, cap, &n);
+ if (t != PDF_TINT)
+ {
+ error = fz_throw("syntaxerror: corrupt object stream");
+ goto cleanup4;
+ }
+ oidbuf[i] = atoi(buf);
+
+ t = pdf_lex(xref->stream, buf, cap, &n);
+ if (t != PDF_TINT)
+ {
+ error = fz_throw("syntaxerror: corrupt object stream");
+ goto cleanup4;
+ }
+ ofsbuf[i] = atoi(buf);
+ }
+
+ n = fz_seek(xref->stream, first, 0);
+ if (n < 0)
+ {
+ error = fz_ferror(xref->stream);
+ goto cleanup4;
+ }
+
+ for (i = 0; i < count; i++)
+ {
+ /* FIXME: seek to first + ofsbuf[i] */
+
+ error = pdf_parsestmobj(&obj, xref->stream, buf, cap);
+ if (error)
+ goto cleanup4;
+
+ if (oidbuf[i] < 1 || oidbuf[i] >= xref->len)
+ {
+ error = fz_throw("rangecheck: object number out of range");
+ goto cleanup4;
+ }
+
+ if (xref->table[oidbuf[i]].obj)
+ fz_dropobj(xref->table[oidbuf[i]].obj);
+ xref->table[oidbuf[i]].obj = obj;
+ }
+
+ pdf_closestream(xref);
+ fz_free(ofsbuf);
+ fz_free(oidbuf);
+ fz_dropobj(objstm);
+ return nil;
+
+cleanup4:
+ pdf_closestream(xref);
+cleanup3:
+ fz_free(ofsbuf);
+cleanup2:
+ fz_free(oidbuf);
+cleanup1:
+ fz_dropobj(objstm);
+ return error;
+}
+
+/*
+ * open and load xref tables from pdf
+ */
+
+fz_error *
+pdf_loadxref(pdf_xref *xref, char *filename)
+{
+ fz_error *error;
+ fz_obj *size;
+ int i;
+
+ char buf[65536]; /* yeowch! */
+
+ pdf_logxref("loadxref '%s' %p\n", filename, xref);
+
+ error = fz_openfile(&xref->file, filename, FZ_READ);
+ if (error)
+ return error;
+
+ error = loadversion(xref);
+ if (error)
+ return error;
+
+ error = readstartxref(xref);
+ if (error)
+ return error;
+
+ error = readtrailer(xref, buf, sizeof buf);
+ if (error)
+ return error;
+
+ size = fz_dictgets(xref->trailer, "Size");
+ if (!size)
+ return fz_throw("syntaxerror: trailer missing Size entry");
+
+ pdf_logxref(" size %d\n", fz_toint(size));
+
+ assert(xref->table == nil);
+
+ xref->cap = fz_toint(size);
+ xref->len = fz_toint(size);
+ xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry));
+ if (!xref->table)
+ return fz_outofmem;
+
+ for (i = 0; i < xref->len; i++)
+ {
+ xref->table[i].ofs = 0;
+ xref->table[i].gen = 0;
+ xref->table[i].type = 0;
+ xref->table[i].mark = 0;
+ xref->table[i].stmbuf = nil;
+ xref->table[i].stmofs = 0;
+ xref->table[i].obj = nil;
+ }
+
+ error = readxrefsections(xref, xref->startxref, buf, sizeof buf);
+ if (error)
+ return error;
+
+ return nil;
+}
+