diff options
author | Tor Andersson <tor@ghostscript.com> | 2005-03-30 10:45:21 +0200 |
---|---|---|
committer | Tor Andersson <tor@ghostscript.com> | 2005-03-30 10:45:21 +0200 |
commit | 5f4d61903ee8fc514ed7e23eac4d5ac6409ff760 (patch) | |
tree | a824aa883d9d5df072c17ec0a2ac4a2b5074c2c0 /mupdf/pdf_open.c | |
parent | ee154f16bd09a43359967f7e7b86c3677c09461d (diff) | |
download | mupdf-5f4d61903ee8fc514ed7e23eac4d5ac6409ff760.tar.xz |
rename and shuffle -- part 2
Diffstat (limited to 'mupdf/pdf_open.c')
-rw-r--r-- | mupdf/pdf_open.c | 537 |
1 files changed, 537 insertions, 0 deletions
diff --git a/mupdf/pdf_open.c b/mupdf/pdf_open.c new file mode 100644 index 00000000..34caa862 --- /dev/null +++ b/mupdf/pdf_open.c @@ -0,0 +1,537 @@ +#include <fitz.h> +#include <mupdf.h> + +static inline int iswhite(int ch) +{ + return ch == '\000' || ch == '\011' || ch == '\012' || + ch == '\014' || ch == '\015' || ch == '\040'; +} + +/* + * magic version tag and startxref + */ + +static fz_error * +loadversion(pdf_xref *xref) +{ + char buf[20]; + int n; + + n = fz_seek(xref->file, 0, 0); + if (n < 0) + return fz_ferror(xref->file); + + fz_readline(xref->file, buf, sizeof buf); + if (memcmp(buf, "%PDF-", 5) != 0) + return fz_throw("syntaxerror: corrupt version marker"); + + xref->version = atof(buf + 5); + + pdf_logxref("version %g\n", xref->version); + + return nil; +} + +static fz_error * +readstartxref(pdf_xref *xref) +{ + char buf[1024]; + int t, n; + int i; + + t = fz_seek(xref->file, 0, 2); + if (t == -1) + return fz_ferror(xref->file); + + t = fz_seek(xref->file, MAX(0, t - ((int)sizeof buf)), 0); + if (t == -1) + return fz_ferror(xref->file); + + n = fz_read(xref->file, buf, sizeof buf); + if (n == -1) + return fz_ferror(xref->file); + + for (i = n - 9; i >= 0; i--) + { + if (memcmp(buf + i, "startxref", 9) == 0) + { + i += 9; + while (iswhite(buf[i]) && i < n) + i ++; + xref->startxref = atoi(buf + i); + return nil; + } + } + + return fz_throw("syntaxerror: could not find startxref"); +} + +/* + * trailer dictionary + */ + +static fz_error * +readoldtrailer(pdf_xref *xref, char *buf, int cap) +{ + int ofs, len; + char *s; + int n; + int t; + int c; + + pdf_logxref("load old xref format trailer\n"); + + fz_readline(xref->file, buf, cap); + if (strcmp(buf, "xref") != 0) + return fz_throw("syntaxerror: missing xref"); + + while (1) + { + c = fz_peekbyte(xref->file); + if (!(c >= '0' && c <= '9')) + break; + + n = fz_readline(xref->file, buf, cap); + if (n < 0) return fz_ferror(xref->file); + + s = buf; + ofs = atoi(strsep(&s, " ")); + len = atoi(strsep(&s, " ")); + + /* broken pdfs where the section is not on a separate line */ + if (s && *s != '\0') + fz_seek(xref->file, -(n + buf - s + 2), 1); + + t = fz_tell(xref->file); + if (t < 0) return fz_ferror(xref->file); + + n = fz_seek(xref->file, t + 20 * len, 0); + if (n < 0) return fz_ferror(xref->file); + } + + t = pdf_lex(xref->file, buf, cap, &n); + if (t != PDF_TTRAILER) + return fz_throw("syntaxerror: expected trailer"); + + t = pdf_lex(xref->file, buf, cap, &n); + if (t != PDF_TODICT) + return fz_throw("syntaxerror: expected trailer dictionary"); + + return pdf_parsedict(&xref->trailer, xref->file, buf, cap); +} + +static fz_error * +readnewtrailer(pdf_xref *xref, char *buf, int cap) +{ + pdf_logxref("load new xref format trailer\n"); + return pdf_parseindobj(&xref->trailer, xref->file, buf, cap, nil, nil, nil); +} + +static fz_error * +readtrailer(pdf_xref *xref, char *buf, int cap) +{ + int n; + int c; + + n = fz_seek(xref->file, xref->startxref, 0); + if (n < 0) + return fz_ferror(xref->file); + + c = fz_peekbyte(xref->file); + if (c == 'x') + return readoldtrailer(xref, buf, cap); + else if (c >= '0' && c <= '9') + return readnewtrailer(xref, buf, cap); + + return fz_throw("syntaxerror: could not find xref"); +} + +/* + * xref tables + */ + +static fz_error * +readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) +{ + int ofs, len; + char *s; + int n; + int t; + int i; + int c; + + pdf_logxref("load old xref format\n"); + + fz_readline(xref->file, buf, cap); + if (strcmp(buf, "xref") != 0) + return fz_throw("syntaxerror: expected xref"); + + while (1) + { + c = fz_peekbyte(xref->file); + if (!(c >= '0' && c <= '9')) + break; + + n = fz_readline(xref->file, buf, cap); + if (n < 0) return fz_ferror(xref->file); + + s = buf; + ofs = atoi(strsep(&s, " ")); + len = atoi(strsep(&s, " ")); + + /* broken pdfs where the section is not on a separate line */ + if (s && *s != '\0') + { + fz_warn("syntaxerror: broken xref section"); + fz_seek(xref->file, -(n + buf - s + 2), 1); + } + + for (i = 0; i < len; i++) + { + n = fz_read(xref->file, buf, 20); + if (n < 0) return fz_ferror(xref->file); + if (n != 20) return fz_throw("syntaxerror: truncated xref table"); + if (!xref->table[ofs + i].type) + { + s = buf; + xref->table[ofs + i].ofs = atoi(s); + xref->table[ofs + i].gen = atoi(s + 11); + xref->table[ofs + i].type = s[17]; + } + } + } + + t = pdf_lex(xref->file, buf, cap, &n); + if (t != PDF_TTRAILER) + return fz_throw("syntaxerror: expected trailer"); + t = pdf_lex(xref->file, buf, cap, &n); + if (t != PDF_TODICT) + return fz_throw("syntaxerror: expected trailer dictionary"); + + return pdf_parsedict(trailerp, xref->file, buf, cap); +} + +static fz_error * +readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) +{ + fz_error *error; + fz_obj *trailer; + fz_obj *obj; + int oid, gen, stmofs; + int size, w0, w1, w2, i0, i1; + int i, n; + + pdf_logxref("load new xref format\n"); + + error = pdf_parseindobj(&trailer, xref->file, buf, cap, &oid, &gen, &stmofs); + if (error) + return error; + + if (oid < 0 || oid >= xref->len) { + error = fz_throw("rangecheck: object id out of range"); + goto cleanup; + } + + xref->table[oid].type = 'n'; + xref->table[oid].gen = gen; + xref->table[oid].obj = fz_keepobj(trailer); + xref->table[oid].stmofs = stmofs; + + obj = fz_dictgets(trailer, "Size"); + if (!obj) { + error = fz_throw("syntaxerror: xref stream missing Size entry"); + goto cleanup; + } + size = fz_toint(obj); + + obj = fz_dictgets(trailer, "W"); + if (!obj) { + error = fz_throw("syntaxerror: xref stream missing W entry"); + goto cleanup; + } + w0 = fz_toint(fz_arrayget(obj, 0)); + w1 = fz_toint(fz_arrayget(obj, 1)); + w2 = fz_toint(fz_arrayget(obj, 2)); + + obj = fz_dictgets(trailer, "Index"); + if (obj) { + i0 = fz_toint(fz_arrayget(obj, 0)); + i1 = fz_toint(fz_arrayget(obj, 1)); + } + else { + i0 = 0; + i1 = size; + } + + if (i0 < 0 || i1 > xref->len) { + error = fz_throw("syntaxerror: xref stream has too many entries"); + goto cleanup; + } + + error = pdf_openstream(xref, oid, gen); + if (error) + goto cleanup; + + for (i = i0; i < i0 + i1; i++) + { + int a = 0; + int b = 0; + int c = 0; + + if (fz_peekbyte(xref->stream) == EOF) + { + error = fz_ferror(xref->stream); + if (!error) + error = fz_throw("syntaxerror: truncated xref stream"); + pdf_closestream(xref); + goto cleanup; + } + + for (n = 0; n < w0; n++) + a = (a << 8) + fz_readbyte(xref->stream); + for (n = 0; n < w1; n++) + b = (b << 8) + fz_readbyte(xref->stream); + for (n = 0; n < w2; n++) + c = (c << 8) + fz_readbyte(xref->stream); + + if (!xref->table[i].type) + { + int t = w0 ? a : 1; + xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; + xref->table[i].ofs = w2 ? b : 0; + xref->table[i].gen = w1 ? c : 0; + } + } + + pdf_closestream(xref); + + *trailerp = trailer; + + return nil; + +cleanup: + fz_dropobj(trailer); + return error; +} + +static fz_error * +readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap) +{ + int n; + int c; + + n = fz_seek(xref->file, ofs, 0); + if (n < 0) + return fz_ferror(xref->file); + + c = fz_peekbyte(xref->file); + if (c == 'x') + return readoldxref(trailerp, xref, buf, cap); + else if (c >= '0' && c <= '9') + return readnewxref(trailerp, xref, buf, cap); + + return fz_throw("syntaxerror: expected xref"); +} + +static fz_error * +readxrefsections(pdf_xref *xref, int ofs, char *buf, int cap) +{ + fz_error *error; + fz_obj *trailer; + fz_obj *prev; + fz_obj *xrefstm; + + error = readxref(&trailer, xref, ofs, buf, cap); + if (error) + return error; + + /* FIXME: do we overwrite free entries properly? */ + xrefstm = fz_dictgets(trailer, "XrefStm"); + if (xrefstm) + { + pdf_logxref("load xrefstm\n"); + error = readxrefsections(xref, fz_toint(xrefstm), buf, cap); + if (error) + goto cleanup; + } + + prev = fz_dictgets(trailer, "Prev"); + if (prev) + { + pdf_logxref("load prev\n"); + error = readxrefsections(xref, fz_toint(prev), buf, cap); + if (error) + goto cleanup; + } + + fz_dropobj(trailer); + return nil; + +cleanup: + fz_dropobj(trailer); + return error; +} + +/* + * compressed object streams + */ + +fz_error * +pdf_loadobjstm(pdf_xref *xref, int oid, int gen, char *buf, int cap) +{ + fz_error *error; + fz_obj *objstm; + int *oidbuf; + int *ofsbuf; + + fz_obj *obj; + int first; + int count; + int i, n, t; + + pdf_logxref("loadobjstm %d %d\n", oid, gen); + + error = pdf_loadobject(&objstm, xref, oid, gen); + if (error) + return error; + + count = fz_toint(fz_dictgets(objstm, "N")); + first = fz_toint(fz_dictgets(objstm, "First")); + + pdf_logxref(" count %d\n", count); + + oidbuf = fz_malloc(count * sizeof(int)); + if (!oidbuf) { error = fz_outofmem; goto cleanup1; } + + ofsbuf = fz_malloc(count * sizeof(int)); + if (!ofsbuf) { error = fz_outofmem; goto cleanup2; } + + error = pdf_openstream(xref, oid, gen); + if (error) + goto cleanup3; + + for (i = 0; i < count; i++) + { + t = pdf_lex(xref->stream, buf, cap, &n); + if (t != PDF_TINT) + { + error = fz_throw("syntaxerror: corrupt object stream"); + goto cleanup4; + } + oidbuf[i] = atoi(buf); + + t = pdf_lex(xref->stream, buf, cap, &n); + if (t != PDF_TINT) + { + error = fz_throw("syntaxerror: corrupt object stream"); + goto cleanup4; + } + ofsbuf[i] = atoi(buf); + } + + n = fz_seek(xref->stream, first, 0); + if (n < 0) + { + error = fz_ferror(xref->stream); + goto cleanup4; + } + + for (i = 0; i < count; i++) + { + /* FIXME: seek to first + ofsbuf[i] */ + + error = pdf_parsestmobj(&obj, xref->stream, buf, cap); + if (error) + goto cleanup4; + + if (oidbuf[i] < 1 || oidbuf[i] >= xref->len) + { + error = fz_throw("rangecheck: object number out of range"); + goto cleanup4; + } + + if (xref->table[oidbuf[i]].obj) + fz_dropobj(xref->table[oidbuf[i]].obj); + xref->table[oidbuf[i]].obj = obj; + } + + pdf_closestream(xref); + fz_free(ofsbuf); + fz_free(oidbuf); + fz_dropobj(objstm); + return nil; + +cleanup4: + pdf_closestream(xref); +cleanup3: + fz_free(ofsbuf); +cleanup2: + fz_free(oidbuf); +cleanup1: + fz_dropobj(objstm); + return error; +} + +/* + * open and load xref tables from pdf + */ + +fz_error * +pdf_loadxref(pdf_xref *xref, char *filename) +{ + fz_error *error; + fz_obj *size; + int i; + + char buf[65536]; /* yeowch! */ + + pdf_logxref("loadxref '%s' %p\n", filename, xref); + + error = fz_openfile(&xref->file, filename, FZ_READ); + if (error) + return error; + + error = loadversion(xref); + if (error) + return error; + + error = readstartxref(xref); + if (error) + return error; + + error = readtrailer(xref, buf, sizeof buf); + if (error) + return error; + + size = fz_dictgets(xref->trailer, "Size"); + if (!size) + return fz_throw("syntaxerror: trailer missing Size entry"); + + pdf_logxref(" size %d\n", fz_toint(size)); + + assert(xref->table == nil); + + xref->cap = fz_toint(size); + xref->len = fz_toint(size); + xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry)); + if (!xref->table) + return fz_outofmem; + + for (i = 0; i < xref->len; i++) + { + xref->table[i].ofs = 0; + xref->table[i].gen = 0; + xref->table[i].type = 0; + xref->table[i].mark = 0; + xref->table[i].stmbuf = nil; + xref->table[i].stmofs = 0; + xref->table[i].obj = nil; + } + + error = readxrefsections(xref, xref->startxref, buf, sizeof buf); + if (error) + return error; + + return nil; +} + |