summaryrefslogtreecommitdiff
path: root/pdf/pdf_page.c
diff options
context:
space:
mode:
Diffstat (limited to 'pdf/pdf_page.c')
-rw-r--r--pdf/pdf_page.c246
1 files changed, 246 insertions, 0 deletions
diff --git a/pdf/pdf_page.c b/pdf/pdf_page.c
new file mode 100644
index 00000000..869f7a5c
--- /dev/null
+++ b/pdf/pdf_page.c
@@ -0,0 +1,246 @@
+#include "fitz.h"
+#include "mupdf.h"
+
+/* we need to combine all sub-streams into one for the content stream interpreter */
+
+static fz_error
+pdf_loadpagecontentsarray(fz_buffer **bigbufp, pdf_xref *xref, fz_obj *list)
+{
+ fz_error error;
+ fz_buffer *big;
+ fz_buffer *one;
+ int i;
+
+ pdf_logpage("multiple content streams: %d\n", fz_arraylen(list));
+
+ /* TODO: openstream, read, close into big buffer at once */
+
+ big = fz_newbuffer(32 * 1024);
+
+ for (i = 0; i < fz_arraylen(list); i++)
+ {
+ fz_obj *stm = fz_arrayget(list, i);
+ error = pdf_loadstream(&one, xref, fz_tonum(stm), fz_togen(stm));
+ if (error)
+ {
+ fz_dropbuffer(big);
+ return fz_rethrow(error, "cannot load content stream part %d/%d (%d %d R)", i + 1, fz_arraylen(list), fz_tonum(stm), fz_togen(stm));
+ }
+
+ if (big->len + one->len + 1 > big->cap)
+ fz_resizebuffer(big, big->len + one->len + 1);
+ memcpy(big->data + big->len, one->data, one->len);
+ big->data[big->len + one->len] = ' ';
+ big->len += one->len + 1;
+
+ fz_dropbuffer(one);
+ }
+
+ *bigbufp = big;
+ return fz_okay;
+}
+
+static fz_error
+pdf_loadpagecontents(fz_buffer **bufp, pdf_xref *xref, fz_obj *obj)
+{
+ fz_error error;
+
+ if (fz_isarray(obj))
+ {
+ error = pdf_loadpagecontentsarray(bufp, xref, obj);
+ if (error)
+ return fz_rethrow(error, "cannot load content stream array (%d 0 R)", fz_tonum(obj));
+ }
+ else if (pdf_isstream(xref, fz_tonum(obj), fz_togen(obj)))
+ {
+ error = pdf_loadstream(bufp, xref, fz_tonum(obj), fz_togen(obj));
+ if (error)
+ return fz_rethrow(error, "cannot load content stream (%d 0 R)", fz_tonum(obj));
+ }
+ else
+ {
+ fz_warn("page contents missing, leaving page blank");
+ *bufp = fz_newbuffer(0);
+ }
+
+ return fz_okay;
+}
+
+/* We need to know whether to install a page-level transparency group */
+
+static int pdf_resourcesuseblending(fz_obj *rdb);
+
+static int
+pdf_extgstateusesblending(fz_obj *dict)
+{
+ fz_obj *obj;
+
+ obj = fz_dictgets(dict, "BM");
+ if (fz_isname(obj) && strcmp(fz_toname(obj), "Normal"))
+ return 1;
+
+ return 0;
+}
+
+static int
+pdf_patternusesblending(fz_obj *dict)
+{
+ fz_obj *obj;
+
+ obj = fz_dictgets(dict, "Resources");
+ if (fz_isdict(obj) && pdf_resourcesuseblending(obj))
+ return 1;
+
+ obj = fz_dictgets(dict, "ExtGState");
+ if (fz_isdict(obj) && pdf_extgstateusesblending(obj))
+ return 1;
+
+ return 0;
+}
+
+static int
+pdf_xobjectusesblending(fz_obj *dict)
+{
+ fz_obj *obj;
+
+ obj = fz_dictgets(dict, "Resources");
+ if (fz_isdict(obj) && pdf_resourcesuseblending(obj))
+ return 1;
+
+ return 0;
+}
+
+static int
+pdf_resourcesuseblending(fz_obj *rdb)
+{
+ fz_obj *dict;
+ fz_obj *tmp;
+ int i;
+
+ /* stop on cyclic resource dependencies */
+ if (fz_dictgets(rdb, ".useBM"))
+ return fz_tobool(fz_dictgets(rdb, ".useBM"));
+
+ tmp = fz_newbool(0);
+ fz_dictputs(rdb, ".useBM", tmp);
+ fz_dropobj(tmp);
+
+ dict = fz_dictgets(rdb, "ExtGState");
+ for (i = 0; i < fz_dictlen(dict); i++)
+ if (pdf_extgstateusesblending(fz_dictgetval(dict, i)))
+ goto found;
+
+ dict = fz_dictgets(rdb, "Pattern");
+ for (i = 0; i < fz_dictlen(dict); i++)
+ if (pdf_patternusesblending(fz_dictgetval(dict, i)))
+ goto found;
+
+ dict = fz_dictgets(rdb, "XObject");
+ for (i = 0; i < fz_dictlen(dict); i++)
+ if (pdf_xobjectusesblending(fz_dictgetval(dict, i)))
+ goto found;
+
+ return 0;
+
+found:
+ tmp = fz_newbool(1);
+ fz_dictputs(rdb, ".useBM", tmp);
+ fz_dropobj(tmp);
+ return 1;
+}
+
+fz_error
+pdf_loadpage(pdf_page **pagep, pdf_xref *xref, fz_obj *dict)
+{
+ fz_error error;
+ pdf_page *page;
+ fz_obj *obj;
+ fz_bbox bbox;
+
+ pdf_logpage("load page {\n");
+
+ // TODO: move this to a more appropriate place
+ /* Ensure that we have a store for resource objects */
+ if (!xref->store)
+ xref->store = pdf_newstore();
+
+ page = fz_malloc(sizeof(pdf_page));
+ page->resources = nil;
+ page->contents = nil;
+ page->transparency = 0;
+ page->links = nil;
+ page->annots = nil;
+
+ obj = fz_dictgets(dict, "MediaBox");
+ bbox = fz_roundrect(pdf_torect(obj));
+ if (fz_isemptyrect(pdf_torect(obj)))
+ {
+ fz_warn("cannot find page bounds, guessing page bounds.");
+ bbox.x0 = 0;
+ bbox.y0 = 0;
+ bbox.x1 = 612;
+ bbox.y1 = 792;
+ }
+
+ obj = fz_dictgets(dict, "CropBox");
+ if (fz_isarray(obj))
+ {
+ fz_bbox cropbox = fz_roundrect(pdf_torect(obj));
+ bbox = fz_intersectbbox(bbox, cropbox);
+ }
+
+ page->mediabox.x0 = MIN(bbox.x0, bbox.x1);
+ page->mediabox.y0 = MIN(bbox.y0, bbox.y1);
+ page->mediabox.x1 = MAX(bbox.x0, bbox.x1);
+ page->mediabox.y1 = MAX(bbox.y0, bbox.y1);
+
+ if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1)
+ return fz_throw("invalid page size");
+
+ page->rotate = fz_toint(fz_dictgets(dict, "Rotate"));
+
+ pdf_logpage("bbox [%d %d %d %d]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1);
+ pdf_logpage("rotate %d\n", page->rotate);
+
+ obj = fz_dictgets(dict, "Annots");
+ if (obj)
+ {
+ pdf_loadlinks(&page->links, xref, obj);
+ pdf_loadannots(&page->annots, xref, obj);
+ }
+
+ page->resources = fz_dictgets(dict, "Resources");
+ if (page->resources)
+ fz_keepobj(page->resources);
+
+ obj = fz_dictgets(dict, "Contents");
+ error = pdf_loadpagecontents(&page->contents, xref, obj);
+ if (error)
+ {
+ pdf_freepage(page);
+ return fz_rethrow(error, "cannot load page contents (%d %d R)", fz_tonum(obj), fz_togen(obj));
+ }
+
+ if (page->resources && pdf_resourcesuseblending(page->resources))
+ page->transparency = 1;
+
+ pdf_logpage("} %p\n", page);
+
+ *pagep = page;
+ return fz_okay;
+}
+
+void
+pdf_freepage(pdf_page *page)
+{
+ pdf_logpage("drop page %p\n", page);
+ if (page->resources)
+ fz_dropobj(page->resources);
+ if (page->contents)
+ fz_dropbuffer(page->contents);
+ if (page->links)
+ pdf_freelink(page->links);
+ if (page->annots)
+ pdf_freeannot(page->annots);
+ fz_free(page);
+}