diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2013-06-19 15:29:44 +0200 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2013-06-20 16:45:35 +0200 |
commit | 0a927854a10e1e6b9770a81e2e1d9f3093631757 (patch) | |
tree | 3d65d820d9fdba2d0d394d99c36290c851b78ca0 /source/pdf | |
parent | 1ae8f19179c5f0f8c6352b3c7855465325d5449a (diff) | |
download | mupdf-0a927854a10e1e6b9770a81e2e1d9f3093631757.tar.xz |
Rearrange source files.
Diffstat (limited to 'source/pdf')
44 files changed, 29190 insertions, 0 deletions
diff --git a/source/pdf/js/pdf-js-none.c b/source/pdf/js/pdf-js-none.c new file mode 100644 index 00000000..00cc5b54 --- /dev/null +++ b/source/pdf/js/pdf-js-none.c @@ -0,0 +1,36 @@ +#include "mupdf/pdf.h" + +pdf_js *pdf_new_js(pdf_document *doc) +{ + return NULL; +} + +void pdf_js_load_document_level(pdf_js *js) +{ +} + +void pdf_drop_js(pdf_js *js) +{ +} + +void pdf_js_setup_event(pdf_js *js, pdf_js_event *e) +{ +} + +pdf_js_event *pdf_js_get_event(pdf_js *js) +{ + return NULL; +} + +void pdf_js_execute(pdf_js *js, char *code) +{ +} + +void pdf_js_execute_count(pdf_js *js, char *code, int count) +{ +} + +int pdf_js_supported(void) +{ + return 0; +} diff --git a/source/pdf/js/pdf-js.c b/source/pdf/js/pdf-js.c new file mode 100644 index 00000000..4a2313f1 --- /dev/null +++ b/source/pdf/js/pdf-js.c @@ -0,0 +1,919 @@ +#include "mupdf/pdf.h" + +struct pdf_js_s +{ + pdf_document *doc; + pdf_obj *form; + pdf_js_event event; + pdf_jsimp *imp; + pdf_jsimp_type *doctype; + pdf_jsimp_type *eventtype; + pdf_jsimp_type *fieldtype; + pdf_jsimp_type *apptype; +}; + +static pdf_jsimp_obj *app_alert(void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_context *ctx = js->doc->ctx; + pdf_jsimp_obj *cMsg_obj = NULL; + pdf_jsimp_obj *nIcon_obj = NULL; + pdf_jsimp_obj *nType_obj = NULL; + pdf_jsimp_obj *cTitle_obj = NULL; + pdf_jsimp_obj *nButton_obj = NULL; + pdf_alert_event event; + int arg_is_obj = 0; + + if (argc < 1 || argc > 6) + return NULL; + + event.message = ""; + event.icon_type = PDF_ALERT_ICON_ERROR; + event.button_group_type = PDF_ALERT_BUTTON_GROUP_OK; + event.title = "MuPDF"; + event.check_box_message = NULL; + event.button_pressed = 0; + + fz_var(cMsg_obj); + fz_var(nIcon_obj); + fz_var(nType_obj); + fz_var(cTitle_obj); + fz_try(ctx) + { + arg_is_obj = (argc == 1 && pdf_jsimp_to_type(js->imp, args[0]) != JS_TYPE_STRING); + if (arg_is_obj) + { + cMsg_obj = pdf_jsimp_property(js->imp, args[0], "cMsg"); + nIcon_obj = pdf_jsimp_property(js->imp, args[0], "nIcon"); + nType_obj = pdf_jsimp_property(js->imp, args[0], "nType"); + cTitle_obj = pdf_jsimp_property(js->imp, args[0], "cTitle"); + } + else + { + switch (argc) + { + case 6: + case 5: + case 4: + cTitle_obj = args[3]; + case 3: + nType_obj = args[2]; + case 2: + nIcon_obj = args[1]; + case 1: + cMsg_obj = args[0]; + } + } + + if (cMsg_obj) + event.message = pdf_jsimp_to_string(js->imp, cMsg_obj); + + if (nIcon_obj) + event.icon_type = (int)pdf_jsimp_to_number(js->imp, nIcon_obj); + + if (nType_obj) + event.button_group_type = (int)pdf_jsimp_to_number(js->imp, nType_obj); + + if (cTitle_obj) + event.title = pdf_jsimp_to_string(js->imp, cTitle_obj); + + pdf_event_issue_alert(js->doc, &event); + nButton_obj = pdf_jsimp_from_number(js->imp, (double)event.button_pressed); + } + fz_always(ctx) + { + if (arg_is_obj) + { + pdf_jsimp_drop_obj(js->imp, cMsg_obj); + pdf_jsimp_drop_obj(js->imp, nIcon_obj); + pdf_jsimp_drop_obj(js->imp, nType_obj); + pdf_jsimp_drop_obj(js->imp, cTitle_obj); + } + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + return nButton_obj; +} + +static pdf_jsimp_obj *app_execDialog(void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + pdf_js *js = (pdf_js *)jsctx; + + pdf_event_issue_exec_dialog(js->doc); + + return NULL; +} + +static pdf_jsimp_obj *app_execMenuItem(void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + pdf_js *js = (pdf_js *)jsctx; + + if (argc == 1) + pdf_event_issue_exec_menu_item(js->doc, pdf_jsimp_to_string(js->imp, args[0])); + + return NULL; +} + +static pdf_jsimp_obj *app_launchURL(void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + pdf_js *js = (pdf_js *)jsctx; + char *cUrl; + int bNewFrame = 0; + + switch (argc) + { + default: + return NULL; + case 2: + bNewFrame = (int)pdf_jsimp_to_number(js->imp, args[1]); + case 1: + cUrl = pdf_jsimp_to_string(js->imp, args[0]); + } + + pdf_event_issue_launch_url(js->doc, cUrl, bNewFrame); + + return NULL; +} + +static pdf_obj *load_color(fz_context *ctx, pdf_jsimp *imp, pdf_jsimp_obj *val) +{ + pdf_obj *col = NULL; + pdf_obj *comp = NULL; + pdf_jsimp_obj *jscomp = NULL; + int i; + int n; + + n = pdf_jsimp_array_len(imp, val); + + /* The only legitimate color expressed as an array of length 1 + * is [T], meaning transparent. Return a NULL object to represent + * transparent */ + if (n <= 1) + return NULL; + + col = pdf_new_array(ctx, n-1); + + fz_var(comp); + fz_var(jscomp); + fz_try(ctx) + { + for (i = 0; i < n-1; i++) + { + jscomp = pdf_jsimp_array_item(imp, val, i+1); + comp = pdf_new_real(ctx, pdf_jsimp_to_number(imp, jscomp)); + pdf_array_push(col, comp); + pdf_jsimp_drop_obj(imp, jscomp); + jscomp = NULL; + pdf_drop_obj(comp); + comp = NULL; + } + } + fz_catch(ctx) + { + pdf_jsimp_drop_obj(imp, jscomp); + pdf_drop_obj(comp); + pdf_drop_obj(col); + fz_rethrow(ctx); + } + + return col; +} + +static pdf_jsimp_obj *field_buttonSetCaption(void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + pdf_js *js = (pdf_js *)jsctx; + pdf_obj *field = (pdf_obj *)obj; + char *name; + + if (argc != 1) + return NULL; + + name = pdf_jsimp_to_string(js->imp, args[0]); + pdf_field_set_button_caption(js->doc, field, name); + + return NULL; +} + +static pdf_jsimp_obj *field_getName(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_context *ctx = js->doc->ctx; + pdf_obj *field = (pdf_obj *)obj; + char *name; + pdf_jsimp_obj *oname = NULL; + + if (field == NULL) + return NULL; + + name = pdf_field_name(js->doc, field); + fz_try(ctx) + { + oname = pdf_jsimp_from_string(js->imp, name); + } + fz_always(ctx) + { + fz_free(ctx, name); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + return oname; +} + +static void field_setName(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_warn(js->doc->ctx, "Unexpected call to field_setName"); +} + +static pdf_jsimp_obj *field_getDisplay(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + pdf_obj *field = (pdf_obj *)obj; + + return field ? pdf_jsimp_from_number(js->imp, (double)pdf_field_display(js->doc, field)) : NULL; +} + +static void field_setDisplay(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_context *ctx = js->doc->ctx; + pdf_obj *field = (pdf_obj *)obj; + + if (field) + pdf_field_set_display(js->doc, field, (int)pdf_jsimp_to_number(js->imp, val)); +} + +static pdf_jsimp_obj *field_getFillColor(void *jsctx, void *obj) +{ + return NULL; +} + +static void field_setFillColor(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_context *ctx = js->doc->ctx; + pdf_obj *field = (pdf_obj *)obj; + pdf_obj *col; + + if (!field) + return; + + col = load_color(js->doc->ctx, js->imp, val); + fz_try(ctx) + { + pdf_field_set_fill_color(js->doc, field, col); + } + fz_always(ctx) + { + pdf_drop_obj(col); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static pdf_jsimp_obj *field_getTextColor(void *jsctx, void *obj) +{ + return NULL; +} + +static void field_setTextColor(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_context *ctx = js->doc->ctx; + pdf_obj *field = (pdf_obj *)obj; + pdf_obj *col; + + if (!field) + return; + + col = load_color(js->doc->ctx, js->imp, val); + fz_try(ctx) + { + pdf_field_set_text_color(js->doc, field, col); + } + fz_always(ctx) + { + pdf_drop_obj(col); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static pdf_jsimp_obj *field_getBorderStyle(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + pdf_obj *field = (pdf_obj *)obj; + + return field ? pdf_jsimp_from_string(js->imp, pdf_field_border_style(js->doc, field)) : NULL; +} + +static void field_setBorderStyle(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + pdf_obj *field = (pdf_obj *)obj; + + if (field) + pdf_field_set_border_style(js->doc, field, pdf_jsimp_to_string(js->imp, val)); +} + +static pdf_jsimp_obj *field_getValue(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + pdf_obj *field = (pdf_obj *)obj; + char *fval; + + if (!field) + return NULL; + + fval = pdf_field_value(js->doc, field); + return pdf_jsimp_from_string(js->imp, fval?fval:""); +} + +static void field_setValue(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + pdf_obj *field = (pdf_obj *)obj; + + if (field) + (void)pdf_field_set_value(js->doc, field, pdf_jsimp_to_string(js->imp, val)); +} + +static pdf_jsimp_obj *event_getTarget(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + + return pdf_jsimp_new_obj(js->imp, js->fieldtype, js->event.target); +} + +static void event_setTarget(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_warn(js->doc->ctx, "Unexpected call to event_setTarget"); +} + +static pdf_jsimp_obj *event_getValue(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + char *v = js->event.value; + + return pdf_jsimp_from_string(js->imp, v?v:""); +} + +static void event_setValue(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_context *ctx = js->doc->ctx; + fz_free(ctx, js->event.value); + js->event.value = NULL; + js->event.value = fz_strdup(ctx, pdf_jsimp_to_string(js->imp, val)); +} + +static pdf_jsimp_obj *event_getWillCommit(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + + return pdf_jsimp_from_number(js->imp, 1.0); +} + +static void event_setWillCommit(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_warn(js->doc->ctx, "Unexpected call to event_setWillCommit"); +} + +static pdf_jsimp_obj *event_getRC(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + + return pdf_jsimp_from_number(js->imp, (double)js->event.rc); +} + +static void event_setRC(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + + js->event.rc = (int)pdf_jsimp_to_number(js->imp, val); +} + +static pdf_jsimp_obj *doc_getEvent(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + + return pdf_jsimp_new_obj(js->imp, js->eventtype, &js->event); +} + +static void doc_setEvent(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_warn(js->doc->ctx, "Unexpected call to doc_setEvent"); +} + +static pdf_jsimp_obj *doc_getApp(void *jsctx, void *obj) +{ + pdf_js *js = (pdf_js *)jsctx; + + return pdf_jsimp_new_obj(js->imp, js->apptype, NULL); +} + +static void doc_setApp(void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_warn(js->doc->ctx, "Unexpected call to doc_setApp"); +} + +static char *utf8_to_pdf(fz_context *ctx, char *utf8) +{ + char *pdf = fz_malloc(ctx, strlen(utf8)+1); + int i = 0; + unsigned char c; + + while ((c = *utf8) != 0) + { + if ((c & 0x80) == 0 && pdf_doc_encoding[c] == c) + { + pdf[i++] = c; + utf8++ ; + } + else + { + int rune; + int j; + + utf8 += fz_chartorune(&rune, utf8); + + for (j = 0; j < sizeof(pdf_doc_encoding) && pdf_doc_encoding[j] != rune; j++) + ; + + if (j < sizeof(pdf_doc_encoding)) + pdf[i++] = j; + } + } + + pdf[i] = 0; + + return pdf; +} + +static pdf_jsimp_obj *doc_getField(void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_context *ctx = js->doc->ctx; + pdf_obj *dict = NULL; + char *utf8; + char *name = NULL; + + if (argc != 1) + return NULL; + + fz_var(dict); + fz_var(name); + fz_try(ctx) + { + utf8 = pdf_jsimp_to_string(js->imp, args[0]); + + if (utf8) + { + name = utf8_to_pdf(ctx, utf8); + dict = pdf_lookup_field(js->form, name); + } + } + fz_always(ctx) + { + fz_free(ctx, name); + } + fz_catch(ctx) + { + /* FIXME: TryLater ? */ + fz_warn(ctx, "doc_getField failed: %s", fz_caught_message(ctx)); + dict = NULL; + } + + return dict ? pdf_jsimp_new_obj(js->imp, js->fieldtype, dict) : NULL; +} + +static void reset_field(pdf_js *js, pdf_jsimp_obj *item) +{ + fz_context *ctx = js->doc->ctx; + char *name = NULL; + char *utf8 = pdf_jsimp_to_string(js->imp, item); + + if (utf8) + { + pdf_obj *field; + + fz_var(name); + fz_try(ctx) + { + name = utf8_to_pdf(ctx, utf8); + field = pdf_lookup_field(js->form, name); + if (field) + pdf_field_reset(js->doc, field); + } + fz_always(ctx) + { + fz_free(ctx, name); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } +} + +static pdf_jsimp_obj *doc_resetForm(void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_context *ctx = js->doc->ctx; + pdf_jsimp_obj *arr = NULL; + pdf_jsimp_obj *elem = NULL; + + switch (argc) + { + case 0: + break; + case 1: + switch (pdf_jsimp_to_type(js->imp, args[0])) + { + case JS_TYPE_NULL: + break; + case JS_TYPE_ARRAY: + arr = args[0]; + break; + case JS_TYPE_STRING: + elem = args[0]; + break; + default: + return NULL; + } + break; + default: + return NULL; + } + + fz_try(ctx) + { + if(arr) + { + /* An array of fields has been passed in. Call + * pdf_reset_field on each */ + int i, n = pdf_jsimp_array_len(js->imp, arr); + + for (i = 0; i < n; i++) + { + pdf_jsimp_obj *item = pdf_jsimp_array_item(js->imp, arr, i); + + if (item) + reset_field(js, item); + + } + } + else if (elem) + { + reset_field(js, elem); + } + else + { + /* No argument or null passed in means reset all. */ + int i, n = pdf_array_len(js->form); + + for (i = 0; i < n; i++) + pdf_field_reset(js->doc, pdf_array_get(js->form, i)); + } + } + fz_catch(ctx) + { + fz_warn(ctx, "doc_resetForm failed: %s", fz_caught_message(ctx)); + } + + return NULL; +} + +static pdf_jsimp_obj *doc_print(void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + pdf_js *js = (pdf_js *)jsctx; + + pdf_event_issue_print(js->doc); + + return NULL; +} + +static pdf_jsimp_obj *doc_mailDoc(void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + pdf_js *js = (pdf_js *)jsctx; + fz_context *ctx = js->doc->ctx; + pdf_jsimp_obj *bUI_obj = NULL; + pdf_jsimp_obj *cTo_obj = NULL; + pdf_jsimp_obj *cCc_obj = NULL; + pdf_jsimp_obj *cBcc_obj = NULL; + pdf_jsimp_obj *cSubject_obj = NULL; + pdf_jsimp_obj *cMessage_obj = NULL; + pdf_mail_doc_event event; + int arg_is_obj = 0; + + if (argc < 1 || argc > 6) + return NULL; + + event.ask_user = 1; + event.to = ""; + event.cc = ""; + event.bcc = ""; + event.subject = ""; + event.message = ""; + + fz_var(bUI_obj); + fz_var(cTo_obj); + fz_var(cCc_obj); + fz_var(cBcc_obj); + fz_var(cSubject_obj); + fz_var(cMessage_obj); + fz_try(ctx) + { + arg_is_obj = (argc == 1 && pdf_jsimp_to_type(js->imp, args[0]) != JS_TYPE_BOOLEAN); + if (arg_is_obj) + { + bUI_obj = pdf_jsimp_property(js->imp, args[0], "bUI"); + cTo_obj = pdf_jsimp_property(js->imp, args[0], "cTo"); + cCc_obj = pdf_jsimp_property(js->imp, args[0], "cCc"); + cBcc_obj = pdf_jsimp_property(js->imp, args[0], "cBcc"); + cSubject_obj = pdf_jsimp_property(js->imp, args[0], "cSubject"); + cMessage_obj = pdf_jsimp_property(js->imp, args[0], "cMessage"); + } + else + { + switch (argc) + { + case 6: + cMessage_obj = args[5]; + case 5: + cSubject_obj = args[4]; + case 4: + cBcc_obj = args[3]; + case 3: + cCc_obj = args[2]; + case 2: + cTo_obj = args[1]; + case 1: + bUI_obj = args[0]; + } + } + + if (bUI_obj) + event.ask_user = (int)pdf_jsimp_to_number(js->imp, bUI_obj); + + if (cTo_obj) + event.to = pdf_jsimp_to_string(js->imp, cTo_obj); + + if (cCc_obj) + event.cc = pdf_jsimp_to_string(js->imp, cCc_obj); + + if (cBcc_obj) + event.bcc = pdf_jsimp_to_string(js->imp, cBcc_obj); + + if (cSubject_obj) + event.subject = pdf_jsimp_to_string(js->imp, cSubject_obj); + + if (cMessage_obj) + event.message = pdf_jsimp_to_string(js->imp, cMessage_obj); + + pdf_event_issue_mail_doc(js->doc, &event); + } + fz_always(ctx) + { + if (arg_is_obj) + { + pdf_jsimp_drop_obj(js->imp, bUI_obj); + pdf_jsimp_drop_obj(js->imp, cTo_obj); + pdf_jsimp_drop_obj(js->imp, cCc_obj); + pdf_jsimp_drop_obj(js->imp, cBcc_obj); + pdf_jsimp_drop_obj(js->imp, cSubject_obj); + pdf_jsimp_drop_obj(js->imp, cMessage_obj); + } + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + return NULL; +} + +static void declare_dom(pdf_js *js) +{ + pdf_jsimp *imp = js->imp; + + /* Create the document type */ + js->doctype = pdf_jsimp_new_type(imp, NULL); + pdf_jsimp_addmethod(imp, js->doctype, "getField", doc_getField); + pdf_jsimp_addmethod(imp, js->doctype, "resetForm", doc_resetForm); + pdf_jsimp_addmethod(imp, js->doctype, "print", doc_print); + pdf_jsimp_addmethod(imp, js->doctype, "mailDoc", doc_mailDoc); + pdf_jsimp_addproperty(imp, js->doctype, "event", doc_getEvent, doc_setEvent); + pdf_jsimp_addproperty(imp, js->doctype, "app", doc_getApp, doc_setApp); + + /* Create the event type */ + js->eventtype = pdf_jsimp_new_type(imp, NULL); + pdf_jsimp_addproperty(imp, js->eventtype, "target", event_getTarget, event_setTarget); + pdf_jsimp_addproperty(imp, js->eventtype, "value", event_getValue, event_setValue); + pdf_jsimp_addproperty(imp, js->eventtype, "willCommit", event_getWillCommit, event_setWillCommit); + pdf_jsimp_addproperty(imp, js->eventtype, "rc", event_getRC, event_setRC); + + /* Create the field type */ + js->fieldtype = pdf_jsimp_new_type(imp, NULL); + pdf_jsimp_addproperty(imp, js->fieldtype, "value", field_getValue, field_setValue); + pdf_jsimp_addproperty(imp, js->fieldtype, "borderStyle", field_getBorderStyle, field_setBorderStyle); + pdf_jsimp_addproperty(imp, js->fieldtype, "textColor", field_getTextColor, field_setTextColor); + pdf_jsimp_addproperty(imp, js->fieldtype, "fillColor", field_getFillColor, field_setFillColor); + pdf_jsimp_addproperty(imp, js->fieldtype, "display", field_getDisplay, field_setDisplay); + pdf_jsimp_addproperty(imp, js->fieldtype, "name", field_getName, field_setName); + pdf_jsimp_addmethod(imp, js->fieldtype, "buttonSetCaption", field_buttonSetCaption); + + /* Create the app type */ + js->apptype = pdf_jsimp_new_type(imp, NULL); + pdf_jsimp_addmethod(imp, js->apptype, "alert", app_alert); + pdf_jsimp_addmethod(imp, js->apptype, "execDialog", app_execDialog); + pdf_jsimp_addmethod(imp, js->apptype, "execMenuItem", app_execMenuItem); + pdf_jsimp_addmethod(imp, js->apptype, "launchURL", app_launchURL); + + /* Create the document object and tell the engine to use */ + pdf_jsimp_set_global_type(js->imp, js->doctype); +} + +static void preload_helpers(pdf_js *js) +{ + /* When testing on the cluster, redefine the Date object + * to use a fixed date */ +#ifdef CLUSTER + pdf_jsimp_execute(js->imp, +"var MuPDFOldDate = Date\n" +"Date = function() { return new MuPDFOldDate(1979,5,15); }\n" + ); +#endif + + pdf_jsimp_execute(js->imp, +#include "gen_js_util.h" + ); +} + +pdf_js *pdf_new_js(pdf_document *doc) +{ + fz_context *ctx = doc->ctx; + pdf_js *js = NULL; + + fz_var(js); + fz_try(ctx) + { + pdf_obj *root, *acroform; + + js = fz_malloc_struct(ctx, pdf_js); + js->doc = doc; + + /* Find the form array */ + root = pdf_dict_gets(pdf_trailer(doc), "Root"); + acroform = pdf_dict_gets(root, "AcroForm"); + js->form = pdf_dict_gets(acroform, "Fields"); + + /* Initialise the javascript engine, passing the main context + * for use in memory allocation and exception handling. Also + * pass our js context, for it to pass back to us. */ + js->imp = pdf_new_jsimp(ctx, js); + declare_dom(js); + + preload_helpers(js); + } + fz_catch(ctx) + { + pdf_drop_js(js); + js = NULL; + } + + return js; +} + +void pdf_js_load_document_level(pdf_js *js) +{ + pdf_document *doc = js->doc; + fz_context *ctx = doc->ctx; + pdf_obj *javascript = NULL; + char *codebuf = NULL; + + fz_var(javascript); + fz_var(codebuf); + fz_try(ctx) + { + int len, i; + + javascript = pdf_load_name_tree(doc, "JavaScript"); + len = pdf_dict_len(javascript); + + for (i = 0; i < len; i++) + { + pdf_obj *fragment = pdf_dict_get_val(javascript, i); + pdf_obj *code = pdf_dict_gets(fragment, "JS"); + + fz_var(codebuf); + fz_try(ctx) + { + codebuf = pdf_to_utf8(doc, code); + pdf_jsimp_execute(js->imp, codebuf); + } + fz_always(ctx) + { + fz_free(ctx, codebuf); + codebuf = NULL; + } + fz_catch(ctx) + { + /* FIXME: TryLater ? */ + fz_warn(ctx, "Warning: %s", fz_caught_message(ctx)); + } + } + } + fz_always(ctx) + { + pdf_drop_obj(javascript); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +void pdf_drop_js(pdf_js *js) +{ + if (js) + { + fz_context *ctx = js->doc->ctx; + fz_free(ctx, js->event.value); + pdf_jsimp_drop_type(js->imp, js->apptype); + pdf_jsimp_drop_type(js->imp, js->fieldtype); + pdf_jsimp_drop_type(js->imp, js->doctype); + pdf_drop_jsimp(js->imp); + fz_free(ctx, js); + } +} + +void pdf_js_setup_event(pdf_js *js, pdf_js_event *e) +{ + if (js) + { + fz_context *ctx = js->doc->ctx; + char *ev = e->value ? e->value : ""; + char *v = fz_strdup(ctx, ev); + + fz_free(ctx, js->event.value); + js->event.value = v; + + js->event.target = e->target; + js->event.rc = 1; + } +} + +pdf_js_event *pdf_js_get_event(pdf_js *js) +{ + return js ? &js->event : NULL; +} + +void pdf_js_execute(pdf_js *js, char *code) +{ + if (js) + { + fz_context *ctx = js->doc->ctx; + fz_try(ctx) + { + pdf_jsimp_execute(js->imp, code); + } + fz_catch(ctx) + { + } + } +} + +void pdf_js_execute_count(pdf_js *js, char *code, int count) +{ + if (js) + { + fz_context *ctx = js->doc->ctx; + fz_try(ctx) + { + pdf_jsimp_execute_count(js->imp, code, count); + } + fz_catch(ctx) + { + } + } +} + +int pdf_js_supported(void) +{ + return 1; +} diff --git a/source/pdf/js/pdf-jsimp-cpp.c b/source/pdf/js/pdf-jsimp-cpp.c new file mode 100644 index 00000000..7e8031a6 --- /dev/null +++ b/source/pdf/js/pdf-jsimp-cpp.c @@ -0,0 +1,225 @@ +/* This file contains wrapper functions for pdf_jsimp functions implemented + * in C++, from which calls to fz_throw aren't safe. The C++ versions + * return errors explicitly, and these wrappers then throw them. */ + +#include "mupdf/pdf.h" +#include "pdf-jsimp-cpp.h" + +pdf_jsimp *pdf_new_jsimp(fz_context *ctx, void *jsctx) +{ + pdf_jsimp *jsi = NULL; + const char *err = pdf_new_jsimp_cpp(ctx, jsctx, &jsi); + if (err != NULL) + fz_throw(ctx, FZ_ERROR_GENERIC, "%s", err); + + return jsi; +} + +void pdf_drop_jsimp(pdf_jsimp *imp) +{ + if (imp) + { + fz_context *ctx = pdf_jsimp_ctx_cpp(imp); + const char *err = pdf_drop_jsimp_cpp(imp); + if (err != NULL) + fz_warn(ctx, "%s", err); + } +} + +pdf_jsimp_type *pdf_jsimp_new_type(pdf_jsimp *imp, pdf_jsimp_dtr *dtr) +{ + pdf_jsimp_type *type = NULL; + const char *err = pdf_jsimp_new_type_cpp(imp, dtr, &type); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return type; +} + +void pdf_jsimp_drop_type(pdf_jsimp *imp, pdf_jsimp_type *type) +{ + const char *err = pdf_jsimp_drop_type_cpp(imp, type); + if (err != NULL) + fz_warn(pdf_jsimp_ctx_cpp(imp), "%s", err); +} + +void pdf_jsimp_addmethod(pdf_jsimp *imp, pdf_jsimp_type *type, char *name, pdf_jsimp_method *meth) +{ + const char *err = pdf_jsimp_addmethod_cpp(imp, type, name, meth); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); +} + +void pdf_jsimp_addproperty(pdf_jsimp *imp, pdf_jsimp_type *type, char *name, pdf_jsimp_getter *get, pdf_jsimp_setter *set) +{ + const char *err = pdf_jsimp_addproperty_cpp(imp, type, name, get, set); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); +} + +void pdf_jsimp_set_global_type(pdf_jsimp *imp, pdf_jsimp_type *type) +{ + const char *err = pdf_jsimp_set_global_type_cpp(imp, type); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); +} + +pdf_jsimp_obj *pdf_jsimp_new_obj(pdf_jsimp *imp, pdf_jsimp_type *type, void *natobj) +{ + pdf_jsimp_obj *obj = NULL; + const char *err = pdf_jsimp_new_obj_cpp(imp, type, natobj, &obj); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return obj; +} + +void pdf_jsimp_drop_obj(pdf_jsimp *imp, pdf_jsimp_obj *obj) +{ + const char *err = pdf_jsimp_drop_obj_cpp(imp, obj); + if (err != NULL) + fz_warn(pdf_jsimp_ctx_cpp(imp), "%s", err); +} + +int pdf_jsimp_to_type(pdf_jsimp *imp, pdf_jsimp_obj *obj) +{ + int type = 0; + const char *err = pdf_jsimp_to_type_cpp(imp, obj, &type); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return type; +} + +pdf_jsimp_obj *pdf_jsimp_from_string(pdf_jsimp *imp, char *str) +{ + pdf_jsimp_obj *obj = NULL; + const char *err = pdf_jsimp_from_string_cpp(imp, str, &obj); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return obj; +} + +char *pdf_jsimp_to_string(pdf_jsimp *imp, pdf_jsimp_obj *obj) +{ + char *str = NULL; + const char *err = pdf_jsimp_to_string_cpp(imp, obj, &str); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return str; +} + +pdf_jsimp_obj *pdf_jsimp_from_number(pdf_jsimp *imp, double num) +{ + pdf_jsimp_obj *obj = NULL; + const char *err = pdf_jsimp_from_number_cpp(imp, num, &obj); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return obj; +} + +double pdf_jsimp_to_number(pdf_jsimp *imp, pdf_jsimp_obj *obj) +{ + double num; + const char *err = pdf_jsimp_to_number_cpp(imp, obj, &num); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return num; +} + +int pdf_jsimp_array_len(pdf_jsimp *imp, pdf_jsimp_obj *obj) +{ + int len = 0; + const char *err = pdf_jsimp_array_len_cpp(imp, obj, &len); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return len; +} + +pdf_jsimp_obj *pdf_jsimp_array_item(pdf_jsimp *imp, pdf_jsimp_obj *obj, int i) +{ + pdf_jsimp_obj *item = NULL; + const char *err = pdf_jsimp_array_item_cpp(imp, obj, i, &item); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return item; +} + +pdf_jsimp_obj *pdf_jsimp_property(pdf_jsimp *imp, pdf_jsimp_obj *obj, char *prop) +{ + pdf_jsimp_obj *pobj = NULL; + const char *err = pdf_jsimp_property_cpp(imp, obj, prop, &pobj); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); + + return pobj; +} + +void pdf_jsimp_execute(pdf_jsimp *imp, char *code) +{ + const char *err = pdf_jsimp_execute_cpp(imp, code); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); +} + +void pdf_jsimp_execute_count(pdf_jsimp *imp, char *code, int count) +{ + const char *err = pdf_jsimp_execute_count_cpp(imp, code, count); + if (err != NULL) + fz_throw(pdf_jsimp_ctx_cpp(imp), FZ_ERROR_GENERIC, "%s", err); +} +pdf_jsimp_obj *pdf_jsimp_call_method(pdf_jsimp *imp, pdf_jsimp_method *meth, void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]) +{ + fz_context *ctx = pdf_jsimp_ctx_cpp(imp); + pdf_jsimp_obj *res; + + fz_try(ctx) + { + res = meth(jsctx, obj, argc, args); + } + fz_catch(ctx) + { + res = NULL; + fz_warn(ctx, "%s", fz_caught_message(ctx)); + } + + return res; +} + +pdf_jsimp_obj *pdf_jsimp_call_getter(pdf_jsimp *imp, pdf_jsimp_getter *get, void *jsctx, void *obj) +{ + fz_context *ctx = pdf_jsimp_ctx_cpp(imp); + pdf_jsimp_obj *res; + + fz_try(ctx) + { + res = get(jsctx, obj); + } + fz_catch(ctx) + { + res = NULL; + fz_warn(ctx, "%s", fz_caught_message(ctx)); + } + + return res; +} + +void pdf_jsimp_call_setter(pdf_jsimp *imp, pdf_jsimp_setter *set, void *jsctx, void *obj, pdf_jsimp_obj *val) +{ + fz_context *ctx = pdf_jsimp_ctx_cpp(imp); + + fz_try(ctx) + { + set(jsctx, obj, val); + } + fz_catch(ctx) + { + fz_warn(ctx, "%s", fz_caught_message(ctx)); + } +} diff --git a/source/pdf/js/pdf-jsimp-cpp.h b/source/pdf/js/pdf-jsimp-cpp.h new file mode 100644 index 00000000..83af1d23 --- /dev/null +++ b/source/pdf/js/pdf-jsimp-cpp.h @@ -0,0 +1,29 @@ +/* C++ version of the pdf_jsimp api. C++ cannot safely call fz_throw, + * so C++ implementations return explicit errors in char * form. */ + +fz_context *pdf_jsimp_ctx_cpp(pdf_jsimp *imp); +const char *pdf_new_jsimp_cpp(fz_context *ctx, void *jsctx, pdf_jsimp **imp); +const char *pdf_drop_jsimp_cpp(pdf_jsimp *imp); +const char *pdf_jsimp_new_type_cpp(pdf_jsimp *imp, pdf_jsimp_dtr *dtr, pdf_jsimp_type **type); +const char *pdf_jsimp_drop_type_cpp(pdf_jsimp *imp, pdf_jsimp_type *type); +const char *pdf_jsimp_addmethod_cpp(pdf_jsimp *imp, pdf_jsimp_type *type, char *name, pdf_jsimp_method *meth); +const char *pdf_jsimp_addproperty_cpp(pdf_jsimp *imp, pdf_jsimp_type *type, char *name, pdf_jsimp_getter *get, pdf_jsimp_setter *set); +const char *pdf_jsimp_set_global_type_cpp(pdf_jsimp *imp, pdf_jsimp_type *type); +const char *pdf_jsimp_new_obj_cpp(pdf_jsimp *imp, pdf_jsimp_type *type, void *natobj, pdf_jsimp_obj **obj); +const char *pdf_jsimp_drop_obj_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj); +const char *pdf_jsimp_to_type_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, int *type); +const char *pdf_jsimp_from_string_cpp(pdf_jsimp *imp, char *str, pdf_jsimp_obj **obj); +const char *pdf_jsimp_to_string_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, char **str); +const char *pdf_jsimp_from_number_cpp(pdf_jsimp *imp, double num, pdf_jsimp_obj **obj); +const char *pdf_jsimp_to_number_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, double *num); +const char *pdf_jsimp_array_len_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, int *len); +const char *pdf_jsimp_array_item_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, int i, pdf_jsimp_obj **item); +const char *pdf_jsimp_property_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, char *prop, pdf_jsimp_obj **pobj); +const char *pdf_jsimp_execute_cpp(pdf_jsimp *imp, char *code); +const char *pdf_jsimp_execute_count_cpp(pdf_jsimp *imp, char *code, int count); + +/* Also when calling back into mupdf, all exceptions must be caught. The functions bellow + * wrap these calls */ +pdf_jsimp_obj *pdf_jsimp_call_method(pdf_jsimp *imp, pdf_jsimp_method *meth, void *jsctx, void *obj, int argc, pdf_jsimp_obj *args[]); +pdf_jsimp_obj *pdf_jsimp_call_getter(pdf_jsimp *imp, pdf_jsimp_getter *get, void *jsctx, void *obj); +void pdf_jsimp_call_setter(pdf_jsimp *imp, pdf_jsimp_setter *set, void *jsctx, void *obj, pdf_jsimp_obj *val); diff --git a/source/pdf/js/pdf-jsimp-v8.cpp b/source/pdf/js/pdf-jsimp-v8.cpp new file mode 100644 index 00000000..f3894b6b --- /dev/null +++ b/source/pdf/js/pdf-jsimp-v8.cpp @@ -0,0 +1,476 @@ +/* + * This is a dummy JavaScript engine. It cheats by recognising the specific + * strings in calc.pdf, and hence will work only for that file. It is for + * testing only. + */ + +extern "C" { +#include "mupdf/fitz.h" +#include "mupdf/pdf.h" +#include "pdf-jsimp-cpp.h" +} + +#include <vector> +#include <set> +#include <v8.h> + +using namespace v8; +using namespace std; + +struct PDFJSImp; + +/* Object we pass to FunctionTemplate::New, which v8 passes back to us in + * callMethod, allowing us to call our client's, passed-in method. */ +struct PDFJSImpMethod +{ + PDFJSImp *imp; + pdf_jsimp_method *meth; + + PDFJSImpMethod(PDFJSImp *imp, pdf_jsimp_method *meth) : imp(imp), meth(meth) {} +}; + +/* Object we pass to ObjectTemplate::SetAccessor, which v8 passes back to us in + * setProp and getProp, allowing us to call our client's, passed-in set/get methods. */ +struct PDFJSImpProperty +{ + PDFJSImp *imp; + pdf_jsimp_getter *get; + pdf_jsimp_setter *set; + + PDFJSImpProperty(PDFJSImp *imp, pdf_jsimp_getter *get, pdf_jsimp_setter *set) : imp(imp), get(get), set(set) {} +}; + +/* Internal representation of the pdf_jsimp_type object */ +struct PDFJSImpType +{ + PDFJSImp *imp; + Persistent<ObjectTemplate> templ; + pdf_jsimp_dtr *dtr; + vector<PDFJSImpMethod *> methods; + vector<PDFJSImpProperty *> properties; + + PDFJSImpType(PDFJSImp *imp, pdf_jsimp_dtr *dtr): imp(imp), dtr(dtr) + { + HandleScope scope; + templ = Persistent<ObjectTemplate>::New(ObjectTemplate::New()); + templ->SetInternalFieldCount(1); + } + + ~PDFJSImpType() + { + vector<PDFJSImpMethod *>::iterator mit; + for (mit = methods.begin(); mit < methods.end(); mit++) + delete *mit; + + vector<PDFJSImpProperty *>::iterator pit; + for (pit = properties.begin(); pit < properties.end(); pit++) + delete *pit; + + templ.Dispose(); + } +}; + +/* Info via which we destroy the client side part of objects that + * v8 garbage collects */ +struct PDFJSImpGCObj +{ + Persistent<Object> pobj; + PDFJSImpType *type; + + PDFJSImpGCObj(Handle<Object> obj, PDFJSImpType *type): type(type) + { + pobj = Persistent<Object>::New(obj); + } + + ~PDFJSImpGCObj() + { + pobj.Dispose(); + } +}; + +/* Internal representation of the pdf_jsimp object */ +struct PDFJSImp +{ + fz_context *ctx; + void *jsctx; + Persistent<Context> context; + vector<PDFJSImpType *> types; + set<PDFJSImpGCObj *> gclist; + + PDFJSImp(fz_context *ctx, void *jsctx) : ctx(ctx), jsctx(jsctx) + { + HandleScope scope; + context = Persistent<Context>::New(Context::New()); + } + + ~PDFJSImp() + { + HandleScope scope; + /* Tell v8 our context will not be used again */ + context.Dispose(); + + /* Unlink and destroy all the objects that v8 has yet to gc */ + set<PDFJSImpGCObj *>::iterator oit; + for (oit = gclist.begin(); oit != gclist.end(); oit++) + { + (*oit)->pobj.ClearWeak(); /* So that gcCallback wont get called */ + PDFJSImpType *vType = (*oit)->type; + Local<External> owrap = Local<External>::Cast((*oit)->pobj->GetInternalField(0)); + vType->dtr(vType->imp->jsctx, owrap->Value()); + delete *oit; + } + + vector<PDFJSImpType *>::iterator it; + for (it = types.begin(); it < types.end(); it++) + delete *it; + } +}; + +/* Internal representation of the pdf_jsimp_obj object */ +class PDFJSImpObject +{ + Persistent<Value> pobj; + String::Utf8Value *utf8; + +public: + PDFJSImpObject(Handle<Value> obj): utf8(NULL) + { + pobj = Persistent<Value>::New(obj); + } + + PDFJSImpObject(const char *str): utf8(NULL) + { + pobj = Persistent<Value>::New(String::New(str)); + } + + PDFJSImpObject(double num): utf8(NULL) + { + pobj = Persistent<Value>::New(Number::New(num)); + } + + ~PDFJSImpObject() + { + delete utf8; + pobj.Dispose(); + } + + int type() + { + if (pobj->IsNull()) + return JS_TYPE_NULL; + else if (pobj->IsString() || pobj->IsStringObject()) + return JS_TYPE_STRING; + else if (pobj->IsNumber() || pobj->IsNumberObject()) + return JS_TYPE_NUMBER; + else if (pobj->IsArray()) + return JS_TYPE_ARRAY; + else if (pobj->IsBoolean() || pobj->IsBooleanObject()) + return JS_TYPE_BOOLEAN; + else + return JS_TYPE_UNKNOWN; + } + + char *toString() + { + delete utf8; + utf8 = new String::Utf8Value(pobj); + return **utf8; + } + + double toNumber() + { + return pobj->NumberValue(); + } + + Handle<Value> toValue() + { + return pobj; + } +}; + +extern "C" fz_context *pdf_jsimp_ctx_cpp(pdf_jsimp *imp) +{ + return reinterpret_cast<PDFJSImp *>(imp)->ctx; +} + +extern "C" const char *pdf_new_jsimp_cpp(fz_context *ctx, void *jsctx, pdf_jsimp **imp) +{ + Locker lock; + *imp = reinterpret_cast<pdf_jsimp *>(new PDFJSImp(ctx, jsctx)); + + return NULL; +} + +extern "C" const char *pdf_drop_jsimp_cpp(pdf_jsimp *imp) +{ + Locker lock; + delete reinterpret_cast<PDFJSImp *>(imp); + return NULL; +} + +extern "C" const char *pdf_jsimp_new_type_cpp(pdf_jsimp *imp, pdf_jsimp_dtr *dtr, pdf_jsimp_type **type) +{ + Locker lock; + PDFJSImp *vImp = reinterpret_cast<PDFJSImp *>(imp); + PDFJSImpType *vType = new PDFJSImpType(vImp, dtr); + vImp->types.push_back(vType); + *type = reinterpret_cast<pdf_jsimp_type *>(vType); + return NULL; +} + +extern "C" const char *pdf_jsimp_drop_type_cpp(pdf_jsimp *imp, pdf_jsimp_type *type) +{ + /* Types are recorded and destroyed as part of PDFJSImp */ + return NULL; +} + +static Handle<Value> callMethod(const Arguments &args) +{ + HandleScope scope; + Local<External> mwrap = Local<External>::Cast(args.Data()); + PDFJSImpMethod *m = (PDFJSImpMethod *)mwrap->Value(); + + Local<Object> self = args.Holder(); + Local<External> owrap; + void *nself = NULL; + if (self->InternalFieldCount() > 0) + { + owrap = Local<External>::Cast(self->GetInternalField(0)); + nself = owrap->Value(); + } + + int c = args.Length(); + PDFJSImpObject **native_args = new PDFJSImpObject*[c]; + for (int i = 0; i < c; i++) + native_args[i] = new PDFJSImpObject(args[i]); + + PDFJSImpObject *obj = reinterpret_cast<PDFJSImpObject *>(pdf_jsimp_call_method(reinterpret_cast<pdf_jsimp *>(m->imp), m->meth, m->imp->jsctx, nself, c, reinterpret_cast<pdf_jsimp_obj **>(native_args))); + Handle<Value> val; + if (obj) + val = obj->toValue(); + delete obj; + + for (int i = 0; i < c; i++) + delete native_args[i]; + + delete native_args; + + return scope.Close(val); +} + +extern "C" const char *pdf_jsimp_addmethod_cpp(pdf_jsimp *imp, pdf_jsimp_type *type, char *name, pdf_jsimp_method *meth) +{ + Locker lock; + PDFJSImpType *vType = reinterpret_cast<PDFJSImpType *>(type); + HandleScope scope; + + PDFJSImpMethod *pmeth = new PDFJSImpMethod(vType->imp, meth); + vType->templ->Set(String::New(name), FunctionTemplate::New(callMethod, External::New(pmeth))); + vType->methods.push_back(pmeth); + return NULL; +} + +static Handle<Value> getProp(Local<String> property, const AccessorInfo &info) +{ + HandleScope scope; + Local<External> pwrap = Local<External>::Cast(info.Data()); + PDFJSImpProperty *p = reinterpret_cast<PDFJSImpProperty *>(pwrap->Value()); + + Local<Object> self = info.Holder(); + Local<External> owrap; + void *nself = NULL; + if (self->InternalFieldCount() > 0) + { + Local<Value> val = self->GetInternalField(0); + if (val->IsExternal()) + { + owrap = Local<External>::Cast(val); + nself = owrap->Value(); + } + } + + PDFJSImpObject *obj = reinterpret_cast<PDFJSImpObject *>(pdf_jsimp_call_getter(reinterpret_cast<pdf_jsimp *>(p->imp), p->get, p->imp->jsctx, nself)); + Handle<Value> val; + if (obj) + val = obj->toValue(); + delete obj; + return scope.Close(val); +} + +static void setProp(Local<String> property, Local<Value> value, const AccessorInfo &info) +{ + HandleScope scope; + Local<External> wrap = Local<External>::Cast(info.Data()); + PDFJSImpProperty *p = reinterpret_cast<PDFJSImpProperty *>(wrap->Value()); + + Local<Object> self = info.Holder(); + Local<External> owrap; + void *nself = NULL; + if (self->InternalFieldCount() > 0) + { + owrap = Local<External>::Cast(self->GetInternalField(0)); + nself = owrap->Value(); + } + + PDFJSImpObject *obj = new PDFJSImpObject(value); + + pdf_jsimp_call_setter(reinterpret_cast<pdf_jsimp *>(p->imp), p->set, p->imp->jsctx, nself, reinterpret_cast<pdf_jsimp_obj *>(obj)); + delete obj; +} + +extern "C" const char *pdf_jsimp_addproperty_cpp(pdf_jsimp *imp, pdf_jsimp_type *type, char *name, pdf_jsimp_getter *get, pdf_jsimp_setter *set) +{ + Locker lock; + PDFJSImpType *vType = reinterpret_cast<PDFJSImpType *>(type); + HandleScope scope; + + PDFJSImpProperty *prop = new PDFJSImpProperty(vType->imp, get, set); + vType->templ->SetAccessor(String::New(name), getProp, setProp, External::New(prop)); + vType->properties.push_back(prop); + return NULL; +} + +extern "C" const char *pdf_jsimp_set_global_type_cpp(pdf_jsimp *imp, pdf_jsimp_type *type) +{ + Locker lock; + PDFJSImp *vImp = reinterpret_cast<PDFJSImp *>(imp); + PDFJSImpType *vType = reinterpret_cast<PDFJSImpType *>(type); + HandleScope scope; + + vImp->context = Persistent<Context>::New(Context::New(NULL, vType->templ)); + return NULL; +} + +static void gcCallback(Persistent<Value> val, void *parm) +{ + PDFJSImpGCObj *gco = reinterpret_cast<PDFJSImpGCObj *>(parm); + PDFJSImpType *vType = gco->type; + HandleScope scope; + Persistent<Object> obj = Persistent<Object>::Cast(val); + + Local<External> owrap = Local<External>::Cast(obj->GetInternalField(0)); + vType->dtr(vType->imp->jsctx, owrap->Value()); + vType->imp->gclist.erase(gco); + delete gco; /* Disposes of the persistent handle */ +} + +extern "C" const char *pdf_jsimp_new_obj_cpp(pdf_jsimp *imp, pdf_jsimp_type *type, void *natobj, pdf_jsimp_obj **robj) +{ + Locker lock; + PDFJSImpType *vType = reinterpret_cast<PDFJSImpType *>(type); + HandleScope scope; + Local<Object> obj = vType->templ->NewInstance(); + obj->SetInternalField(0, External::New(natobj)); + + /* Arrange for destructor to be called on the client-side object + * when the v8 object is garbage collected */ + if (vType->dtr) + { + /* Wrap obj in a PDFJSImpGCObj, which takes a persistent handle to + * obj, and stores its type with it. The persistent handle tells v8 + * it cannot just destroy obj leaving the client-side object hanging */ + PDFJSImpGCObj *gco = new PDFJSImpGCObj(obj, vType); + /* Keep the wrapped object in a list, so that we can take back control + * of destroying client-side objects when shutting down this context */ + vType->imp->gclist.insert(gco); + /* Tell v8 that it can destroy the persistent handle to obj when it has + * no further need for it, but it must inform us via gcCallback */ + gco->pobj.MakeWeak(gco, gcCallback); + } + + *robj = reinterpret_cast<pdf_jsimp_obj *>(new PDFJSImpObject(obj)); + return NULL; +} + +extern "C" const char *pdf_jsimp_drop_obj_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj) +{ + Locker lock; + delete reinterpret_cast<PDFJSImpObject *>(obj); + return NULL; +} + +extern "C" const char *pdf_jsimp_to_type_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, int *type) +{ + Locker lock; + *type = reinterpret_cast<PDFJSImpObject *>(obj)->type(); + return NULL; +} + +extern "C" const char *pdf_jsimp_from_string_cpp(pdf_jsimp *imp, char *str, pdf_jsimp_obj **obj) +{ + Locker lock; + *obj = reinterpret_cast<pdf_jsimp_obj *>(new PDFJSImpObject(str)); + return NULL; +} + +extern "C" const char *pdf_jsimp_to_string_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, char **str) +{ + Locker lock; + *str = reinterpret_cast<PDFJSImpObject *>(obj)->toString(); + return NULL; +} + +extern "C" const char *pdf_jsimp_from_number_cpp(pdf_jsimp *imp, double num, pdf_jsimp_obj **obj) +{ + Locker lock; + *obj = reinterpret_cast<pdf_jsimp_obj *>(new PDFJSImpObject(num)); + return NULL; +} + +extern "C" const char *pdf_jsimp_to_number_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, double *num) +{ + Locker lock; + *num = reinterpret_cast<PDFJSImpObject *>(obj)->toNumber(); + return NULL; +} + +extern "C" const char *pdf_jsimp_array_len_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, int *len) +{ + Locker lock; + Local<Object> jsobj = reinterpret_cast<PDFJSImpObject *>(obj)->toValue()->ToObject(); + Local<Array> arr = Local<Array>::Cast(jsobj); + *len = arr->Length(); + return NULL; +} + +extern "C" const char *pdf_jsimp_array_item_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, int i, pdf_jsimp_obj **item) +{ + Locker lock; + Local<Object> jsobj = reinterpret_cast<PDFJSImpObject *>(obj)->toValue()->ToObject(); + *item = reinterpret_cast<pdf_jsimp_obj *>(new PDFJSImpObject(jsobj->Get(Number::New(i)))); + return NULL; +} + +extern "C" const char *pdf_jsimp_property_cpp(pdf_jsimp *imp, pdf_jsimp_obj *obj, char *prop, pdf_jsimp_obj **pobj) +{ + Locker lock; + Local<Object> jsobj = reinterpret_cast<PDFJSImpObject *>(obj)->toValue()->ToObject(); + *pobj = reinterpret_cast<pdf_jsimp_obj *>(new PDFJSImpObject(jsobj->Get(String::New(prop)))); + return NULL; +} + +extern "C" const char *pdf_jsimp_execute_cpp(pdf_jsimp *imp, char *code) +{ + Locker lock; + PDFJSImp *vImp = reinterpret_cast<PDFJSImp *>(imp); + HandleScope scope; + Context::Scope context_scope(vImp->context); + Handle<Script> script = Script::Compile(String::New(code)); + if (script.IsEmpty()) + return "compile failed in pdf_jsimp_execute"; + script->Run(); + return NULL; +} + +extern "C" const char *pdf_jsimp_execute_count_cpp(pdf_jsimp *imp, char *code, int count) +{ + Locker lock; + PDFJSImp *vImp = reinterpret_cast<PDFJSImp *>(imp); + HandleScope scope; + Context::Scope context_scope(vImp->context); + Handle<Script> script = Script::Compile(String::New(code, count)); + if (script.IsEmpty()) + return "compile failed in pdf_jsimp_execute_count"; + script->Run(); + return NULL; +} diff --git a/source/pdf/js/pdf-util.js b/source/pdf/js/pdf-util.js new file mode 100644 index 00000000..06f4874b --- /dev/null +++ b/source/pdf/js/pdf-util.js @@ -0,0 +1,875 @@ +var MuPDF = new Array(); + +MuPDF.monthName = ['January','February','March','April','May','June','July','August','September','October','November','December']; +MuPDF.dayName = ['Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday']; + +MuPDF.shortMonthName = new Array(); + +for (var i = 0; i < MuPDF.monthName.length; i++) + MuPDF.shortMonthName.push(MuPDF.monthName[i].substr(0,3)); + +MuPDF.monthPattern = new RegExp(); +MuPDF.monthPattern.compile('('+MuPDF.shortMonthName.join('|')+')'); + +MuPDF.padZeros = function(num, places) +{ + var s = num.toString(); + + if (s.length < places) + s = new Array(places-s.length+1).join('0') + s; + + return s; +} + +MuPDF.convertCase = function(str,cmd) +{ + switch (cmd) + { + case '>': return str.toUpperCase(); + case '<': return str.toLowerCase(); + default: return str; + } +} + +/* display must be kept in sync with an enum in pdf_form.c */ +var display = new Array(); +display.visible = 0; +display.hidden = 1; +display.noPrint = 2; +display.noView = 3; +var border = new Array(); +border.s = "Solid"; +border.d = "Dashed"; +border.b = "Beveled"; +border.i = "Inset"; +border.u = "Underline"; +var color = new Array(); +color.transparent = [ "T" ]; +color.black = [ "G", 0]; +color.white = [ "G", 1]; +color.red = [ "RGB", 1,0,0 ]; +color.green = [ "RGB", 0,1,0 ]; +color.blue = [ "RGB", 0,0,1 ]; +color.cyan = [ "CMYK", 1,0,0,0 ]; +color.magenta = [ "CMYK", 0,1,0,0 ]; +color.yellow = [ "CMYK", 0,0,1,0 ]; +color.dkGray = [ "G", 0.25]; +color.gray = [ "G", 0.5]; +color.ltGray = [ "G", 0.75]; + +var util = new Array(); + +util.printd = function(fmt, d) +{ + var regexp = /(m+|d+|y+|H+|h+|M+|s+|t+|[^mdyHhMst]+)/g; + var res = ''; + + if (!d) + return null; + + var tokens = fmt.match(regexp); + var length = tokens ? tokens.length : 0; + + for (var i = 0; i < length; i++) + { + switch(tokens[i]) + { + case 'mmmm': res += MuPDF.monthName[d.getMonth()]; break; + case 'mmm': res += MuPDF.monthName[d.getMonth()].substr(0,3); break; + case 'mm': res += MuPDF.padZeros(d.getMonth()+1, 2); break; + case 'm': res += d.getMonth()+1; break; + case 'dddd': res += MuPDF.dayName[d.getDay()]; break; + case 'ddd': res += MuPDF.dayName[d.getDay()].substr(0,3); break; + case 'dd': res += MuPDF.padZeros(d.getDate(), 2); break; + case 'd': res += d.getDate(); break; + case 'yyyy': res += d.getFullYear(); break; + case 'yy': res += d.getFullYear()%100; break; + case 'HH': res += MuPDF.padZeros(d.getHours(), 2); break; + case 'H': res += d.getHours(); break; + case 'hh': res += MuPDF.padZeros((d.getHours()+11)%12+1, 2); break; + case 'h': res += (d.getHours()+11)%12+1; break; + case 'MM': res += MuPDF.padZeros(d.getMinutes(), 2); break; + case 'M': res += d.getMinutes(); break; + case 'ss': res += MuPDF.padZeros(d.getSeconds(), 2); break; + case 's': res += d.getSeconds(); break; + case 'tt': res += d.getHours() < 12 ? 'am' : 'pm'; break; + case 't': res += d.getHours() < 12 ? 'a' : 'p'; break; + default: res += tokens[i]; + } + } + + return res; +} + +util.printx = function(fmt, val) +{ + var cs = '='; + var res = ''; + var i = 0; + var m; + var length = fmt ? fmt.length : 0; + + while (i < length) + { + switch (fmt.charAt(i)) + { + case '\\': + i++; + if (i >= length) return res; + res += fmt.charAt(i); + break; + + case 'X': + m = val.match(/\w/); + if (!m) return res; + res += MuPDF.convertCase(m[0],cs); + val = val.replace(/^\W*\w/,''); + break; + + case 'A': + m = val.match(/[A-z]/); + if (!m) return res; + res += MuPDF.convertCase(m[0],cs); + val = val.replace(/^[^A-z]*[A-z]/,''); + break; + + case '9': + m = val.match(/\d/); + if (!m) return res; + res += m[0]; + val = val.replace(/^\D*\d/,''); + break; + + case '*': + res += val; + val = ''; + break; + + case '?': + if (!val) return res; + res += MuPDF.convertCase(val.charAt(0),cs); + val = val.substr(1); + break; + + case '=': + case '>': + case '<': + cs = fmt.charAt(i); + break; + + default: + res += MuPDF.convertCase(fmt.charAt(i),cs); + break; + } + + i++; + } + + return res; +} + +util.printf = function() +{ + if (arguments.length < 1) + return ""; + + var res = ""; + var arg_i = 1; + var regexp = /%[^dfsx]*[dfsx]|[^%]*/g; + var tokens = arguments[0].match(regexp); + var length = tokens ? tokens.length : 0; + + for (var i = 0; i < length; i++) + { + var tok = tokens[i]; + if (tok.match(/^%/)) + { + if (arg_i < arguments.length) + { + var val = arguments[arg_i++]; + var fval = ''; + var neg = false; + var decsep_re = /^,[0123]/; + var flags_re = /^[+ 0#]+/; + var width_re = /^\d+/; + var prec_re = /^\.\d+/; + var conv_re = /^[dfsx]/; + + tok = tok.replace(/^%/, ""); + var decsep = tok.match(decsep_re); + if (decsep) decsep = decsep[0]; + tok = tok.replace(decsep_re, ""); + var flags = tok.match(flags_re); + if (flags) flags = flags[0]; + tok = tok.replace(flags_re, ""); + var width = tok.match(width_re); + if (width) width = width[0]; + tok = tok.replace(width_re, ""); + var prec = tok.match(prec_re); + if (prec) prec = prec[0]; + tok = tok.replace(prec_re, ""); + var conv = tok.match(conv_re); + if (conv) conv = conv[0]; + + prec = prec ? Number(prec.replace(/^\./, '')) : 0; + var poschar = (flags && flags.match(/[+ ]/)) ? flags.match(/[+ ]/)[0] : ''; + var pad = (flags && flags.match(/0/)) ? '0' : ' '; + + var point = '.'; + var thou = ''; + + if (decsep) + { + switch(decsep) + { + case ',0': thou = ','; break; + case ',1': break; + case ',2': thou = '.'; point = ','; break; + case ',3': point = ','; break; + } + } + + switch(conv) + { + case 'x': + val = Math.floor(val); + neg = (val < 0); + if (neg) + val = -val; + + // Convert to hex + while (val) + { + fval = '0123456789ABCDEF'.charAt(val%16) + fval; + val = Math.floor(val/16); + } + + if (neg) + fval = '-' + fval; + else + fval = poschar + fval; + break; + + case 'd': + fval = String(Math.floor(val)); + break; + + case 's': + // Always pad strings with space + pad = ' '; + fval = String(val); + break; + + case 'f': + fval = String(val); + + if (prec) + { + var frac = fval.match(/\.\d+/); + if (frac) + { + frac = frac[0]; + // Matched string includes the dot, so make it + // prec+1 in length + if (frac.length > prec+1) + frac = frac.substr(0, prec+1); + else if(frac.length < prec+1) + frac += new Array(prec+1-frac.length+1).join('0'); + + fval = fval.replace(/\.\d+/, frac); + } + } + break; + } + + if (conv.match(/[fd]/)) + { + if (fval >= 0) + fval = poschar + fval; + + if (point != '.') + fval.replace(/\./, point); + + if (thou) + { + var intpart = fval.match(/\d+/)[0]; + intpart = new Array(2-(intpart.length+2)%3+1).join('0') + intpart; + intpart = intpart.match(/.../g).join(thou).replace(/^0*[,.]?/,''); + fval = fval.replace(/\d+/, intpart); + } + } + + if (width && fval.length < width) + fval = new Array(width - fval.length + 1).join(pad) + fval; + + res += fval; + } + } + else + { + res += tok; + } + } + + return res; +} + +function AFMergeChange(event) +{ + return event.value; +} + +function AFMakeNumber(str) +{ + var nums = str.match(/\d+/g); + + if (!nums) + return null; + + var res = nums.join('.'); + + if (str.match(/^[^0-9]*\./)) + res = '0.'+res; + + return res * (str.match(/-/) ? -1.0 : 1.0); +} + +function AFExtractTime(dt) +{ + var ampm = dt.match(/(am|pm)/); + dt = dt.replace(/(am|pm)/, ''); + var t = dt.match(/\d{1,2}:\d{1,2}:\d{1,2}/); + dt = dt.replace(/\d{1,2}:\d{1,2}:\d{1,2}/, ''); + if (!t) + { + t = dt.match(/\d{1,2}:\d{1,2}/); + dt = dt.replace(/\d{1,2}:\d{1,2}/, ''); + } + + return [dt, t?t[0]+(ampm?ampm[0]:''):'']; +} + +function AFParseDateOrder(fmt) +{ + var order = ''; + + // Ensure all present with those not added in default order + fmt += "mdy"; + + for (var i = 0; i < fmt.length; i++) + { + var c = fmt.charAt(i); + + if ('ymd'.indexOf(c) != -1 && order.indexOf(c) == -1) + order += c; + } + + return order; +} + +function AFMatchMonth(d) +{ + var m = d.match(MuPDF.monthPattern); + + return m ? MuPDF.shortMonthName.indexOf(m[0]) : null; +} + +function AFParseTime(str, d) +{ + if (!str) + return d; + + if (!d) + d = new Date(); + + var ampm = str.match(/(am|pm)/); + var nums = str.match(/\d+/g); + var hour, min, sec; + + if (!nums) + return null; + + sec = 0; + + switch (nums.length) + { + case 3: + sec = nums[2]; + case 2: + hour = nums[0]; + min = nums[1]; + break; + + default: + return null; + } + + if (ampm == 'am' && hour < 12) + hour = 12 + hour; + + if (ampm == 'pm' && hour >= 12) + hour = 0 + hour - 12; + + d.setHours(hour, min, sec); + + if (d.getHours() != hour || d.getMinutes() != min || d.getSeconds() != sec) + return null; + + return d; +} + +function AFParseDateEx(d, fmt) +{ + var dt = AFExtractTime(d); + var nums = dt[0].match(/\d+/g); + var order = AFParseDateOrder(fmt); + var text_month = AFMatchMonth(dt[0]); + var dout = new Date(); + var year = dout.getFullYear(); + var month = dout.getMonth(); + var date = dout.getDate(); + + dout.setHours(12,0,0); + + if (!nums || nums.length < 1 || nums.length > 3) + return null; + + if (nums.length < 3 && text_month) + { + // Use the text month rather than one of the numbers + month = text_month; + order = order.replace('m',''); + } + + order = order.substr(0, nums.length); + + // If year and month specified but not date then use the 1st + if (order == "ym" || (order == "y" && text_month)) + date = 1; + + for (var i = 0; i < nums.length; i++) + { + switch (order.charAt(i)) + { + case 'y': year = nums[i]; break; + case 'm': month = nums[i] - 1; break; + case 'd': date = nums[i]; break; + } + } + + if (year < 100) + { + if (fmt.search("yyyy") != -1) + return null; + + if (year >= 50) + year = 1900 + year; + else if (year >= 0) + year = 2000 + year; + } + + dout.setFullYear(year, month, date); + + if (dout.getFullYear() != year || dout.getMonth() != month || dout.getDate() != date) + return null; + + return AFParseTime(dt[1], dout); +} + +function AFDate_KeystrokeEx(fmt) +{ + if (event.willCommit && !AFParseDateEx(event.value, fmt)) + { + app.alert("Invalid date/time. please ensure that the date/time exists. Field [ "+event.target.name+" ] should match format "+fmt); + event.rc = false; + } +} + +function AFDate_Keystroke(index) +{ + var formats = ['m/d','m/d/yy','mm/dd/yy','mm/yy','d-mmm','d-mmm-yy','dd-mm-yy','yy-mm-dd', + 'mmm-yy','mmmm-yy','mmm d, yyyy','mmmm d, yyyy','m/d/yy h:MM tt','m/d/yy HH:MM']; + AFDate_KeystrokeEx(formats[index]); +} + +function AFDate_FormatEx(fmt) +{ + var d = AFParseDateEx(event.value, fmt); + + event.value = d ? util.printd(fmt, d) : ""; +} + +function AFDate_Format(index) +{ + var formats = ['m/d','m/d/yy','mm/dd/yy','mm/yy','d-mmm','d-mmm-yy','dd-mm-yy','yy-mm-dd', + 'mmm-yy','mmmm-yy','mmm d, yyyy','mmmm d, yyyy','m/d/yy h:MM tt','m/d/yy HH:MM']; + AFDate_FormatEx(formats[index]); +} + +function AFTime_Keystroke(index) +{ + if (event.willCommit && !AFParseTime(event.value, null)) + { + app.alert("The value entered does not match the format of the field [ "+event.target.name+" ]"); + event.rc = false; + } +} + +function AFTime_FormatEx(fmt) +{ + var d = AFParseTime(event.value, null); + + event.value = d ? util.printd(fmt, d) : ''; +} + +function AFTime_Format(index) +{ + var formats = ['HH:MM','h:MM tt','HH:MM:ss','h:MM:ss tt']; + + AFTime_FormatEx(formats[index]); +} + +function AFSpecial_KeystrokeEx(fmt) +{ + var cs = '='; + var val = event.value; + var res = ''; + var i = 0; + var m; + var length = fmt ? fmt.length : 0; + + while (i < length) + { + switch (fmt.charAt(i)) + { + case '\\': + i++; + if (i >= length) + break; + res += fmt.charAt(i); + if (val && val.charAt(0) == fmt.charAt(i)) + val = val.substr(1); + break; + + case 'X': + m = val.match(/^\w/); + if (!m) + { + event.rc = false; + break; + } + res += MuPDF.convertCase(m[0],cs); + val = val.substr(1); + break; + + case 'A': + m = val.match(/^[A-z]/); + if (!m) + { + event.rc = false; + break; + } + res += MuPDF.convertCase(m[0],cs); + val = val.substr(1); + break; + + case '9': + m = val.match(/^\d/); + if (!m) + { + event.rc = false; + break; + } + res += m[0]; + val = val.substr(1); + break; + + case '*': + res += val; + val = ''; + break; + + case '?': + if (!val) + { + event.rc = false; + break; + } + res += MuPDF.convertCase(val.charAt(0),cs); + val = val.substr(1); + break; + + case '=': + case '>': + case '<': + cs = fmt.charAt(i); + break; + + default: + res += fmt.charAt(i); + if (val && val.charAt(0) == fmt.charAt(i)) + val = val.substr(1); + break; + } + + i++; + } + + if (event.rc) + event.value = res; + else if (event.willCommit) + app.alert("The value entered does not match the format of the field [ "+event.target.name+" ] should be "+fmt); +} + +function AFSpecial_Keystroke(index) +{ + if (event.willCommit) + { + switch (index) + { + case 0: + if (!event.value.match(/^\d{5}$/)) + event.rc = false; + break; + case 1: + if (!event.value.match(/^\d{5}[-. ]?\d{4}$/)) + event.rc = false; + break; + case 2: + if (!event.value.match(/^((\(\d{3}\)|\d{3})[-. ]?)?\d{3}[-. ]?\d{4}$/)) + event.rc = false; + break; + case 3: + if (!event.value.match(/^\d{3}[-. ]?\d{2}[-. ]?\d{4}$/)) + event.rc = false; + break; + } + + if (!event.rc) + app.alert("The value entered does not match the format of the field [ "+event.target.name+" ]"); + } +} + +function AFSpecial_Format(index) +{ + var res; + + switch (index) + { + case 0: + res = util.printx('99999', event.value); + break; + case 1: + res = util.printx('99999-9999', event.value); + break; + case 2: + res = util.printx('9999999999', event.value); + res = util.printx(res.length >= 10 ? '(999) 999-9999' : '999-9999', event.value); + break; + case 3: + res = util.printx('999-99-9999', event.value); + break; + } + + event.value = res ? res : ''; +} + +function AFNumber_Keystroke(nDec, sepStyle, negStyle, currStyle, strCurrency, bCurrencyPrepend) +{ + if (sepStyle & 2) + { + if (!event.value.match(/^[+-]?\d*[,.]?\d*$/)) + event.rc = false; + } + else + { + if (!event.value.match(/^[+-]?\d*\.?\d*$/)) + event.rc = false; + } + + if (event.willCommit) + { + if (!event.value.match(/\d/)) + event.rc = false; + + if (!event.rc) + app.alert("The value entered does not match the format of the field [ "+event.target.name+" ]"); + } +} + +function AFNumber_Format(nDec,sepStyle,negStyle,currStyle,strCurrency,bCurrencyPrepend) +{ + var val = event.value; + var fracpart; + var intpart; + var point = sepStyle&2 ? ',' : '.'; + var separator = sepStyle&2 ? '.' : ','; + + if (/^\D*\./.test(val)) + val = '0'+val; + + var groups = val.match(/\d+/g); + + if (!groups) + return; + + switch (groups.length) + { + case 0: + return; + case 1: + fracpart = ''; + intpart = groups[0]; + break; + default: + fracpart = groups.pop(); + intpart = groups.join(''); + break; + } + + // Remove leading zeros + intpart = intpart.replace(/^0*/,''); + if (!intpart) + intpart = '0'; + + if ((sepStyle & 1) == 0) + { + // Add the thousands sepearators: pad to length multiple of 3 with zeros, + // split into 3s, join with separator, and remove the leading zeros + intpart = new Array(2-(intpart.length+2)%3+1).join('0') + intpart; + intpart = intpart.match(/.../g).join(separator).replace(/^0*/,''); + } + + if (!intpart) + intpart = '0'; + + // Adjust fractional part to correct number of decimal places + fracpart += new Array(nDec+1).join('0'); + fracpart = fracpart.substr(0,nDec); + + if (fracpart) + intpart += point+fracpart; + + if (bCurrencyPrepend) + intpart = strCurrency+intpart; + else + intpart += strCurrency; + + if (/-/.test(val)) + { + switch (negStyle) + { + case 0: + intpart = '-'+intpart; + break; + case 1: + break; + case 2: + case 3: + intpart = '('+intpart+')'; + break; + } + } + + if (negStyle&1) + event.target.textColor = /-/.test(val) ? color.red : color.black; + + event.value = intpart; +} + +function AFPercent_Keystroke(nDec, sepStyle) +{ + AFNumber_Keystroke(nDec, sepStyle, 0, 0, "", true); +} + +function AFPercent_Format(nDec, sepStyle) +{ + var val = AFMakeNumber(event.value); + + if (!val) + { + event.value = ''; + return; + } + + event.value = (val * 100) + ''; + + AFNumber_Format(nDec, sepStyle, 0, 0, "%", false); +} + +function AFSimple_Calculate(op, list) +{ + var res; + + switch (op) + { + case 'SUM': + res = 0; + break; + case 'PRD': + res = 1; + break; + case 'AVG': + res = 0; + break; + } + + if (typeof list == 'string') + list = list.split(/ *, */); + + for (var i = 0; i < list.length; i++) + { + var field = getField(list[i]); + var value = Number(field.value); + + switch (op) + { + case 'SUM': + res += value; + break; + case 'PRD': + res *= value; + break; + case 'AVG': + res += value; + break; + case 'MIN': + if (i == 0 || value < res) + res = value; + break; + case 'MAX': + if (i == 0 || value > res) + res = value; + break; + } + } + + if (op == 'AVG') + res /= list.length; + + event.value = res; +} + +function AFRange_Validate(lowerCheck, lowerLimit, upperCheck, upperLimit) +{ + if (upperCheck && event.value > upperLimit) + { + event.rc = false; + } + + if (lowerCheck && event.value < lowerLimit) + { + event.rc = false; + } + + + if (!event.rc) + { + if (lowerCheck && upperCheck) + app.alert(util.printf("Invalid value: must be greater than or equal to %s and less than or equal to %s", lowerLimit, upperLimit)); + else if (lowerCheck) + app.alert(util.printf("Invalid value: must be greater than or equal to %s", lowerLimit)); + else + app.alert(util.printf("Invalid value: must be less than or equal to %s", upperLimit)); + } +} diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c new file mode 100644 index 00000000..c50bbba2 --- /dev/null +++ b/source/pdf/pdf-annot.c @@ -0,0 +1,1200 @@ +#include "mupdf/pdf.h" + +#define SMALL_FLOAT (0.00001) + +static pdf_obj * +resolve_dest_rec(pdf_document *xref, pdf_obj *dest, int depth) +{ + if (depth > 10) /* Arbitrary to avoid infinite recursion */ + return NULL; + + if (pdf_is_name(dest) || pdf_is_string(dest)) + { + dest = pdf_lookup_dest(xref, dest); + return resolve_dest_rec(xref, dest, depth+1); + } + + else if (pdf_is_array(dest)) + { + return dest; + } + + else if (pdf_is_dict(dest)) + { + dest = pdf_dict_gets(dest, "D"); + return resolve_dest_rec(xref, dest, depth+1); + } + + else if (pdf_is_indirect(dest)) + return dest; + + return NULL; +} + +static pdf_obj * +resolve_dest(pdf_document *xref, pdf_obj *dest) +{ + return resolve_dest_rec(xref, dest, 0); +} + +fz_link_dest +pdf_parse_link_dest(pdf_document *xref, pdf_obj *dest) +{ + fz_link_dest ld; + pdf_obj *obj; + + int l_from_2 = 0; + int b_from_3 = 0; + int r_from_4 = 0; + int t_from_5 = 0; + int t_from_3 = 0; + int t_from_2 = 0; + int z_from_4 = 0; + + dest = resolve_dest(xref, dest); + if (dest == NULL || !pdf_is_array(dest)) + { + ld.kind = FZ_LINK_NONE; + return ld; + } + obj = pdf_array_get(dest, 0); + if (pdf_is_int(obj)) + ld.ld.gotor.page = pdf_to_int(obj); + else + ld.ld.gotor.page = pdf_lookup_page_number(xref, obj); + + ld.kind = FZ_LINK_GOTO; + ld.ld.gotor.flags = 0; + ld.ld.gotor.lt.x = 0; + ld.ld.gotor.lt.y = 0; + ld.ld.gotor.rb.x = 0; + ld.ld.gotor.rb.y = 0; + ld.ld.gotor.file_spec = NULL; + ld.ld.gotor.new_window = 0; + + obj = pdf_array_get(dest, 1); + if (!pdf_is_name(obj)) + return ld; + + if (!strcmp("XYZ", pdf_to_name(obj))) + { + l_from_2 = t_from_3 = z_from_4 = 1; + ld.ld.gotor.flags |= fz_link_flag_r_is_zoom; + } + else if ((!strcmp("Fit", pdf_to_name(obj))) || (!strcmp("FitB", pdf_to_name(obj)))) + { + ld.ld.gotor.flags |= fz_link_flag_fit_h; + ld.ld.gotor.flags |= fz_link_flag_fit_v; + } + else if ((!strcmp("FitH", pdf_to_name(obj))) || (!strcmp("FitBH", pdf_to_name(obj)))) + { + t_from_2 = 1; + ld.ld.gotor.flags |= fz_link_flag_fit_h; + } + else if ((!strcmp("FitV", pdf_to_name(obj))) || (!strcmp("FitBV", pdf_to_name(obj)))) + { + l_from_2 = 1; + ld.ld.gotor.flags |= fz_link_flag_fit_v; + } + else if (!strcmp("FitR", pdf_to_name(obj))) + { + l_from_2 = b_from_3 = r_from_4 = t_from_5 = 1; + ld.ld.gotor.flags |= fz_link_flag_fit_h; + ld.ld.gotor.flags |= fz_link_flag_fit_v; + } + + if (l_from_2) + { + obj = pdf_array_get(dest, 2); + if (pdf_is_int(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_l_valid; + ld.ld.gotor.lt.x = pdf_to_int(obj); + } + else if (pdf_is_real(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_l_valid; + ld.ld.gotor.lt.x = pdf_to_real(obj); + } + } + if (b_from_3) + { + obj = pdf_array_get(dest, 3); + if (pdf_is_int(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_b_valid; + ld.ld.gotor.rb.y = pdf_to_int(obj); + } + else if (pdf_is_real(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_b_valid; + ld.ld.gotor.rb.y = pdf_to_real(obj); + } + } + if (r_from_4) + { + obj = pdf_array_get(dest, 4); + if (pdf_is_int(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_r_valid; + ld.ld.gotor.rb.x = pdf_to_int(obj); + } + else if (pdf_is_real(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_r_valid; + ld.ld.gotor.rb.x = pdf_to_real(obj); + } + } + if (t_from_5 || t_from_3 || t_from_2) + { + if (t_from_5) + obj = pdf_array_get(dest, 5); + else if (t_from_3) + obj = pdf_array_get(dest, 3); + else + obj = pdf_array_get(dest, 2); + if (pdf_is_int(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_t_valid; + ld.ld.gotor.lt.y = pdf_to_int(obj); + } + else if (pdf_is_real(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_t_valid; + ld.ld.gotor.lt.y = pdf_to_real(obj); + } + } + if (z_from_4) + { + obj = pdf_array_get(dest, 4); + if (pdf_is_int(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_r_valid; + ld.ld.gotor.rb.x = pdf_to_int(obj); + } + else if (pdf_is_real(obj)) + { + ld.ld.gotor.flags |= fz_link_flag_r_valid; + ld.ld.gotor.rb.x = pdf_to_real(obj); + } + } + + /* Duplicate the values out for the sake of stupid clients */ + if ((ld.ld.gotor.flags & (fz_link_flag_l_valid | fz_link_flag_r_valid)) == fz_link_flag_l_valid) + ld.ld.gotor.rb.x = ld.ld.gotor.lt.x; + if ((ld.ld.gotor.flags & (fz_link_flag_l_valid | fz_link_flag_r_valid | fz_link_flag_r_is_zoom)) == fz_link_flag_r_valid) + ld.ld.gotor.lt.x = ld.ld.gotor.rb.x; + if ((ld.ld.gotor.flags & (fz_link_flag_t_valid | fz_link_flag_b_valid)) == fz_link_flag_t_valid) + ld.ld.gotor.rb.y = ld.ld.gotor.lt.y; + if ((ld.ld.gotor.flags & (fz_link_flag_t_valid | fz_link_flag_b_valid)) == fz_link_flag_b_valid) + ld.ld.gotor.lt.y = ld.ld.gotor.rb.y; + + return ld; +} + +static char * +pdf_parse_file_spec(pdf_document *xref, pdf_obj *file_spec) +{ + fz_context *ctx = xref->ctx; + pdf_obj *filename; + + if (pdf_is_string(file_spec)) + return pdf_to_utf8(xref, file_spec); + + if (pdf_is_dict(file_spec)) { + filename = pdf_dict_gets(file_spec, "UF"); + if (!filename) + filename = pdf_dict_gets(file_spec, "F"); + if (!filename) + filename = pdf_dict_gets(file_spec, "Unix"); + if (!filename) + filename = pdf_dict_gets(file_spec, "Mac"); + if (!filename) + filename = pdf_dict_gets(file_spec, "DOS"); + + return pdf_to_utf8(xref, filename); + } + + fz_warn(ctx, "cannot parse file specification"); + return NULL; +} + +fz_link_dest +pdf_parse_action(pdf_document *xref, pdf_obj *action) +{ + fz_link_dest ld; + pdf_obj *obj, *dest; + fz_context *ctx = xref->ctx; + + UNUSED(ctx); + + ld.kind = FZ_LINK_NONE; + + if (!action) + return ld; + + obj = pdf_dict_gets(action, "S"); + if (!strcmp(pdf_to_name(obj), "GoTo")) + { + dest = pdf_dict_gets(action, "D"); + ld = pdf_parse_link_dest(xref, dest); + } + else if (!strcmp(pdf_to_name(obj), "URI")) + { + ld.kind = FZ_LINK_URI; + ld.ld.uri.is_map = pdf_to_bool(pdf_dict_gets(action, "IsMap")); + ld.ld.uri.uri = pdf_to_utf8(xref, pdf_dict_gets(action, "URI")); + } + else if (!strcmp(pdf_to_name(obj), "Launch")) + { + ld.kind = FZ_LINK_LAUNCH; + dest = pdf_dict_gets(action, "F"); + ld.ld.launch.file_spec = pdf_parse_file_spec(xref, dest); + ld.ld.launch.new_window = pdf_to_int(pdf_dict_gets(action, "NewWindow")); + } + else if (!strcmp(pdf_to_name(obj), "Named")) + { + ld.kind = FZ_LINK_NAMED; + ld.ld.named.named = pdf_to_utf8(xref, pdf_dict_gets(action, "N")); + } + else if (!strcmp(pdf_to_name(obj), "GoToR")) + { + dest = pdf_dict_gets(action, "D"); + ld = pdf_parse_link_dest(xref, dest); + ld.kind = FZ_LINK_GOTOR; + dest = pdf_dict_gets(action, "F"); + ld.ld.gotor.file_spec = pdf_parse_file_spec(xref, dest); + ld.ld.gotor.new_window = pdf_to_int(pdf_dict_gets(action, "NewWindow")); + } + return ld; +} + +static fz_link * +pdf_load_link(pdf_document *xref, pdf_obj *dict, const fz_matrix *page_ctm) +{ + pdf_obj *dest = NULL; + pdf_obj *action; + pdf_obj *obj; + fz_rect bbox; + fz_context *ctx = xref->ctx; + fz_link_dest ld; + + obj = pdf_dict_gets(dict, "Rect"); + if (obj) + pdf_to_rect(ctx, obj, &bbox); + else + bbox = fz_empty_rect; + + fz_transform_rect(&bbox, page_ctm); + + obj = pdf_dict_gets(dict, "Dest"); + if (obj) + { + dest = resolve_dest(xref, obj); + ld = pdf_parse_link_dest(xref, dest); + } + else + { + action = pdf_dict_gets(dict, "A"); + /* fall back to additional action button's down/up action */ + if (!action) + action = pdf_dict_getsa(pdf_dict_gets(dict, "AA"), "U", "D"); + + ld = pdf_parse_action(xref, action); + } + if (ld.kind == FZ_LINK_NONE) + return NULL; + return fz_new_link(ctx, &bbox, ld); +} + +fz_link * +pdf_load_link_annots(pdf_document *xref, pdf_obj *annots, const fz_matrix *page_ctm) +{ + fz_link *link, *head, *tail; + pdf_obj *obj; + int i, n; + + head = tail = NULL; + link = NULL; + + n = pdf_array_len(annots); + for (i = 0; i < n; i++) + { + fz_try(xref->ctx) + { + obj = pdf_array_get(annots, i); + link = pdf_load_link(xref, obj, page_ctm); + } + fz_catch(xref->ctx) + { + /* FIXME: TryLater */ + link = NULL; + } + + if (link) + { + if (!head) + head = tail = link; + else + { + tail->next = link; + tail = link; + } + } + } + + return head; +} + +void +pdf_free_annot(fz_context *ctx, pdf_annot *annot) +{ + pdf_annot *next; + + while (annot) + { + next = annot->next; + if (annot->ap) + pdf_drop_xobject(ctx, annot->ap); + pdf_drop_obj(annot->obj); + fz_free(ctx, annot); + annot = next; + } +} + +static void +pdf_transform_annot(pdf_annot *annot) +{ + fz_rect bbox = annot->ap->bbox; + fz_rect rect = annot->rect; + float w, h, x, y; + + fz_transform_rect(&bbox, &annot->ap->matrix); + if (bbox.x1 == bbox.x0) + w = 0; + else + w = (rect.x1 - rect.x0) / (bbox.x1 - bbox.x0); + if (bbox.y1 == bbox.y0) + h = 0; + else + h = (rect.y1 - rect.y0) / (bbox.y1 - bbox.y0); + x = rect.x0 - bbox.x0; + y = rect.y0 - bbox.y0; + + fz_pre_scale(fz_translate(&annot->matrix, x, y), w, h); +} + +fz_annot_type pdf_annot_obj_type(pdf_obj *obj) +{ + char *subtype = pdf_to_name(pdf_dict_gets(obj, "Subtype")); + if (!strcmp(subtype, "Text")) + return FZ_ANNOT_TEXT; + else if (!strcmp(subtype, "Link")) + return FZ_ANNOT_LINK; + else if (!strcmp(subtype, "FreeText")) + return FZ_ANNOT_FREETEXT; + else if (!strcmp(subtype, "Line")) + return FZ_ANNOT_LINE; + else if (!strcmp(subtype, "Square")) + return FZ_ANNOT_SQUARE; + else if (!strcmp(subtype, "Circle")) + return FZ_ANNOT_CIRCLE; + else if (!strcmp(subtype, "Polygon")) + return FZ_ANNOT_POLYGON; + else if (!strcmp(subtype, "PolyLine")) + return FZ_ANNOT_POLYLINE; + else if (!strcmp(subtype, "Highlight")) + return FZ_ANNOT_HIGHLIGHT; + else if (!strcmp(subtype, "Underline")) + return FZ_ANNOT_UNDERLINE; + else if (!strcmp(subtype, "Squiggly")) + return FZ_ANNOT_SQUIGGLY; + else if (!strcmp(subtype, "StrikeOut")) + return FZ_ANNOT_STRIKEOUT; + else if (!strcmp(subtype, "Stamp")) + return FZ_ANNOT_STAMP; + else if (!strcmp(subtype, "Caret")) + return FZ_ANNOT_CARET; + else if (!strcmp(subtype, "Ink")) + return FZ_ANNOT_INK; + else if (!strcmp(subtype, "Popup")) + return FZ_ANNOT_POPUP; + else if (!strcmp(subtype, "FileAttachment")) + return FZ_ANNOT_FILEATTACHMENT; + else if (!strcmp(subtype, "Sound")) + return FZ_ANNOT_SOUND; + else if (!strcmp(subtype, "Movie")) + return FZ_ANNOT_MOVIE; + else if (!strcmp(subtype, "Widget")) + return FZ_ANNOT_WIDGET; + else if (!strcmp(subtype, "Screen")) + return FZ_ANNOT_SCREEN; + else if (!strcmp(subtype, "PrinterMark")) + return FZ_ANNOT_PRINTERMARK; + else if (!strcmp(subtype, "TrapNet")) + return FZ_ANNOT_TRAPNET; + else if (!strcmp(subtype, "Watermark")) + return FZ_ANNOT_WATERMARK; + else if (!strcmp(subtype, "3D")) + return FZ_ANNOT_3D; + else + return -1; +} + +static const char *annot_type_str(fz_annot_type type) +{ + switch (type) + { + case FZ_ANNOT_TEXT: return "Text"; + case FZ_ANNOT_LINK: return "Link"; + case FZ_ANNOT_FREETEXT: return "FreeText"; + case FZ_ANNOT_LINE: return "Line"; + case FZ_ANNOT_SQUARE: return "Square"; + case FZ_ANNOT_CIRCLE: return "Circle"; + case FZ_ANNOT_POLYGON: return "Polygon"; + case FZ_ANNOT_POLYLINE: return "PolyLine"; + case FZ_ANNOT_HIGHLIGHT: return "Highlight"; + case FZ_ANNOT_UNDERLINE: return "Underline"; + case FZ_ANNOT_SQUIGGLY: return "Squiggly"; + case FZ_ANNOT_STRIKEOUT: return "StrikeOut"; + case FZ_ANNOT_STAMP: return "Stamp"; + case FZ_ANNOT_CARET: return "Caret"; + case FZ_ANNOT_INK: return "Ink"; + case FZ_ANNOT_POPUP: return "Popup"; + case FZ_ANNOT_FILEATTACHMENT: return "FileAttachment"; + case FZ_ANNOT_SOUND: return "Sound"; + case FZ_ANNOT_MOVIE: return "Movie"; + case FZ_ANNOT_WIDGET: return "Widget"; + case FZ_ANNOT_SCREEN: return "Screen"; + case FZ_ANNOT_PRINTERMARK: return "PrinterMark"; + case FZ_ANNOT_TRAPNET: return "TrapNet"; + case FZ_ANNOT_WATERMARK: return "Watermark"; + case FZ_ANNOT_3D: return "3D"; + default: return ""; + } +} + +pdf_annot * +pdf_load_annots(pdf_document *xref, pdf_obj *annots, pdf_page *page) +{ + pdf_annot *annot, *head, *tail; + pdf_obj *obj, *ap, *as, *n, *rect; + int i, len, is_dict; + fz_context *ctx = xref->ctx; + + fz_var(annot); + + head = tail = NULL; + + len = pdf_array_len(annots); + for (i = 0; i < len; i++) + { + fz_try(ctx) + { + obj = pdf_array_get(annots, i); + + if (xref->update_appearance) + xref->update_appearance(xref, obj); + + rect = pdf_dict_gets(obj, "Rect"); + ap = pdf_dict_gets(obj, "AP"); + as = pdf_dict_gets(obj, "AS"); + is_dict = pdf_is_dict(ap); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + ap = NULL; + is_dict = 0; + } + + if (!is_dict) + continue; + + annot = NULL; + fz_try(ctx) + { + pdf_hotspot *hp = &xref->hotspot; + + n = NULL; + + if (hp->num == pdf_to_num(obj) + && hp->gen == pdf_to_gen(obj) + && (hp->state & HOTSPOT_POINTER_DOWN)) + { + n = pdf_dict_gets(ap, "D"); /* down state */ + } + + if (n == NULL) + n = pdf_dict_gets(ap, "N"); /* normal state */ + + /* lookup current state in sub-dictionary */ + if (!pdf_is_stream(xref, pdf_to_num(n), pdf_to_gen(n))) + n = pdf_dict_get(n, as); + + annot = fz_malloc_struct(ctx, pdf_annot); + annot->page = page; + annot->obj = pdf_keep_obj(obj); + pdf_to_rect(ctx, rect, &annot->rect); + annot->pagerect = annot->rect; + fz_transform_rect(&annot->pagerect, &page->ctm); + annot->ap = NULL; + annot->annot_type = pdf_annot_obj_type(obj); + annot->widget_type = annot->annot_type == FZ_ANNOT_WIDGET ? pdf_field_type(xref, obj) : PDF_WIDGET_TYPE_NOT_WIDGET; + + if (pdf_is_stream(xref, pdf_to_num(n), pdf_to_gen(n))) + { + annot->ap = pdf_load_xobject(xref, n); + pdf_transform_annot(annot); + annot->ap_iteration = annot->ap->iteration; + } + + annot->next = NULL; + + if (obj == xref->focus_obj) + xref->focus = annot; + + if (!head) + head = tail = annot; + else + { + tail->next = annot; + tail = annot; + } + } + fz_catch(ctx) + { + pdf_free_annot(ctx, annot); + fz_warn(ctx, "ignoring broken annotation"); + /* FIXME: TryLater */ + } + } + + return head; +} + +void +pdf_update_annot(pdf_document *xref, pdf_annot *annot) +{ + pdf_obj *obj, *ap, *as, *n; + fz_context *ctx = xref->ctx; + + obj = annot->obj; + + if (xref->update_appearance) + xref->update_appearance(xref, obj); + + ap = pdf_dict_gets(obj, "AP"); + as = pdf_dict_gets(obj, "AS"); + + if (pdf_is_dict(ap)) + { + pdf_hotspot *hp = &xref->hotspot; + + n = NULL; + + if (hp->num == pdf_to_num(obj) + && hp->gen == pdf_to_gen(obj) + && (hp->state & HOTSPOT_POINTER_DOWN)) + { + n = pdf_dict_gets(ap, "D"); /* down state */ + } + + if (n == NULL) + n = pdf_dict_gets(ap, "N"); /* normal state */ + + /* lookup current state in sub-dictionary */ + if (!pdf_is_stream(xref, pdf_to_num(n), pdf_to_gen(n))) + n = pdf_dict_get(n, as); + + pdf_drop_xobject(ctx, annot->ap); + annot->ap = NULL; + + if (pdf_is_stream(xref, pdf_to_num(n), pdf_to_gen(n))) + { + fz_try(ctx) + { + annot->ap = pdf_load_xobject(xref, n); + pdf_transform_annot(annot); + annot->ap_iteration = annot->ap->iteration; + } + fz_catch(ctx) + { + fz_warn(ctx, "ignoring broken annotation"); + /* FIXME: TryLater */ + } + } + } +} + +pdf_annot * +pdf_first_annot(pdf_document *doc, pdf_page *page) +{ + return page ? page->annots : NULL; +} + +pdf_annot * +pdf_next_annot(pdf_document *doc, pdf_annot *annot) +{ + return annot ? annot->next : NULL; +} + +fz_rect * +pdf_bound_annot(pdf_document *doc, pdf_annot *annot, fz_rect *rect) +{ + if (rect == NULL) + return NULL; + + if (annot) + *rect = annot->pagerect; + else + *rect = fz_empty_rect; + return rect; +} + +fz_annot_type +pdf_annot_type(pdf_annot *annot) +{ + return annot->annot_type; +} + +pdf_annot * +pdf_create_annot(pdf_document *doc, pdf_page *page, fz_annot_type type) +{ + fz_context *ctx = doc->ctx; + pdf_annot *annot = NULL; + pdf_obj *annot_obj = pdf_new_dict(ctx, 0); + pdf_obj *ind_obj = NULL; + + fz_var(annot); + fz_var(ind_obj); + fz_try(ctx) + { + int ind_obj_num; + fz_rect rect = {0.0, 0.0, 0.0, 0.0}; + const char *type_str = annot_type_str(type); + pdf_obj *annot_arr = pdf_dict_gets(page->me, "Annots"); + if (annot_arr == NULL) + { + annot_arr = pdf_new_array(ctx, 0); + pdf_dict_puts_drop(page->me, "Annots", annot_arr); + } + + pdf_dict_puts_drop(annot_obj, "Type", pdf_new_name(ctx, "Annot")); + + pdf_dict_puts_drop(annot_obj, "Subtype", pdf_new_name(ctx, type_str)); + pdf_dict_puts_drop(annot_obj, "Rect", pdf_new_rect(ctx, &rect)); + + annot = fz_malloc_struct(ctx, pdf_annot); + annot->page = page; + annot->obj = pdf_keep_obj(annot_obj); + annot->rect = rect; + annot->pagerect = rect; + annot->ap = NULL; + annot->widget_type = PDF_WIDGET_TYPE_NOT_WIDGET; + annot->annot_type = type; + + /* + Both annotation object and annotation structure are now created. + Insert the object in the hierarchy and the structure in the + page's array. + */ + ind_obj_num = pdf_create_object(doc); + pdf_update_object(doc, ind_obj_num, annot_obj); + ind_obj = pdf_new_indirect(ctx, ind_obj_num, 0, doc); + pdf_array_push(annot_arr, ind_obj); + + /* + Linking must be done after any call that might throw because + pdf_free_annot below actually frees a list + */ + annot->next = page->annots; + page->annots = annot; + + doc->dirty = 1; + } + fz_always(ctx) + { + pdf_drop_obj(annot_obj); + pdf_drop_obj(ind_obj); + } + fz_catch(ctx) + { + pdf_free_annot(ctx, annot); + fz_rethrow(ctx); + } + + return annot; +} + +void +pdf_delete_annot(pdf_document *doc, pdf_page *page, pdf_annot *annot) +{ + fz_context *ctx = doc->ctx; + pdf_annot **annotptr; + pdf_obj *old_annot_arr; + pdf_obj *annot_arr; + + if (annot == NULL) + return; + + /* Remove annot from page's list */ + for (annotptr = &page->annots; *annotptr; annotptr = &(*annotptr)->next) + { + if (*annotptr == annot) + break; + } + + /* Check the passed annotation was of this page */ + if (*annotptr == NULL) + return; + + *annotptr = annot->next; + + /* Stick it in the deleted list */ + annot->next = page->deleted_annots; + page->deleted_annots = annot; + + pdf_drop_xobject(ctx, annot->ap); + annot->ap = NULL; + + /* Recreate the "Annots" array with this annot removed */ + old_annot_arr = pdf_dict_gets(page->me, "Annots"); + + if (old_annot_arr) + { + int i, n = pdf_array_len(old_annot_arr); + annot_arr = pdf_new_array(ctx, n?(n-1):0); + + fz_try(ctx) + { + for (i = 0; i < n; i++) + { + pdf_obj *obj = pdf_array_get(old_annot_arr, i); + + if (obj != annot->obj) + pdf_array_push(annot_arr, obj); + } + + /* + Overwrite "Annots" in the page dictionary, which has the + side-effect of releasing the last reference to old_annot_arr + */ + pdf_dict_puts(page->me, "Annots", annot_arr); + } + fz_always(ctx) + { + pdf_drop_obj(annot_arr); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } + + pdf_drop_obj(annot->obj); + annot->obj = NULL; + doc->dirty = 1; +} + +static fz_colorspace *pdf_to_color(pdf_document *doc, pdf_obj *col, float color[4]) +{ + fz_colorspace *cs; + int i, ncol = pdf_array_len(col); + + switch (ncol) + { + case 1: cs = fz_device_gray(doc->ctx); break; + case 3: cs = fz_device_rgb(doc->ctx); break; + case 4: cs = fz_device_cmyk(doc->ctx); break; + default: return NULL; + } + + for (i = 0; i < ncol; i++) + color[i] = pdf_to_real(pdf_array_get(col, i)); + + return cs; +} + +static fz_point * +quadpoints(pdf_document *doc, pdf_obj *annot, int *nout) +{ + fz_context *ctx = doc->ctx; + pdf_obj *quad = pdf_dict_gets(annot, "QuadPoints"); + fz_point *qp = NULL; + int i, n; + + if (!quad) + return NULL; + + n = pdf_array_len(quad); + + if (n%8 != 0) + return NULL; + + fz_var(qp); + fz_try(ctx) + { + qp = fz_malloc_array(ctx, n/2, sizeof(fz_point)); + + for (i = 0; i < n; i += 2) + { + qp[i/2].x = pdf_to_real(pdf_array_get(quad, i)); + qp[i/2].y = pdf_to_real(pdf_array_get(quad, i+1)); + } + } + fz_catch(ctx) + { + fz_free(ctx, qp); + fz_rethrow(ctx); + } + + *nout = n/2; + + return qp; +} + +void +pdf_set_markup_annot_quadpoints(pdf_document *doc, pdf_annot *annot, fz_point *qp, int n) +{ + fz_context *ctx = doc->ctx; + fz_matrix ctm; + pdf_obj *arr = pdf_new_array(ctx, n*2); + int i; + + fz_invert_matrix(&ctm, &annot->page->ctm); + + pdf_dict_puts_drop(annot->obj, "QuadPoints", arr); + + for (i = 0; i < n; i++) + { + fz_point pt = qp[i]; + pdf_obj *r; + + fz_transform_point(&pt, &ctm); + r = pdf_new_real(ctx, pt.x); + pdf_array_push_drop(arr, r); + r = pdf_new_real(ctx, pt.y); + pdf_array_push_drop(arr, r); + } +} + +static void update_rect(fz_context *ctx, pdf_annot *annot) +{ + pdf_to_rect(ctx, pdf_dict_gets(annot->obj, "Rect"), &annot->rect); + annot->pagerect = annot->rect; + fz_transform_rect(&annot->pagerect, &annot->page->ctm); +} + +void +pdf_set_ink_annot_list(pdf_document *doc, pdf_annot *annot, fz_point *pts, int *counts, int ncount, float color[3], float thickness) +{ + fz_context *ctx = doc->ctx; + fz_matrix ctm; + pdf_obj *list = pdf_new_array(ctx, ncount); + pdf_obj *bs, *col; + fz_rect rect; + int i, k = 0; + + fz_invert_matrix(&ctm, &annot->page->ctm); + + pdf_dict_puts_drop(annot->obj, "InkList", list); + + for (i = 0; i < ncount; i++) + { + int j; + pdf_obj *arc = pdf_new_array(ctx, counts[i]); + + pdf_array_push_drop(list, arc); + + for (j = 0; j < counts[i]; j++) + { + fz_point pt = pts[k]; + + fz_transform_point(&pt, &ctm); + + if (i == 0 && j == 0) + { + rect.x0 = rect.x1 = pt.x; + rect.y0 = rect.y1 = pt.y; + } + else + { + fz_include_point_in_rect(&rect, &pt); + } + + pdf_array_push_drop(arc, pdf_new_real(ctx, pt.x)); + pdf_array_push_drop(arc, pdf_new_real(ctx, pt.y)); + k++; + } + } + + fz_expand_rect(&rect, thickness); + pdf_dict_puts_drop(annot->obj, "Rect", pdf_new_rect(ctx, &rect)); + update_rect(ctx, annot); + + bs = pdf_new_dict(ctx, 1); + pdf_dict_puts_drop(annot->obj, "BS", bs); + pdf_dict_puts_drop(bs, "W", pdf_new_real(ctx, thickness)); + + col = pdf_new_array(ctx, 3); + pdf_dict_puts_drop(annot->obj, "C", col); + for (i = 0; i < 3; i++) + pdf_array_push_drop(col, pdf_new_real(ctx, color[i])); +} + +void +pdf_set_annot_obj_appearance(pdf_document *doc, pdf_obj *obj, const fz_matrix *page_ctm, fz_rect *rect, fz_display_list *disp_list) +{ + fz_context *ctx = doc->ctx; + fz_matrix ctm; + fz_matrix mat = fz_identity; + fz_device *dev = NULL; + pdf_xobject *xobj = NULL; + + fz_invert_matrix(&ctm, page_ctm); + + fz_var(dev); + fz_try(ctx) + { + pdf_obj *ap_obj; + fz_rect trect = *rect; + + fz_transform_rect(&trect, &ctm); + + pdf_dict_puts_drop(obj, "Rect", pdf_new_rect(ctx, &trect)); + + /* See if there is a current normal appearance */ + ap_obj = pdf_dict_getp(obj, "AP/N"); + if (!pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj))) + ap_obj = NULL; + + if (ap_obj == NULL) + { + ap_obj = pdf_new_xobject(doc, &trect, &mat); + pdf_dict_putp_drop(obj, "AP/N", ap_obj); + } + else + { + pdf_dict_puts_drop(ap_obj, "Rect", pdf_new_rect(ctx, &trect)); + pdf_dict_puts_drop(ap_obj, "Matrix", pdf_new_matrix(ctx, &mat)); + } + + dev = pdf_new_pdf_device(doc, ap_obj, pdf_dict_gets(ap_obj, "Resources"), &mat); + fz_run_display_list(disp_list, dev, &ctm, &fz_infinite_rect, NULL); + fz_free_device(dev); + + /* Mark the appearance as changed - required for partial update */ + xobj = pdf_load_xobject(doc, ap_obj); + if (xobj) + { + xobj->iteration++; + pdf_drop_xobject(ctx, xobj); + } + + doc->dirty = 1; + } + fz_catch(ctx) + { + fz_free_device(dev); + fz_rethrow(ctx); + } +} + +void +pdf_set_annot_appearance(pdf_document *doc, pdf_annot *annot, fz_rect *rect, fz_display_list *disp_list) +{ + pdf_set_annot_obj_appearance(doc, annot->obj, &annot->page->ctm, rect, disp_list); + update_rect(doc->ctx, annot); +} + +void +pdf_set_markup_obj_appearance(pdf_document *doc, pdf_obj *annot, float color[3], float alpha, float line_thickness, float line_height) +{ + fz_context *ctx = doc->ctx; + fz_path *path = NULL; + fz_stroke_state *stroke = NULL; + fz_device *dev = NULL; + fz_display_list *strike_list = NULL; + int i, n; + fz_point *qp = quadpoints(doc, annot, &n); + + if (!qp || n <= 0) + return; + + fz_var(path); + fz_var(stroke); + fz_var(dev); + fz_var(strike_list); + fz_try(ctx) + { + fz_rect rect = fz_empty_rect; + + rect.x0 = rect.x1 = qp[0].x; + rect.y0 = rect.y1 = qp[0].y; + for (i = 0; i < n; i++) + fz_include_point_in_rect(&rect, &qp[i]); + + strike_list = fz_new_display_list(ctx); + dev = fz_new_list_device(ctx, strike_list); + + for (i = 0; i < n; i += 4) + { + fz_point pt0 = qp[i]; + fz_point pt1 = qp[i+1]; + fz_point up; + float thickness; + + up.x = qp[i+2].x - qp[i+1].x; + up.y = qp[i+2].y - qp[i+1].y; + + pt0.x += line_height * up.x; + pt0.y += line_height * up.y; + pt1.x += line_height * up.x; + pt1.y += line_height * up.y; + + thickness = sqrtf(up.x * up.x + up.y * up.y) * line_thickness; + + if (!stroke || fz_abs(stroke->linewidth - thickness) < SMALL_FLOAT) + { + if (stroke) + { + // assert(path) + fz_stroke_path(dev, path, stroke, &fz_identity, fz_device_rgb(ctx), color, alpha); + fz_drop_stroke_state(ctx, stroke); + stroke = NULL; + fz_free_path(ctx, path); + path = NULL; + } + + stroke = fz_new_stroke_state(ctx); + stroke->linewidth = thickness; + path = fz_new_path(ctx); + } + + fz_moveto(ctx, path, pt0.x, pt0.y); + fz_lineto(ctx, path, pt1.x, pt1.y); + } + + if (stroke) + { + fz_stroke_path(dev, path, stroke, &fz_identity, fz_device_rgb(ctx), color, alpha); + } + + pdf_set_annot_obj_appearance(doc, annot, &fz_identity, &rect, strike_list); + } + fz_always(ctx) + { + fz_free(ctx, qp); + fz_free_device(dev); + fz_drop_stroke_state(ctx, stroke); + fz_free_path(ctx, path); + fz_drop_display_list(ctx, strike_list); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +void +pdf_set_ink_obj_appearance(pdf_document *doc, pdf_obj *annot) +{ + fz_context *ctx = doc->ctx; + fz_path *path = NULL; + fz_stroke_state *stroke = NULL; + fz_device *dev = NULL; + fz_display_list *strike_list = NULL; + + fz_var(path); + fz_var(stroke); + fz_var(dev); + fz_var(strike_list); + fz_try(ctx) + { + fz_rect rect = fz_empty_rect; + fz_colorspace *cs; + float color[4]; + float width; + pdf_obj *list; + int n, m, i, j; + + cs = pdf_to_color(doc, pdf_dict_gets(annot, "C"), color); + if (!cs) + { + cs = fz_device_rgb(ctx); + color[0] = 1.0f; + color[1] = 0.0f; + color[2] = 0.0f; + } + + width = pdf_to_real(pdf_dict_gets(pdf_dict_gets(annot, "BS"), "W")); + if (width == 0.0f) + width = 1.0f; + + list = pdf_dict_gets(annot, "InkList"); + + n = pdf_array_len(list); + + strike_list = fz_new_display_list(ctx); + dev = fz_new_list_device(ctx, strike_list); + path = fz_new_path(ctx); + stroke = fz_new_stroke_state(ctx); + stroke->linewidth = width; + + for (i = 0; i < n; i ++) + { + fz_point pt_last; + pdf_obj *arc = pdf_array_get(list, i); + m = pdf_array_len(arc); + + for (j = 0; j < m-1; j += 2) + { + fz_point pt; + pt.x = pdf_to_real(pdf_array_get(arc, j)); + pt.y = pdf_to_real(pdf_array_get(arc, j+1)); + + if (i == 0 && j == 0) + { + rect.x0 = rect.x1 = pt.x; + rect.y0 = rect.y1 = pt.y; + } + else + { + fz_include_point_in_rect(&rect, &pt); + } + + if (j == 0) + fz_moveto(ctx, path, pt.x, pt.y); + else + fz_curvetov(ctx, path, pt_last.x, pt_last.y, (pt.x + pt_last.x) / 2, (pt.y + pt_last.y) / 2); + pt_last = pt; + } + fz_lineto(ctx, path, pt_last.x, pt_last.y); + } + + fz_stroke_path(dev, path, stroke, &fz_identity, cs, color, 1.0f); + + fz_expand_rect(&rect, width); + + pdf_set_annot_obj_appearance(doc, annot, &fz_identity, &rect, strike_list); + } + fz_always(ctx) + { + fz_free_device(dev); + fz_drop_stroke_state(ctx, stroke); + fz_free_path(ctx, path); + fz_drop_display_list(ctx, strike_list); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +void +pdf_set_markup_appearance(pdf_document *doc, pdf_annot *annot, float color[3], float alpha, float line_thickness, float line_height) +{ + pdf_set_markup_obj_appearance(doc, annot->obj, color, alpha, line_thickness, line_height); + update_rect(doc->ctx, annot); +} diff --git a/source/pdf/pdf-cmap-load.c b/source/pdf/pdf-cmap-load.c new file mode 100644 index 00000000..5fcae15d --- /dev/null +++ b/source/pdf/pdf-cmap-load.c @@ -0,0 +1,141 @@ +#include "mupdf/pdf.h" + +unsigned int +pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap) +{ + if (cmap == NULL) + return 0; + if (cmap->storable.refs < 0) + return 0; + + return cmap->rcap * sizeof(pdf_range) + cmap->tcap * sizeof(short) + pdf_cmap_size(ctx, cmap->usecmap); +} + +/* + * Load CMap stream in PDF file + */ +pdf_cmap * +pdf_load_embedded_cmap(pdf_document *xref, pdf_obj *stmobj) +{ + fz_stream *file = NULL; + pdf_cmap *cmap = NULL; + pdf_cmap *usecmap; + pdf_obj *wmode; + pdf_obj *obj = NULL; + fz_context *ctx = xref->ctx; + int phase = 0; + + fz_var(phase); + fz_var(obj); + fz_var(file); + fz_var(cmap); + + if (pdf_obj_marked(stmobj)) + fz_throw(ctx, FZ_ERROR_GENERIC, "Recursion in embedded cmap"); + + if ((cmap = pdf_find_item(ctx, pdf_free_cmap_imp, stmobj))) + { + return cmap; + } + + fz_try(ctx) + { + file = pdf_open_stream(xref, pdf_to_num(stmobj), pdf_to_gen(stmobj)); + phase = 1; + cmap = pdf_load_cmap(ctx, file); + phase = 2; + fz_close(file); + file = NULL; + + wmode = pdf_dict_gets(stmobj, "WMode"); + if (pdf_is_int(wmode)) + pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(wmode)); + obj = pdf_dict_gets(stmobj, "UseCMap"); + if (pdf_is_name(obj)) + { + usecmap = pdf_load_system_cmap(ctx, pdf_to_name(obj)); + pdf_set_usecmap(ctx, cmap, usecmap); + pdf_drop_cmap(ctx, usecmap); + } + else if (pdf_is_indirect(obj)) + { + phase = 3; + pdf_obj_mark(obj); + usecmap = pdf_load_embedded_cmap(xref, obj); + pdf_obj_unmark(obj); + phase = 4; + pdf_set_usecmap(ctx, cmap, usecmap); + pdf_drop_cmap(ctx, usecmap); + } + + pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); + } + fz_catch(ctx) + { + if (file) + fz_close(file); + if (cmap) + pdf_drop_cmap(ctx, cmap); + if (phase < 1) + fz_rethrow_message(ctx, "cannot open cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); + else if (phase < 2) + fz_rethrow_message(ctx, "cannot parse cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); + else if (phase < 3) + fz_rethrow_message(ctx, "cannot load system usecmap '%s'", pdf_to_name(obj)); + else + { + if (phase == 3) + pdf_obj_unmark(obj); + fz_rethrow_message(ctx, "cannot load embedded usecmap (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); + } + } + + return cmap; +} + +/* + * Create an Identity-* CMap (for both 1 and 2-byte encodings) + */ +pdf_cmap * +pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes) +{ + pdf_cmap *cmap = pdf_new_cmap(ctx); + fz_try(ctx) + { + sprintf(cmap->cmap_name, "Identity-%c", wmode ? 'V' : 'H'); + pdf_add_codespace(ctx, cmap, 0x0000, 0xffff, bytes); + pdf_map_range_to_range(ctx, cmap, 0x0000, 0xffff, 0); + pdf_sort_cmap(ctx, cmap); + pdf_set_cmap_wmode(ctx, cmap, wmode); + } + fz_catch(ctx) + { + pdf_drop_cmap(ctx, cmap); + fz_rethrow(ctx); + } + return cmap; +} + +/* + * Load predefined CMap from system. + */ +pdf_cmap * +pdf_load_system_cmap(fz_context *ctx, char *cmap_name) +{ + pdf_cmap *usecmap; + pdf_cmap *cmap; + + cmap = pdf_load_builtin_cmap(ctx, cmap_name); + if (!cmap) + fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap_name); + + if (cmap->usecmap_name[0] && !cmap->usecmap) + { + usecmap = pdf_load_builtin_cmap(ctx, cmap->usecmap_name); + if (!usecmap) + fz_throw(ctx, FZ_ERROR_GENERIC, "nu builtin cmap file: %s", cmap->usecmap_name); + pdf_set_usecmap(ctx, cmap, usecmap); + } + + return cmap; +} diff --git a/source/pdf/pdf-cmap-parse.c b/source/pdf/pdf-cmap-parse.c new file mode 100644 index 00000000..361c512f --- /dev/null +++ b/source/pdf/pdf-cmap-parse.c @@ -0,0 +1,344 @@ +#include "mupdf/pdf.h" + +/* + * CMap parser + */ + +static int +pdf_code_from_string(char *buf, int len) +{ + int a = 0; + while (len--) + a = (a << 8) | *(unsigned char *)buf++; + return a; +} + +static void +pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_token tok; + + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_NAME) + fz_strlcpy(cmap->cmap_name, buf->scratch, sizeof(cmap->cmap_name)); + else + fz_warn(ctx, "expected name after CMapName in cmap"); +} + +static void +pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_token tok; + + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_INT) + pdf_set_cmap_wmode(ctx, cmap, buf->i); + else + fz_warn(ctx, "expected integer after WMode in cmap"); +} + +static void +pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_token tok; + int lo, hi; + + while (1) + { + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcodespacerange")) + return; + + else if (tok == PDF_TOK_STRING) + { + lo = pdf_code_from_string(buf->scratch, buf->len); + tok = pdf_lex(file, buf); + if (tok == PDF_TOK_STRING) + { + hi = pdf_code_from_string(buf->scratch, buf->len); + pdf_add_codespace(ctx, cmap, lo, hi, buf->len); + } + else break; + } + + else break; + } + + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcodespacerange"); +} + +static void +pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_token tok; + int lo, hi, dst; + + while (1) + { + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcidrange")) + return; + + else if (tok != PDF_TOK_STRING) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcidrange"); + + lo = pdf_code_from_string(buf->scratch, buf->len); + + tok = pdf_lex(file, buf); + if (tok != PDF_TOK_STRING) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string"); + + hi = pdf_code_from_string(buf->scratch, buf->len); + + tok = pdf_lex(file, buf); + if (tok != PDF_TOK_INT) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected integer"); + + dst = buf->i; + + pdf_map_range_to_range(ctx, cmap, lo, hi, dst); + } +} + +static void +pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_token tok; + int src, dst; + + while (1) + { + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcidchar")) + return; + + else if (tok != PDF_TOK_STRING) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcidchar"); + + src = pdf_code_from_string(buf->scratch, buf->len); + + tok = pdf_lex(file, buf); + if (tok != PDF_TOK_INT) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected integer"); + + dst = buf->i; + + pdf_map_range_to_range(ctx, cmap, src, src, dst); + } +} + +static void +pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf, int lo, int hi) +{ + pdf_token tok; + int dst[256]; + int i; + + while (1) + { + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_CLOSE_ARRAY) + return; + + /* Note: does not handle [ /Name /Name ... ] */ + else if (tok != PDF_TOK_STRING) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or ]"); + + if (buf->len / 2) + { + int len = fz_mini(buf->len / 2, nelem(dst)); + for (i = 0; i < len; i++) + dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2); + + pdf_map_one_to_many(ctx, cmap, lo, dst, buf->len / 2); + } + + lo ++; + } +} + +static void +pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_token tok; + int lo, hi, dst; + + while (1) + { + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endbfrange")) + return; + + else if (tok != PDF_TOK_STRING) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endbfrange"); + + lo = pdf_code_from_string(buf->scratch, buf->len); + + tok = pdf_lex(file, buf); + if (tok != PDF_TOK_STRING) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string"); + + hi = pdf_code_from_string(buf->scratch, buf->len); + if (lo < 0 || lo > 65535 || hi < 0 || hi > 65535 || lo > hi) + { + fz_warn(ctx, "bf_range limits out of range in cmap %s", cmap->cmap_name); + return; + } + + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_STRING) + { + if (buf->len == 2) + { + dst = pdf_code_from_string(buf->scratch, buf->len); + pdf_map_range_to_range(ctx, cmap, lo, hi, dst); + } + else + { + int dststr[256]; + int i; + + if (buf->len / 2) + { + int len = fz_mini(buf->len / 2, nelem(dststr)); + for (i = 0; i < len; i++) + dststr[i] = pdf_code_from_string(&buf->scratch[i * 2], 2); + + while (lo <= hi) + { + dststr[i-1] ++; + pdf_map_one_to_many(ctx, cmap, lo, dststr, i); + lo ++; + } + } + } + } + + else if (tok == PDF_TOK_OPEN_ARRAY) + { + pdf_parse_bf_range_array(ctx, cmap, file, buf, lo, hi); + } + + else + { + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or array or endbfrange"); + } + } +} + +static void +pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_token tok; + int dst[256]; + int src; + int i; + + while (1) + { + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endbfchar")) + return; + + else if (tok != PDF_TOK_STRING) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endbfchar"); + + src = pdf_code_from_string(buf->scratch, buf->len); + + tok = pdf_lex(file, buf); + /* Note: does not handle /dstName */ + if (tok != PDF_TOK_STRING) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected string"); + + if (buf->len / 2) + { + int len = fz_mini(buf->len / 2, nelem(dst)); + for (i = 0; i < len; i++) + dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2); + pdf_map_one_to_many(ctx, cmap, src, dst, i); + } + } +} + +pdf_cmap * +pdf_load_cmap(fz_context *ctx, fz_stream *file) +{ + pdf_cmap *cmap; + char key[64]; + pdf_lexbuf buf; + pdf_token tok; + + pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL); + cmap = pdf_new_cmap(ctx); + + strcpy(key, ".notdef"); + + fz_try(ctx) + { + while (1) + { + tok = pdf_lex(file, &buf); + + if (tok == PDF_TOK_EOF) + break; + + else if (tok == PDF_TOK_NAME) + { + if (!strcmp(buf.scratch, "CMapName")) + pdf_parse_cmap_name(ctx, cmap, file, &buf); + else if (!strcmp(buf.scratch, "WMode")) + pdf_parse_wmode(ctx, cmap, file, &buf); + else + fz_strlcpy(key, buf.scratch, sizeof key); + } + + else if (tok == PDF_TOK_KEYWORD) + { + if (!strcmp(buf.scratch, "endcmap")) + break; + + else if (!strcmp(buf.scratch, "usecmap")) + fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name)); + + else if (!strcmp(buf.scratch, "begincodespacerange")) + pdf_parse_codespace_range(ctx, cmap, file, &buf); + + else if (!strcmp(buf.scratch, "beginbfchar")) + pdf_parse_bf_char(ctx, cmap, file, &buf); + + else if (!strcmp(buf.scratch, "begincidchar")) + pdf_parse_cid_char(ctx, cmap, file, &buf); + + else if (!strcmp(buf.scratch, "beginbfrange")) + pdf_parse_bf_range(ctx, cmap, file, &buf); + + else if (!strcmp(buf.scratch, "begincidrange")) + pdf_parse_cid_range(ctx, cmap, file, &buf); + } + + /* ignore everything else */ + } + + pdf_sort_cmap(ctx, cmap); + } + fz_always(ctx) + { + pdf_lexbuf_fin(&buf); + } + fz_catch(ctx) + { + pdf_drop_cmap(ctx, cmap); + fz_rethrow_message(ctx, "syntaxerror in cmap"); + } + + return cmap; +} diff --git a/source/pdf/pdf-cmap-table.c b/source/pdf/pdf-cmap-table.c new file mode 100644 index 00000000..ef69c703 --- /dev/null +++ b/source/pdf/pdf-cmap-table.c @@ -0,0 +1,183 @@ +#include "mupdf/pdf.h" + +#ifndef NOCJK +#include "gen_cmap_cns.h" +#include "gen_cmap_gb.h" +#include "gen_cmap_japan.h" +#include "gen_cmap_korea.h" +#endif + +static const struct { char *name; pdf_cmap *cmap; } cmap_table[] = +{ +#ifndef NOCJK + {"78-EUC-H",&cmap_78_EUC_H}, + {"78-EUC-V",&cmap_78_EUC_V}, + {"78-H",&cmap_78_H}, + {"78-RKSJ-H",&cmap_78_RKSJ_H}, + {"78-RKSJ-V",&cmap_78_RKSJ_V}, + {"78-V",&cmap_78_V}, + {"78ms-RKSJ-H",&cmap_78ms_RKSJ_H}, + {"78ms-RKSJ-V",&cmap_78ms_RKSJ_V}, + {"83pv-RKSJ-H",&cmap_83pv_RKSJ_H}, + {"90ms-RKSJ-H",&cmap_90ms_RKSJ_H}, + {"90ms-RKSJ-V",&cmap_90ms_RKSJ_V}, + {"90msp-RKSJ-H",&cmap_90msp_RKSJ_H}, + {"90msp-RKSJ-V",&cmap_90msp_RKSJ_V}, + {"90pv-RKSJ-H",&cmap_90pv_RKSJ_H}, + {"90pv-RKSJ-V",&cmap_90pv_RKSJ_V}, + {"Add-H",&cmap_Add_H}, + {"Add-RKSJ-H",&cmap_Add_RKSJ_H}, + {"Add-RKSJ-V",&cmap_Add_RKSJ_V}, + {"Add-V",&cmap_Add_V}, + {"Adobe-CNS1-0",&cmap_Adobe_CNS1_0}, + {"Adobe-CNS1-1",&cmap_Adobe_CNS1_1}, + {"Adobe-CNS1-2",&cmap_Adobe_CNS1_2}, + {"Adobe-CNS1-3",&cmap_Adobe_CNS1_3}, + {"Adobe-CNS1-4",&cmap_Adobe_CNS1_4}, + {"Adobe-CNS1-5",&cmap_Adobe_CNS1_5}, + {"Adobe-CNS1-6",&cmap_Adobe_CNS1_6}, + {"Adobe-CNS1-UCS2",&cmap_Adobe_CNS1_UCS2}, + {"Adobe-GB1-0",&cmap_Adobe_GB1_0}, + {"Adobe-GB1-1",&cmap_Adobe_GB1_1}, + {"Adobe-GB1-2",&cmap_Adobe_GB1_2}, + {"Adobe-GB1-3",&cmap_Adobe_GB1_3}, + {"Adobe-GB1-4",&cmap_Adobe_GB1_4}, + {"Adobe-GB1-5",&cmap_Adobe_GB1_5}, + {"Adobe-GB1-UCS2",&cmap_Adobe_GB1_UCS2}, + {"Adobe-Japan1-0",&cmap_Adobe_Japan1_0}, + {"Adobe-Japan1-1",&cmap_Adobe_Japan1_1}, + {"Adobe-Japan1-2",&cmap_Adobe_Japan1_2}, + {"Adobe-Japan1-3",&cmap_Adobe_Japan1_3}, + {"Adobe-Japan1-4",&cmap_Adobe_Japan1_4}, + {"Adobe-Japan1-5",&cmap_Adobe_Japan1_5}, + {"Adobe-Japan1-6",&cmap_Adobe_Japan1_6}, + {"Adobe-Japan1-UCS2",&cmap_Adobe_Japan1_UCS2}, + {"Adobe-Japan2-0",&cmap_Adobe_Japan2_0}, + {"Adobe-Korea1-0",&cmap_Adobe_Korea1_0}, + {"Adobe-Korea1-1",&cmap_Adobe_Korea1_1}, + {"Adobe-Korea1-2",&cmap_Adobe_Korea1_2}, + {"Adobe-Korea1-UCS2",&cmap_Adobe_Korea1_UCS2}, + {"B5-H",&cmap_B5_H}, + {"B5-V",&cmap_B5_V}, + {"B5pc-H",&cmap_B5pc_H}, + {"B5pc-V",&cmap_B5pc_V}, + {"CNS-EUC-H",&cmap_CNS_EUC_H}, + {"CNS-EUC-V",&cmap_CNS_EUC_V}, + {"CNS1-H",&cmap_CNS1_H}, + {"CNS1-V",&cmap_CNS1_V}, + {"CNS2-H",&cmap_CNS2_H}, + {"CNS2-V",&cmap_CNS2_V}, + {"ETHK-B5-H",&cmap_ETHK_B5_H}, + {"ETHK-B5-V",&cmap_ETHK_B5_V}, + {"ETen-B5-H",&cmap_ETen_B5_H}, + {"ETen-B5-V",&cmap_ETen_B5_V}, + {"ETenms-B5-H",&cmap_ETenms_B5_H}, + {"ETenms-B5-V",&cmap_ETenms_B5_V}, + {"EUC-H",&cmap_EUC_H}, + {"EUC-V",&cmap_EUC_V}, + {"Ext-H",&cmap_Ext_H}, + {"Ext-RKSJ-H",&cmap_Ext_RKSJ_H}, + {"Ext-RKSJ-V",&cmap_Ext_RKSJ_V}, + {"Ext-V",&cmap_Ext_V}, + {"GB-EUC-H",&cmap_GB_EUC_H}, + {"GB-EUC-V",&cmap_GB_EUC_V}, + {"GB-H",&cmap_GB_H}, + {"GB-V",&cmap_GB_V}, + {"GBK-EUC-H",&cmap_GBK_EUC_H}, + {"GBK-EUC-V",&cmap_GBK_EUC_V}, + {"GBK2K-H",&cmap_GBK2K_H}, + {"GBK2K-V",&cmap_GBK2K_V}, + {"GBKp-EUC-H",&cmap_GBKp_EUC_H}, + {"GBKp-EUC-V",&cmap_GBKp_EUC_V}, + {"GBT-EUC-H",&cmap_GBT_EUC_H}, + {"GBT-EUC-V",&cmap_GBT_EUC_V}, + {"GBT-H",&cmap_GBT_H}, + {"GBT-V",&cmap_GBT_V}, + {"GBTpc-EUC-H",&cmap_GBTpc_EUC_H}, + {"GBTpc-EUC-V",&cmap_GBTpc_EUC_V}, + {"GBpc-EUC-H",&cmap_GBpc_EUC_H}, + {"GBpc-EUC-V",&cmap_GBpc_EUC_V}, + {"H",&cmap_H}, + {"HKdla-B5-H",&cmap_HKdla_B5_H}, + {"HKdla-B5-V",&cmap_HKdla_B5_V}, + {"HKdlb-B5-H",&cmap_HKdlb_B5_H}, + {"HKdlb-B5-V",&cmap_HKdlb_B5_V}, + {"HKgccs-B5-H",&cmap_HKgccs_B5_H}, + {"HKgccs-B5-V",&cmap_HKgccs_B5_V}, + {"HKm314-B5-H",&cmap_HKm314_B5_H}, + {"HKm314-B5-V",&cmap_HKm314_B5_V}, + {"HKm471-B5-H",&cmap_HKm471_B5_H}, + {"HKm471-B5-V",&cmap_HKm471_B5_V}, + {"HKscs-B5-H",&cmap_HKscs_B5_H}, + {"HKscs-B5-V",&cmap_HKscs_B5_V}, + {"Hankaku",&cmap_Hankaku}, + {"Hiragana",&cmap_Hiragana}, + {"Hojo-EUC-H",&cmap_Hojo_EUC_H}, + {"Hojo-EUC-V",&cmap_Hojo_EUC_V}, + {"Hojo-H",&cmap_Hojo_H}, + {"Hojo-V",&cmap_Hojo_V}, + {"KSC-EUC-H",&cmap_KSC_EUC_H}, + {"KSC-EUC-V",&cmap_KSC_EUC_V}, + {"KSC-H",&cmap_KSC_H}, + {"KSC-Johab-H",&cmap_KSC_Johab_H}, + {"KSC-Johab-V",&cmap_KSC_Johab_V}, + {"KSC-V",&cmap_KSC_V}, + {"KSCms-UHC-H",&cmap_KSCms_UHC_H}, + {"KSCms-UHC-HW-H",&cmap_KSCms_UHC_HW_H}, + {"KSCms-UHC-HW-V",&cmap_KSCms_UHC_HW_V}, + {"KSCms-UHC-V",&cmap_KSCms_UHC_V}, + {"KSCpc-EUC-H",&cmap_KSCpc_EUC_H}, + {"KSCpc-EUC-V",&cmap_KSCpc_EUC_V}, + {"Katakana",&cmap_Katakana}, + {"NWP-H",&cmap_NWP_H}, + {"NWP-V",&cmap_NWP_V}, + {"RKSJ-H",&cmap_RKSJ_H}, + {"RKSJ-V",&cmap_RKSJ_V}, + {"Roman",&cmap_Roman}, + {"UniCNS-UCS2-H",&cmap_UniCNS_UCS2_H}, + {"UniCNS-UCS2-V",&cmap_UniCNS_UCS2_V}, + {"UniCNS-UTF16-H",&cmap_UniCNS_UTF16_H}, + {"UniCNS-UTF16-V",&cmap_UniCNS_UTF16_V}, + {"UniGB-UCS2-H",&cmap_UniGB_UCS2_H}, + {"UniGB-UCS2-V",&cmap_UniGB_UCS2_V}, + {"UniGB-UTF16-H",&cmap_UniGB_UTF16_H}, + {"UniGB-UTF16-V",&cmap_UniGB_UTF16_V}, + {"UniHojo-UCS2-H",&cmap_UniHojo_UCS2_H}, + {"UniHojo-UCS2-V",&cmap_UniHojo_UCS2_V}, + {"UniHojo-UTF16-H",&cmap_UniHojo_UTF16_H}, + {"UniHojo-UTF16-V",&cmap_UniHojo_UTF16_V}, + {"UniJIS-UCS2-H",&cmap_UniJIS_UCS2_H}, + {"UniJIS-UCS2-HW-H",&cmap_UniJIS_UCS2_HW_H}, + {"UniJIS-UCS2-HW-V",&cmap_UniJIS_UCS2_HW_V}, + {"UniJIS-UCS2-V",&cmap_UniJIS_UCS2_V}, + {"UniJIS-UTF16-H",&cmap_UniJIS_UTF16_H}, + {"UniJIS-UTF16-V",&cmap_UniJIS_UTF16_V}, + {"UniJISPro-UCS2-HW-V",&cmap_UniJISPro_UCS2_HW_V}, + {"UniJISPro-UCS2-V",&cmap_UniJISPro_UCS2_V}, + {"UniKS-UCS2-H",&cmap_UniKS_UCS2_H}, + {"UniKS-UCS2-V",&cmap_UniKS_UCS2_V}, + {"UniKS-UTF16-H",&cmap_UniKS_UTF16_H}, + {"UniKS-UTF16-V",&cmap_UniKS_UTF16_V}, + {"V",&cmap_V}, + {"WP-Symbol",&cmap_WP_Symbol}, +#endif +}; + +pdf_cmap * +pdf_load_builtin_cmap(fz_context *ctx, char *cmap_name) +{ + int l = 0; + int r = nelem(cmap_table) - 1; + while (l <= r) + { + int m = (l + r) >> 1; + int c = strcmp(cmap_name, cmap_table[m].name); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return cmap_table[m].cmap; + } + return NULL; +} diff --git a/source/pdf/pdf-cmap.c b/source/pdf/pdf-cmap.c new file mode 100644 index 00000000..c006c6bb --- /dev/null +++ b/source/pdf/pdf-cmap.c @@ -0,0 +1,518 @@ +/* + * The CMap data structure here is constructed on the fly by + * adding simple range-to-range mappings. Then the data structure + * is optimized to contain both range-to-range and range-to-table + * lookups. + * + * Any one-to-many mappings are inserted as one-to-table + * lookups in the beginning, and are not affected by the optimization + * stage. + * + * There is a special function to add a 256-length range-to-table mapping. + * The ranges do not have to be added in order. + * + * This code can be a lot simpler if we don't care about wasting memory, + * or can trust the parser to give us optimal mappings. + */ + +#include "mupdf/pdf.h" + +/* Macros for accessing the combined extent_flags field */ +#define pdf_range_high(r) ((r)->low + ((r)->extent_flags >> 2)) +#define pdf_range_flags(r) ((r)->extent_flags & 3) +#define pdf_range_set_high(r, h) \ + ((r)->extent_flags = (((r)->extent_flags & 3) | ((h - (r)->low) << 2))) +#define pdf_range_set_flags(r, f) \ + ((r)->extent_flags = (((r)->extent_flags & ~3) | f)) + +/* + * Allocate, destroy and simple parameters. + */ + +void +pdf_free_cmap_imp(fz_context *ctx, fz_storable *cmap_) +{ + pdf_cmap *cmap = (pdf_cmap *)cmap_; + if (cmap->usecmap) + pdf_drop_cmap(ctx, cmap->usecmap); + fz_free(ctx, cmap->ranges); + fz_free(ctx, cmap->table); + fz_free(ctx, cmap); +} + +pdf_cmap * +pdf_new_cmap(fz_context *ctx) +{ + pdf_cmap *cmap; + + cmap = fz_malloc_struct(ctx, pdf_cmap); + FZ_INIT_STORABLE(cmap, 1, pdf_free_cmap_imp); + + strcpy(cmap->cmap_name, ""); + strcpy(cmap->usecmap_name, ""); + cmap->usecmap = NULL; + cmap->wmode = 0; + cmap->codespace_len = 0; + + cmap->rlen = 0; + cmap->rcap = 0; + cmap->ranges = NULL; + + cmap->tlen = 0; + cmap->tcap = 0; + cmap->table = NULL; + + return cmap; +} + +/* Could be a macro for speed */ +pdf_cmap * +pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap) +{ + return (pdf_cmap *)fz_keep_storable(ctx, &cmap->storable); +} + +/* Could be a macro for speed */ +void +pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap) +{ + fz_drop_storable(ctx, &cmap->storable); +} + +void +pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap) +{ + int i; + + if (cmap->usecmap) + pdf_drop_cmap(ctx, cmap->usecmap); + cmap->usecmap = pdf_keep_cmap(ctx, usecmap); + + if (cmap->codespace_len == 0) + { + cmap->codespace_len = usecmap->codespace_len; + for (i = 0; i < usecmap->codespace_len; i++) + cmap->codespace[i] = usecmap->codespace[i]; + } +} + +int +pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap) +{ + return cmap->wmode; +} + +void +pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode) +{ + cmap->wmode = wmode; +} + +#ifndef NDEBUG +void +pdf_print_cmap(fz_context *ctx, pdf_cmap *cmap) +{ + int i, k, n; + + printf("cmap $%p /%s {\n", (void *) cmap, cmap->cmap_name); + + if (cmap->usecmap_name[0]) + printf("\tusecmap /%s\n", cmap->usecmap_name); + if (cmap->usecmap) + printf("\tusecmap $%p\n", (void *) cmap->usecmap); + + printf("\twmode %d\n", cmap->wmode); + + printf("\tcodespaces {\n"); + for (i = 0; i < cmap->codespace_len; i++) + { + printf("\t\t<%x> <%x>\n", cmap->codespace[i].low, cmap->codespace[i].high); + } + printf("\t}\n"); + + printf("\tranges (%d,%d) {\n", cmap->rlen, cmap->tlen); + for (i = 0; i < cmap->rlen; i++) + { + pdf_range *r = &cmap->ranges[i]; + printf("\t\t<%04x> <%04x> ", r->low, pdf_range_high(r)); + if (pdf_range_flags(r) == PDF_CMAP_TABLE) + { + printf("[ "); + for (k = 0; k < pdf_range_high(r) - r->low + 1; k++) + printf("%d ", cmap->table[r->offset + k]); + printf("]\n"); + } + else if (pdf_range_flags(r) == PDF_CMAP_MULTI) + { + printf("< "); + n = cmap->table[r->offset]; + for (k = 0; k < n; k++) + printf("%04x ", cmap->table[r->offset + 1 + k]); + printf(">\n"); + } + else + printf("%d\n", r->offset); + } + printf("\t}\n}\n"); +} +#endif + +/* + * Add a codespacerange section. + * These ranges are used by pdf_decode_cmap to decode + * multi-byte encoded strings. + */ +void +pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, int low, int high, int n) +{ + if (cmap->codespace_len + 1 == nelem(cmap->codespace)) + { + fz_warn(ctx, "assert: too many code space ranges"); + return; + } + + cmap->codespace[cmap->codespace_len].n = n; + cmap->codespace[cmap->codespace_len].low = low; + cmap->codespace[cmap->codespace_len].high = high; + cmap->codespace_len ++; +} + +/* + * Add an integer to the table. + */ +static void +add_table(fz_context *ctx, pdf_cmap *cmap, int value) +{ + if (cmap->tlen >= USHRT_MAX + 1) + { + fz_warn(ctx, "cmap table is full; ignoring additional entries"); + return; + } + if (cmap->tlen + 1 > cmap->tcap) + { + int new_cap = cmap->tcap > 1 ? (cmap->tcap * 3) / 2 : 256; + cmap->table = fz_resize_array(ctx, cmap->table, new_cap, sizeof(unsigned short)); + cmap->tcap = new_cap; + } + cmap->table[cmap->tlen++] = value; +} + +/* + * Add a range. + */ +static void +add_range(fz_context *ctx, pdf_cmap *cmap, int low, int high, int flag, int offset) +{ + /* Sanity check ranges */ + if (low < 0 || low > 65535 || high < 0 || high > 65535 || low > high) + { + fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name); + return; + } + /* If the range is too large to be represented, split it */ + if (high - low > 0x3fff) + { + add_range(ctx, cmap, low, low+0x3fff, flag, offset); + add_range(ctx, cmap, low+0x3fff, high, flag, offset+0x3fff); + return; + } + if (cmap->rlen + 1 > cmap->rcap) + { + int new_cap = cmap->rcap > 1 ? (cmap->rcap * 3) / 2 : 256; + cmap->ranges = fz_resize_array(ctx, cmap->ranges, new_cap, sizeof(pdf_range)); + cmap->rcap = new_cap; + } + cmap->ranges[cmap->rlen].low = low; + pdf_range_set_high(&cmap->ranges[cmap->rlen], high); + pdf_range_set_flags(&cmap->ranges[cmap->rlen], flag); + cmap->ranges[cmap->rlen].offset = offset; + cmap->rlen ++; +} + +/* + * Add a range-to-table mapping. + */ +void +pdf_map_range_to_table(fz_context *ctx, pdf_cmap *cmap, int low, int *table, int len) +{ + int i; + int high = low + len; + int offset = cmap->tlen; + if (cmap->tlen + len >= USHRT_MAX + 1) + fz_warn(ctx, "cannot map range to table; table is full"); + else + { + for (i = 0; i < len; i++) + add_table(ctx, cmap, table[i]); + add_range(ctx, cmap, low, high, PDF_CMAP_TABLE, offset); + } +} + +/* + * Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25) + */ +void +pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, int low, int high, int offset) +{ + add_range(ctx, cmap, low, high, high - low == 0 ? PDF_CMAP_SINGLE : PDF_CMAP_RANGE, offset); +} + +/* + * Add a single one-to-many mapping. + */ +void +pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, int low, int *values, int len) +{ + int offset, i; + + if (len == 1) + { + add_range(ctx, cmap, low, low, PDF_CMAP_SINGLE, values[0]); + return; + } + + if (len > 8) + { + fz_warn(ctx, "one to many mapping is too large (%d); truncating", len); + len = 8; + } + + if (len == 2 && + values[0] >= 0xD800 && values[0] <= 0xDBFF && + values[1] >= 0xDC00 && values[1] <= 0xDFFF) + { + fz_warn(ctx, "ignoring surrogate pair mapping in cmap %s", cmap->cmap_name); + return; + } + + if (cmap->tlen + len + 1 >= USHRT_MAX + 1) + fz_warn(ctx, "cannot map one to many; table is full"); + else + { + offset = cmap->tlen; + add_table(ctx, cmap, len); + for (i = 0; i < len; i++) + add_table(ctx, cmap, values[i]); + add_range(ctx, cmap, low, low, PDF_CMAP_MULTI, offset); + } +} + +/* + * Sort the input ranges. + * Merge contiguous input ranges to range-to-range if the output is contiguous. + * Merge contiguous input ranges to range-to-table if the output is random. + */ + +static int cmprange(const void *va, const void *vb) +{ + return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low; +} + +void +pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap) +{ + pdf_range *a; /* last written range on output */ + pdf_range *b; /* current range examined on input */ + + if (cmap->rlen == 0) + return; + + qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange); + + if (cmap->tlen >= USHRT_MAX + 1) + { + fz_warn(ctx, "cmap table is full; will not combine ranges"); + return; + } + + a = cmap->ranges; + b = cmap->ranges + 1; + + while (b < cmap->ranges + cmap->rlen) + { + /* ignore one-to-many mappings */ + if (pdf_range_flags(b) == PDF_CMAP_MULTI) + { + *(++a) = *b; + } + + /* input contiguous */ + else if (pdf_range_high(a) + 1 == b->low) + { + /* output contiguous */ + if (pdf_range_high(a) - a->low + a->offset + 1 == b->offset) + { + /* SR -> R and SS -> R and RR -> R and RS -> R */ + if ((pdf_range_flags(a) == PDF_CMAP_SINGLE || pdf_range_flags(a) == PDF_CMAP_RANGE) && (pdf_range_high(b) - a->low <= 0x3fff)) + { + pdf_range_set_flags(a, PDF_CMAP_RANGE); + pdf_range_set_high(a, pdf_range_high(b)); + } + + /* LS -> L */ + else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_SINGLE && (pdf_range_high(b) - a->low <= 0x3fff)) + { + pdf_range_set_high(a, pdf_range_high(b)); + add_table(ctx, cmap, b->offset); + } + + /* LR -> LR */ + else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_RANGE) + { + *(++a) = *b; + } + + /* XX -> XX */ + else + { + *(++a) = *b; + } + } + + /* output separated */ + else + { + /* SS -> L */ + if (pdf_range_flags(a) == PDF_CMAP_SINGLE && pdf_range_flags(b) == PDF_CMAP_SINGLE) + { + pdf_range_set_flags(a, PDF_CMAP_TABLE); + pdf_range_set_high(a, pdf_range_high(b)); + add_table(ctx, cmap, a->offset); + add_table(ctx, cmap, b->offset); + a->offset = cmap->tlen - 2; + } + + /* LS -> L */ + else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_SINGLE && (pdf_range_high(b) - a->low <= 0x3fff)) + { + pdf_range_set_high(a, pdf_range_high(b)); + add_table(ctx, cmap, b->offset); + } + + /* XX -> XX */ + else + { + *(++a) = *b; + } + } + } + + /* input separated: XX -> XX */ + else + { + *(++a) = *b; + } + + b ++; + } + + cmap->rlen = a - cmap->ranges + 1; +} + +/* + * Lookup the mapping of a codepoint. + */ +int +pdf_lookup_cmap(pdf_cmap *cmap, int cpt) +{ + int l = 0; + int r = cmap->rlen - 1; + int m; + + while (l <= r) + { + m = (l + r) >> 1; + if (cpt < cmap->ranges[m].low) + r = m - 1; + else if (cpt > pdf_range_high(&cmap->ranges[m])) + l = m + 1; + else + { + int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset; + if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE) + return cmap->table[i]; + if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI) + return -1; /* should use lookup_cmap_full */ + return i; + } + } + + if (cmap->usecmap) + return pdf_lookup_cmap(cmap->usecmap, cpt); + + return -1; +} + +int +pdf_lookup_cmap_full(pdf_cmap *cmap, int cpt, int *out) +{ + int i, k, n; + int l = 0; + int r = cmap->rlen - 1; + int m; + + while (l <= r) + { + m = (l + r) >> 1; + if (cpt < cmap->ranges[m].low) + r = m - 1; + else if (cpt > pdf_range_high(&cmap->ranges[m])) + l = m + 1; + else + { + k = cpt - cmap->ranges[m].low + cmap->ranges[m].offset; + if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE) + { + out[0] = cmap->table[k]; + return 1; + } + else if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI) + { + n = cmap->ranges[m].offset; + for (i = 0; i < cmap->table[n]; i++) + out[i] = cmap->table[n + i + 1]; + return cmap->table[n]; + } + else + { + out[0] = k; + return 1; + } + } + } + + if (cmap->usecmap) + return pdf_lookup_cmap_full(cmap->usecmap, cpt, out); + + return 0; +} + +/* + * Use the codespace ranges to extract a codepoint from a + * multi-byte encoded string. + */ +int +pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, int *cpt) +{ + int k, n, c; + + c = 0; + for (n = 0; n < 4; n++) + { + c = (c << 8) | buf[n]; + for (k = 0; k < cmap->codespace_len; k++) + { + if (cmap->codespace[k].n == n + 1) + { + if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high) + { + *cpt = c; + return n + 1; + } + } + } + } + + *cpt = 0; + return 1; +} diff --git a/source/pdf/pdf-colorspace.c b/source/pdf/pdf-colorspace.c new file mode 100644 index 00000000..84b3e847 --- /dev/null +++ b/source/pdf/pdf-colorspace.c @@ -0,0 +1,338 @@ +#include "mupdf/pdf.h" + +/* ICCBased */ + +static fz_colorspace * +load_icc_based(pdf_document *xref, pdf_obj *dict) +{ + int n; + + n = pdf_to_int(pdf_dict_gets(dict, "N")); + + switch (n) + { + case 1: return fz_device_gray(xref->ctx); + case 3: return fz_device_rgb(xref->ctx); + case 4: return fz_device_cmyk(xref->ctx); + } + + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "syntaxerror: ICCBased must have 1, 3 or 4 components"); + return NULL; /* Stupid MSVC */ +} + +/* Lab */ + +static inline float fung(float x) +{ + if (x >= 6.0f / 29.0f) + return x * x * x; + return (108.0f / 841.0f) * (x - (4.0f / 29.0f)); +} + +static void +lab_to_rgb(fz_context *ctx, fz_colorspace *cs, float *lab, float *rgb) +{ + /* input is in range (0..100, -128..127, -128..127) not (0..1, 0..1, 0..1) */ + float lstar, astar, bstar, l, m, n, x, y, z, r, g, b; + lstar = lab[0]; + astar = lab[1]; + bstar = lab[2]; + m = (lstar + 16) / 116; + l = m + astar / 500; + n = m - bstar / 200; + x = fung(l); + y = fung(m); + z = fung(n); + r = (3.240449f * x + -1.537136f * y + -0.498531f * z) * 0.830026f; + g = (-0.969265f * x + 1.876011f * y + 0.041556f * z) * 1.05452f; + b = (0.055643f * x + -0.204026f * y + 1.057229f * z) * 1.1003f; + rgb[0] = sqrtf(fz_clamp(r, 0, 1)); + rgb[1] = sqrtf(fz_clamp(g, 0, 1)); + rgb[2] = sqrtf(fz_clamp(b, 0, 1)); +} + +static void +rgb_to_lab(fz_context *ctx, fz_colorspace *cs, float *rgb, float *lab) +{ + fz_warn(ctx, "cannot convert into L*a*b colorspace"); + lab[0] = rgb[0]; + lab[1] = rgb[1]; + lab[2] = rgb[2]; +} + +static fz_colorspace k_device_lab = { {-1, fz_free_colorspace_imp}, 0, "Lab", 3, lab_to_rgb, rgb_to_lab }; +static fz_colorspace *fz_device_lab = &k_device_lab; + +/* Separation and DeviceN */ + +struct separation +{ + fz_colorspace *base; + fz_function *tint; +}; + +static void +separation_to_rgb(fz_context *ctx, fz_colorspace *cs, float *color, float *rgb) +{ + struct separation *sep = cs->data; + float alt[FZ_MAX_COLORS]; + fz_eval_function(ctx, sep->tint, color, cs->n, alt, sep->base->n); + sep->base->to_rgb(ctx, sep->base, alt, rgb); +} + +static void +free_separation(fz_context *ctx, fz_colorspace *cs) +{ + struct separation *sep = cs->data; + fz_drop_colorspace(ctx, sep->base); + fz_drop_function(ctx, sep->tint); + fz_free(ctx, sep); +} + +static fz_colorspace * +load_separation(pdf_document *xref, pdf_obj *array) +{ + fz_colorspace *cs; + struct separation *sep = NULL; + fz_context *ctx = xref->ctx; + pdf_obj *nameobj = pdf_array_get(array, 1); + pdf_obj *baseobj = pdf_array_get(array, 2); + pdf_obj *tintobj = pdf_array_get(array, 3); + fz_colorspace *base; + fz_function *tint = NULL; + int n; + + fz_var(tint); + fz_var(sep); + + if (pdf_is_array(nameobj)) + n = pdf_array_len(nameobj); + else + n = 1; + + if (n > FZ_MAX_COLORS) + fz_throw(ctx, FZ_ERROR_GENERIC, "too many components in colorspace"); + + base = pdf_load_colorspace(xref, baseobj); + + fz_try(ctx) + { + tint = pdf_load_function(xref, tintobj, n, base->n); + /* RJW: fz_drop_colorspace(ctx, base); + * "cannot load tint function (%d %d R)", pdf_to_num(tintobj), pdf_to_gen(tintobj) */ + + sep = fz_malloc_struct(ctx, struct separation); + sep->base = base; + sep->tint = tint; + + cs = fz_new_colorspace(ctx, n == 1 ? "Separation" : "DeviceN", n); + cs->to_rgb = separation_to_rgb; + cs->free_data = free_separation; + cs->data = sep; + cs->size += sizeof(struct separation) + (base ? base->size : 0) + fz_function_size(tint); + } + fz_catch(ctx) + { + fz_drop_colorspace(ctx, base); + fz_drop_function(ctx, tint); + fz_free(ctx, sep); + fz_rethrow(ctx); + } + + return cs; +} + +static fz_colorspace * +load_indexed(pdf_document *xref, pdf_obj *array) +{ + fz_context *ctx = xref->ctx; + pdf_obj *baseobj = pdf_array_get(array, 1); + pdf_obj *highobj = pdf_array_get(array, 2); + pdf_obj *lookupobj = pdf_array_get(array, 3); + fz_colorspace *base = NULL; + fz_colorspace *cs; + int i, n, high; + unsigned char *lookup = NULL; + + fz_var(base); + + fz_try(ctx) + { + base = pdf_load_colorspace(xref, baseobj); + + high = pdf_to_int(highobj); + high = fz_clampi(high, 0, 255); + n = base->n * (high + 1); + lookup = fz_malloc_array(ctx, 1, n); + + if (pdf_is_string(lookupobj) && pdf_to_str_len(lookupobj) == n) + { + unsigned char *buf = (unsigned char *) pdf_to_str_buf(lookupobj); + for (i = 0; i < n; i++) + lookup[i] = buf[i]; + } + else if (pdf_is_indirect(lookupobj)) + { + fz_stream *file = NULL; + + fz_var(file); + + fz_try(ctx) + { + file = pdf_open_stream(xref, pdf_to_num(lookupobj), pdf_to_gen(lookupobj)); + i = fz_read(file, lookup, n); + } + fz_always(ctx) + { + fz_close(file); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot open colorspace lookup table (%d 0 R)", pdf_to_num(lookupobj)); + } + } + else + { + fz_rethrow_message(ctx, "cannot parse colorspace lookup table"); + } + + cs = fz_new_indexed_colorspace(ctx, base, high, lookup); + } + fz_catch(ctx) + { + fz_drop_colorspace(ctx, base); + fz_free(ctx, lookup); + fz_rethrow(ctx); + } + + return cs; +} + +/* Parse and create colorspace from PDF object */ + +static fz_colorspace * +pdf_load_colorspace_imp(pdf_document *xref, pdf_obj *obj) +{ + fz_context *ctx = xref->ctx; + + if (pdf_obj_marked(obj)) + fz_throw(ctx, FZ_ERROR_GENERIC, "Recursion in colorspace definition"); + + if (pdf_is_name(obj)) + { + const char *str = pdf_to_name(obj); + if (!strcmp(str, "Pattern")) + return fz_device_gray(ctx); + else if (!strcmp(str, "G")) + return fz_device_gray(ctx); + else if (!strcmp(str, "RGB")) + return fz_device_rgb(ctx); + else if (!strcmp(str, "CMYK")) + return fz_device_cmyk(ctx); + else if (!strcmp(str, "DeviceGray")) + return fz_device_gray(ctx); + else if (!strcmp(str, "DeviceRGB")) + return fz_device_rgb(ctx); + else if (!strcmp(str, "DeviceCMYK")) + return fz_device_cmyk(ctx); + else + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown colorspace: %s", pdf_to_name(obj)); + } + + else if (pdf_is_array(obj)) + { + pdf_obj *name = pdf_array_get(obj, 0); + const char *str = pdf_to_name(name); + + if (pdf_is_name(name)) + { + /* load base colorspace instead */ + if (!strcmp(str, "G")) + return fz_device_gray(ctx); + else if (!strcmp(str, "RGB")) + return fz_device_rgb(ctx); + else if (!strcmp(str, "CMYK")) + return fz_device_cmyk(ctx); + else if (!strcmp(str, "DeviceGray")) + return fz_device_gray(ctx); + else if (!strcmp(str, "DeviceRGB")) + return fz_device_rgb(ctx); + else if (!strcmp(str, "DeviceCMYK")) + return fz_device_cmyk(ctx); + else if (!strcmp(str, "CalGray")) + return fz_device_gray(ctx); + else if (!strcmp(str, "CalRGB")) + return fz_device_rgb(ctx); + else if (!strcmp(str, "CalCMYK")) + return fz_device_cmyk(ctx); + else if (!strcmp(str, "Lab")) + return fz_device_lab; + else + { + fz_colorspace *cs; + fz_try(ctx) + { + pdf_obj_mark(obj); + if (!strcmp(str, "ICCBased")) + cs = load_icc_based(xref, pdf_array_get(obj, 1)); + + else if (!strcmp(str, "Indexed")) + cs = load_indexed(xref, obj); + else if (!strcmp(str, "I")) + cs = load_indexed(xref, obj); + + else if (!strcmp(str, "Separation")) + cs = load_separation(xref, obj); + + else if (!strcmp(str, "DeviceN")) + cs = load_separation(xref, obj); + else if (!strcmp(str, "Pattern")) + { + pdf_obj *pobj; + + pobj = pdf_array_get(obj, 1); + if (!pobj) + { + cs = fz_device_gray(ctx); + break; + } + + cs = pdf_load_colorspace(xref, pobj); + } + else + fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: unknown colorspace %s", str); + } + fz_always(ctx) + { + pdf_obj_unmark(obj); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + return cs; + } + } + } + + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "syntaxerror: could not parse color space (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); + return NULL; /* Stupid MSVC */ +} + +fz_colorspace * +pdf_load_colorspace(pdf_document *xref, pdf_obj *obj) +{ + fz_context *ctx = xref->ctx; + fz_colorspace *cs; + + if ((cs = pdf_find_item(ctx, fz_free_colorspace_imp, obj))) + { + return cs; + } + + cs = pdf_load_colorspace_imp(xref, obj); + + pdf_store_item(ctx, obj, cs, cs->size); + + return cs; +} diff --git a/source/pdf/pdf-crypt.c b/source/pdf/pdf-crypt.c new file mode 100644 index 00000000..5128473c --- /dev/null +++ b/source/pdf/pdf-crypt.c @@ -0,0 +1,1010 @@ +#include "mupdf/pdf.h" + +enum +{ + PDF_CRYPT_NONE, + PDF_CRYPT_RC4, + PDF_CRYPT_AESV2, + PDF_CRYPT_AESV3, + PDF_CRYPT_UNKNOWN, +}; + +typedef struct pdf_crypt_filter_s pdf_crypt_filter; + +struct pdf_crypt_filter_s +{ + int method; + int length; +}; + +struct pdf_crypt_s +{ + pdf_obj *id; + + int v; + int length; + pdf_obj *cf; + pdf_crypt_filter stmf; + pdf_crypt_filter strf; + + int r; + unsigned char o[48]; + unsigned char u[48]; + unsigned char oe[32]; + unsigned char ue[32]; + int p; + int encrypt_metadata; + + unsigned char key[32]; /* decryption key generated from password */ + fz_context *ctx; +}; + +static void pdf_parse_crypt_filter(fz_context *ctx, pdf_crypt_filter *cf, pdf_crypt *crypt, char *name); + +/* + * Create crypt object for decrypting strings and streams + * given the Encryption and ID objects. + */ + +pdf_crypt * +pdf_new_crypt(fz_context *ctx, pdf_obj *dict, pdf_obj *id) +{ + pdf_crypt *crypt; + pdf_obj *obj; + + crypt = fz_malloc_struct(ctx, pdf_crypt); + + /* Common to all security handlers (PDF 1.7 table 3.18) */ + + obj = pdf_dict_gets(dict, "Filter"); + if (!pdf_is_name(obj)) + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "unspecified encryption handler"); + } + if (strcmp(pdf_to_name(obj), "Standard") != 0) + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown encryption handler: '%s'", pdf_to_name(obj)); + } + + crypt->v = 0; + obj = pdf_dict_gets(dict, "V"); + if (pdf_is_int(obj)) + crypt->v = pdf_to_int(obj); + if (crypt->v != 1 && crypt->v != 2 && crypt->v != 4 && crypt->v != 5) + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown encryption version"); + } + + /* Standard security handler (PDF 1.7 table 3.19) */ + + obj = pdf_dict_gets(dict, "R"); + if (pdf_is_int(obj)) + crypt->r = pdf_to_int(obj); + else if (crypt->v <= 4) + { + fz_warn(ctx, "encryption dictionary missing revision value, guessing..."); + if (crypt->v < 2) + crypt->r = 2; + else if (crypt->v == 2) + crypt->r = 3; + else if (crypt->v == 4) + crypt->r = 4; + } + else + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "encryption dictionary missing version and revision value"); + } + + obj = pdf_dict_gets(dict, "O"); + if (pdf_is_string(obj) && pdf_to_str_len(obj) == 32) + memcpy(crypt->o, pdf_to_str_buf(obj), 32); + /* /O and /U are supposed to be 48 bytes long for revision 5 and 6, they're often longer, though */ + else if (crypt->r >= 5 && pdf_is_string(obj) && pdf_to_str_len(obj) >= 48) + memcpy(crypt->o, pdf_to_str_buf(obj), 48); + else + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "encryption dictionary missing owner password"); + } + + obj = pdf_dict_gets(dict, "U"); + if (pdf_is_string(obj) && pdf_to_str_len(obj) == 32) + memcpy(crypt->u, pdf_to_str_buf(obj), 32); + /* /O and /U are supposed to be 48 bytes long for revision 5 and 6, they're often longer, though */ + else if (crypt->r >= 5 && pdf_is_string(obj) && pdf_to_str_len(obj) >= 48) + memcpy(crypt->u, pdf_to_str_buf(obj), 48); + else if (pdf_is_string(obj) && pdf_to_str_len(obj) < 32) + { + fz_warn(ctx, "encryption password key too short (%d)", pdf_to_str_len(obj)); + memcpy(crypt->u, pdf_to_str_buf(obj), pdf_to_str_len(obj)); + } + else + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "encryption dictionary missing user password"); + } + + obj = pdf_dict_gets(dict, "P"); + if (pdf_is_int(obj)) + crypt->p = pdf_to_int(obj); + else + { + fz_warn(ctx, "encryption dictionary missing permissions"); + crypt->p = 0xfffffffc; + } + + if (crypt->r == 5 || crypt->r == 6) + { + obj = pdf_dict_gets(dict, "OE"); + if (!pdf_is_string(obj) || pdf_to_str_len(obj) != 32) + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "encryption dictionary missing owner encryption key"); + } + memcpy(crypt->oe, pdf_to_str_buf(obj), 32); + + obj = pdf_dict_gets(dict, "UE"); + if (!pdf_is_string(obj) || pdf_to_str_len(obj) != 32) + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "encryption dictionary missing user encryption key"); + } + memcpy(crypt->ue, pdf_to_str_buf(obj), 32); + } + + crypt->encrypt_metadata = 1; + obj = pdf_dict_gets(dict, "EncryptMetadata"); + if (pdf_is_bool(obj)) + crypt->encrypt_metadata = pdf_to_bool(obj); + + /* Extract file identifier string */ + + if (pdf_is_array(id) && pdf_array_len(id) == 2) + { + obj = pdf_array_get(id, 0); + if (pdf_is_string(obj)) + crypt->id = pdf_keep_obj(obj); + } + else + fz_warn(ctx, "missing file identifier, may not be able to do decryption"); + + /* Determine encryption key length */ + + crypt->length = 40; + if (crypt->v == 2 || crypt->v == 4) + { + obj = pdf_dict_gets(dict, "Length"); + if (pdf_is_int(obj)) + crypt->length = pdf_to_int(obj); + + /* work-around for pdf generators that assume length is in bytes */ + if (crypt->length < 40) + crypt->length = crypt->length * 8; + + if (crypt->length % 8 != 0) + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "invalid encryption key length"); + } + if (crypt->length < 0 || crypt->length > 256) + { + pdf_free_crypt(ctx, crypt); + fz_throw(ctx, FZ_ERROR_GENERIC, "invalid encryption key length"); + } + } + + if (crypt->v == 5) + crypt->length = 256; + + if (crypt->v == 1 || crypt->v == 2) + { + crypt->stmf.method = PDF_CRYPT_RC4; + crypt->stmf.length = crypt->length; + + crypt->strf.method = PDF_CRYPT_RC4; + crypt->strf.length = crypt->length; + } + + if (crypt->v == 4 || crypt->v == 5) + { + crypt->stmf.method = PDF_CRYPT_NONE; + crypt->stmf.length = crypt->length; + + crypt->strf.method = PDF_CRYPT_NONE; + crypt->strf.length = crypt->length; + + obj = pdf_dict_gets(dict, "CF"); + if (pdf_is_dict(obj)) + { + crypt->cf = pdf_keep_obj(obj); + } + else + { + crypt->cf = NULL; + } + + fz_try(ctx) + { + obj = pdf_dict_gets(dict, "StmF"); + if (pdf_is_name(obj)) + pdf_parse_crypt_filter(ctx, &crypt->stmf, crypt, pdf_to_name(obj)); + + obj = pdf_dict_gets(dict, "StrF"); + if (pdf_is_name(obj)) + pdf_parse_crypt_filter(ctx, &crypt->strf, crypt, pdf_to_name(obj)); + } + fz_catch(ctx) + { + pdf_free_crypt(ctx, crypt); + fz_rethrow_message(ctx, "cannot parse string crypt filter (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); + } + + /* in crypt revision 4, the crypt filter determines the key length */ + if (crypt->strf.method != PDF_CRYPT_NONE) + crypt->length = crypt->stmf.length; + } + + return crypt; +} + +void +pdf_free_crypt(fz_context *ctx, pdf_crypt *crypt) +{ + pdf_drop_obj(crypt->id); + pdf_drop_obj(crypt->cf); + fz_free(ctx, crypt); +} + +/* + * Parse a CF dictionary entry (PDF 1.7 table 3.22) + */ + +static void +pdf_parse_crypt_filter(fz_context *ctx, pdf_crypt_filter *cf, pdf_crypt *crypt, char *name) +{ + pdf_obj *obj; + pdf_obj *dict; + int is_identity = (strcmp(name, "Identity") == 0); + int is_stdcf = (!is_identity && (strcmp(name, "StdCF") == 0)); + + if (!is_identity && !is_stdcf) + fz_throw(ctx, FZ_ERROR_GENERIC, "Crypt Filter not Identity or StdCF (%d %d R)", pdf_to_num(crypt->cf), pdf_to_gen(crypt->cf)); + + cf->method = PDF_CRYPT_NONE; + cf->length = crypt->length; + + if (!crypt->cf) + { + cf->method = (is_identity ? PDF_CRYPT_NONE : PDF_CRYPT_RC4); + return; + } + + dict = pdf_dict_gets(crypt->cf, name); + if (!pdf_is_dict(dict)) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse crypt filter (%d %d R)", pdf_to_num(crypt->cf), pdf_to_gen(crypt->cf)); + + obj = pdf_dict_gets(dict, "CFM"); + if (pdf_is_name(obj)) + { + if (!strcmp(pdf_to_name(obj), "None")) + cf->method = PDF_CRYPT_NONE; + else if (!strcmp(pdf_to_name(obj), "V2")) + cf->method = PDF_CRYPT_RC4; + else if (!strcmp(pdf_to_name(obj), "AESV2")) + cf->method = PDF_CRYPT_AESV2; + else if (!strcmp(pdf_to_name(obj), "AESV3")) + cf->method = PDF_CRYPT_AESV3; + else + fz_warn(ctx, "unknown encryption method: %s", pdf_to_name(obj)); + } + + obj = pdf_dict_gets(dict, "Length"); + if (pdf_is_int(obj)) + cf->length = pdf_to_int(obj); + + /* the length for crypt filters is supposed to be in bytes not bits */ + if (cf->length < 40) + cf->length = cf->length * 8; + + if ((cf->length % 8) != 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key length: %d", cf->length); + + if ((crypt->r == 1 || crypt->r == 2 || crypt->r == 4) && + (cf->length < 0 || cf->length > 128)) + fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key length: %d", cf->length); + if ((crypt->r == 5 || crypt->r == 6) && cf->length != 256) + fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key length: %d", cf->length); +} + +/* + * Compute an encryption key (PDF 1.7 algorithm 3.2) + */ + +static const unsigned char padding[32] = +{ + 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41, + 0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08, + 0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80, + 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a +}; + +static void +pdf_compute_encryption_key(pdf_crypt *crypt, unsigned char *password, int pwlen, unsigned char *key) +{ + unsigned char buf[32]; + unsigned int p; + int i, n; + fz_md5 md5; + + n = crypt->length / 8; + + /* Step 1 - copy and pad password string */ + if (pwlen > 32) + pwlen = 32; + memcpy(buf, password, pwlen); + memcpy(buf + pwlen, padding, 32 - pwlen); + + /* Step 2 - init md5 and pass value of step 1 */ + fz_md5_init(&md5); + fz_md5_update(&md5, buf, 32); + + /* Step 3 - pass O value */ + fz_md5_update(&md5, crypt->o, 32); + + /* Step 4 - pass P value as unsigned int, low-order byte first */ + p = (unsigned int) crypt->p; + buf[0] = (p) & 0xFF; + buf[1] = (p >> 8) & 0xFF; + buf[2] = (p >> 16) & 0xFF; + buf[3] = (p >> 24) & 0xFF; + fz_md5_update(&md5, buf, 4); + + /* Step 5 - pass first element of ID array */ + fz_md5_update(&md5, (unsigned char *)pdf_to_str_buf(crypt->id), pdf_to_str_len(crypt->id)); + + /* Step 6 (revision 4 or greater) - if metadata is not encrypted pass 0xFFFFFFFF */ + if (crypt->r >= 4) + { + if (!crypt->encrypt_metadata) + { + buf[0] = 0xFF; + buf[1] = 0xFF; + buf[2] = 0xFF; + buf[3] = 0xFF; + fz_md5_update(&md5, buf, 4); + } + } + + /* Step 7 - finish the hash */ + fz_md5_final(&md5, buf); + + /* Step 8 (revision 3 or greater) - do some voodoo 50 times */ + if (crypt->r >= 3) + { + for (i = 0; i < 50; i++) + { + fz_md5_init(&md5); + fz_md5_update(&md5, buf, n); + fz_md5_final(&md5, buf); + } + } + + /* Step 9 - the key is the first 'n' bytes of the result */ + memcpy(key, buf, n); +} + +/* + * Compute an encryption key (PDF 1.7 ExtensionLevel 3 algorithm 3.2a) + */ + +static void +pdf_compute_encryption_key_r5(fz_context *ctx, pdf_crypt *crypt, unsigned char *password, int pwlen, int ownerkey, unsigned char *validationkey) +{ + unsigned char buffer[128 + 8 + 48]; + fz_sha256 sha256; + fz_aes aes; + + /* Step 2 - truncate UTF-8 password to 127 characters */ + + if (pwlen > 127) + pwlen = 127; + + /* Step 3/4 - test password against owner/user key and compute encryption key */ + + memcpy(buffer, password, pwlen); + if (ownerkey) + { + memcpy(buffer + pwlen, crypt->o + 32, 8); + memcpy(buffer + pwlen + 8, crypt->u, 48); + } + else + memcpy(buffer + pwlen, crypt->u + 32, 8); + + fz_sha256_init(&sha256); + fz_sha256_update(&sha256, buffer, pwlen + 8 + (ownerkey ? 48 : 0)); + fz_sha256_final(&sha256, validationkey); + + /* Step 3.5/4.5 - compute file encryption key from OE/UE */ + + memcpy(buffer + pwlen, crypt->u + 40, 8); + + fz_sha256_init(&sha256); + fz_sha256_update(&sha256, buffer, pwlen + 8); + fz_sha256_final(&sha256, buffer); + + /* clear password buffer and use it as iv */ + memset(buffer + 32, 0, sizeof(buffer) - 32); + if (aes_setkey_dec(&aes, buffer, crypt->length)) + fz_throw(ctx, FZ_ERROR_GENERIC, "AES key init failed (keylen=%d)", crypt->length); + aes_crypt_cbc(&aes, AES_DECRYPT, 32, buffer + 32, ownerkey ? crypt->oe : crypt->ue, crypt->key); +} + +/* + * Compute an encryption key (PDF 1.7 ExtensionLevel 8 algorithm) + * + * Adobe has not yet released the details, so the algorithm reference is: + * http://esec-lab.sogeti.com/post/The-undocumented-password-validation-algorithm-of-Adobe-Reader-X + */ + +static void +pdf_compute_hardened_hash_r6(fz_context *ctx, unsigned char *password, int pwlen, unsigned char salt[16], unsigned char *ownerkey, unsigned char hash[32]) +{ + unsigned char data[(128 + 64 + 48) * 64]; + unsigned char block[64]; + int block_size = 32; + int data_len = 0; + int i, j, sum; + + fz_sha256 sha256; + fz_sha384 sha384; + fz_sha512 sha512; + fz_aes aes; + + /* Step 1: calculate initial data block */ + fz_sha256_init(&sha256); + fz_sha256_update(&sha256, password, pwlen); + fz_sha256_update(&sha256, salt, 8); + if (ownerkey) + fz_sha256_update(&sha256, ownerkey, 48); + fz_sha256_final(&sha256, block); + + for (i = 0; i < 64 || i < data[data_len * 64 - 1] + 32; i++) + { + /* Step 2: repeat password and data block 64 times */ + memcpy(data, password, pwlen); + memcpy(data + pwlen, block, block_size); + memcpy(data + pwlen + block_size, ownerkey, ownerkey ? 48 : 0); + data_len = pwlen + block_size + (ownerkey ? 48 : 0); + for (j = 1; j < 64; j++) + memcpy(data + j * data_len, data, data_len); + + /* Step 3: encrypt data using data block as key and iv */ + if (aes_setkey_enc(&aes, block, 128)) + fz_throw(ctx, FZ_ERROR_GENERIC, "AES key init failed (keylen=%d)", 128); + aes_crypt_cbc(&aes, AES_ENCRYPT, data_len * 64, block + 16, data, data); + + /* Step 4: determine SHA-2 hash size for this round */ + for (j = 0, sum = 0; j < 16; j++) + sum += data[j]; + + /* Step 5: calculate data block for next round */ + block_size = 32 + (sum % 3) * 16; + switch (block_size) + { + case 32: + fz_sha256_init(&sha256); + fz_sha256_update(&sha256, data, data_len * 64); + fz_sha256_final(&sha256, block); + break; + case 48: + fz_sha384_init(&sha384); + fz_sha384_update(&sha384, data, data_len * 64); + fz_sha384_final(&sha384, block); + break; + case 64: + fz_sha512_init(&sha512); + fz_sha512_update(&sha512, data, data_len * 64); + fz_sha512_final(&sha512, block); + break; + } + } + + memset(data, 0, sizeof(data)); + memcpy(hash, block, 32); +} + +static void +pdf_compute_encryption_key_r6(fz_context *ctx, pdf_crypt *crypt, unsigned char *password, int pwlen, int ownerkey, unsigned char *validationkey) +{ + unsigned char hash[32]; + unsigned char iv[16]; + fz_aes aes; + + if (pwlen > 127) + pwlen = 127; + + pdf_compute_hardened_hash_r6(ctx, password, pwlen, + (ownerkey ? crypt->o : crypt->u) + 32, + ownerkey ? crypt->u : NULL, validationkey); + pdf_compute_hardened_hash_r6(ctx, password, pwlen, + crypt->u + 40, NULL, hash); + + memset(iv, 0, sizeof(iv)); + if (aes_setkey_dec(&aes, hash, 256)) + fz_throw(ctx, FZ_ERROR_GENERIC, "AES key init failed (keylen=256)"); + aes_crypt_cbc(&aes, AES_DECRYPT, 32, iv, + ownerkey ? crypt->oe : crypt->ue, crypt->key); +} + +/* + * Computing the user password (PDF 1.7 algorithm 3.4 and 3.5) + * Also save the generated key for decrypting objects and streams in crypt->key. + */ + +static void +pdf_compute_user_password(fz_context *ctx, pdf_crypt *crypt, unsigned char *password, int pwlen, unsigned char *output) +{ + if (crypt->r == 2) + { + fz_arc4 arc4; + + pdf_compute_encryption_key(crypt, password, pwlen, crypt->key); + fz_arc4_init(&arc4, crypt->key, crypt->length / 8); + fz_arc4_encrypt(&arc4, output, padding, 32); + } + + if (crypt->r == 3 || crypt->r == 4) + { + unsigned char xor[32]; + unsigned char digest[16]; + fz_md5 md5; + fz_arc4 arc4; + int i, x, n; + + n = crypt->length / 8; + + pdf_compute_encryption_key(crypt, password, pwlen, crypt->key); + + fz_md5_init(&md5); + fz_md5_update(&md5, padding, 32); + fz_md5_update(&md5, (unsigned char*)pdf_to_str_buf(crypt->id), pdf_to_str_len(crypt->id)); + fz_md5_final(&md5, digest); + + fz_arc4_init(&arc4, crypt->key, n); + fz_arc4_encrypt(&arc4, output, digest, 16); + + for (x = 1; x <= 19; x++) + { + for (i = 0; i < n; i++) + xor[i] = crypt->key[i] ^ x; + fz_arc4_init(&arc4, xor, n); + fz_arc4_encrypt(&arc4, output, output, 16); + } + + memcpy(output + 16, padding, 16); + } + + if (crypt->r == 5) + { + pdf_compute_encryption_key_r5(ctx, crypt, password, pwlen, 0, output); + } + + if (crypt->r == 6) + { + pdf_compute_encryption_key_r6(ctx, crypt, password, pwlen, 0, output); + } +} + +/* + * Authenticating the user password (PDF 1.7 algorithm 3.6 + * and ExtensionLevel 3 algorithm 3.11) + * This also has the side effect of saving a key generated + * from the password for decrypting objects and streams. + */ + +static int +pdf_authenticate_user_password(fz_context *ctx, pdf_crypt *crypt, unsigned char *password, int pwlen) +{ + unsigned char output[32]; + pdf_compute_user_password(ctx, crypt, password, pwlen, output); + if (crypt->r == 2 || crypt->r == 5 || crypt->r == 6) + return memcmp(output, crypt->u, 32) == 0; + if (crypt->r == 3 || crypt->r == 4) + return memcmp(output, crypt->u, 16) == 0; + return 0; +} + +/* + * Authenticating the owner password (PDF 1.7 algorithm 3.7 + * and ExtensionLevel 3 algorithm 3.12) + * Generates the user password from the owner password + * and calls pdf_authenticate_user_password. + */ + +static int +pdf_authenticate_owner_password(fz_context *ctx, pdf_crypt *crypt, unsigned char *ownerpass, int pwlen) +{ + unsigned char pwbuf[32]; + unsigned char key[32]; + unsigned char xor[32]; + unsigned char userpass[32]; + int i, n, x; + fz_md5 md5; + fz_arc4 arc4; + + if (crypt->r == 5) + { + /* PDF 1.7 ExtensionLevel 3 algorithm 3.12 */ + pdf_compute_encryption_key_r5(ctx, crypt, ownerpass, pwlen, 1, key); + return !memcmp(key, crypt->o, 32); + } + else if (crypt->r == 6) + { + /* PDF 1.7 ExtensionLevel 8 algorithm */ + pdf_compute_encryption_key_r6(ctx, crypt, ownerpass, pwlen, 1, key); + return !memcmp(key, crypt->o, 32); + } + + n = crypt->length / 8; + + /* Step 1 -- steps 1 to 4 of PDF 1.7 algorithm 3.3 */ + + /* copy and pad password string */ + if (pwlen > 32) + pwlen = 32; + memcpy(pwbuf, ownerpass, pwlen); + memcpy(pwbuf + pwlen, padding, 32 - pwlen); + + /* take md5 hash of padded password */ + fz_md5_init(&md5); + fz_md5_update(&md5, pwbuf, 32); + fz_md5_final(&md5, key); + + /* do some voodoo 50 times (Revision 3 or greater) */ + if (crypt->r >= 3) + { + for (i = 0; i < 50; i++) + { + fz_md5_init(&md5); + fz_md5_update(&md5, key, 16); + fz_md5_final(&md5, key); + } + } + + /* Step 2 (Revision 2) */ + if (crypt->r == 2) + { + fz_arc4_init(&arc4, key, n); + fz_arc4_encrypt(&arc4, userpass, crypt->o, 32); + } + + /* Step 2 (Revision 3 or greater) */ + if (crypt->r >= 3) + { + memcpy(userpass, crypt->o, 32); + for (x = 0; x < 20; x++) + { + for (i = 0; i < n; i++) + xor[i] = key[i] ^ (19 - x); + fz_arc4_init(&arc4, xor, n); + fz_arc4_encrypt(&arc4, userpass, userpass, 32); + } + } + + return pdf_authenticate_user_password(ctx, crypt, userpass, 32); +} + +static void pdf_docenc_from_utf8(char *password, const char *utf8, int n) +{ + int i = 0, k, c; + while (*utf8 && i + 1 < n) + { + utf8 += fz_chartorune(&c, utf8); + for (k = 0; k < 256; k++) + { + if (c == pdf_doc_encoding[k]) + { + password[i++] = k; + break; + } + } + /* FIXME: drop characters that can't be encoded or return an error? */ + } + password[i] = 0; +} + +static void pdf_saslprep_from_utf8(char *password, const char *utf8, int n) +{ + /* TODO: stringprep with SALSprep profile */ + fz_strlcpy(password, utf8, n); +} + +int +pdf_authenticate_password(pdf_document *xref, const char *pwd_utf8) +{ + char password[2048]; + + if (xref->crypt) + { + password[0] = 0; + if (pwd_utf8) + { + if (xref->crypt->r <= 4) + pdf_docenc_from_utf8(password, pwd_utf8, sizeof password); + else + pdf_saslprep_from_utf8(password, pwd_utf8, sizeof password); + } + + if (pdf_authenticate_user_password(xref->ctx, xref->crypt, (unsigned char *)password, strlen(password))) + return 1; + if (pdf_authenticate_owner_password(xref->ctx, xref->crypt, (unsigned char *)password, strlen(password))) + return 1; + return 0; + } + return 1; +} + +int +pdf_needs_password(pdf_document *xref) +{ + if (!xref->crypt) + return 0; + if (pdf_authenticate_password(xref, "")) + return 0; + return 1; +} + +int +pdf_has_permission(pdf_document *xref, int p) +{ + if (!xref->crypt) + return 1; + return xref->crypt->p & p; +} + +unsigned char * +pdf_crypt_key(pdf_document *xref) +{ + if (xref->crypt) + return xref->crypt->key; + return NULL; +} + +int +pdf_crypt_version(pdf_document *xref) +{ + if (xref->crypt) + return xref->crypt->v; + return 0; +} + +int pdf_crypt_revision(pdf_document *xref) +{ + if (xref->crypt) + return xref->crypt->r; + return 0; +} + +char * +pdf_crypt_method(pdf_document *xref) +{ + if (xref->crypt) + { + switch (xref->crypt->strf.method) + { + case PDF_CRYPT_NONE: return "None"; + case PDF_CRYPT_RC4: return "RC4"; + case PDF_CRYPT_AESV2: return "AES"; + case PDF_CRYPT_AESV3: return "AES"; + case PDF_CRYPT_UNKNOWN: return "Unknown"; + } + } + return "None"; +} + +int +pdf_crypt_length(pdf_document *xref) +{ + if (xref->crypt) + return xref->crypt->length; + return 0; +} + +/* + * PDF 1.7 algorithm 3.1 and ExtensionLevel 3 algorithm 3.1a + * + * Using the global encryption key that was generated from the + * password, create a new key that is used to decrypt individual + * objects and streams. This key is based on the object and + * generation numbers. + */ + +static int +pdf_compute_object_key(pdf_crypt *crypt, pdf_crypt_filter *cf, int num, int gen, unsigned char *key, int max_len) +{ + fz_md5 md5; + unsigned char message[5]; + int key_len = crypt->length / 8; + + if (key_len > max_len) + key_len = max_len; + + if (cf->method == PDF_CRYPT_AESV3) + { + memcpy(key, crypt->key, key_len); + return key_len; + } + + fz_md5_init(&md5); + fz_md5_update(&md5, crypt->key, key_len); + message[0] = (num) & 0xFF; + message[1] = (num >> 8) & 0xFF; + message[2] = (num >> 16) & 0xFF; + message[3] = (gen) & 0xFF; + message[4] = (gen >> 8) & 0xFF; + fz_md5_update(&md5, message, 5); + + if (cf->method == PDF_CRYPT_AESV2) + fz_md5_update(&md5, (unsigned char *)"sAlT", 4); + + fz_md5_final(&md5, key); + + if (key_len + 5 > 16) + return 16; + return key_len + 5; +} + +/* + * PDF 1.7 algorithm 3.1 and ExtensionLevel 3 algorithm 3.1a + * + * Decrypt all strings in obj modifying the data in-place. + * Recurse through arrays and dictionaries, but do not follow + * indirect references. + */ + +static void +pdf_crypt_obj_imp(fz_context *ctx, pdf_crypt *crypt, pdf_obj *obj, unsigned char *key, int keylen) +{ + unsigned char *s; + int i, n; + + if (pdf_is_indirect(obj)) + return; + + if (pdf_is_string(obj)) + { + s = (unsigned char *)pdf_to_str_buf(obj); + n = pdf_to_str_len(obj); + + if (crypt->strf.method == PDF_CRYPT_RC4) + { + fz_arc4 arc4; + fz_arc4_init(&arc4, key, keylen); + fz_arc4_encrypt(&arc4, s, s, n); + } + + if (crypt->strf.method == PDF_CRYPT_AESV2 || crypt->strf.method == PDF_CRYPT_AESV3) + { + if (n == 0) + { + /* Empty strings are permissible */ + } + else if (n & 15 || n < 32) + fz_warn(ctx, "invalid string length for aes encryption"); + else + { + unsigned char iv[16]; + fz_aes aes; + memcpy(iv, s, 16); + if (aes_setkey_dec(&aes, key, keylen * 8)) + fz_throw(ctx, FZ_ERROR_GENERIC, "AES key init failed (keylen=%d)", keylen * 8); + aes_crypt_cbc(&aes, AES_DECRYPT, n - 16, iv, s + 16, s); + /* delete space used for iv and padding bytes at end */ + if (s[n - 17] < 1 || s[n - 17] > 16) + fz_warn(ctx, "aes padding out of range"); + else + pdf_set_str_len(obj, n - 16 - s[n - 17]); + } + } + } + + else if (pdf_is_array(obj)) + { + n = pdf_array_len(obj); + for (i = 0; i < n; i++) + { + pdf_crypt_obj_imp(ctx, crypt, pdf_array_get(obj, i), key, keylen); + } + } + + else if (pdf_is_dict(obj)) + { + n = pdf_dict_len(obj); + for (i = 0; i < n; i++) + { + pdf_crypt_obj_imp(ctx, crypt, pdf_dict_get_val(obj, i), key, keylen); + } + } +} + +void +pdf_crypt_obj(fz_context *ctx, pdf_crypt *crypt, pdf_obj *obj, int num, int gen) +{ + unsigned char key[32]; + int len; + + len = pdf_compute_object_key(crypt, &crypt->strf, num, gen, key, 32); + + pdf_crypt_obj_imp(ctx, crypt, obj, key, len); +} + +/* + * PDF 1.7 algorithm 3.1 and ExtensionLevel 3 algorithm 3.1a + * + * Create filter suitable for de/encrypting a stream. + */ +static fz_stream * +pdf_open_crypt_imp(fz_stream *chain, pdf_crypt *crypt, pdf_crypt_filter *stmf, int num, int gen) +{ + unsigned char key[32]; + int len; + + crypt->ctx = chain->ctx; + len = pdf_compute_object_key(crypt, stmf, num, gen, key, 32); + + if (stmf->method == PDF_CRYPT_RC4) + return fz_open_arc4(chain, key, len); + + if (stmf->method == PDF_CRYPT_AESV2 || stmf->method == PDF_CRYPT_AESV3) + return fz_open_aesd(chain, key, len); + + return fz_open_copy(chain); +} + +fz_stream * +pdf_open_crypt(fz_stream *chain, pdf_crypt *crypt, int num, int gen) +{ + return pdf_open_crypt_imp(chain, crypt, &crypt->stmf, num, gen); +} + +fz_stream * +pdf_open_crypt_with_filter(fz_stream *chain, pdf_crypt *crypt, char *name, int num, int gen) +{ + if (strcmp(name, "Identity")) + { + pdf_crypt_filter cf; + pdf_parse_crypt_filter(chain->ctx, &cf, crypt, name); + return pdf_open_crypt_imp(chain, crypt, &cf, num, gen); + } + return chain; +} + +#ifndef NDEBUG +void pdf_print_crypt(pdf_crypt *crypt) +{ + int i; + + printf("crypt {\n"); + + printf("\tv=%d length=%d\n", crypt->v, crypt->length); + printf("\tstmf method=%d length=%d\n", crypt->stmf.method, crypt->stmf.length); + printf("\tstrf method=%d length=%d\n", crypt->strf.method, crypt->strf.length); + printf("\tr=%d\n", crypt->r); + + printf("\to=<"); + for (i = 0; i < 32; i++) + printf("%02X", crypt->o[i]); + printf(">\n"); + + printf("\tu=<"); + for (i = 0; i < 32; i++) + printf("%02X", crypt->u[i]); + printf(">\n"); + + printf("}\n"); +} +#endif diff --git a/source/pdf/pdf-device.c b/source/pdf/pdf-device.c new file mode 100644 index 00000000..602a778f --- /dev/null +++ b/source/pdf/pdf-device.c @@ -0,0 +1,1263 @@ +#include "mupdf/pdf.h" + +typedef struct pdf_device_s pdf_device; + +typedef struct gstate_s gstate; + +struct gstate_s +{ + /* The first few entries aren't really graphics state things, but + * they are recorded here as they are fundamentally intertwined with + * the push/pulling of the gstates. */ + fz_buffer *buf; + void (*on_pop)(pdf_device*,void *); + void *on_pop_arg; + /* The graphics state proper */ + fz_colorspace *colorspace[2]; + float color[2][4]; + fz_matrix ctm; + fz_stroke_state *stroke_state; + float alpha[2]; + int font; + float font_size; + float char_spacing; + float word_spacing; + float horizontal_scaling; + float leading; + int text_rendering_mode; + float rise; + int knockout; + fz_matrix tm; +}; + +typedef struct image_entry_s image_entry; + +struct image_entry_s +{ + char digest[16]; + pdf_obj *ref; +}; + +typedef struct alpha_entry_s alpha_entry; + +struct alpha_entry_s +{ + float alpha; + int stroke; +}; + +typedef struct font_entry_s font_entry; + +struct font_entry_s +{ + fz_font *font; +}; + +typedef struct group_entry_s group_entry; + +struct group_entry_s +{ + int blendmode; + int alpha; + int isolated; + int knockout; + fz_colorspace *colorspace; + pdf_obj *ref; +}; + +struct pdf_device_s +{ + fz_context *ctx; + pdf_document *xref; + pdf_obj *contents; + pdf_obj *resources; + + int in_text; + + int num_forms; + int num_smasks; + + int num_gstates; + int max_gstates; + gstate *gstates; + + int num_imgs; + int max_imgs; + image_entry *images; + + int num_alphas; + int max_alphas; + alpha_entry *alphas; + + int num_fonts; + int max_fonts; + font_entry *fonts; + + int num_groups; + int max_groups; + group_entry *groups; +}; + +#define CURRENT_GSTATE(pdev) (&(pdev)->gstates[(pdev)->num_gstates-1]) + +/* Helper functions */ + +static int +send_image(pdf_device *pdev, fz_image *image, int mask, int smask) +{ + fz_context *ctx = pdev->ctx; + fz_pixmap *pixmap = NULL; + pdf_obj *imobj = NULL; + pdf_obj *imref = NULL; + fz_compressed_buffer *cbuffer = NULL; + fz_compression_params *cp = NULL; + fz_buffer *buffer = NULL; + int i, num; + fz_md5 state; + unsigned char digest[16]; + int bpc = 8; + fz_colorspace *colorspace = image->colorspace; + + fz_var(pixmap); + fz_var(buffer); + fz_var(imobj); + fz_var(imref); + + fz_try(ctx) + { + if (cbuffer == NULL) + { + unsigned int size; + int n; + /* Currently, set to maintain resolution; should we consider + * subsampling here according to desired output res? */ + pixmap = image->get_pixmap(ctx, image, image->w, image->h); + colorspace = pixmap->colorspace; /* May be different to image->colorspace! */ + n = (pixmap->n == 1 ? 1 : pixmap->n-1); + size = image->w * image->h * n; + buffer = fz_new_buffer(ctx, size); + buffer->len = size; + if (pixmap->n == 1) + { + memcpy(buffer->data, pixmap->samples, size); + } + else + { + /* Need to remove the alpha plane */ + unsigned char *d = buffer->data; + unsigned char *s = pixmap->samples; + int mod = n; + while (size--) + { + *d++ = *s++; + mod--; + if (mod == 0) + s++, mod = n; + } + } + } + else + { + buffer = fz_keep_buffer(ctx, cbuffer->buffer); + cp = &cbuffer->params; + } + + fz_md5_init(&state); + fz_md5_update(&state, buffer->data, buffer->len); + fz_md5_final(&state, digest); + for(i=0; i < pdev->num_imgs; i++) + { + if (!memcmp(&digest, pdev->images[i].digest, sizeof(16))) + { + num = i; + break; + } + } + + if (i < pdev->num_imgs) + break; + + if (pdev->num_imgs == pdev->max_imgs) + { + int newmax = pdev->max_imgs * 2; + if (newmax == 0) + newmax = 4; + pdev->images = fz_resize_array(ctx, pdev->images, newmax, sizeof(*pdev->images)); + pdev->max_imgs = newmax; + } + num = pdev->num_imgs++; + memcpy(pdev->images[num].digest,digest,16); + pdev->images[num].ref = NULL; /* Will be filled in later */ + + imobj = pdf_new_dict(ctx, 3); + pdf_dict_puts_drop(imobj, "Type", pdf_new_name(ctx, "XObject")); + pdf_dict_puts_drop(imobj, "Subtype", pdf_new_name(ctx, "Image")); + pdf_dict_puts_drop(imobj, "Width", pdf_new_int(ctx, image->w)); + pdf_dict_puts_drop(imobj, "Height", pdf_new_int(ctx, image->h)); + if (mask) + {} + else if (!colorspace || colorspace->n == 1) + pdf_dict_puts_drop(imobj, "ColorSpace", pdf_new_name(ctx, "DeviceGray")); + else if (colorspace->n == 3) + pdf_dict_puts_drop(imobj, "ColorSpace", pdf_new_name(ctx, "DeviceRGB")); + else if (colorspace->n == 4) + pdf_dict_puts_drop(imobj, "ColorSpace", pdf_new_name(ctx, "DeviceCMYK")); + switch (cp ? cp->type : FZ_IMAGE_UNKNOWN) + { + case FZ_IMAGE_UNKNOWN: + default: + break; + case FZ_IMAGE_JPEG: + if (cp->u.jpeg.color_transform != -1) + pdf_dict_puts_drop(imobj, "ColorTransform", pdf_new_int(ctx, cp->u.jpeg.color_transform)); + pdf_dict_puts_drop(imobj, "Filter", pdf_new_name(ctx, "DCTDecode")); + break; + case FZ_IMAGE_JPX: + if (cp->u.jpx.smask_in_data) + pdf_dict_puts_drop(imobj, "SMaskInData", pdf_new_int(ctx, cp->u.jpx.smask_in_data)); + pdf_dict_puts_drop(imobj, "Filter", pdf_new_name(ctx, "JPXDecode")); + break; + case FZ_IMAGE_FAX: + if (cp->u.fax.columns) + pdf_dict_puts(imobj, "Columns", pdf_new_int(ctx, cp->u.fax.columns)); + if (cp->u.fax.rows) + pdf_dict_puts(imobj, "Rows", pdf_new_int(ctx, cp->u.fax.rows)); + if (cp->u.fax.k) + pdf_dict_puts(imobj, "K", pdf_new_int(ctx, cp->u.fax.k)); + if (cp->u.fax.end_of_line) + pdf_dict_puts(imobj, "EndOfLine", pdf_new_int(ctx, cp->u.fax.end_of_line)); + if (cp->u.fax.encoded_byte_align) + pdf_dict_puts(imobj, "EncodedByteAlign", pdf_new_int(ctx, cp->u.fax.encoded_byte_align)); + if (cp->u.fax.end_of_block) + pdf_dict_puts(imobj, "EndOfBlock", pdf_new_int(ctx, cp->u.fax.end_of_block)); + if (cp->u.fax.black_is_1) + pdf_dict_puts(imobj, "BlackIs1", pdf_new_int(ctx, cp->u.fax.black_is_1)); + if (cp->u.fax.damaged_rows_before_error) + pdf_dict_puts(imobj, "DamagedRowsBeforeError", pdf_new_int(ctx, cp->u.fax.damaged_rows_before_error)); + pdf_dict_puts(imobj, "Filter", pdf_new_name(ctx, "CCITTFaxDecode")); + break; + case FZ_IMAGE_JBIG2: + /* FIXME - jbig2globals */ + cp->type = FZ_IMAGE_UNKNOWN; + /* bpc = 1; */ + break; + case FZ_IMAGE_FLATE: + if (cp->u.flate.columns) + pdf_dict_puts(imobj, "Columns", pdf_new_int(ctx, cp->u.flate.columns)); + if (cp->u.flate.colors) + pdf_dict_puts(imobj, "Colors", pdf_new_int(ctx, cp->u.flate.colors)); + if (cp->u.flate.predictor) + pdf_dict_puts(imobj, "Predictor", pdf_new_int(ctx, cp->u.flate.predictor)); + if (cp->u.flate.bpc) + bpc = cp->u.flate.bpc; + pdf_dict_puts(imobj, "Filter", pdf_new_name(ctx, "FlateDecode")); + break; + case FZ_IMAGE_LZW: + if (cp->u.lzw.columns) + pdf_dict_puts(imobj, "Columns", pdf_new_int(ctx, cp->u.lzw.columns)); + if (cp->u.lzw.colors) + pdf_dict_puts(imobj, "Colors", pdf_new_int(ctx, cp->u.lzw.colors)); + if (cp->u.lzw.predictor) + pdf_dict_puts(imobj, "Predictor", pdf_new_int(ctx, cp->u.lzw.predictor)); + if (cp->u.lzw.bpc) + bpc = cp->u.lzw.bpc; + if (cp->u.lzw.early_change) + pdf_dict_puts(imobj, "EarlyChange", pdf_new_int(ctx, cp->u.lzw.early_change)); + pdf_dict_puts(imobj, "Filter", pdf_new_name(ctx, "LZWDecode")); + break; + case FZ_IMAGE_RLD: + pdf_dict_puts(imobj, "Filter", pdf_new_name(ctx, "RunLengthDecode")); + break; + } + if (mask) + { + pdf_dict_puts_drop(imobj, "ImageMask", pdf_new_bool(ctx, 1)); + bpc = 1; + } + if (image->mask) + { + int smasknum = send_image(pdev, image->mask, 0, 1); + pdf_dict_puts(imobj, "SMask", pdev->images[smasknum].ref); + } + if (bpc) + pdf_dict_puts_drop(imobj, "BitsPerComponent", pdf_new_int(ctx, bpc)); + + imref = pdf_new_ref(pdev->xref, imobj); + pdf_update_stream(pdev->xref, pdf_to_num(imref), buffer); + pdf_dict_puts_drop(imobj, "Length", pdf_new_int(ctx, buffer->len)); + + { + char text[32]; + snprintf(text, sizeof(text), "XObject/Img%d", num); + pdf_dict_putp(pdev->resources, text, imref); + } + pdev->images[num].ref = imref; + } + fz_always(ctx) + { + fz_drop_buffer(ctx, buffer); + pdf_drop_obj(imobj); + fz_drop_pixmap(ctx, pixmap); + } + fz_catch(ctx) + { + pdf_drop_obj(imref); + fz_rethrow(ctx); + } + return num; +} + +static void +pdf_dev_stroke_state(pdf_device *pdev, fz_stroke_state *stroke_state) +{ + fz_context *ctx = pdev->ctx; + gstate *gs = CURRENT_GSTATE(pdev); + + if (stroke_state == gs->stroke_state) + return; + if (gs->stroke_state && !memcmp(stroke_state, gs->stroke_state, sizeof(*stroke_state))) + return; + if (!gs->stroke_state || gs->stroke_state->linewidth != stroke_state->linewidth) + { + fz_buffer_printf(ctx, gs->buf, "%f w\n", stroke_state->linewidth); + } + if (!gs->stroke_state || gs->stroke_state->start_cap != stroke_state->start_cap) + { + int cap = stroke_state->start_cap; + /* FIXME: Triangle caps aren't supported in pdf */ + if (cap == FZ_LINECAP_TRIANGLE) + cap = FZ_LINECAP_BUTT; + fz_buffer_printf(ctx, gs->buf, "%d J\n", cap); + } + if (!gs->stroke_state || gs->stroke_state->linejoin != stroke_state->linejoin) + { + int join = stroke_state->linejoin; + if (join == FZ_LINEJOIN_MITER_XPS) + join = FZ_LINEJOIN_MITER; + fz_buffer_printf(ctx, gs->buf, "%d j\n", join); + } + if (!gs->stroke_state || gs->stroke_state->miterlimit != stroke_state->miterlimit) + { + fz_buffer_printf(ctx, gs->buf, "%f M\n", stroke_state->miterlimit); + } + if (gs->stroke_state == NULL && stroke_state->dash_len == 0) + {} + else if (!gs->stroke_state || gs->stroke_state->dash_phase != stroke_state->dash_phase || gs->stroke_state->dash_len != stroke_state->dash_len || + memcmp(gs->stroke_state->dash_list, stroke_state->dash_list, sizeof(float)*stroke_state->dash_len)) + { + int i; + if (stroke_state->dash_len == 0) + fz_buffer_printf(ctx, gs->buf, "["); + for (i = 0; i < stroke_state->dash_len; i++) + fz_buffer_printf(ctx, gs->buf, "%c%f", (i == 0 ? '[' : ' '), stroke_state->dash_list[i]); + fz_buffer_printf(ctx, gs->buf, "]%f d\n", stroke_state->dash_phase); + + } + fz_drop_stroke_state(ctx, gs->stroke_state); + gs->stroke_state = fz_keep_stroke_state(ctx, stroke_state); +} + +static void +pdf_dev_path(pdf_device *pdev, fz_path *path) +{ + fz_context *ctx = pdev->ctx; + gstate *gs = CURRENT_GSTATE(pdev); + float x, y; + int i = 0; + while (i < path->len) + { + switch (path->items[i++].k) + { + case FZ_MOVETO: + x = path->items[i++].v; + y = path->items[i++].v; + fz_buffer_printf(ctx, gs->buf, "%g %g m\n", x, y); + break; + case FZ_LINETO: + x = path->items[i++].v; + y = path->items[i++].v; + fz_buffer_printf(ctx, gs->buf, "%g %g l\n", x, y); + break; + case FZ_CURVETO: + x = path->items[i++].v; + y = path->items[i++].v; + fz_buffer_printf(ctx, gs->buf, "%g %g ", x, y); + x = path->items[i++].v; + y = path->items[i++].v; + fz_buffer_printf(ctx, gs->buf, "%g %g ", x, y); + x = path->items[i++].v; + y = path->items[i++].v; + fz_buffer_printf(ctx, gs->buf, "%g %g c\n", x, y); + break; + case FZ_CLOSE_PATH: + fz_buffer_printf(ctx, gs->buf, "h\n"); + break; + } + } +} + +static void +pdf_dev_ctm(pdf_device *pdev, const fz_matrix *ctm) +{ + fz_matrix inverse; + gstate *gs = CURRENT_GSTATE(pdev); + + if (memcmp(&gs->ctm, ctm, sizeof(*ctm)) == 0) + return; + fz_invert_matrix(&inverse, &gs->ctm); + fz_concat(&inverse, ctm, &inverse); + memcpy(&gs->ctm, ctm, sizeof(*ctm)); + fz_buffer_printf(pdev->ctx, gs->buf, "%f %f %f %f %f %f cm\n", inverse.a, inverse.b, inverse.c, inverse.d, inverse.e, inverse.f); +} + +static void +pdf_dev_color(pdf_device *pdev, fz_colorspace *colorspace, float *color, int stroke) +{ + int diff = 0; + int i; + int cspace = 0; + fz_context *ctx = pdev->ctx; + float rgb[FZ_MAX_COLORS]; + gstate *gs = CURRENT_GSTATE(pdev); + + if (colorspace == fz_device_gray(ctx)) + cspace = 1; + else if (colorspace == fz_device_rgb(ctx)) + cspace = 3; + else if (colorspace == fz_device_cmyk(ctx)) + cspace = 4; + + if (cspace == 0) + { + /* If it's an unknown colorspace, fallback to rgb */ + colorspace->to_rgb(ctx, colorspace, color, rgb); + color = rgb; + colorspace = fz_device_rgb(ctx); + } + + if (gs->colorspace[stroke] != colorspace) + { + gs->colorspace[stroke] = colorspace; + diff = 1; + } + + for (i=0; i < colorspace->n; i++) + if (gs->color[stroke][i] != color[i]) + { + gs->color[stroke][i] = color[i]; + diff = 1; + } + + if (diff == 0) + return; + + switch (cspace + stroke*8) + { + case 1: + fz_buffer_printf(ctx, gs->buf, "%f g\n", color[0]); + break; + case 3: + fz_buffer_printf(ctx, gs->buf, "%f %f %f rg\n", color[0], color[1], color[2]); + break; + case 4: + fz_buffer_printf(ctx, gs->buf, "%f %f %f %f k\n", color[0], color[1], color[2], color[3]); + break; + case 1+8: + fz_buffer_printf(ctx, gs->buf, "%f G\n", color[0]); + break; + case 3+8: + fz_buffer_printf(ctx, gs->buf, "%f %f %f RG\n", color[0], color[1], color[2]); + break; + case 4+8: + fz_buffer_printf(ctx, gs->buf, "%f %f %f %f K\n", color[0], color[1], color[2], color[3]); + break; + } +} + +static void +pdf_dev_alpha(pdf_device *pdev, float alpha, int stroke) +{ + int i; + fz_context *ctx = pdev->ctx; + gstate *gs = CURRENT_GSTATE(pdev); + + /* If the alpha is unchanged, nothing to do */ + if (gs->alpha[stroke] == alpha) + return; + + /* Have we sent such an alpha before? */ + for (i = 0; i < pdev->num_alphas; i++) + if (pdev->alphas[i].alpha == alpha && pdev->alphas[i].stroke == stroke) + break; + + if (i == pdev->num_alphas) + { + pdf_obj *o; + pdf_obj *ref = NULL; + + fz_var(ref); + + /* No. Need to make a new one */ + if (pdev->num_alphas == pdev->max_alphas) + { + int newmax = pdev->max_alphas * 2; + if (newmax == 0) + newmax = 4; + pdev->alphas = fz_resize_array(ctx, pdev->alphas, newmax, sizeof(*pdev->alphas)); + pdev->max_alphas = newmax; + } + pdev->alphas[i].alpha = alpha; + pdev->alphas[i].stroke = stroke; + + o = pdf_new_dict(ctx, 1); + fz_try(ctx) + { + char text[32]; + pdf_dict_puts_drop(o, (stroke ? "CA" : "ca"), pdf_new_real(ctx, alpha)); + ref = pdf_new_ref(pdev->xref, o); + snprintf(text, sizeof(text), "ExtGState/Alp%d", i); + pdf_dict_putp(pdev->resources, text, ref); + } + fz_always(ctx) + { + pdf_drop_obj(o); + pdf_drop_obj(ref); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + pdev->num_alphas++; + } + fz_buffer_printf(ctx, gs->buf, "/Alp%d gs\n", i); +} + +static void +pdf_dev_font(pdf_device *pdev, fz_font *font, float size) +{ + int i; + fz_context *ctx = pdev->ctx; + gstate *gs = CURRENT_GSTATE(pdev); + + /* If the font is unchanged, nothing to do */ + if (gs->font >= 0 && pdev->fonts[gs->font].font == font) + return; + + /* Have we sent such a font before? */ + for (i = 0; i < pdev->num_fonts; i++) + if (pdev->fonts[i].font == font) + break; + + if (i == pdev->num_fonts) + { + pdf_obj *o; + pdf_obj *ref = NULL; + + fz_var(ref); + + /* No. Need to make a new one */ + if (pdev->num_fonts == pdev->max_fonts) + { + int newmax = pdev->max_fonts * 2; + if (newmax == 0) + newmax = 4; + pdev->fonts = fz_resize_array(ctx, pdev->fonts, newmax, sizeof(*pdev->fonts)); + pdev->max_fonts = newmax; + } + pdev->fonts[i].font = fz_keep_font(ctx, font); + + o = pdf_new_dict(ctx, 3); + fz_try(ctx) + { + /* BIG FIXME: Get someone who understands fonts to fill this bit in. */ + char text[32]; + pdf_dict_puts_drop(o, "Type", pdf_new_name(ctx, "Font")); + pdf_dict_puts_drop(o, "Subtype", pdf_new_name(ctx, "Type1")); + pdf_dict_puts_drop(o, "BaseFont", pdf_new_name(ctx, "Helvetica")); + ref = pdf_new_ref(pdev->xref, o); + snprintf(text, sizeof(text), "Font/F%d", i); + pdf_dict_putp(pdev->resources, text, ref); + } + fz_always(ctx) + { + pdf_drop_obj(o); + pdf_drop_obj(ref); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + pdev->num_fonts++; + } + fz_buffer_printf(ctx, gs->buf, "/F%d %g Tf\n", i, size); +} + +static void +pdf_dev_tm(pdf_device *pdev, const fz_matrix *tm) +{ + gstate *gs = CURRENT_GSTATE(pdev); + + if (memcmp(&gs->tm, tm, sizeof(*tm)) == 0) + return; + fz_buffer_printf(pdev->ctx, gs->buf, "%f %f %f %f %f %f Tm\n", tm->a, tm->b, tm->c, tm->d, tm->e, tm->f); + gs->tm = *tm; +} + +static void +pdf_dev_push_new_buf(pdf_device *pdev, fz_buffer *buf, void (*on_pop)(pdf_device*,void *), void *on_pop_arg) +{ + fz_context *ctx = pdev->ctx; + + if (pdev->num_gstates == pdev->max_gstates) + { + int newmax = pdev->max_gstates*2; + + pdev->gstates = fz_resize_array(ctx, pdev->gstates, newmax, sizeof(*pdev->gstates)); + pdev->max_gstates = newmax; + } + memcpy(&pdev->gstates[pdev->num_gstates], &pdev->gstates[pdev->num_gstates-1], sizeof(*pdev->gstates)); + fz_keep_stroke_state(ctx, pdev->gstates[pdev->num_gstates].stroke_state); + if (buf) + pdev->gstates[pdev->num_gstates].buf = buf; + else + fz_keep_buffer(ctx, pdev->gstates[pdev->num_gstates].buf); + pdev->gstates[pdev->num_gstates].on_pop = on_pop; + pdev->gstates[pdev->num_gstates].on_pop_arg = on_pop_arg; + fz_buffer_printf(ctx, pdev->gstates[pdev->num_gstates].buf, "q\n"); + pdev->num_gstates++; +} + +static void +pdf_dev_push(pdf_device *pdev) +{ + pdf_dev_push_new_buf(pdev, NULL, NULL, NULL); +} + +static void * +pdf_dev_pop(pdf_device *pdev) +{ + fz_context *ctx = pdev->ctx; + gstate *gs = CURRENT_GSTATE(pdev); + void *arg = gs->on_pop_arg; + + fz_buffer_printf(pdev->ctx, gs->buf, "Q\n"); + if (gs->on_pop) + gs->on_pop(pdev, arg); + pdev->num_gstates--; + fz_drop_stroke_state(ctx, pdev->gstates[pdev->num_gstates].stroke_state); + fz_drop_buffer(ctx, pdev->gstates[pdev->num_gstates].buf); + return arg; +} + +static void +pdf_dev_text(pdf_device *pdev, fz_text *text) +{ + int i; + fz_matrix trm; + fz_matrix inverse; + gstate *gs = CURRENT_GSTATE(pdev); + fz_matrix trunc_trm; + + /* BIG FIXME: Get someone who understands fonts to fill this bit in. */ + trm = gs->tm; + trunc_trm.a = trm.a; + trunc_trm.b = trm.b; + trunc_trm.c = trm.c; + trunc_trm.d = trm.d; + trunc_trm.e = 0; + trunc_trm.f = 0; + fz_invert_matrix(&inverse, &trunc_trm); + + for (i=0; i < text->len; i++) + { + fz_text_item *it = &text->items[i]; + fz_point delta; + delta.x = it->x - trm.e; + delta.y = it->y - trm.f; + fz_transform_point(&delta, &inverse); + if (delta.x != 0 || delta.y != 0) + { + fz_buffer_printf(pdev->ctx, gs->buf, "%g %g Td ", delta.x, delta.y); + trm.e = it->x; + trm.f = it->y; + } + fz_buffer_printf(pdev->ctx, gs->buf, "<%02x> Tj\n", it->ucs); + /* FIXME: Advance the text position - doesn't matter at the + * moment as we absolutely position each glyph, but we should + * use more efficient text outputting where possible. */ + } + gs->tm.e = trm.e; + gs->tm.f = trm.f; +} + +static void +pdf_dev_trm(pdf_device *pdev, int trm) +{ + gstate *gs = CURRENT_GSTATE(pdev); + + if (gs->text_rendering_mode == trm) + return; + gs->text_rendering_mode = trm; + fz_buffer_printf(pdev->ctx, gs->buf, "%d Tr\n", trm); +} + +static void +pdf_dev_begin_text(pdf_device *pdev, const fz_matrix *tm, int trm) +{ + pdf_dev_trm(pdev, trm); + if (!pdev->in_text) + { + gstate *gs = CURRENT_GSTATE(pdev); + fz_buffer_printf(pdev->ctx, gs->buf, "BT\n"); + gs->tm.a = 1; + gs->tm.b = 0; + gs->tm.c = 0; + gs->tm.d = 1; + gs->tm.e = 0; + gs->tm.f = 0; + pdev->in_text = 1; + } + pdf_dev_tm(pdev, tm); +} + +static void +pdf_dev_end_text(pdf_device *pdev) +{ + gstate *gs = CURRENT_GSTATE(pdev); + + if (!pdev->in_text) + return; + pdev->in_text = 0; + fz_buffer_printf(pdev->ctx, gs->buf, "ET\n"); +} + +static int +pdf_dev_new_form(pdf_obj **form_ref, pdf_device *pdev, const fz_rect *bbox, int isolated, int knockout, int blendmode, float alpha, fz_colorspace *colorspace) +{ + fz_context *ctx = pdev->ctx; + int num; + pdf_obj *group_ref; + pdf_obj *group; + pdf_obj *form; + + *form_ref = NULL; + + /* Find (or make) a new group with the required options. */ + for(num = 0; num < pdev->num_groups; num++) + { + group_entry *g = &pdev->groups[num]; + if (g->isolated == isolated && g->knockout == knockout && g->blendmode == blendmode && g->alpha == alpha && g->colorspace == colorspace) + { + group_ref = pdev->groups[num].ref; + break; + } + } + + /* If we didn't find one, make one */ + if (num == pdev->num_groups) + { + if (pdev->num_groups == pdev->max_groups) + { + int newmax = pdev->max_groups * 2; + if (newmax == 0) + newmax = 4; + pdev->groups = fz_resize_array(ctx, pdev->groups, newmax, sizeof(*pdev->groups)); + pdev->max_groups = newmax; + } + pdev->num_groups++; + pdev->groups[num].isolated = isolated; + pdev->groups[num].knockout = knockout; + pdev->groups[num].blendmode = blendmode; + pdev->groups[num].alpha = alpha; + pdev->groups[num].colorspace = fz_keep_colorspace(ctx, colorspace); + pdev->groups[num].ref = NULL; + group = pdf_new_dict(ctx, 5); + fz_try(ctx) + { + pdf_dict_puts_drop(group, "Type", pdf_new_name(ctx, "Group")); + pdf_dict_puts_drop(group, "S", pdf_new_name(ctx, "Transparency")); + pdf_dict_puts_drop(group, "K", pdf_new_bool(ctx, knockout)); + pdf_dict_puts_drop(group, "I", pdf_new_bool(ctx, isolated)); + pdf_dict_puts_drop(group, "K", pdf_new_bool(ctx, knockout)); + pdf_dict_puts_drop(group, "BM", pdf_new_name(ctx, fz_blendmode_name(blendmode))); + if (!colorspace) + {} + else if (colorspace->n == 1) + pdf_dict_puts_drop(group, "CS", pdf_new_name(ctx, "DeviceGray")); + else if (colorspace->n == 4) + pdf_dict_puts_drop(group, "CS", pdf_new_name(ctx, "DeviceCMYK")); + else + pdf_dict_puts_drop(group, "CS", pdf_new_name(ctx, "DeviceRGB")); + group_ref = pdev->groups[num].ref = pdf_new_ref(pdev->xref, group); + } + fz_always(ctx) + { + pdf_drop_obj(group); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } + + /* Make us a new Forms object that points to that group, and change + * to writing into the buffer for that Forms object. */ + form = pdf_new_dict(ctx, 4); + fz_try(ctx) + { + pdf_dict_puts_drop(form, "Subtype", pdf_new_name(ctx, "Form")); + pdf_dict_puts(form, "Group", group_ref); + pdf_dict_puts_drop(form, "FormType", pdf_new_int(ctx, 1)); + pdf_dict_puts_drop(form, "BBox", pdf_new_rect(ctx, bbox)); + *form_ref = pdf_new_ref(pdev->xref, form); + } + fz_catch(ctx) + { + pdf_drop_obj(form); + fz_rethrow(ctx); + } + + /* Insert the new form object into the resources */ + { + char text[32]; + num = pdev->num_forms++; + snprintf(text, sizeof(text), "XObject/Fm%d", num); + pdf_dict_putp(pdev->resources, text, *form_ref); + } + + return num; +} + +/* Entry points */ + +static void +pdf_dev_fill_path(fz_device *dev, fz_path *path, int even_odd, const fz_matrix *ctm, + fz_colorspace *colorspace, float *color, float alpha) +{ + pdf_device *pdev = dev->user; + gstate *gs = CURRENT_GSTATE(pdev); + + pdf_dev_end_text(pdev); + pdf_dev_alpha(pdev, alpha, 0); + pdf_dev_color(pdev, colorspace, color, 0); + pdf_dev_ctm(pdev, ctm); + pdf_dev_path(pdev, path); + fz_buffer_printf(dev->ctx, gs->buf, (even_odd ? "f*\n" : "f\n")); +} + +static void +pdf_dev_stroke_path(fz_device *dev, fz_path *path, fz_stroke_state *stroke, const fz_matrix *ctm, + fz_colorspace *colorspace, float *color, float alpha) +{ + pdf_device *pdev = dev->user; + gstate *gs = CURRENT_GSTATE(pdev); + + pdf_dev_end_text(pdev); + pdf_dev_alpha(pdev, alpha, 1); + pdf_dev_color(pdev, colorspace, color, 1); + pdf_dev_ctm(pdev, ctm); + pdf_dev_stroke_state(pdev, stroke); + pdf_dev_path(pdev, path); + fz_buffer_printf(dev->ctx, gs->buf, "S\n"); +} + +static void +pdf_dev_clip_path(fz_device *dev, fz_path *path, const fz_rect *rect, int even_odd, const fz_matrix *ctm) +{ + pdf_device *pdev = dev->user; + gstate *gs; + + pdf_dev_end_text(pdev); + pdf_dev_push(pdev); + pdf_dev_ctm(pdev, ctm); + pdf_dev_path(pdev, path); + gs = CURRENT_GSTATE(pdev); + fz_buffer_printf(dev->ctx, gs->buf, (even_odd ? "W* n\n" : "W n\n")); +} + +static void +pdf_dev_clip_stroke_path(fz_device *dev, fz_path *path, const fz_rect *rect, fz_stroke_state *stroke, const fz_matrix *ctm) +{ + pdf_device *pdev = dev->user; + gstate *gs; + + pdf_dev_end_text(pdev); + pdf_dev_push(pdev); + /* FIXME: Need to push a group, select a pattern (or shading) here, + * stroke with the pattern/shading. Then move to defining that pattern + * with the next calls to the device interface until the next pop + * when we pop the group. */ + pdf_dev_ctm(pdev, ctm); + pdf_dev_path(pdev, path); + gs = CURRENT_GSTATE(pdev); + fz_buffer_printf(dev->ctx, gs->buf, "W n\n"); +} + +static void +pdf_dev_fill_text(fz_device *dev, fz_text *text, const fz_matrix *ctm, + fz_colorspace *colorspace, float *color, float alpha) +{ + pdf_device *pdev = dev->user; + + pdf_dev_begin_text(pdev, &text->trm, 0); + pdf_dev_font(pdev, text->font, 1); + pdf_dev_text(pdev, text); +} + +static void +pdf_dev_stroke_text(fz_device *dev, fz_text *text, fz_stroke_state *stroke, const fz_matrix *ctm, + fz_colorspace *colorspace, float *color, float alpha) +{ + pdf_device *pdev = dev->user; + + pdf_dev_begin_text(pdev, &text->trm, 1); + pdf_dev_font(pdev, text->font, 1); + pdf_dev_text(pdev, text); +} + +static void +pdf_dev_clip_text(fz_device *dev, fz_text *text, const fz_matrix *ctm, int accumulate) +{ + pdf_device *pdev = dev->user; + + pdf_dev_begin_text(pdev, &text->trm, 0); + pdf_dev_font(pdev, text->font, 7); + pdf_dev_text(pdev, text); +} + +static void +pdf_dev_clip_stroke_text(fz_device *dev, fz_text *text, fz_stroke_state *stroke, const fz_matrix *ctm) +{ + pdf_device *pdev = dev->user; + + pdf_dev_begin_text(pdev, &text->trm, 0); + pdf_dev_font(pdev, text->font, 5); + pdf_dev_text(pdev, text); +} + +static void +pdf_dev_ignore_text(fz_device *dev, fz_text *text, const fz_matrix *ctm) +{ + pdf_device *pdev = dev->user; + + pdf_dev_begin_text(pdev, &text->trm, 0); + pdf_dev_font(pdev, text->font, 3); + pdf_dev_text(pdev, text); +} + +static void +pdf_dev_fill_image(fz_device *dev, fz_image *image, const fz_matrix *ctm, float alpha) +{ + pdf_device *pdev = (pdf_device *)dev->user; + int num; + gstate *gs = CURRENT_GSTATE(pdev); + fz_matrix local_ctm = *ctm; + + pdf_dev_end_text(pdev); + num = send_image(pdev, image, 0, 0); + fz_buffer_printf(dev->ctx, gs->buf, "q\n"); + pdf_dev_alpha(pdev, alpha, 0); + /* PDF images are upside down, so fiddle the ctm */ + fz_pre_scale(&local_ctm, 1, -1); + fz_pre_translate(&local_ctm, 0, -1); + pdf_dev_ctm(pdev, &local_ctm); + fz_buffer_printf(dev->ctx, gs->buf, "/Img%d Do Q\n", num); +} + +static void +pdf_dev_fill_shade(fz_device *dev, fz_shade *shade, const fz_matrix *ctm, float alpha) +{ + pdf_device *pdev = (pdf_device *)dev->user; + + /* FIXME */ + pdf_dev_end_text(pdev); +} + +static void +pdf_dev_fill_image_mask(fz_device *dev, fz_image *image, const fz_matrix *ctm, +fz_colorspace *colorspace, float *color, float alpha) +{ + pdf_device *pdev = (pdf_device *)dev->user; + gstate *gs = CURRENT_GSTATE(pdev); + int num; + fz_matrix local_ctm = *ctm; + + pdf_dev_end_text(pdev); + num = send_image(pdev, image, 1, 0); + fz_buffer_printf(dev->ctx, gs->buf, "q\n"); + pdf_dev_alpha(pdev, alpha, 0); + pdf_dev_color(pdev, colorspace, color, 0); + /* PDF images are upside down, so fiddle the ctm */ + fz_pre_scale(&local_ctm, 1, -1); + fz_pre_translate(&local_ctm, 0, -1); + pdf_dev_ctm(pdev, &local_ctm); + fz_buffer_printf(dev->ctx, gs->buf, "/Img%d Do Q\n", num); +} + +static void +pdf_dev_clip_image_mask(fz_device *dev, fz_image *image, const fz_rect *rect, const fz_matrix *ctm) +{ + pdf_device *pdev = (pdf_device *)dev->user; + + /* FIXME */ + pdf_dev_end_text(pdev); + pdf_dev_push(pdev); +} + +static void +pdf_dev_pop_clip(fz_device *dev) +{ + pdf_device *pdev = (pdf_device *)dev->user; + + /* FIXME */ + pdf_dev_end_text(pdev); + pdf_dev_pop(pdev); +} + +static void +pdf_dev_begin_mask(fz_device *dev, const fz_rect *bbox, int luminosity, fz_colorspace *colorspace, float *color) +{ + pdf_device *pdev = (pdf_device *)dev->user; + fz_context *ctx = pdev->ctx; + gstate *gs; + pdf_obj *smask = NULL; + pdf_obj *egs = NULL; + pdf_obj *egs_ref; + pdf_obj *form_ref; + pdf_obj *color_obj = NULL; + int i; + + fz_var(smask); + fz_var(egs); + fz_var(color_obj); + + pdf_dev_end_text(pdev); + + /* Make a new form to contain the contents of the softmask */ + pdf_dev_new_form(&form_ref, pdev, bbox, 0, 0, 0, 1, colorspace); + + fz_try(ctx) + { + smask = pdf_new_dict(ctx, 4); + pdf_dict_puts(smask, "Type", pdf_new_name(ctx, "Mask")); + pdf_dict_puts_drop(smask, "S", pdf_new_name(ctx, (luminosity ? "Luminosity" : "Alpha"))); + pdf_dict_puts(smask, "G", form_ref); + color_obj = pdf_new_array(ctx, colorspace->n); + for (i = 0; i < colorspace->n; i++) + pdf_array_push(color_obj, pdf_new_real(ctx, color[i])); + pdf_dict_puts_drop(smask, "BC", color_obj); + color_obj = NULL; + + egs = pdf_new_dict(ctx, 5); + pdf_dict_puts_drop(egs, "Type", pdf_new_name(ctx, "ExtGState")); + pdf_dict_puts_drop(egs, "SMask", pdf_new_ref(pdev->xref, smask)); + egs_ref = pdf_new_ref(pdev->xref, egs); + + { + char text[32]; + snprintf(text, sizeof(text), "ExtGState/SM%d", pdev->num_smasks++); + pdf_dict_putp(pdev->resources, text, egs_ref); + pdf_drop_obj(egs_ref); + } + gs = CURRENT_GSTATE(pdev); + fz_buffer_printf(dev->ctx, gs->buf, "/SM%d gs\n", pdev->num_smasks-1); + } + fz_always(ctx) + { + pdf_drop_obj(smask); + } + fz_catch(ctx) + { + pdf_drop_obj(form_ref); + pdf_drop_obj(color_obj); + fz_rethrow(ctx); + } + + /* Now, everything we get until the end_mask needs to go into a + * new buffer, which will be the stream contents for the form. */ + pdf_dev_push_new_buf(pdev, fz_new_buffer(ctx, 1024), NULL, form_ref); +} + +static void +pdf_dev_end_mask(fz_device *dev) +{ + pdf_device *pdev = (pdf_device *)dev->user; + fz_context *ctx = pdev->ctx; + gstate *gs = CURRENT_GSTATE(pdev); + fz_buffer *buf = fz_keep_buffer(ctx, gs->buf); + pdf_obj *form_ref = (pdf_obj *)gs->on_pop_arg; + + /* Here we do part of the pop, but not all of it. */ + pdf_dev_end_text(pdev); + fz_buffer_printf(pdev->ctx, buf, "Q\n"); + pdf_dict_puts_drop(form_ref, "Length", pdf_new_int(ctx, buf->len)); + pdf_update_stream(pdev->xref, pdf_to_num(form_ref), buf); + fz_drop_buffer(ctx, buf); + gs->buf = fz_keep_buffer(ctx, gs[-1].buf); + gs->on_pop_arg = NULL; + pdf_drop_obj(form_ref); + fz_buffer_printf(pdev->ctx, gs->buf, "q\n"); +} + +static void +pdf_dev_begin_group(fz_device *dev, const fz_rect *bbox, int isolated, int knockout, int blendmode, float alpha) +{ + pdf_device *pdev = (pdf_device *)dev->user; + fz_context *ctx = pdev->ctx; + int num; + pdf_obj *form_ref; + gstate *gs; + + pdf_dev_end_text(pdev); + + num = pdf_dev_new_form(&form_ref, pdev, bbox, isolated, knockout, blendmode, alpha, NULL); + + /* Add the call to this group */ + gs = CURRENT_GSTATE(pdev); + fz_buffer_printf(dev->ctx, gs->buf, "/Fm%d Do\n", num); + + /* Now, everything we get until the end of group needs to go into a + * new buffer, which will be the stream contents for the form. */ + pdf_dev_push_new_buf(pdev, fz_new_buffer(ctx, 1024), NULL, form_ref); +} + +static void +pdf_dev_end_group(fz_device *dev) +{ + pdf_device *pdev = (pdf_device *)dev->user; + gstate *gs = CURRENT_GSTATE(pdev); + fz_context *ctx = pdev->ctx; + fz_buffer *buf = fz_keep_buffer(ctx, gs->buf); + pdf_obj *form_ref; + + pdf_dev_end_text(pdev); + form_ref = (pdf_obj *)pdf_dev_pop(pdev); + pdf_dict_puts_drop(form_ref, "Length", pdf_new_int(ctx, gs->buf->len)); + pdf_update_stream(pdev->xref, pdf_to_num(form_ref), buf); + fz_drop_buffer(ctx, buf); + pdf_drop_obj(form_ref); +} + +static int +pdf_dev_begin_tile(fz_device *dev, const fz_rect *area, const fz_rect *view, float xstep, float ystep, const fz_matrix *ctm, int id) +{ + pdf_device *pdev = (pdf_device *)dev->user; + + /* FIXME */ + pdf_dev_end_text(pdev); + return 0; +} + +static void +pdf_dev_end_tile(fz_device *dev) +{ + pdf_device *pdev = (pdf_device *)dev->user; + + /* FIXME */ + pdf_dev_end_text(pdev); +} + +static void +pdf_dev_free_user(fz_device *dev) +{ + pdf_device *pdev = dev->user; + fz_context *ctx = pdev->ctx; + gstate *gs = CURRENT_GSTATE(pdev); + int i; + + pdf_dev_end_text(pdev); + + pdf_dict_puts_drop(pdev->contents, "Length", pdf_new_int(ctx, gs->buf->len)); + + pdf_update_stream(pdev->xref, pdf_to_num(pdev->contents), gs->buf); + + for (i = pdev->num_gstates-1; i >= 0; i--) + { + fz_drop_stroke_state(ctx, pdev->gstates[i].stroke_state); + } + + for (i = pdev->num_fonts-1; i >= 0; i--) + { + fz_drop_font(ctx, pdev->fonts[i].font); + } + + for (i = pdev->num_imgs-1; i >= 0; i--) + { + pdf_drop_obj(pdev->images[i].ref); + } + + pdf_drop_obj(pdev->contents); + pdf_drop_obj(pdev->resources); + + fz_free(ctx, pdev->images); + fz_free(ctx, pdev->alphas); + fz_free(ctx, pdev->gstates); + fz_free(ctx, pdev); +} + +fz_device *pdf_new_pdf_device(pdf_document *doc, pdf_obj *contents, pdf_obj *resources, const fz_matrix *ctm) +{ + fz_context *ctx = doc->ctx; + pdf_device *pdev = fz_malloc_struct(ctx, pdf_device); + fz_device *dev; + + fz_try(ctx) + { + pdev->ctx = ctx; + pdev->xref = doc; + pdev->contents = pdf_keep_obj(contents); + pdev->resources = pdf_keep_obj(resources); + pdev->gstates = fz_malloc_struct(ctx, gstate); + pdev->gstates[0].buf = fz_new_buffer(ctx, 256); + pdev->gstates[0].ctm = *ctm; + pdev->gstates[0].colorspace[0] = fz_device_gray(ctx); + pdev->gstates[0].colorspace[1] = fz_device_gray(ctx); + pdev->gstates[0].color[0][0] = 1; + pdev->gstates[0].color[1][0] = 1; + pdev->gstates[0].alpha[0] = 1.0; + pdev->gstates[0].alpha[1] = 1.0; + pdev->gstates[0].font = -1; + pdev->gstates[0].horizontal_scaling = 100; + pdev->num_gstates = 1; + pdev->max_gstates = 1; + + dev = fz_new_device(ctx, pdev); + } + fz_catch(ctx) + { + if (pdev->gstates) + fz_drop_buffer(ctx, pdev->gstates[0].buf); + fz_free(ctx, pdev); + fz_rethrow(ctx); + } + + dev->free_user = pdf_dev_free_user; + + dev->fill_path = pdf_dev_fill_path; + dev->stroke_path = pdf_dev_stroke_path; + dev->clip_path = pdf_dev_clip_path; + dev->clip_stroke_path = pdf_dev_clip_stroke_path; + + dev->fill_text = pdf_dev_fill_text; + dev->stroke_text = pdf_dev_stroke_text; + dev->clip_text = pdf_dev_clip_text; + dev->clip_stroke_text = pdf_dev_clip_stroke_text; + dev->ignore_text = pdf_dev_ignore_text; + + dev->fill_shade = pdf_dev_fill_shade; + dev->fill_image = pdf_dev_fill_image; + dev->fill_image_mask = pdf_dev_fill_image_mask; + dev->clip_image_mask = pdf_dev_clip_image_mask; + + dev->pop_clip = pdf_dev_pop_clip; + + dev->begin_mask = pdf_dev_begin_mask; + dev->end_mask = pdf_dev_end_mask; + dev->begin_group = pdf_dev_begin_group; + dev->end_group = pdf_dev_end_group; + + dev->begin_tile = pdf_dev_begin_tile; + dev->end_tile = pdf_dev_end_tile; + + return dev; +} diff --git a/source/pdf/pdf-encoding.c b/source/pdf/pdf-encoding.c new file mode 100644 index 00000000..c634a9ee --- /dev/null +++ b/source/pdf/pdf-encoding.c @@ -0,0 +1,82 @@ +#include "mupdf/pdf.h" + +#include "pdf-encodings.h" +#include "pdf-glyphlist.h" + +void +pdf_load_encoding(char **estrings, char *encoding) +{ + char **bstrings = NULL; + int i; + + if (!strcmp(encoding, "StandardEncoding")) + bstrings = (char**) pdf_standard; + if (!strcmp(encoding, "MacRomanEncoding")) + bstrings = (char**) pdf_mac_roman; + if (!strcmp(encoding, "MacExpertEncoding")) + bstrings = (char**) pdf_mac_expert; + if (!strcmp(encoding, "WinAnsiEncoding")) + bstrings = (char**) pdf_win_ansi; + + if (bstrings) + for (i = 0; i < 256; i++) + estrings[i] = bstrings[i]; +} + +int +pdf_lookup_agl(char *name) +{ + char buf[64]; + char *p; + int l = 0; + int r = nelem(agl_name_list) - 1; + + fz_strlcpy(buf, name, sizeof buf); + + /* kill anything after first period and underscore */ + p = strchr(buf, '.'); + if (p) p[0] = 0; + p = strchr(buf, '_'); + if (p) p[0] = 0; + + while (l <= r) + { + int m = (l + r) >> 1; + int c = strcmp(buf, agl_name_list[m]); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return agl_code_list[m]; + } + + if (strstr(buf, "uni") == buf) + return strtol(buf + 3, NULL, 16); + else if (strstr(buf, "u") == buf) + return strtol(buf + 1, NULL, 16); + else if (strstr(buf, "a") == buf && strlen(buf) >= 3) + return strtol(buf + 1, NULL, 10); + + return 0; +} + +static const char *empty_dup_list[] = { 0 }; + +const char ** +pdf_lookup_agl_duplicates(int ucs) +{ + int l = 0; + int r = nelem(agl_dup_offsets) / 2 - 1; + while (l <= r) + { + int m = (l + r) >> 1; + if (ucs < agl_dup_offsets[m << 1]) + r = m - 1; + else if (ucs > agl_dup_offsets[m << 1]) + l = m + 1; + else + return agl_dup_names + agl_dup_offsets[(m << 1) + 1]; + } + return empty_dup_list; +} diff --git a/source/pdf/pdf-encodings.h b/source/pdf/pdf-encodings.h new file mode 100644 index 00000000..025e9d03 --- /dev/null +++ b/source/pdf/pdf-encodings.h @@ -0,0 +1,215 @@ +#define _notdef NULL + +const unsigned short pdf_doc_encoding[256] = +{ + /* 0x0 to 0x17 except \t, \n and \r are really undefined */ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, + 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, + 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018, + 0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, + 0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000, + 0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, + 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff +}; + +const char * const pdf_standard[256] = { _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", + "ampersand", "quoteright", "parenleft", "parenright", "asterisk", + "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", + "three", "four", "five", "six", "seven", "eight", "nine", "colon", + "semicolon", "less", "equal", "greater", "question", "at", "A", + "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", + "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + "bracketleft", "backslash", "bracketright", "asciicircum", "underscore", + "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", + "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", + "y", "z", "braceleft", "bar", "braceright", "asciitilde", _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, "exclamdown", "cent", "sterling", + "fraction", "yen", "florin", "section", "currency", "quotesingle", + "quotedblleft", "guillemotleft", "guilsinglleft", "guilsinglright", + "fi", "fl", _notdef, "endash", "dagger", "daggerdbl", "periodcentered", + _notdef, "paragraph", "bullet", "quotesinglbase", "quotedblbase", + "quotedblright", "guillemotright", "ellipsis", "perthousand", + _notdef, "questiondown", _notdef, "grave", "acute", "circumflex", + "tilde", "macron", "breve", "dotaccent", "dieresis", _notdef, + "ring", "cedilla", _notdef, "hungarumlaut", "ogonek", "caron", + "emdash", _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "AE", + _notdef, "ordfeminine", _notdef, _notdef, _notdef, _notdef, + "Lslash", "Oslash", "OE", "ordmasculine", _notdef, _notdef, + _notdef, _notdef, _notdef, "ae", _notdef, _notdef, + _notdef, "dotlessi", _notdef, _notdef, "lslash", "oslash", + "oe", "germandbls", _notdef, _notdef, _notdef, _notdef +}; + +const char * const pdf_mac_roman[256] = { _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", + "ampersand", "quotesingle", "parenleft", "parenright", "asterisk", + "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", + "three", "four", "five", "six", "seven", "eight", "nine", "colon", + "semicolon", "less", "equal", "greater", "question", "at", "A", + "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", + "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + "bracketleft", "backslash", "bracketright", "asciicircum", "underscore", + "grave", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", + "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", + "y", "z", "braceleft", "bar", "braceright", "asciitilde", _notdef, + "Adieresis", "Aring", "Ccedilla", "Eacute", "Ntilde", "Odieresis", + "Udieresis", "aacute", "agrave", "acircumflex", "adieresis", "atilde", + "aring", "ccedilla", "eacute", "egrave", "ecircumflex", "edieresis", + "iacute", "igrave", "icircumflex", "idieresis", "ntilde", "oacute", + "ograve", "ocircumflex", "odieresis", "otilde", "uacute", "ugrave", + "ucircumflex", "udieresis", "dagger", "degree", "cent", "sterling", + "section", "bullet", "paragraph", "germandbls", "registered", + "copyright", "trademark", "acute", "dieresis", _notdef, "AE", + "Oslash", _notdef, "plusminus", _notdef, _notdef, "yen", "mu", + _notdef, _notdef, _notdef, _notdef, _notdef, "ordfeminine", + "ordmasculine", _notdef, "ae", "oslash", "questiondown", "exclamdown", + "logicalnot", _notdef, "florin", _notdef, _notdef, "guillemotleft", + "guillemotright", "ellipsis", "space", "Agrave", "Atilde", "Otilde", + "OE", "oe", "endash", "emdash", "quotedblleft", "quotedblright", + "quoteleft", "quoteright", "divide", _notdef, "ydieresis", + "Ydieresis", "fraction", "currency", "guilsinglleft", "guilsinglright", + "fi", "fl", "daggerdbl", "periodcentered", "quotesinglbase", + "quotedblbase", "perthousand", "Acircumflex", "Ecircumflex", "Aacute", + "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave", + "Oacute", "Ocircumflex", _notdef, "Ograve", "Uacute", "Ucircumflex", + "Ugrave", "dotlessi", "circumflex", "tilde", "macron", "breve", + "dotaccent", "ring", "cedilla", "hungarumlaut", "ogonek", "caron" +}; + +const char * const pdf_mac_expert[256] = { _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + "space", "exclamsmall", "Hungarumlautsmall", "centoldstyle", + "dollaroldstyle", "dollarsuperior", "ampersandsmall", "Acutesmall", + "parenleftsuperior", "parenrightsuperior", "twodotenleader", + "onedotenleader", "comma", "hyphen", "period", "fraction", + "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle", + "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle", + "eightoldstyle", "nineoldstyle", "colon", "semicolon", _notdef, + "threequartersemdash", _notdef, "questionsmall", _notdef, + _notdef, _notdef, _notdef, "Ethsmall", _notdef, _notdef, + "onequarter", "onehalf", "threequarters", "oneeighth", "threeeighths", + "fiveeighths", "seveneighths", "onethird", "twothirds", _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "ff", "fi", + "fl", "ffi", "ffl", "parenleftinferior", _notdef, "parenrightinferior", + "Circumflexsmall", "hypheninferior", "Gravesmall", "Asmall", "Bsmall", + "Csmall", "Dsmall", "Esmall", "Fsmall", "Gsmall", "Hsmall", "Ismall", + "Jsmall", "Ksmall", "Lsmall", "Msmall", "Nsmall", "Osmall", "Psmall", + "Qsmall", "Rsmall", "Ssmall", "Tsmall", "Usmall", "Vsmall", "Wsmall", + "Xsmall", "Ysmall", "Zsmall", "colonmonetary", "onefitted", "rupiah", + "Tildesmall", _notdef, _notdef, "asuperior", "centsuperior", + _notdef, _notdef, _notdef, _notdef, "Aacutesmall", + "Agravesmall", "Acircumflexsmall", "Adieresissmall", "Atildesmall", + "Aringsmall", "Ccedillasmall", "Eacutesmall", "Egravesmall", + "Ecircumflexsmall", "Edieresissmall", "Iacutesmall", "Igravesmall", + "Icircumflexsmall", "Idieresissmall", "Ntildesmall", "Oacutesmall", + "Ogravesmall", "Ocircumflexsmall", "Odieresissmall", "Otildesmall", + "Uacutesmall", "Ugravesmall", "Ucircumflexsmall", "Udieresissmall", + _notdef, "eightsuperior", "fourinferior", "threeinferior", + "sixinferior", "eightinferior", "seveninferior", "Scaronsmall", + _notdef, "centinferior", "twoinferior", _notdef, "Dieresissmall", + _notdef, "Caronsmall", "osuperior", "fiveinferior", _notdef, + "commainferior", "periodinferior", "Yacutesmall", _notdef, + "dollarinferior", _notdef, _notdef, "Thornsmall", _notdef, + "nineinferior", "zeroinferior", "Zcaronsmall", "AEsmall", "Oslashsmall", + "questiondownsmall", "oneinferior", "Lslashsmall", _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "Cedillasmall", + _notdef, _notdef, _notdef, _notdef, _notdef, "OEsmall", + "figuredash", "hyphensuperior", _notdef, _notdef, _notdef, + _notdef, "exclamdownsmall", _notdef, "Ydieresissmall", _notdef, + "onesuperior", "twosuperior", "threesuperior", "foursuperior", + "fivesuperior", "sixsuperior", "sevensuperior", "ninesuperior", + "zerosuperior", _notdef, "esuperior", "rsuperior", "tsuperior", + _notdef, _notdef, "isuperior", "ssuperior", "dsuperior", + _notdef, _notdef, _notdef, _notdef, _notdef, "lsuperior", + "Ogoneksmall", "Brevesmall", "Macronsmall", "bsuperior", "nsuperior", + "msuperior", "commasuperior", "periodsuperior", "Dotaccentsmall", + "Ringsmall", _notdef, _notdef, _notdef, _notdef }; + +const char * const pdf_win_ansi[256] = { _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, + _notdef, _notdef, _notdef, _notdef, _notdef, "space", + "exclam", "quotedbl", "numbersign", "dollar", "percent", "ampersand", + "quotesingle", "parenleft", "parenright", "asterisk", "plus", + "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", + "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", + "less", "equal", "greater", "question", "at", "A", "B", "C", "D", + "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", + "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", + "backslash", "bracketright", "asciicircum", "underscore", "grave", + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", + "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", + "braceleft", "bar", "braceright", "asciitilde", "bullet", "Euro", + "bullet", "quotesinglbase", "florin", "quotedblbase", "ellipsis", + "dagger", "daggerdbl", "circumflex", "perthousand", "Scaron", + "guilsinglleft", "OE", "bullet", "Zcaron", "bullet", "bullet", + "quoteleft", "quoteright", "quotedblleft", "quotedblright", "bullet", + "endash", "emdash", "tilde", "trademark", "scaron", "guilsinglright", + "oe", "bullet", "zcaron", "Ydieresis", "space", "exclamdown", "cent", + "sterling", "currency", "yen", "brokenbar", "section", "dieresis", + "copyright", "ordfeminine", "guillemotleft", "logicalnot", "hyphen", + "registered", "macron", "degree", "plusminus", "twosuperior", + "threesuperior", "acute", "mu", "paragraph", "periodcentered", + "cedilla", "onesuperior", "ordmasculine", "guillemotright", + "onequarter", "onehalf", "threequarters", "questiondown", "Agrave", + "Aacute", "Acircumflex", "Atilde", "Adieresis", "Aring", "AE", + "Ccedilla", "Egrave", "Eacute", "Ecircumflex", "Edieresis", "Igrave", + "Iacute", "Icircumflex", "Idieresis", "Eth", "Ntilde", "Ograve", + "Oacute", "Ocircumflex", "Otilde", "Odieresis", "multiply", "Oslash", + "Ugrave", "Uacute", "Ucircumflex", "Udieresis", "Yacute", "Thorn", + "germandbls", "agrave", "aacute", "acircumflex", "atilde", "adieresis", + "aring", "ae", "ccedilla", "egrave", "eacute", "ecircumflex", + "edieresis", "igrave", "iacute", "icircumflex", "idieresis", "eth", + "ntilde", "ograve", "oacute", "ocircumflex", "otilde", "odieresis", + "divide", "oslash", "ugrave", "uacute", "ucircumflex", "udieresis", + "yacute", "thorn", "ydieresis" +}; diff --git a/source/pdf/pdf-event.c b/source/pdf/pdf-event.c new file mode 100644 index 00000000..dc908985 --- /dev/null +++ b/source/pdf/pdf-event.c @@ -0,0 +1,144 @@ +#include "mupdf/fitz.h" +#include "mupdf/pdf.h" + +typedef struct +{ + pdf_doc_event base; + pdf_alert_event alert; +} pdf_alert_event_internal; + +pdf_alert_event *pdf_access_alert_event(pdf_doc_event *event) +{ + pdf_alert_event *alert = NULL; + + if (event->type == PDF_DOCUMENT_EVENT_ALERT) + alert = &((pdf_alert_event_internal *)event)->alert; + + return alert; +} + +void pdf_event_issue_alert(pdf_document *doc, pdf_alert_event *alert) +{ + if (doc->event_cb) + { + pdf_alert_event_internal ievent; + ievent.base.type = PDF_DOCUMENT_EVENT_ALERT; + ievent.alert = *alert; + + doc->event_cb((pdf_doc_event *)&ievent, doc->event_cb_data); + + *alert = ievent.alert; + } +} + +void pdf_event_issue_print(pdf_document *doc) +{ + pdf_doc_event e; + + e.type = PDF_DOCUMENT_EVENT_PRINT; + + if (doc->event_cb) + doc->event_cb(&e, doc->event_cb_data); +} + +typedef struct +{ + pdf_doc_event base; + char *item; +} pdf_exec_menu_item_event_internal; + +char *pdf_access_exec_menu_item_event(pdf_doc_event *event) +{ + char *item = NULL; + + if (event->type == PDF_DOCUMENT_EVENT_EXEC_MENU_ITEM) + item = ((pdf_exec_menu_item_event_internal *)event)->item; + + return item; +} + +void pdf_event_issue_exec_menu_item(pdf_document *doc, char *item) +{ + if (doc->event_cb) + { + pdf_exec_menu_item_event_internal ievent; + ievent.base.type = PDF_DOCUMENT_EVENT_EXEC_MENU_ITEM; + ievent.item = item; + + doc->event_cb((pdf_doc_event *)&ievent, doc->event_cb_data); + } +} + +void pdf_event_issue_exec_dialog(pdf_document *doc) +{ + pdf_doc_event e; + + e.type = PDF_DOCUMENT_EVENT_EXEC_DIALOG; + + if (doc->event_cb) + doc->event_cb(&e, doc->event_cb_data); +} + +typedef struct +{ + pdf_doc_event base; + pdf_launch_url_event launch_url; +} pdf_launch_url_event_internal; + +pdf_launch_url_event *pdf_access_launch_url_event(pdf_doc_event *event) +{ + pdf_launch_url_event *launch_url = NULL; + + if (event->type == PDF_DOCUMENT_EVENT_LAUNCH_URL) + launch_url = &((pdf_launch_url_event_internal *)event)->launch_url; + + return launch_url; +} + +void pdf_event_issue_launch_url(pdf_document *doc, char *url, int new_frame) +{ + if (doc->event_cb) + { + pdf_launch_url_event_internal e; + + e.base.type = PDF_DOCUMENT_EVENT_LAUNCH_URL; + e.launch_url.url = url; + e.launch_url.new_frame = new_frame; + doc->event_cb((pdf_doc_event *)&e, doc->event_cb_data); + } +} + +typedef struct +{ + pdf_doc_event base; + pdf_mail_doc_event mail_doc; +} pdf_mail_doc_event_internal; + +pdf_mail_doc_event *pdf_access_mail_doc_event(pdf_doc_event *event) +{ + pdf_mail_doc_event *mail_doc = NULL; + + if (event->type == PDF_DOCUMENT_EVENT_MAIL_DOC) + mail_doc = &((pdf_mail_doc_event_internal *)event)->mail_doc; + + return mail_doc; +} + +void pdf_event_issue_mail_doc(pdf_document *doc, pdf_mail_doc_event *event) +{ + if (doc->event_cb) + { + pdf_mail_doc_event_internal e; + + e.base.type = PDF_DOCUMENT_EVENT_MAIL_DOC; + e.mail_doc = *event; + + doc->event_cb((pdf_doc_event *)&e, doc->event_cb_data); + } +} + +void pdf_set_doc_event_callback(pdf_document *doc, pdf_doc_event_cb *fn, void *data) +{ + doc->event_cb = fn; + doc->event_cb_data = data; +} diff --git a/source/pdf/pdf-field.c b/source/pdf/pdf-field.c new file mode 100644 index 00000000..d8e1a240 --- /dev/null +++ b/source/pdf/pdf-field.c @@ -0,0 +1,56 @@ +#include "mupdf/pdf.h" + +pdf_obj *pdf_get_inheritable(pdf_document *doc, pdf_obj *obj, char *key) +{ + pdf_obj *fobj = NULL; + + while (!fobj && obj) + { + fobj = pdf_dict_gets(obj, key); + + if (!fobj) + obj = pdf_dict_gets(obj, "Parent"); + } + + return fobj ? fobj + : pdf_dict_gets(pdf_dict_gets(pdf_dict_gets(pdf_trailer(doc), "Root"), "AcroForm"), key); +} + +int pdf_get_field_flags(pdf_document *doc, pdf_obj *obj) +{ + return pdf_to_int(pdf_get_inheritable(doc, obj, "Ff")); +} + +static char *get_field_type_name(pdf_document *doc, pdf_obj *obj) +{ + return pdf_to_name(pdf_get_inheritable(doc, obj, "FT")); +} + +int pdf_field_type(pdf_document *doc, pdf_obj *obj) +{ + char *type = get_field_type_name(doc, obj); + int flags = pdf_get_field_flags(doc, obj); + + if (!strcmp(type, "Btn")) + { + if (flags & Ff_Pushbutton) + return PDF_WIDGET_TYPE_PUSHBUTTON; + else if (flags & Ff_Radio) + return PDF_WIDGET_TYPE_RADIOBUTTON; + else + return PDF_WIDGET_TYPE_CHECKBOX; + } + else if (!strcmp(type, "Tx")) + return PDF_WIDGET_TYPE_TEXT; + else if (!strcmp(type, "Ch")) + { + if (flags & Ff_Combo) + return PDF_WIDGET_TYPE_COMBOBOX; + else + return PDF_WIDGET_TYPE_LISTBOX; + } + else if (!strcmp(type, "Sig")) + return PDF_WIDGET_TYPE_SIGNATURE; + else + return PDF_WIDGET_TYPE_NOT_WIDGET; +} diff --git a/source/pdf/pdf-font.c b/source/pdf/pdf-font.c new file mode 100644 index 00000000..1c2beb7b --- /dev/null +++ b/source/pdf/pdf-font.c @@ -0,0 +1,1263 @@ +#include "mupdf/pdf.h" + +#include <ft2build.h> +#include FT_FREETYPE_H +#include FT_XFREE86_H + +static void pdf_load_font_descriptor(pdf_font_desc *fontdesc, pdf_document *xref, pdf_obj *dict, char *collection, char *basefont, int iscidfont); + +static char *base_font_names[][10] = +{ + { "Courier", "CourierNew", "CourierNewPSMT", NULL }, + { "Courier-Bold", "CourierNew,Bold", "Courier,Bold", + "CourierNewPS-BoldMT", "CourierNew-Bold", NULL }, + { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic", + "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL }, + { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic", + "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL }, + { "Helvetica", "ArialMT", "Arial", NULL }, + { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold", + "Helvetica,Bold", NULL }, + { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic", + "Helvetica,Italic", "Helvetica-Italic", NULL }, + { "Helvetica-BoldOblique", "Arial-BoldItalicMT", + "Arial,BoldItalic", "Arial-BoldItalic", + "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL }, + { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman", + "TimesNewRomanPS", NULL }, + { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold", + "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL }, + { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic", + "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL }, + { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT", + "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic", + "TimesNewRoman-BoldItalic", NULL }, + { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic", + "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL }, + { "ZapfDingbats", NULL } +}; + +static int is_dynalab(char *name) +{ + if (strstr(name, "HuaTian")) + return 1; + if (strstr(name, "MingLi")) + return 1; + if ((strstr(name, "DF") == name) || strstr(name, "+DF")) + return 1; + if ((strstr(name, "DLC") == name) || strstr(name, "+DLC")) + return 1; + return 0; +} + +static int strcmp_ignore_space(char *a, char *b) +{ + while (1) + { + while (*a == ' ') + a++; + while (*b == ' ') + b++; + if (*a != *b) + return 1; + if (*a == 0) + return *a != *b; + if (*b == 0) + return *a != *b; + a++; + b++; + } +} + +static char *clean_font_name(char *fontname) +{ + int i, k; + for (i = 0; i < nelem(base_font_names); i++) + for (k = 0; base_font_names[i][k]; k++) + if (!strcmp_ignore_space(base_font_names[i][k], fontname)) + return base_font_names[i][0]; + return fontname; +} + +/* + * FreeType and Rendering glue + */ + +enum { UNKNOWN, TYPE1, TRUETYPE }; + +static int ft_kind(FT_Face face) +{ + const char *kind = FT_Get_X11_Font_Format(face); + if (!strcmp(kind, "TrueType")) + return TRUETYPE; + if (!strcmp(kind, "Type 1")) + return TYPE1; + if (!strcmp(kind, "CFF")) + return TYPE1; + if (!strcmp(kind, "CID Type 1")) + return TYPE1; + return UNKNOWN; +} + +static int ft_is_bold(FT_Face face) +{ + return face->style_flags & FT_STYLE_FLAG_BOLD; +} + +static int ft_is_italic(FT_Face face) +{ + return face->style_flags & FT_STYLE_FLAG_ITALIC; +} + +static int ft_char_index(FT_Face face, int cid) +{ + int gid = FT_Get_Char_Index(face, cid); + if (gid == 0) + gid = FT_Get_Char_Index(face, 0xf000 + cid); + + /* some chinese fonts only ship the similarly looking 0x2026 */ + if (gid == 0 && cid == 0x22ef) + gid = FT_Get_Char_Index(face, 0x2026); + + return gid; +} + +static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid) +{ + if (fontdesc->to_ttf_cmap) + { + cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid); + return ft_char_index(fontdesc->font->ft_face, cid); + } + + if (fontdesc->cid_to_gid && cid < fontdesc->cid_to_gid_len && cid >= 0) + return fontdesc->cid_to_gid[cid]; + + return cid; +} + +int +pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid) +{ + if (fontdesc->font->ft_face) + return ft_cid_to_gid(fontdesc, cid); + return cid; +} + +static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid) +{ + int gid = ft_cid_to_gid(fontdesc, cid); + int fterr; + + fterr = FT_Load_Glyph(fontdesc->font->ft_face, gid, + FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM); + if (fterr) + { + fz_warn(ctx, "freetype load glyph (gid %d): %s", gid, ft_error_string(fterr)); + return 0; + } + return ((FT_Face)fontdesc->font->ft_face)->glyph->advance.x; +} + +static int lookup_mre_code(char *name) +{ + int i; + for (i = 0; i < 256; i++) + if (pdf_mac_roman[i] && !strcmp(name, pdf_mac_roman[i])) + return i; + return -1; +} + +/* + * Load font files. + */ + +static void +pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, char *fontname) +{ + unsigned char *data; + unsigned int len; + + fontname = clean_font_name(fontname); + + data = pdf_lookup_builtin_font(fontname, &len); + if (!data) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find builtin font: '%s'", fontname); + + fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1); + + if (!strcmp(fontname, "Symbol") || !strcmp(fontname, "ZapfDingbats")) + fontdesc->flags |= PDF_FD_SYMBOLIC; +} + +static void +pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, char *fontname, int mono, int serif, int bold, int italic) +{ + unsigned char *data; + unsigned int len; + + data = pdf_lookup_substitute_font(mono, serif, bold, italic, &len); + if (!data) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find substitute font"); + + fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1); + + fontdesc->font->ft_substitute = 1; + fontdesc->font->ft_bold = bold && !ft_is_bold(fontdesc->font->ft_face); + fontdesc->font->ft_italic = italic && !ft_is_italic(fontdesc->font->ft_face); +} + +static void +pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, char *fontname, int ros, int serif) +{ + unsigned char *data; + unsigned int len; + + data = pdf_lookup_substitute_cjk_font(ros, serif, &len); + if (!data) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find builtin CJK font"); + + /* a glyph bbox cache is too big for droid sans fallback (51k glyphs!) */ + fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 0); + + fontdesc->font->ft_substitute = 1; +} + +static void +pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, char *fontname, char *collection) +{ + int bold = 0; + int italic = 0; + int serif = 0; + int mono = 0; + + if (strstr(fontname, "Bold")) + bold = 1; + if (strstr(fontname, "Italic")) + italic = 1; + if (strstr(fontname, "Oblique")) + italic = 1; + + if (fontdesc->flags & PDF_FD_FIXED_PITCH) + mono = 1; + if (fontdesc->flags & PDF_FD_SERIF) + serif = 1; + if (fontdesc->flags & PDF_FD_ITALIC) + italic = 1; + if (fontdesc->flags & PDF_FD_FORCE_BOLD) + bold = 1; + + if (collection) + { + if (!strcmp(collection, "Adobe-CNS1")) + pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, PDF_ROS_CNS, serif); + else if (!strcmp(collection, "Adobe-GB1")) + pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, PDF_ROS_GB, serif); + else if (!strcmp(collection, "Adobe-Japan1")) + pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, PDF_ROS_JAPAN, serif); + else if (!strcmp(collection, "Adobe-Korea1")) + pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, PDF_ROS_KOREA, serif); + else + { + if (strcmp(collection, "Adobe-Identity") != 0) + fz_warn(ctx, "unknown cid collection: %s", collection); + pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic); + } + } + else + { + pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic); + } +} + +static void +pdf_load_embedded_font(pdf_document *xref, pdf_font_desc *fontdesc, char *fontname, pdf_obj *stmref) +{ + fz_buffer *buf; + fz_context *ctx = xref->ctx; + + fz_try(ctx) + { + buf = pdf_load_stream(xref, pdf_to_num(stmref), pdf_to_gen(stmref)); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot load font stream (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); + } + + fz_try(ctx) + { + fontdesc->font = fz_new_font_from_memory(ctx, fontname, buf->data, buf->len, 0, 1); + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, buf); + fz_rethrow_message(ctx, "cannot load embedded font (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); + } + fontdesc->size += buf->len; + + /* save the buffer so we can free it later */ + fontdesc->font->ft_data = buf->data; + fontdesc->font->ft_size = buf->len; + fz_free(ctx, buf); /* only free the fz_buffer struct, not the contained data */ + + fontdesc->is_embedded = 1; +} + +/* + * Create and destroy + */ + +pdf_font_desc * +pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc) +{ + return (pdf_font_desc *)fz_keep_storable(ctx, &fontdesc->storable); +} + +void +pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc) +{ + fz_drop_storable(ctx, &fontdesc->storable); +} + +static void +pdf_free_font_imp(fz_context *ctx, fz_storable *fontdesc_) +{ + pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_; + + if (fontdesc->font) + fz_drop_font(ctx, fontdesc->font); + if (fontdesc->encoding) + pdf_drop_cmap(ctx, fontdesc->encoding); + if (fontdesc->to_ttf_cmap) + pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap); + if (fontdesc->to_unicode) + pdf_drop_cmap(ctx, fontdesc->to_unicode); + fz_free(ctx, fontdesc->cid_to_gid); + fz_free(ctx, fontdesc->cid_to_ucs); + fz_free(ctx, fontdesc->hmtx); + fz_free(ctx, fontdesc->vmtx); + fz_free(ctx, fontdesc); +} + +pdf_font_desc * +pdf_new_font_desc(fz_context *ctx) +{ + pdf_font_desc *fontdesc; + + fontdesc = fz_malloc_struct(ctx, pdf_font_desc); + FZ_INIT_STORABLE(fontdesc, 1, pdf_free_font_imp); + fontdesc->size = sizeof(pdf_font_desc); + + fontdesc->font = NULL; + + fontdesc->flags = 0; + fontdesc->italic_angle = 0; + fontdesc->ascent = 0; + fontdesc->descent = 0; + fontdesc->cap_height = 0; + fontdesc->x_height = 0; + fontdesc->missing_width = 0; + + fontdesc->encoding = NULL; + fontdesc->to_ttf_cmap = NULL; + fontdesc->cid_to_gid_len = 0; + fontdesc->cid_to_gid = NULL; + + fontdesc->to_unicode = NULL; + fontdesc->cid_to_ucs_len = 0; + fontdesc->cid_to_ucs = NULL; + + fontdesc->wmode = 0; + + fontdesc->hmtx_cap = 0; + fontdesc->vmtx_cap = 0; + fontdesc->hmtx_len = 0; + fontdesc->vmtx_len = 0; + fontdesc->hmtx = NULL; + fontdesc->vmtx = NULL; + + fontdesc->dhmtx.lo = 0x0000; + fontdesc->dhmtx.hi = 0xFFFF; + fontdesc->dhmtx.w = 1000; + + fontdesc->dvmtx.lo = 0x0000; + fontdesc->dvmtx.hi = 0xFFFF; + fontdesc->dvmtx.x = 0; + fontdesc->dvmtx.y = 880; + fontdesc->dvmtx.w = -1000; + + fontdesc->is_embedded = 0; + + return fontdesc; +} + +/* + * Simple fonts (Type1 and TrueType) + */ + +static pdf_font_desc * +pdf_load_simple_font(pdf_document *xref, pdf_obj *dict) +{ + pdf_obj *descriptor; + pdf_obj *encoding; + pdf_obj *widths; + unsigned short *etable = NULL; + pdf_font_desc *fontdesc = NULL; + char *subtype; + FT_Face face; + FT_CharMap cmap; + int symbolic; + int kind; + + char *basefont; + char *estrings[256]; + char ebuffer[256][32]; + int i, k, n; + int fterr; + int has_lock = 0; + fz_context *ctx = xref->ctx; + + fz_var(fontdesc); + fz_var(etable); + fz_var(has_lock); + + basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); + + /* Load font file */ + fz_try(ctx) + { + fontdesc = pdf_new_font_desc(ctx); + + descriptor = pdf_dict_gets(dict, "FontDescriptor"); + if (descriptor) + pdf_load_font_descriptor(fontdesc, xref, descriptor, NULL, basefont, 0); + else + pdf_load_builtin_font(ctx, fontdesc, basefont); + + /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ + if (descriptor && pdf_is_string(pdf_dict_gets(descriptor, "FontName")) && + !pdf_dict_gets(dict, "ToUnicode") && + !strcmp(pdf_to_name(pdf_dict_gets(dict, "Encoding")), "WinAnsiEncoding") && + pdf_to_int(pdf_dict_gets(descriptor, "Flags")) == 4) + { + char *cp936fonts[] = { + "\xCB\xCE\xCC\xE5", "SimSun,Regular", + "\xBA\xDA\xCC\xE5", "SimHei,Regular", + "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", + "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", + "\xC1\xA5\xCA\xE9", "SimLi,Regular", + NULL + }; + for (i = 0; cp936fonts[i]; i += 2) + if (!strcmp(basefont, cp936fonts[i])) + break; + if (cp936fonts[i]) + { + fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings"); + pdf_drop_font(ctx, fontdesc); + fontdesc = NULL; + fontdesc = pdf_new_font_desc(ctx); + pdf_load_font_descriptor(fontdesc, xref, descriptor, "Adobe-GB1", cp936fonts[i+1], 0); + fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H"); + fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); + fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); + + face = fontdesc->font->ft_face; + kind = ft_kind(face); + goto skip_encoding; + } + } + + face = fontdesc->font->ft_face; + kind = ft_kind(face); + + /* Encoding */ + + symbolic = fontdesc->flags & 4; + + if (face->num_charmaps > 0) + cmap = face->charmaps[0]; + else + cmap = NULL; + + for (i = 0; i < face->num_charmaps; i++) + { + FT_CharMap test = face->charmaps[i]; + + if (kind == TYPE1) + { + if (test->platform_id == 7) + cmap = test; + } + + if (kind == TRUETYPE) + { + if (test->platform_id == 1 && test->encoding_id == 0) + cmap = test; + if (test->platform_id == 3 && test->encoding_id == 1) + cmap = test; + if (symbolic && test->platform_id == 3 && test->encoding_id == 0) + cmap = test; + } + } + + if (cmap) + { + fterr = FT_Set_Charmap(face, cmap); + if (fterr) + fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr)); + } + else + fz_warn(ctx, "freetype could not find any cmaps"); + + etable = fz_malloc_array(ctx, 256, sizeof(unsigned short)); + fontdesc->size += 256 * sizeof(unsigned short); + for (i = 0; i < 256; i++) + { + estrings[i] = NULL; + etable[i] = 0; + } + + encoding = pdf_dict_gets(dict, "Encoding"); + if (encoding) + { + if (pdf_is_name(encoding)) + pdf_load_encoding(estrings, pdf_to_name(encoding)); + + if (pdf_is_dict(encoding)) + { + pdf_obj *base, *diff, *item; + + base = pdf_dict_gets(encoding, "BaseEncoding"); + if (pdf_is_name(base)) + pdf_load_encoding(estrings, pdf_to_name(base)); + else if (!fontdesc->is_embedded && !symbolic) + pdf_load_encoding(estrings, "StandardEncoding"); + + diff = pdf_dict_gets(encoding, "Differences"); + if (pdf_is_array(diff)) + { + n = pdf_array_len(diff); + k = 0; + for (i = 0; i < n; i++) + { + item = pdf_array_get(diff, i); + if (pdf_is_int(item)) + k = pdf_to_int(item); + if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) + estrings[k++] = pdf_to_name(item); + } + } + } + } + + /* start with the builtin encoding */ + for (i = 0; i < 256; i++) + etable[i] = ft_char_index(face, i); + + fz_lock(ctx, FZ_LOCK_FREETYPE); + has_lock = 1; + + /* built-in and substitute fonts may be a different type than what the document expects */ + subtype = pdf_to_name(pdf_dict_gets(dict, "Subtype")); + if (!strcmp(subtype, "Type1")) + kind = TYPE1; + else if (!strcmp(subtype, "MMType1")) + kind = TYPE1; + else if (!strcmp(subtype, "TrueType")) + kind = TRUETYPE; + else if (!strcmp(subtype, "CIDFontType0")) + kind = TYPE1; + else if (!strcmp(subtype, "CIDFontType2")) + kind = TRUETYPE; + + /* encode by glyph name where we can */ + if (kind == TYPE1) + { + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + etable[i] = FT_Get_Name_Index(face, estrings[i]); + if (etable[i] == 0) + { + int aglcode = pdf_lookup_agl(estrings[i]); + const char **dupnames = pdf_lookup_agl_duplicates(aglcode); + while (*dupnames) + { + etable[i] = FT_Get_Name_Index(face, (char*)*dupnames); + if (etable[i]) + break; + dupnames++; + } + } + } + } + } + + /* encode by glyph name where we can */ + if (kind == TRUETYPE) + { + /* Unicode cmap */ + if (!symbolic && face->charmap && face->charmap->platform_id == 3) + { + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + int aglcode = pdf_lookup_agl(estrings[i]); + if (!aglcode) + etable[i] = FT_Get_Name_Index(face, estrings[i]); + else + etable[i] = ft_char_index(face, aglcode); + } + } + } + + /* MacRoman cmap */ + else if (!symbolic && face->charmap && face->charmap->platform_id == 1) + { + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + k = lookup_mre_code(estrings[i]); + if (k <= 0) + etable[i] = FT_Get_Name_Index(face, estrings[i]); + else + etable[i] = ft_char_index(face, k); + } + } + } + + /* Symbolic cmap */ + else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL) + { + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + etable[i] = FT_Get_Name_Index(face, estrings[i]); + if (etable[i] == 0) + etable[i] = ft_char_index(face, i); + } + } + } + } + + /* try to reverse the glyph names from the builtin encoding */ + for (i = 0; i < 256; i++) + { + if (etable[i] && !estrings[i]) + { + if (FT_HAS_GLYPH_NAMES(face)) + { + fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); + if (fterr) + fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr)); + if (ebuffer[i][0]) + estrings[i] = ebuffer[i]; + } + else + { + estrings[i] = (char*) pdf_win_ansi[i]; /* discard const */ + } + } + } + + /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */ + if (kind == TYPE1 && symbolic) + { + for (i = 0; i < 256; i++) + if (etable[i] && estrings[i] && !pdf_lookup_agl(estrings[i])) + estrings[i] = (char*) pdf_standard[i]; + } + + fz_unlock(ctx, FZ_LOCK_FREETYPE); + has_lock = 0; + + fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); + fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); + fontdesc->cid_to_gid_len = 256; + fontdesc->cid_to_gid = etable; + + fz_try(ctx) + { + pdf_load_to_unicode(xref, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + fz_warn(ctx, "cannot load ToUnicode CMap"); + } + + skip_encoding: + + /* Widths */ + + pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); + + widths = pdf_dict_gets(dict, "Widths"); + if (widths) + { + int first, last; + + first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); + last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); + + if (first < 0 || last > 255 || first > last) + first = last = 0; + + for (i = 0; i < last - first + 1; i++) + { + int wid = pdf_to_int(pdf_array_get(widths, i)); + pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); + } + } + else + { + fz_lock(ctx, FZ_LOCK_FREETYPE); + has_lock = 1; + fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72); + if (fterr) + fz_warn(ctx, "freetype set character size: %s", ft_error_string(fterr)); + for (i = 0; i < 256; i++) + { + pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); + } + fz_unlock(ctx, FZ_LOCK_FREETYPE); + has_lock = 0; + } + + pdf_end_hmtx(ctx, fontdesc); + } + fz_catch(ctx) + { + if (has_lock) + fz_unlock(ctx, FZ_LOCK_FREETYPE); + if (fontdesc && etable != fontdesc->cid_to_gid) + fz_free(ctx, etable); + pdf_drop_font(ctx, fontdesc); + fz_rethrow_message(ctx, "cannot load simple font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); + } + return fontdesc; +} + +/* + * CID Fonts + */ + +static pdf_font_desc * +load_cid_font(pdf_document *xref, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) +{ + pdf_obj *widths; + pdf_obj *descriptor; + pdf_font_desc *fontdesc = NULL; + FT_Face face; + int kind; + char collection[256]; + char *basefont; + int i, k, fterr; + pdf_obj *obj; + int dw; + fz_context *ctx = xref->ctx; + + fz_var(fontdesc); + + fz_try(ctx) + { + /* Get font name and CID collection */ + + basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); + + { + pdf_obj *cidinfo; + char tmpstr[64]; + int tmplen; + + cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); + if (!cidinfo) + fz_throw(ctx, FZ_ERROR_GENERIC, "cid font is missing info"); + + obj = pdf_dict_gets(cidinfo, "Registry"); + tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); + memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); + tmpstr[tmplen] = '\0'; + fz_strlcpy(collection, tmpstr, sizeof collection); + + fz_strlcat(collection, "-", sizeof collection); + + obj = pdf_dict_gets(cidinfo, "Ordering"); + tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); + memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); + tmpstr[tmplen] = '\0'; + fz_strlcat(collection, tmpstr, sizeof collection); + } + + /* Load font file */ + + fontdesc = pdf_new_font_desc(ctx); + + descriptor = pdf_dict_gets(dict, "FontDescriptor"); + if (!descriptor) + fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: missing font descriptor"); + pdf_load_font_descriptor(fontdesc, xref, descriptor, collection, basefont, 1); + + face = fontdesc->font->ft_face; + kind = ft_kind(face); + + /* Encoding */ + + if (pdf_is_name(encoding)) + { + if (!strcmp(pdf_to_name(encoding), "Identity-H")) + fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); + else if (!strcmp(pdf_to_name(encoding), "Identity-V")) + fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); + else + fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); + } + else if (pdf_is_indirect(encoding)) + { + fontdesc->encoding = pdf_load_embedded_cmap(xref, encoding); + } + else + { + fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: font missing encoding"); + } + fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); + + pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); + + if (kind == TRUETYPE) + { + pdf_obj *cidtogidmap; + + cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); + if (pdf_is_indirect(cidtogidmap)) + { + fz_buffer *buf; + + buf = pdf_load_stream(xref, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); + + fontdesc->cid_to_gid_len = (buf->len) / 2; + fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); + fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); + for (i = 0; i < fontdesc->cid_to_gid_len; i++) + fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; + + fz_drop_buffer(ctx, buf); + } + + /* if truetype font is external, cidtogidmap should not be identity */ + /* so we map from cid to unicode and then map that through the (3 1) */ + /* unicode cmap to get a glyph id */ + else if (fontdesc->font->ft_substitute) + { + fterr = FT_Select_Charmap(face, ft_encoding_unicode); + if (fterr) + { + fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); + } + + if (!strcmp(collection, "Adobe-CNS1")) + fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); + else if (!strcmp(collection, "Adobe-GB1")) + fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); + else if (!strcmp(collection, "Adobe-Japan1")) + fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); + else if (!strcmp(collection, "Adobe-Japan2")) + fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); + else if (!strcmp(collection, "Adobe-Korea1")) + fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); + } + } + + pdf_load_to_unicode(xref, fontdesc, NULL, collection, to_unicode); + + /* If we have an identity encoding, we're supposed to use the glyph ids directly. + * If we only have a substitute font, that won't work. + * Make a last ditch attempt by using + * the ToUnicode table if it exists to map via the substitute font's cmap. */ + if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->ft_substitute) + { + fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont); + if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap) + fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode); + } + + /* Horizontal */ + + dw = 1000; + obj = pdf_dict_gets(dict, "DW"); + if (obj) + dw = pdf_to_int(obj); + pdf_set_default_hmtx(ctx, fontdesc, dw); + + widths = pdf_dict_gets(dict, "W"); + if (widths) + { + int c0, c1, w, n, m; + + n = pdf_array_len(widths); + for (i = 0; i < n; ) + { + c0 = pdf_to_int(pdf_array_get(widths, i)); + obj = pdf_array_get(widths, i + 1); + if (pdf_is_array(obj)) + { + m = pdf_array_len(obj); + for (k = 0; k < m; k++) + { + w = pdf_to_int(pdf_array_get(obj, k)); + pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); + } + i += 2; + } + else + { + c1 = pdf_to_int(obj); + w = pdf_to_int(pdf_array_get(widths, i + 2)); + pdf_add_hmtx(ctx, fontdesc, c0, c1, w); + i += 3; + } + } + } + + pdf_end_hmtx(ctx, fontdesc); + + /* Vertical */ + + if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) + { + int dw2y = 880; + int dw2w = -1000; + + obj = pdf_dict_gets(dict, "DW2"); + if (obj) + { + dw2y = pdf_to_int(pdf_array_get(obj, 0)); + dw2w = pdf_to_int(pdf_array_get(obj, 1)); + } + + pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); + + widths = pdf_dict_gets(dict, "W2"); + if (widths) + { + int c0, c1, w, x, y, n; + + n = pdf_array_len(widths); + for (i = 0; i < n; ) + { + c0 = pdf_to_int(pdf_array_get(widths, i)); + obj = pdf_array_get(widths, i + 1); + if (pdf_is_array(obj)) + { + int m = pdf_array_len(obj); + for (k = 0; k * 3 < m; k ++) + { + w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); + x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); + y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); + pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); + } + i += 2; + } + else + { + c1 = pdf_to_int(obj); + w = pdf_to_int(pdf_array_get(widths, i + 2)); + x = pdf_to_int(pdf_array_get(widths, i + 3)); + y = pdf_to_int(pdf_array_get(widths, i + 4)); + pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); + i += 5; + } + } + } + + pdf_end_vmtx(ctx, fontdesc); + } + } + fz_catch(ctx) + { + pdf_drop_font(ctx, fontdesc); + fz_rethrow_message(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); + } + + return fontdesc; +} + +static pdf_font_desc * +pdf_load_type0_font(pdf_document *xref, pdf_obj *dict) +{ + pdf_obj *dfonts; + pdf_obj *dfont; + pdf_obj *subtype; + pdf_obj *encoding; + pdf_obj *to_unicode; + + dfonts = pdf_dict_gets(dict, "DescendantFonts"); + if (!dfonts) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "cid font is missing descendant fonts"); + + dfont = pdf_array_get(dfonts, 0); + + subtype = pdf_dict_gets(dfont, "Subtype"); + encoding = pdf_dict_gets(dict, "Encoding"); + to_unicode = pdf_dict_gets(dict, "ToUnicode"); + + if (pdf_is_name(subtype) && !strcmp(pdf_to_name(subtype), "CIDFontType0")) + return load_cid_font(xref, dfont, encoding, to_unicode); + else if (pdf_is_name(subtype) && !strcmp(pdf_to_name(subtype), "CIDFontType2")) + return load_cid_font(xref, dfont, encoding, to_unicode); + else + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "syntaxerror: unknown cid font type"); + + return NULL; /* Stupid MSVC */ +} + +/* + * FontDescriptor + */ + +static void +pdf_load_font_descriptor(pdf_font_desc *fontdesc, pdf_document *xref, pdf_obj *dict, char *collection, char *basefont, int iscidfont) +{ + pdf_obj *obj1, *obj2, *obj3, *obj; + char *fontname, *origname; + FT_Face face; + fz_context *ctx = xref->ctx; + + /* Prefer BaseFont; don't bother with FontName */ + origname = basefont; + + /* Look through list of alternate names for built in fonts */ + fontname = clean_font_name(origname); + + fontdesc->flags = pdf_to_int(pdf_dict_gets(dict, "Flags")); + fontdesc->italic_angle = pdf_to_real(pdf_dict_gets(dict, "ItalicAngle")); + fontdesc->ascent = pdf_to_real(pdf_dict_gets(dict, "Ascent")); + fontdesc->descent = pdf_to_real(pdf_dict_gets(dict, "Descent")); + fontdesc->cap_height = pdf_to_real(pdf_dict_gets(dict, "CapHeight")); + fontdesc->x_height = pdf_to_real(pdf_dict_gets(dict, "XHeight")); + fontdesc->missing_width = pdf_to_real(pdf_dict_gets(dict, "MissingWidth")); + + obj1 = pdf_dict_gets(dict, "FontFile"); + obj2 = pdf_dict_gets(dict, "FontFile2"); + obj3 = pdf_dict_gets(dict, "FontFile3"); + obj = obj1 ? obj1 : obj2 ? obj2 : obj3; + + if (pdf_is_indirect(obj)) + { + fz_try(ctx) + { + pdf_load_embedded_font(xref, fontdesc, fontname, obj); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font"); + if (origname != fontname && !iscidfont) + pdf_load_builtin_font(ctx, fontdesc, fontname); + else + pdf_load_system_font(ctx, fontdesc, fontname, collection); + } + } + else + { + if (origname != fontname && !iscidfont) + pdf_load_builtin_font(ctx, fontdesc, fontname); + else + pdf_load_system_font(ctx, fontdesc, fontname, collection); + } + + /* Check for DynaLab fonts that must use hinting */ + face = fontdesc->font->ft_face; + if (ft_kind(face) == TRUETYPE) + { + if (FT_IS_TRICKY(face) || is_dynalab(fontdesc->font->name)) + fontdesc->font->ft_hint = 1; + } +} + +static void +pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc) +{ + fz_font *font = fontdesc->font; + int i, k, n, cid, gid; + + n = 0; + for (i = 0; i < fontdesc->hmtx_len; i++) + { + for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) + { + cid = pdf_lookup_cmap(fontdesc->encoding, k); + gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); + if (gid > n) + n = gid; + } + }; + + font->width_count = n + 1; + font->width_table = fz_malloc_array(ctx, font->width_count, sizeof(int)); + memset(font->width_table, 0, font->width_count * sizeof(int)); + fontdesc->size += font->width_count * sizeof(int); + + for (i = 0; i < fontdesc->hmtx_len; i++) + { + for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) + { + cid = pdf_lookup_cmap(fontdesc->encoding, k); + gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); + if (gid >= 0 && gid < font->width_count) + font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]); + } + } +} + +pdf_font_desc * +pdf_load_font(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, int nested_depth) +{ + char *subtype; + pdf_obj *dfonts; + pdf_obj *charprocs; + fz_context *ctx = xref->ctx; + pdf_font_desc *fontdesc; + int type3 = 0; + + if ((fontdesc = pdf_find_item(ctx, pdf_free_font_imp, dict))) + { + return fontdesc; + } + + subtype = pdf_to_name(pdf_dict_gets(dict, "Subtype")); + dfonts = pdf_dict_gets(dict, "DescendantFonts"); + charprocs = pdf_dict_gets(dict, "CharProcs"); + + if (subtype && !strcmp(subtype, "Type0")) + fontdesc = pdf_load_type0_font(xref, dict); + else if (subtype && !strcmp(subtype, "Type1")) + fontdesc = pdf_load_simple_font(xref, dict); + else if (subtype && !strcmp(subtype, "MMType1")) + fontdesc = pdf_load_simple_font(xref, dict); + else if (subtype && !strcmp(subtype, "TrueType")) + fontdesc = pdf_load_simple_font(xref, dict); + else if (subtype && !strcmp(subtype, "Type3")) + { + fontdesc = pdf_load_type3_font(xref, rdb, dict); + type3 = 1; + } + else if (charprocs) + { + fz_warn(ctx, "unknown font format, guessing type3."); + fontdesc = pdf_load_type3_font(xref, rdb, dict); + type3 = 1; + } + else if (dfonts) + { + fz_warn(ctx, "unknown font format, guessing type0."); + fontdesc = pdf_load_type0_font(xref, dict); + } + else + { + fz_warn(ctx, "unknown font format, guessing type1 or truetype."); + fontdesc = pdf_load_simple_font(xref, dict); + } + + /* Save the widths to stretch non-CJK substitute fonts */ + if (fontdesc->font->ft_substitute && !fontdesc->to_ttf_cmap) + pdf_make_width_table(ctx, fontdesc); + + pdf_store_item(ctx, dict, fontdesc, fontdesc->size); + + if (type3) + pdf_load_type3_glyphs(xref, fontdesc, nested_depth); + + return fontdesc; +} + +#ifndef NDEBUG +void +pdf_print_font(fz_context *ctx, pdf_font_desc *fontdesc) +{ + int i; + + printf("fontdesc {\n"); + + if (fontdesc->font->ft_face) + printf("\tfreetype font\n"); + if (fontdesc->font->t3procs) + printf("\ttype3 font\n"); + + printf("\twmode %d\n", fontdesc->wmode); + printf("\tDW %d\n", fontdesc->dhmtx.w); + + printf("\tW {\n"); + for (i = 0; i < fontdesc->hmtx_len; i++) + printf("\t\t<%04x> <%04x> %d\n", + fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w); + printf("\t}\n"); + + if (fontdesc->wmode) + { + printf("\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w); + printf("\tW2 {\n"); + for (i = 0; i < fontdesc->vmtx_len; i++) + printf("\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi, + fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w); + printf("\t}\n"); + } +} +#endif + +fz_rect *pdf_measure_text(fz_context *ctx, pdf_font_desc *fontdesc, unsigned char *buf, int len, fz_rect *acc) +{ + pdf_hmtx h; + int gid; + int i; + float x = 0.0; + fz_rect bbox; + + *acc = fz_empty_rect; + for (i = 0; i < len; i++) + { + gid = pdf_font_cid_to_gid(ctx, fontdesc, buf[i]); + h = pdf_lookup_hmtx(ctx, fontdesc, buf[i]); + fz_bound_glyph(ctx, fontdesc->font, gid, &fz_identity, &bbox); + bbox.x0 += x; + bbox.x1 += x; + fz_union_rect(acc, &bbox); + x += h.w / 1000.0; + } + + return acc; +} + +float pdf_text_stride(fz_context *ctx, pdf_font_desc *fontdesc, float fontsize, unsigned char *buf, int len, float room, int *count) +{ + pdf_hmtx h; + int i = 0; + float x = 0.0; + + while(i < len) + { + float span; + + h = pdf_lookup_hmtx(ctx, fontdesc, buf[i]); + + span = h.w * fontsize / 1000.0; + + if (x + span > room) + break; + + x += span; + i ++; + } + + if (count) + *count = i; + + return x; +} diff --git a/source/pdf/pdf-fontfile.c b/source/pdf/pdf-fontfile.c new file mode 100644 index 00000000..c9990dad --- /dev/null +++ b/source/pdf/pdf-fontfile.c @@ -0,0 +1,153 @@ +#include "mupdf/pdf.h" + +/* + Which fonts are embedded is based on a few preprocessor definitions. + + The base 14 fonts are always embedded. + For font substitution we embed DroidSans which has good glyph coverage. + For CJK font substitution we embed DroidSansFallback. + + Set NOCJK to skip all CJK support (this also omits embedding the CJK CMaps) + Set NOCJKFONT to skip the embedded CJK font. + Set NOCJKFULL to embed a smaller CJK font without CJK Extension A support. + + Set NODROIDFONT to use the base 14 fonts as substitute fonts. +*/ + +#ifdef NOCJK +#define NOCJKFONT +#endif + +#include "gen_font_base14.h" + +#ifndef NODROIDFONT +#include "gen_font_droid.h" +#endif + +#ifndef NOCJKFONT +#ifndef NOCJKFULL +#include "gen_font_cjk_full.h" +#else +#include "gen_font_cjk.h" +#endif +#endif + +unsigned char * +pdf_lookup_builtin_font(char *name, unsigned int *len) +{ + if (!strcmp("Courier", name)) { + *len = sizeof pdf_font_NimbusMonL_Regu; + return (unsigned char*) pdf_font_NimbusMonL_Regu; + } + if (!strcmp("Courier-Bold", name)) { + *len = sizeof pdf_font_NimbusMonL_Bold; + return (unsigned char*) pdf_font_NimbusMonL_Bold; + } + if (!strcmp("Courier-Oblique", name)) { + *len = sizeof pdf_font_NimbusMonL_ReguObli; + return (unsigned char*) pdf_font_NimbusMonL_ReguObli; + } + if (!strcmp("Courier-BoldOblique", name)) { + *len = sizeof pdf_font_NimbusMonL_BoldObli; + return (unsigned char*) pdf_font_NimbusMonL_BoldObli; + } + if (!strcmp("Helvetica", name)) { + *len = sizeof pdf_font_NimbusSanL_Regu; + return (unsigned char*) pdf_font_NimbusSanL_Regu; + } + if (!strcmp("Helvetica-Bold", name)) { + *len = sizeof pdf_font_NimbusSanL_Bold; + return (unsigned char*) pdf_font_NimbusSanL_Bold; + } + if (!strcmp("Helvetica-Oblique", name)) { + *len = sizeof pdf_font_NimbusSanL_ReguItal; + return (unsigned char*) pdf_font_NimbusSanL_ReguItal; + } + if (!strcmp("Helvetica-BoldOblique", name)) { + *len = sizeof pdf_font_NimbusSanL_BoldItal; + return (unsigned char*) pdf_font_NimbusSanL_BoldItal; + } + if (!strcmp("Times-Roman", name)) { + *len = sizeof pdf_font_NimbusRomNo9L_Regu; + return (unsigned char*) pdf_font_NimbusRomNo9L_Regu; + } + if (!strcmp("Times-Bold", name)) { + *len = sizeof pdf_font_NimbusRomNo9L_Medi; + return (unsigned char*) pdf_font_NimbusRomNo9L_Medi; + } + if (!strcmp("Times-Italic", name)) { + *len = sizeof pdf_font_NimbusRomNo9L_ReguItal; + return (unsigned char*) pdf_font_NimbusRomNo9L_ReguItal; + } + if (!strcmp("Times-BoldItalic", name)) { + *len = sizeof pdf_font_NimbusRomNo9L_MediItal; + return (unsigned char*) pdf_font_NimbusRomNo9L_MediItal; + } + if (!strcmp("Symbol", name)) { + *len = sizeof pdf_font_StandardSymL; + return (unsigned char*) pdf_font_StandardSymL; + } + if (!strcmp("ZapfDingbats", name)) { + *len = sizeof pdf_font_Dingbats; + return (unsigned char*) pdf_font_Dingbats; + } + *len = 0; + return NULL; +} + +unsigned char * +pdf_lookup_substitute_font(int mono, int serif, int bold, int italic, unsigned int *len) +{ +#ifdef NODROIDFONT + if (mono) { + if (bold) { + if (italic) return pdf_lookup_builtin_font("Courier-BoldOblique", len); + else return pdf_lookup_builtin_font("Courier-Bold", len); + } else { + if (italic) return pdf_lookup_builtin_font("Courier-Oblique", len); + else return pdf_lookup_builtin_font("Courier", len); + } + } else if (serif) { + if (bold) { + if (italic) return pdf_lookup_builtin_font("Times-BoldItalic", len); + else return pdf_lookup_builtin_font("Times-Bold", len); + } else { + if (italic) return pdf_lookup_builtin_font("Times-Italic", len); + else return pdf_lookup_builtin_font("Times-Roman", len); + } + } else { + if (bold) { + if (italic) return pdf_lookup_builtin_font("Helvetica-BoldOblique", len); + else return pdf_lookup_builtin_font("Helvetica-Bold", len); + } else { + if (italic) return pdf_lookup_builtin_font("Helvetica-Oblique", len); + else return pdf_lookup_builtin_font("Helvetica", len); + } + } +#else + if (mono) { + *len = sizeof pdf_font_DroidSansMono; + return (unsigned char*) pdf_font_DroidSansMono; + } else { + *len = sizeof pdf_font_DroidSans; + return (unsigned char*) pdf_font_DroidSans; + } +#endif +} + +unsigned char * +pdf_lookup_substitute_cjk_font(int ros, int serif, unsigned int *len) +{ +#ifndef NOCJKFONT +#ifndef NOCJKFULL + *len = sizeof pdf_font_DroidSansFallbackFull; + return (unsigned char*) pdf_font_DroidSansFallbackFull; +#else + *len = sizeof pdf_font_DroidSansFallback; + return (unsigned char*) pdf_font_DroidSansFallback; +#endif +#else + *len = 0; + return NULL; +#endif +} diff --git a/source/pdf/pdf-form.c b/source/pdf/pdf-form.c new file mode 100644 index 00000000..6e1d26ce --- /dev/null +++ b/source/pdf/pdf-form.c @@ -0,0 +1,2876 @@ +#include "mupdf/pdf.h" + +#define MATRIX_COEFS (6) + +#define STRIKE_HEIGHT (0.375f) +#define UNDERLINE_HEIGHT (0.075f) +#define LINE_THICKNESS (0.07f) +#define SMALL_FLOAT (0.00001) + +enum +{ + F_Invisible = 1 << (1-1), + F_Hidden = 1 << (2-1), + F_Print = 1 << (3-1), + F_NoZoom = 1 << (4-1), + F_NoRotate = 1 << (5-1), + F_NoView = 1 << (6-1), + F_ReadOnly = 1 << (7-1), + F_Locked = 1 << (8-1), + F_ToggleNoView = 1 << (9-1), + F_LockedContents = 1 << (10-1) +}; + +enum +{ + BS_Solid, + BS_Dashed, + BS_Beveled, + BS_Inset, + BS_Underline +}; + +/* Must be kept in sync with definitions in pdf_util.js */ +enum +{ + Display_Visible, + Display_Hidden, + Display_NoPrint, + Display_NoView +}; + +enum +{ + Q_Left = 0, + Q_Cent = 1, + Q_Right = 2 +}; + +typedef struct da_info_s +{ + char *font_name; + int font_size; + float col[4]; + int col_size; +} da_info; + +typedef struct font_info_s +{ + da_info da_rec; + pdf_font_desc *font; +} font_info; + +typedef struct text_widget_info_s +{ + pdf_obj *dr; + pdf_obj *col; + font_info font_rec; + int q; + int multiline; + int comb; + int max_len; +} text_widget_info; + +static const char *fmt_re = "%f %f %f %f re\n"; +static const char *fmt_f = "f\n"; +static const char *fmt_s = "s\n"; +static const char *fmt_g = "%f g\n"; +static const char *fmt_m = "%f %f m\n"; +static const char *fmt_l = "%f %f l\n"; +static const char *fmt_w = "%f w\n"; +static const char *fmt_Tx_BMC = "/Tx BMC\n"; +static const char *fmt_q = "q\n"; +static const char *fmt_W = "W\n"; +static const char *fmt_n = "n\n"; +static const char *fmt_BT = "BT\n"; +static const char *fmt_Tm = "%1.2f %1.2f %1.2f %1.2f %1.2f %1.2f Tm\n"; +static const char *fmt_Td = "%f %f Td\n"; +static const char *fmt_Tj = " Tj\n"; +static const char *fmt_ET = "ET\n"; +static const char *fmt_Q = "Q\n"; +static const char *fmt_EMC = "EMC\n"; + +static void account_for_rot(fz_rect *rect, fz_matrix *mat, int rot) +{ + float width = rect->x1; + float height = rect->y1; + + switch (rot) + { + default: + *mat = fz_identity; + break; + case 90: + fz_pre_rotate(fz_translate(mat, width, 0), rot); + rect->x1 = height; + rect->y1 = width; + break; + case 180: + fz_pre_rotate(fz_translate(mat, width, height), rot); + break; + case 270: + fz_pre_rotate(fz_translate(mat, 0, height), rot); + rect->x1 = height; + rect->y1 = width; + break; + } +} + +static char *get_string_or_stream(pdf_document *doc, pdf_obj *obj) +{ + fz_context *ctx = doc->ctx; + int len = 0; + char *buf = NULL; + fz_buffer *strmbuf = NULL; + char *text = NULL; + + fz_var(strmbuf); + fz_var(text); + fz_try(ctx) + { + if (pdf_is_string(obj)) + { + len = pdf_to_str_len(obj); + buf = pdf_to_str_buf(obj); + } + else if (pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj))) + { + strmbuf = pdf_load_stream(doc, pdf_to_num(obj), pdf_to_gen(obj)); + len = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&buf); + } + + if (buf) + { + text = fz_malloc(ctx, len+1); + memcpy(text, buf, len); + text[len] = 0; + } + } + fz_always(ctx) + { + fz_drop_buffer(ctx, strmbuf); + } + fz_catch(ctx) + { + fz_free(ctx, text); + fz_rethrow(ctx); + } + + return text; +} + +/* Find the point in a field hierarchy where all descendents + * share the same name */ +static pdf_obj *find_head_of_field_group(pdf_obj *obj) +{ + if (obj == NULL || pdf_dict_gets(obj, "T")) + return obj; + else + return find_head_of_field_group(pdf_dict_gets(obj, "Parent")); +} + +static void pdf_field_mark_dirty(fz_context *ctx, pdf_obj *field) +{ + pdf_obj *kids = pdf_dict_gets(field, "Kids"); + if (kids) + { + int i, n = pdf_array_len(kids); + + for (i = 0; i < n; i++) + pdf_field_mark_dirty(ctx, pdf_array_get(kids, i)); + } + else if (!pdf_dict_gets(field, "Dirty")) + { + pdf_obj *nullobj = pdf_new_null(ctx); + fz_try(ctx) + { + pdf_dict_puts(field, "Dirty", nullobj); + } + fz_always(ctx) + { + pdf_drop_obj(nullobj); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } +} + +static void copy_resources(pdf_obj *dst, pdf_obj *src) +{ + int i, len; + + len = pdf_dict_len(src); + for (i = 0; i < len; i++) + { + pdf_obj *key = pdf_dict_get_key(src, i); + + if (!pdf_dict_get(dst, key)) + pdf_dict_put(dst, key, pdf_dict_get_val(src, i)); + } +} + +static void da_info_fin(fz_context *ctx, da_info *di) +{ + fz_free(ctx, di->font_name); + di->font_name = NULL; +} + +static void da_check_stack(float *stack, int *top) +{ + if (*top == 32) + { + memmove(stack, stack + 1, 31 * sizeof(stack[0])); + *top = 31; + } +} + +static void parse_da(fz_context *ctx, char *da, da_info *di) +{ + float stack[32]; + int top = 0; + pdf_token tok; + char *name = NULL; + pdf_lexbuf lbuf; + fz_stream *str = fz_open_memory(ctx, (unsigned char *)da, strlen(da)); + + pdf_lexbuf_init(ctx, &lbuf, PDF_LEXBUF_SMALL); + + fz_var(str); + fz_var(name); + fz_try(ctx) + { + for (tok = pdf_lex(str, &lbuf); tok != PDF_TOK_EOF; tok = pdf_lex(str, &lbuf)) + { + switch (tok) + { + case PDF_TOK_NAME: + fz_free(ctx, name); + name = fz_strdup(ctx, lbuf.scratch); + break; + + case PDF_TOK_INT: + da_check_stack(stack, &top); + stack[top] = lbuf.i; + top ++; + break; + + case PDF_TOK_REAL: + da_check_stack(stack, &top); + stack[top] = lbuf.f; + top ++; + break; + + case PDF_TOK_KEYWORD: + if (!strcmp(lbuf.scratch, "Tf")) + { + di->font_size = stack[0]; + di->font_name = name; + name = NULL; + } + else if (!strcmp(lbuf.scratch, "rg")) + { + di->col[0] = stack[0]; + di->col[1] = stack[1]; + di->col[2] = stack[2]; + di->col_size = 3; + } + + fz_free(ctx, name); + name = NULL; + top = 0; + break; + + default: + break; + } + } + } + fz_always(ctx) + { + fz_free(ctx, name); + fz_close(str); + pdf_lexbuf_fin(&lbuf); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void get_font_info(pdf_document *doc, pdf_obj *dr, char *da, font_info *font_rec) +{ + fz_context *ctx = doc->ctx; + + parse_da(ctx, da, &font_rec->da_rec); + if (font_rec->da_rec.font_name == NULL) + fz_throw(ctx, FZ_ERROR_GENERIC, "No font name in default appearance"); + font_rec->font = pdf_load_font(doc, dr, pdf_dict_gets(pdf_dict_gets(dr, "Font"), font_rec->da_rec.font_name), 0); +} + +static void font_info_fin(fz_context *ctx, font_info *font_rec) +{ + pdf_drop_font(ctx, font_rec->font); + font_rec->font = NULL; + da_info_fin(ctx, &font_rec->da_rec); +} + +static void get_text_widget_info(pdf_document *doc, pdf_obj *widget, text_widget_info *info) +{ + char *da = pdf_to_str_buf(pdf_get_inheritable(doc, widget, "DA")); + int ff = pdf_get_field_flags(doc, widget); + pdf_obj *ml = pdf_get_inheritable(doc, widget, "MaxLen"); + + info->dr = pdf_get_inheritable(doc, widget, "DR"); + info->col = pdf_dict_getp(widget, "MK/BG"); + info->q = pdf_to_int(pdf_get_inheritable(doc, widget, "Q")); + info->multiline = (ff & Ff_Multiline) != 0; + info->comb = (ff & (Ff_Multiline|Ff_Password|Ff_FileSelect|Ff_Comb)) == Ff_Comb; + + if (ml == NULL) + info->comb = 0; + else + info->max_len = pdf_to_int(ml); + + get_font_info(doc, info->dr, da, &info->font_rec); +} + +static void fzbuf_print_da(fz_context *ctx, fz_buffer *fzbuf, da_info *di) +{ + if (di->font_name != NULL && di->font_size != 0) + fz_buffer_printf(ctx, fzbuf, "/%s %d Tf", di->font_name, di->font_size); + + switch (di->col_size) + { + case 1: + fz_buffer_printf(ctx, fzbuf, " %f g", di->col[0]); + break; + + case 3: + fz_buffer_printf(ctx, fzbuf, " %f %f %f rg", di->col[0], di->col[1], di->col[2]); + break; + + case 4: + fz_buffer_printf(ctx, fzbuf, " %f %f %f %f k", di->col[0], di->col[1], di->col[2], di->col[3]); + break; + + default: + fz_buffer_printf(ctx, fzbuf, " 0 g"); + break; + } +} + +static fz_rect *measure_text(pdf_document *doc, font_info *font_rec, const fz_matrix *tm, char *text, fz_rect *bbox) +{ + pdf_measure_text(doc->ctx, font_rec->font, (unsigned char *)text, strlen(text), bbox); + + bbox->x0 *= font_rec->da_rec.font_size * tm->a; + bbox->y0 *= font_rec->da_rec.font_size * tm->d; + bbox->x1 *= font_rec->da_rec.font_size * tm->a; + bbox->y1 *= font_rec->da_rec.font_size * tm->d; + + return bbox; +} + +static void fzbuf_print_color(fz_context *ctx, fz_buffer *fzbuf, pdf_obj *arr, int stroke, float adj) +{ + switch (pdf_array_len(arr)) + { + case 1: + fz_buffer_printf(ctx, fzbuf, stroke?"%f G\n":"%f g\n", + pdf_to_real(pdf_array_get(arr, 0)) + adj); + break; + case 3: + fz_buffer_printf(ctx, fzbuf, stroke?"%f %f %f RG\n":"%f %f %f rg\n", + pdf_to_real(pdf_array_get(arr, 0)) + adj, + pdf_to_real(pdf_array_get(arr, 1)) + adj, + pdf_to_real(pdf_array_get(arr, 2)) + adj); + break; + case 4: + fz_buffer_printf(ctx, fzbuf, stroke?"%f %f %f %f K\n":"%f %f %f %f k\n", + pdf_to_real(pdf_array_get(arr, 0)), + pdf_to_real(pdf_array_get(arr, 1)), + pdf_to_real(pdf_array_get(arr, 2)), + pdf_to_real(pdf_array_get(arr, 3))); + break; + } +} + +static void fzbuf_print_text(fz_context *ctx, fz_buffer *fzbuf, const fz_rect *clip, pdf_obj *col, font_info *font_rec, const fz_matrix *tm, char *text) +{ + fz_buffer_printf(ctx, fzbuf, fmt_q); + if (clip) + { + fz_buffer_printf(ctx, fzbuf, fmt_re, clip->x0, clip->y0, clip->x1 - clip->x0, clip->y1 - clip->y0); + fz_buffer_printf(ctx, fzbuf, fmt_W); + if (col) + { + fzbuf_print_color(ctx, fzbuf, col, 0, 0.0); + fz_buffer_printf(ctx, fzbuf, fmt_f); + } + else + { + fz_buffer_printf(ctx, fzbuf, fmt_n); + } + } + + fz_buffer_printf(ctx, fzbuf, fmt_BT); + + fzbuf_print_da(ctx, fzbuf, &font_rec->da_rec); + + fz_buffer_printf(ctx, fzbuf, "\n"); + if (tm) + fz_buffer_printf(ctx, fzbuf, fmt_Tm, tm->a, tm->b, tm->c, tm->d, tm->e, tm->f); + + fz_buffer_cat_pdf_string(ctx, fzbuf, text); + fz_buffer_printf(ctx, fzbuf, fmt_Tj); + fz_buffer_printf(ctx, fzbuf, fmt_ET); + fz_buffer_printf(ctx, fzbuf, fmt_Q); +} + +static fz_buffer *create_text_buffer(fz_context *ctx, const fz_rect *clip, text_widget_info *info, const fz_matrix *tm, char *text) +{ + fz_buffer *fzbuf = fz_new_buffer(ctx, 0); + + fz_try(ctx) + { + fz_buffer_printf(ctx, fzbuf, fmt_Tx_BMC); + fzbuf_print_text(ctx, fzbuf, clip, info->col, &info->font_rec, tm, text); + fz_buffer_printf(ctx, fzbuf, fmt_EMC); + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, fzbuf); + fz_rethrow(ctx); + } + + return fzbuf; +} + +static fz_buffer *create_aligned_text_buffer(pdf_document *doc, const fz_rect *clip, text_widget_info *info, const fz_matrix *tm, char *text) +{ + fz_context *ctx = doc->ctx; + fz_matrix atm = *tm; + + if (info->q != Q_Left) + { + fz_rect rect; + + measure_text(doc, &info->font_rec, tm, text, &rect); + atm.e -= info->q == Q_Right ? rect.x1 : (rect.x1 - rect.x0) / 2; + } + + return create_text_buffer(ctx, clip, info, &atm, text); +} + +static void measure_ascent_descent(pdf_document *doc, font_info *finf, char *text, float *ascent, float *descent) +{ + fz_context *ctx = doc->ctx; + char *testtext = NULL; + fz_rect bbox; + font_info tinf = *finf; + + fz_var(testtext); + fz_try(ctx) + { + /* Heuristic: adding "My" to text will in most cases + * produce a measurement that will encompass all chars */ + testtext = fz_malloc(ctx, strlen(text) + 3); + strcpy(testtext, "My"); + strcat(testtext, text); + tinf.da_rec.font_size = 1; + measure_text(doc, &tinf, &fz_identity, testtext, &bbox); + *descent = -bbox.y0; + *ascent = bbox.y1; + } + fz_always(ctx) + { + fz_free(ctx, testtext); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +typedef struct text_splitter_s +{ + font_info *info; + float width; + float height; + float scale; + float unscaled_width; + float fontsize; + char *text; + int done; + float x_orig; + float y_orig; + float x; + float x_end; + int text_start; + int text_end; + int max_lines; + int retry; +} text_splitter; + +static void text_splitter_init(text_splitter *splitter, font_info *info, char *text, float width, float height, int variable) +{ + float fontsize = info->da_rec.font_size; + + memset(splitter, 0, sizeof(*splitter)); + splitter->info = info; + splitter->text = text; + splitter->width = width; + splitter->unscaled_width = width; + splitter->height = height; + splitter->fontsize = fontsize; + splitter->scale = 1.0; + /* RJW: The cast in the following line is important, as otherwise + * under MSVC in the variable = 0 case, splitter->max_lines becomes + * INT_MIN. */ + splitter->max_lines = variable ? (int)(height/fontsize) : INT_MAX; +} + +static void text_splitter_start_pass(text_splitter *splitter) +{ + splitter->text_end = 0; + splitter->x_orig = 0; + splitter->y_orig = 0; +} + +static void text_splitter_start_line(text_splitter *splitter) +{ + splitter->x_end = 0; +} + +static int text_splitter_layout(fz_context *ctx, text_splitter *splitter) +{ + char *text; + float room; + float stride; + int count; + int len; + float fontsize = splitter->info->da_rec.font_size; + + splitter->x = splitter->x_end; + splitter->text_start = splitter->text_end; + + text = splitter->text + splitter->text_start; + room = splitter->unscaled_width - splitter->x; + + if (strchr("\r\n", text[0])) + { + /* Consume return chars and report end of line */ + splitter->text_end += strspn(text, "\r\n"); + splitter->text_start = splitter->text_end; + splitter->done = (splitter->text[splitter->text_end] == '\0'); + return 0; + } + else if (text[0] == ' ') + { + /* Treat each space as a word */ + len = 1; + } + else + { + len = 0; + while (text[len] != '\0' && !strchr(" \r\n", text[len])) + len ++; + } + + stride = pdf_text_stride(ctx, splitter->info->font, fontsize, (unsigned char *)text, len, room, &count); + + /* If not a single char fits although the line is empty, then force one char */ + if (count == 0 && splitter->x == 0.0) + stride = pdf_text_stride(ctx, splitter->info->font, fontsize, (unsigned char *)text, 1, FLT_MAX, &count); + + if (count < len && splitter->retry) + { + /* The word didn't fit and we are in retry mode. Work out the + * least additional scaling that may help */ + float fitwidth; /* width if we force the word in */ + float hstretchwidth; /* width if we just bump by 10% */ + float vstretchwidth; /* width resulting from forcing in another line */ + float bestwidth; + + fitwidth = splitter->x + + pdf_text_stride(ctx, splitter->info->font, fontsize, (unsigned char *)text, len, FLT_MAX, &count); + /* FIXME: temporary fiddle factor. Would be better to work in integers */ + fitwidth *= 1.001f; + + /* Stretching by 10% is worth trying only if processing the first word on the line */ + hstretchwidth = splitter->x == 0.0 + ? splitter->width * 1.1 / splitter->scale + : FLT_MAX; + + vstretchwidth = splitter->width * (splitter->max_lines + 1) * splitter->fontsize + / splitter->height; + + bestwidth = fz_min(fitwidth, fz_min(hstretchwidth, vstretchwidth)); + + if (bestwidth == vstretchwidth) + splitter->max_lines ++; + + splitter->scale = splitter->width / bestwidth; + splitter->unscaled_width = bestwidth; + + splitter->retry = 0; + + /* Try again */ + room = splitter->unscaled_width - splitter->x; + stride = pdf_text_stride(ctx, splitter->info->font, fontsize, (unsigned char *)text, len, room, &count); + } + + /* This is not the first word on the line. Best to give up on this line and push + * the word onto the next */ + if (count < len && splitter->x > 0.0) + return 0; + + splitter->text_end = splitter->text_start + count; + splitter->x_end = splitter->x + stride; + splitter->done = (splitter->text[splitter->text_end] == '\0'); + return 1; +} + +static void text_splitter_move(text_splitter *splitter, float newy, float *relx, float *rely) +{ + *relx = splitter->x - splitter->x_orig; + *rely = newy - splitter->y_orig; + + splitter->x_orig = splitter->x; + splitter->y_orig = newy; +} + +static void text_splitter_retry(text_splitter *splitter) +{ + if (splitter->retry) + { + /* Already tried expanding lines. Overflow must + * be caused by carriage control */ + splitter->max_lines ++; + splitter->retry = 0; + splitter->unscaled_width = splitter->width * splitter->max_lines * splitter->fontsize + / splitter->height; + splitter->scale = splitter->width / splitter->unscaled_width; + } + else + { + splitter->retry = 1; + } +} + +static void fzbuf_print_text_start(fz_context *ctx, fz_buffer *fzbuf, const fz_rect *clip, pdf_obj *col, font_info *font, const fz_matrix *tm) +{ + fz_buffer_printf(ctx, fzbuf, fmt_Tx_BMC); + fz_buffer_printf(ctx, fzbuf, fmt_q); + + if (clip) + { + fz_buffer_printf(ctx, fzbuf, fmt_re, clip->x0, clip->y0, clip->x1 - clip->x0, clip->y1 - clip->y0); + fz_buffer_printf(ctx, fzbuf, fmt_W); + if (col) + { + fzbuf_print_color(ctx, fzbuf, col, 0, 0.0); + fz_buffer_printf(ctx, fzbuf, fmt_f); + } + else + { + fz_buffer_printf(ctx, fzbuf, fmt_n); + } + } + + fz_buffer_printf(ctx, fzbuf, fmt_BT); + + fzbuf_print_da(ctx, fzbuf, &font->da_rec); + fz_buffer_printf(ctx, fzbuf, "\n"); + + fz_buffer_printf(ctx, fzbuf, fmt_Tm, tm->a, tm->b, tm->c, tm->d, tm->e, tm->f); +} + +static void fzbuf_print_text_end(fz_context *ctx, fz_buffer *fzbuf) +{ + fz_buffer_printf(ctx, fzbuf, fmt_ET); + fz_buffer_printf(ctx, fzbuf, fmt_Q); + fz_buffer_printf(ctx, fzbuf, fmt_EMC); +} + +static void fzbuf_print_text_word(fz_context *ctx, fz_buffer *fzbuf, float x, float y, char *text, int count) +{ + int i; + + fz_buffer_printf(ctx, fzbuf, fmt_Td, x, y); + fz_buffer_printf(ctx, fzbuf, "("); + + for (i = 0; i < count; i++) + fz_buffer_printf(ctx, fzbuf, "%c", text[i]); + + fz_buffer_printf(ctx, fzbuf, ") Tj\n"); +} + +static fz_buffer *create_text_appearance(pdf_document *doc, const fz_rect *bbox, const fz_matrix *oldtm, text_widget_info *info, char *text) +{ + fz_context *ctx = doc->ctx; + int fontsize; + int variable; + float height, width, full_width; + fz_buffer *fzbuf = NULL; + fz_buffer *fztmp = NULL; + fz_rect rect; + fz_rect tbox; + rect = *bbox; + + if (rect.x1 - rect.x0 > 3.0 && rect.y1 - rect.y0 > 3.0) + { + rect.x0 += 1.0; + rect.x1 -= 1.0; + rect.y0 += 1.0; + rect.y1 -= 1.0; + } + + height = rect.y1 - rect.y0; + width = rect.x1 - rect.x0; + full_width = bbox->x1 - bbox->x0; + + fz_var(fzbuf); + fz_var(fztmp); + fz_try(ctx) + { + float ascent, descent; + fz_matrix tm; + + variable = (info->font_rec.da_rec.font_size == 0); + fontsize = variable + ? (info->multiline ? 14.0 : floor(height)) + : info->font_rec.da_rec.font_size; + + info->font_rec.da_rec.font_size = fontsize; + + measure_ascent_descent(doc, &info->font_rec, text, &ascent, &descent); + + if (info->multiline) + { + text_splitter splitter; + + text_splitter_init(&splitter, &info->font_rec, text, width, height, variable); + + while (!splitter.done) + { + /* Try a layout pass */ + int line = 0; + + fz_drop_buffer(ctx, fztmp); + fztmp = NULL; + fztmp = fz_new_buffer(ctx, 0); + + text_splitter_start_pass(&splitter); + + /* Layout unscaled text to a scaled-up width, so that + * the scaled-down text will fit the unscaled width */ + + while (!splitter.done && line < splitter.max_lines) + { + /* Layout a line */ + text_splitter_start_line(&splitter); + + while (!splitter.done && text_splitter_layout(ctx, &splitter)) + { + if (splitter.text[splitter.text_start] != ' ') + { + float x, y; + char *word = text+splitter.text_start; + int wordlen = splitter.text_end-splitter.text_start; + + text_splitter_move(&splitter, -line*fontsize, &x, &y); + fzbuf_print_text_word(ctx, fztmp, x, y, word, wordlen); + } + } + + line ++; + } + + if (!splitter.done) + text_splitter_retry(&splitter); + } + + fzbuf = fz_new_buffer(ctx, 0); + + tm.a = splitter.scale; + tm.b = 0.0; + tm.c = 0.0; + tm.d = splitter.scale; + tm.e = rect.x0; + tm.f = rect.y1 - (1.0+ascent-descent)*fontsize*splitter.scale/2.0; + + fzbuf_print_text_start(ctx, fzbuf, &rect, info->col, &info->font_rec, &tm); + + fz_buffer_cat(ctx, fzbuf, fztmp); + + fzbuf_print_text_end(ctx, fzbuf); + } + else if (info->comb) + { + int i, n = fz_mini((int)strlen(text), info->max_len); + float comb_width = full_width/info->max_len; + float char_width = pdf_text_stride(ctx, info->font_rec.font, fontsize, (unsigned char *)"M", 1, FLT_MAX, NULL); + float init_skip = (comb_width - char_width)/2.0; + + fz_translate(&tm, rect.x0, rect.y1 - (height+(ascent-descent)*fontsize)/2.0); + + fzbuf = fz_new_buffer(ctx, 0); + + fzbuf_print_text_start(ctx, fzbuf, &rect, info->col, &info->font_rec, &tm); + + for (i = 0; i < n; i++) + fzbuf_print_text_word(ctx, fzbuf, i == 0 ? init_skip : comb_width, 0.0, text+i, 1); + + fzbuf_print_text_end(ctx, fzbuf); + } + else + { + if (oldtm) + { + tm = *oldtm; + } + else + { + fz_translate(&tm, rect.x0, rect.y1 - (height+(ascent-descent)*fontsize)/2.0); + + switch (info->q) + { + case Q_Right: tm.e += width; break; + case Q_Cent: tm.e += width/2; break; + } + } + + if (variable) + { + measure_text(doc, &info->font_rec, &tm, text, &tbox); + + if (tbox.x1 - tbox.x0 > width) + { + /* Scale the text to fit but use the same offset + * to keep the baseline constant */ + tm.a *= width / (tbox.x1 - tbox.x0); + tm.d *= width / (tbox.x1 - tbox.x0); + } + } + + fzbuf = create_aligned_text_buffer(doc, &rect, info, &tm, text); + } + } + fz_always(ctx) + { + fz_drop_buffer(ctx, fztmp); + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, fzbuf); + fz_rethrow(ctx); + } + + return fzbuf; +} + +static void update_marked_content(pdf_document *doc, pdf_xobject *form, fz_buffer *fzbuf) +{ + fz_context *ctx = doc->ctx; + pdf_token tok; + pdf_lexbuf lbuf; + fz_stream *str_outer = NULL; + fz_stream *str_inner = NULL; + unsigned char *buf; + int len; + fz_buffer *newbuf = NULL; + + pdf_lexbuf_init(ctx, &lbuf, PDF_LEXBUF_SMALL); + + fz_var(str_outer); + fz_var(str_inner); + fz_var(newbuf); + fz_try(ctx) + { + int bmc_found; + int first = 1; + + newbuf = fz_new_buffer(ctx, 0); + str_outer = pdf_open_stream(doc, pdf_to_num(form->contents), pdf_to_gen(form->contents)); + len = fz_buffer_storage(ctx, fzbuf, &buf); + str_inner = fz_open_memory(ctx, buf, len); + + /* Copy the existing appearance stream to newbuf while looking for BMC */ + for (tok = pdf_lex(str_outer, &lbuf); tok != PDF_TOK_EOF; tok = pdf_lex(str_outer, &lbuf)) + { + if (first) + first = 0; + else + fz_buffer_printf(ctx, newbuf, " "); + + pdf_print_token(ctx, newbuf, tok, &lbuf); + if (tok == PDF_TOK_KEYWORD && !strcmp(lbuf.scratch, "BMC")) + break; + } + + bmc_found = (tok != PDF_TOK_EOF); + + if (bmc_found) + { + /* Drop Tx BMC from the replacement appearance stream */ + (void)pdf_lex(str_inner, &lbuf); + (void)pdf_lex(str_inner, &lbuf); + } + + /* Copy the replacement appearance stream to newbuf */ + for (tok = pdf_lex(str_inner, &lbuf); tok != PDF_TOK_EOF; tok = pdf_lex(str_inner, &lbuf)) + { + fz_buffer_printf(ctx, newbuf, " "); + pdf_print_token(ctx, newbuf, tok, &lbuf); + } + + if (bmc_found) + { + /* Drop the rest of the existing appearance stream until EMC found */ + for (tok = pdf_lex(str_outer, &lbuf); tok != PDF_TOK_EOF; tok = pdf_lex(str_outer, &lbuf)) + { + if (tok == PDF_TOK_KEYWORD && !strcmp(lbuf.scratch, "EMC")) + break; + } + + /* Copy the rest of the existing appearance stream to newbuf */ + for (tok = pdf_lex(str_outer, &lbuf); tok != PDF_TOK_EOF; tok = pdf_lex(str_outer, &lbuf)) + { + fz_buffer_printf(ctx, newbuf, " "); + pdf_print_token(ctx, newbuf, tok, &lbuf); + } + } + + /* Use newbuf in place of the existing appearance stream */ + pdf_update_xobject_contents(doc, form, newbuf); + } + fz_always(ctx) + { + fz_close(str_outer); + fz_close(str_inner); + fz_drop_buffer(ctx, newbuf); + pdf_lexbuf_fin(&lbuf); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static int get_matrix(pdf_document *doc, pdf_xobject *form, int q, fz_matrix *mt) +{ + fz_context *ctx = doc->ctx; + int found = 0; + pdf_lexbuf lbuf; + fz_stream *str; + + str = pdf_open_stream(doc, pdf_to_num(form->contents), pdf_to_gen(form->contents)); + + pdf_lexbuf_init(ctx, &lbuf, PDF_LEXBUF_SMALL); + + fz_try(ctx) + { + int tok; + float coefs[MATRIX_COEFS]; + int coef_i = 0; + + /* Look for the text matrix Tm in the stream */ + for (tok = pdf_lex(str, &lbuf); tok != PDF_TOK_EOF; tok = pdf_lex(str, &lbuf)) + { + if (tok == PDF_TOK_INT || tok == PDF_TOK_REAL) + { + if (coef_i >= MATRIX_COEFS) + { + int i; + for (i = 0; i < MATRIX_COEFS-1; i++) + coefs[i] = coefs[i+1]; + + coef_i = MATRIX_COEFS-1; + } + + coefs[coef_i++] = tok == PDF_TOK_INT ? lbuf.i : lbuf.f; + } + else + { + if (tok == PDF_TOK_KEYWORD && !strcmp(lbuf.scratch, "Tm") && coef_i == MATRIX_COEFS) + { + found = 1; + mt->a = coefs[0]; + mt->b = coefs[1]; + mt->c = coefs[2]; + mt->d = coefs[3]; + mt->e = coefs[4]; + mt->f = coefs[5]; + } + + coef_i = 0; + } + } + + if (found) + { + fz_rect bbox; + pdf_to_rect(ctx, pdf_dict_gets(form->contents, "BBox"), &bbox); + + switch (q) + { + case Q_Left: + mt->e = bbox.x0 + 1; + break; + + case Q_Cent: + mt->e = (bbox.x1 - bbox.x0) / 2; + break; + + case Q_Right: + mt->e = bbox.x1 - 1; + break; + } + } + } + fz_always(ctx) + { + fz_close(str); + pdf_lexbuf_fin(&lbuf); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + return found; +} + +static void update_field_value(fz_context *ctx, pdf_obj *obj, char *text) +{ + pdf_obj *sobj = NULL; + pdf_obj *grp; + + if (!text) + text = ""; + + /* All fields of the same name should be updated, so + * set the value at the head of the group */ + grp = find_head_of_field_group(obj); + if (grp) + obj = grp; + + fz_var(sobj); + fz_try(ctx) + { + sobj = pdf_new_string(ctx, text, strlen(text)); + pdf_dict_puts(obj, "V", sobj); + } + fz_always(ctx) + { + pdf_drop_obj(sobj); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + pdf_field_mark_dirty(ctx, obj); +} + +static pdf_xobject *load_or_create_form(pdf_document *doc, pdf_obj *obj, fz_rect *rect) +{ + fz_context *ctx = doc->ctx; + pdf_obj *ap = NULL; + fz_matrix mat; + int rot; + pdf_obj *formobj = NULL; + pdf_xobject *form = NULL; + char *dn = "N"; + fz_buffer *fzbuf = NULL; + int create_form = 0; + + fz_var(formobj); + fz_var(form); + fz_var(fzbuf); + fz_try(ctx) + { + rot = pdf_to_int(pdf_dict_getp(obj, "MK/R")); + pdf_to_rect(ctx, pdf_dict_gets(obj, "Rect"), rect); + rect->x1 -= rect->x0; + rect->y1 -= rect->y0; + rect->x0 = rect->y0 = 0; + account_for_rot(rect, &mat, rot); + + ap = pdf_dict_gets(obj, "AP"); + if (ap == NULL) + { + ap = pdf_new_dict(ctx, 1); + pdf_dict_puts_drop(obj, "AP", ap); + } + + formobj = pdf_dict_gets(ap, dn); + if (formobj == NULL) + { + formobj = pdf_new_xobject(doc, rect, &mat); + pdf_dict_puts_drop(ap, dn, formobj); + create_form = 1; + } + + form = pdf_load_xobject(doc, formobj); + if (create_form) + { + fzbuf = fz_new_buffer(ctx, 1); + pdf_update_xobject_contents(doc, form, fzbuf); + } + + copy_resources(form->resources, pdf_get_inheritable(doc, obj, "DR")); + } + fz_always(ctx) + { + fz_drop_buffer(ctx, fzbuf); + } + fz_catch(ctx) + { + pdf_drop_xobject(ctx, form); + fz_rethrow(ctx); + } + + return form; +} + +static char *to_font_encoding(fz_context *ctx, pdf_font_desc *font, char *utf8) +{ + int i; + int needs_converting = 0; + + /* Temporay partial solution. We are using a slow lookup in the conversion + * below, so we avoid performing the conversion unnecessarily. We check for + * top-bit-set chars, and convert only if they are present. We should also + * check that the font encoding is one that agrees with utf8 from 0 to 7f, + * but for now we get away without doing so. This is after all an improvement + * on just strdup */ + for (i = 0; utf8[i] != '\0'; i++) + { + if (utf8[i] & 0x80) + needs_converting = 1; + } + + /* Even if we need to convert, we cannot do so if the font has no cid_to_ucs mapping */ + if (needs_converting && font->cid_to_ucs) + { + char *buf = fz_malloc(ctx, strlen(utf8) + 1); + char *bufp = buf; + + fz_try(ctx) + { + while(*utf8) + { + if (*utf8 & 0x80) + { + int rune; + + utf8 += fz_chartorune(&rune, utf8); + + /* Slow search for the cid that maps to the unicode value held in 'rune" */ + for (i = 0; i < font->cid_to_ucs_len && font->cid_to_ucs[i] != rune; i++) + ; + + /* If found store the cid */ + if (i < font->cid_to_ucs_len) + *bufp++ = i; + } + else + { + *bufp++ = *utf8++; + } + } + + *bufp = '\0'; + } + fz_catch(ctx) + { + fz_free(ctx, buf); + fz_rethrow(ctx); + } + + return buf; + } + else + { + /* If either no conversion is needed or the font has no cid_to_ucs + * mapping then leave unconverted, although in the latter case the result + * is likely incorrect */ + return fz_strdup(ctx, utf8); + } +} + +static void update_text_appearance(pdf_document *doc, pdf_obj *obj, char *eventValue) +{ + fz_context *ctx = doc->ctx; + text_widget_info info; + pdf_xobject *form = NULL; + fz_buffer *fzbuf = NULL; + fz_matrix tm; + fz_rect rect; + int has_tm; + char *text = NULL; + + memset(&info, 0, sizeof(info)); + + fz_var(info); + fz_var(form); + fz_var(fzbuf); + fz_var(text); + fz_try(ctx) + { + get_text_widget_info(doc, obj, &info); + + if (eventValue) + text = to_font_encoding(ctx, info.font_rec.font, eventValue); + else + text = pdf_field_value(doc, obj); + + form = load_or_create_form(doc, obj, &rect); + + has_tm = get_matrix(doc, form, info.q, &tm); + fzbuf = create_text_appearance(doc, &form->bbox, has_tm ? &tm : NULL, &info, + text?text:""); + update_marked_content(doc, form, fzbuf); + } + fz_always(ctx) + { + fz_free(ctx, text); + pdf_drop_xobject(ctx, form); + fz_drop_buffer(ctx, fzbuf); + font_info_fin(ctx, &info.font_rec); + } + fz_catch(ctx) + { + fz_warn(ctx, "update_text_appearance failed"); + } +} + +static void update_combobox_appearance(pdf_document *doc, pdf_obj *obj) +{ + fz_context *ctx = doc->ctx; + text_widget_info info; + pdf_xobject *form = NULL; + fz_buffer *fzbuf = NULL; + fz_matrix tm; + fz_rect rect; + int has_tm; + pdf_obj *val; + char *text; + + memset(&info, 0, sizeof(info)); + + fz_var(info); + fz_var(form); + fz_var(fzbuf); + fz_try(ctx) + { + get_text_widget_info(doc, obj, &info); + + val = pdf_get_inheritable(doc, obj, "V"); + + if (pdf_is_array(val)) + val = pdf_array_get(val, 0); + + text = pdf_to_str_buf(val); + + if (!text) + text = ""; + + form = load_or_create_form(doc, obj, &rect); + + has_tm = get_matrix(doc, form, info.q, &tm); + fzbuf = create_text_appearance(doc, &form->bbox, has_tm ? &tm : NULL, &info, + text?text:""); + update_marked_content(doc, form, fzbuf); + } + fz_always(ctx) + { + pdf_drop_xobject(ctx, form); + fz_drop_buffer(ctx, fzbuf); + font_info_fin(ctx, &info.font_rec); + } + fz_catch(ctx) + { + fz_warn(ctx, "update_text_appearance failed"); + } +} + +static int get_border_style(pdf_obj *obj) +{ + char *sname = pdf_to_name(pdf_dict_getp(obj, "BS/S")); + + if (!strcmp(sname, "D")) + return BS_Dashed; + else if (!strcmp(sname, "B")) + return BS_Beveled; + else if (!strcmp(sname, "I")) + return BS_Inset; + else if (!strcmp(sname, "U")) + return BS_Underline; + else + return BS_Solid; +} + +static float get_border_width(pdf_obj *obj) +{ + float w = pdf_to_real(pdf_dict_getp(obj, "BS/W")); + return w == 0.0 ? 1.0 : w; +} + +static void update_pushbutton_appearance(pdf_document *doc, pdf_obj *obj) +{ + fz_context *ctx = doc->ctx; + fz_rect rect; + pdf_xobject *form = NULL; + fz_buffer *fzbuf = NULL; + pdf_obj *tobj = NULL; + font_info font_rec; + int bstyle; + float bwidth; + float btotal; + + memset(&font_rec, 0, sizeof(font_rec)); + + fz_var(font_rec); + fz_var(form); + fz_var(fzbuf); + fz_try(ctx) + { + form = load_or_create_form(doc, obj, &rect); + fzbuf = fz_new_buffer(ctx, 0); + tobj = pdf_dict_getp(obj, "MK/BG"); + if (pdf_is_array(tobj)) + { + fzbuf_print_color(ctx, fzbuf, tobj, 0, 0.0); + fz_buffer_printf(ctx, fzbuf, fmt_re, + rect.x0, rect.y0, rect.x1, rect.y1); + fz_buffer_printf(ctx, fzbuf, fmt_f); + } + bstyle = get_border_style(obj); + bwidth = get_border_width(obj); + btotal = bwidth; + if (bstyle == BS_Beveled || bstyle == BS_Inset) + { + btotal += bwidth; + + if (bstyle == BS_Beveled) + fz_buffer_printf(ctx, fzbuf, fmt_g, 1.0); + else + fz_buffer_printf(ctx, fzbuf, fmt_g, 0.33); + fz_buffer_printf(ctx, fzbuf, fmt_m, bwidth, bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, bwidth, rect.y1 - bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, rect.x1 - bwidth, rect.y1 - bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, rect.x1 - 2 * bwidth, rect.y1 - 2 * bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, 2 * bwidth, rect.y1 - 2 * bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, 2 * bwidth, 2 * bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_f); + if (bstyle == BS_Beveled) + fzbuf_print_color(ctx, fzbuf, tobj, 0, -0.25); + else + fz_buffer_printf(ctx, fzbuf, fmt_g, 0.66); + fz_buffer_printf(ctx, fzbuf, fmt_m, rect.x1 - bwidth, rect.y1 - bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, rect.x1 - bwidth, bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, bwidth, bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, 2 * bwidth, 2 * bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, rect.x1 - 2 * bwidth, 2 * bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_l, rect.x1 - 2 * bwidth, rect.y1 - 2 * bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_f); + } + + tobj = pdf_dict_getp(obj, "MK/BC"); + if (tobj) + { + fzbuf_print_color(ctx, fzbuf, tobj, 1, 0.0); + fz_buffer_printf(ctx, fzbuf, fmt_w, bwidth); + fz_buffer_printf(ctx, fzbuf, fmt_re, + bwidth/2, bwidth/2, + rect.x1 -bwidth/2, rect.y1 - bwidth/2); + fz_buffer_printf(ctx, fzbuf, fmt_s); + } + + tobj = pdf_dict_getp(obj, "MK/CA"); + if (tobj) + { + fz_rect clip = rect; + fz_rect bounds; + fz_matrix mat; + char *da = pdf_to_str_buf(pdf_get_inheritable(doc, obj, "DA")); + char *text = pdf_to_str_buf(tobj); + + clip.x0 += btotal; + clip.y0 += btotal; + clip.x1 -= btotal; + clip.y1 -= btotal; + + get_font_info(doc, form->resources, da, &font_rec); + measure_text(doc, &font_rec, &fz_identity, text, &bounds); + fz_translate(&mat, (rect.x1 - bounds.x1)/2, (rect.y1 - bounds.y1)/2); + fzbuf_print_text(ctx, fzbuf, &clip, NULL, &font_rec, &mat, text); + } + + pdf_update_xobject_contents(doc, form, fzbuf); + } + fz_always(ctx) + { + font_info_fin(ctx, &font_rec); + fz_drop_buffer(ctx, fzbuf); + pdf_drop_xobject(ctx, form); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static pdf_obj *find_field(pdf_obj *dict, char *name, int len) +{ + pdf_obj *field; + + int i, n = pdf_array_len(dict); + + for (i = 0; i < n; i++) + { + char *part; + + field = pdf_array_get(dict, i); + part = pdf_to_str_buf(pdf_dict_gets(field, "T")); + if (strlen(part) == len && !memcmp(part, name, len)) + return field; + } + + return NULL; +} + +pdf_obj *pdf_lookup_field(pdf_obj *form, char *name) +{ + char *dot; + char *namep; + pdf_obj *dict = NULL; + int len; + + /* Process the fully qualified field name which has + * the partial names delimited by '.'. Pretend there + * was a preceding '.' to simplify the loop */ + dot = name - 1; + + while (dot && form) + { + namep = dot + 1; + dot = strchr(namep, '.'); + len = dot ? dot - namep : strlen(namep); + dict = find_field(form, namep, len); + if (dot) + form = pdf_dict_gets(dict, "Kids"); + } + + return dict; +} + +static void reset_field(pdf_document *doc, pdf_obj *field) +{ + fz_context *ctx = doc->ctx; + /* Set V to DV whereever DV is present, and delete V where DV is not. + * FIXME: we assume for now that V has not been set unequal + * to DV higher in the hierarchy than "field". + * + * At the bottom of the hierarchy we may find widget annotations + * that aren't also fields, but DV and V will not be present in their + * dictionaries, and attempts to remove V will be harmless. */ + pdf_obj *dv = pdf_dict_gets(field, "DV"); + pdf_obj *kids = pdf_dict_gets(field, "Kids"); + + if (dv) + pdf_dict_puts(field, "V", dv); + else + pdf_dict_dels(field, "V"); + + if (kids == NULL) + { + /* The leaves of the tree are widget annotations + * In some cases we need to update the appearance state; + * in others we need to mark the field as dirty so that + * the appearance stream will be regenerated. */ + switch (pdf_field_type(doc, field)) + { + case PDF_WIDGET_TYPE_RADIOBUTTON: + case PDF_WIDGET_TYPE_CHECKBOX: + { + pdf_obj *leafv = pdf_get_inheritable(doc, field, "V"); + + if (leafv) + pdf_keep_obj(leafv); + else + leafv = pdf_new_name(ctx, "Off"); + + fz_try(ctx) + { + pdf_dict_puts(field, "AS", leafv); + } + fz_always(ctx) + { + pdf_drop_obj(leafv); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } + break; + + case PDF_WIDGET_TYPE_PUSHBUTTON: + break; + + default: + pdf_field_mark_dirty(ctx, field); + break; + } + } + + doc->dirty = 1; +} + +void pdf_field_reset(pdf_document *doc, pdf_obj *field) +{ + pdf_obj *kids = pdf_dict_gets(field, "Kids"); + + reset_field(doc, field); + + if (kids) + { + int i, n = pdf_array_len(kids); + + for (i = 0; i < n; i++) + pdf_field_reset(doc, pdf_array_get(kids, i)); + } +} + +static void add_field_hierarchy_to_array(pdf_obj *array, pdf_obj *field) +{ + pdf_obj *kids = pdf_dict_gets(field, "Kids"); + pdf_obj *exclude = pdf_dict_gets(field, "Exclude"); + + if (exclude) + return; + + pdf_array_push(array, field); + + if (kids) + { + int i, n = pdf_array_len(kids); + + for (i = 0; i < n; i++) + add_field_hierarchy_to_array(array, pdf_array_get(kids, i)); + } +} + +/* + When resetting or submitting a form, the fields to act upon are defined + by an array of either field references or field names, plus a flag determining + whether to act upon the fields in the array, or all fields other than those in + the array. specified_fields interprets this information and produces the array + of fields to be acted upon. +*/ +static pdf_obj *specified_fields(pdf_document *doc, pdf_obj *fields, int exclude) +{ + fz_context *ctx = doc->ctx; + pdf_obj *form = pdf_dict_getp(pdf_trailer(doc), "Root/AcroForm/Fields"); + int i, n; + pdf_obj *result = pdf_new_array(ctx, 0); + pdf_obj *nil = NULL; + + fz_var(nil); + fz_try(ctx) + { + /* The 'fields' array not being present signals that all fields + * should be acted upon, so handle it using the exclude case - excluding none */ + if (exclude || !fields) + { + /* mark the fields we don't want to act upon */ + nil = pdf_new_null(ctx); + + n = pdf_array_len(fields); + + for (i = 0; i < n; i++) + { + pdf_obj *field = pdf_array_get(fields, i); + + if (pdf_is_string(field)) + field = pdf_lookup_field(form, pdf_to_str_buf(field)); + + if (field) + pdf_dict_puts(field, "Exclude", nil); + } + + /* Act upon all unmarked fields */ + n = pdf_array_len(form); + + for (i = 0; i < n; i++) + add_field_hierarchy_to_array(result, pdf_array_get(form, i)); + + /* Unmark the marked fields */ + n = pdf_array_len(fields); + + for (i = 0; i < n; i++) + { + pdf_obj *field = pdf_array_get(fields, i); + + if (pdf_is_string(field)) + field = pdf_lookup_field(form, pdf_to_str_buf(field)); + + if (field) + pdf_dict_dels(field, "Exclude"); + } + } + else + { + n = pdf_array_len(fields); + + for (i = 0; i < n; i++) + { + pdf_obj *field = pdf_array_get(fields, i); + + if (pdf_is_string(field)) + field = pdf_lookup_field(form, pdf_to_str_buf(field)); + + if (field) + add_field_hierarchy_to_array(result, field); + } + } + } + fz_always(ctx) + { + pdf_drop_obj(nil); + } + fz_catch(ctx) + { + pdf_drop_obj(result); + fz_rethrow(ctx); + } + + return result; +} + +static void reset_form(pdf_document *doc, pdf_obj *fields, int exclude) +{ + fz_context *ctx = doc->ctx; + pdf_obj *sfields = specified_fields(doc, fields, exclude); + + fz_try(ctx) + { + int i, n = pdf_array_len(sfields); + + for (i = 0; i < n; i++) + reset_field(doc, pdf_array_get(sfields, i)); + } + fz_always(ctx) + { + pdf_drop_obj(sfields); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void execute_action(pdf_document *doc, pdf_obj *obj, pdf_obj *a) +{ + fz_context *ctx = doc->ctx; + if (a) + { + char *type = pdf_to_name(pdf_dict_gets(a, "S")); + + if (!strcmp(type, "JavaScript")) + { + pdf_obj *js = pdf_dict_gets(a, "JS"); + if (js) + { + char *code = pdf_to_utf8(doc, js); + fz_try(ctx) + { + pdf_js_execute(doc->js, code); + } + fz_always(ctx) + { + fz_free(ctx, code); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } + } + else if (!strcmp(type, "ResetForm")) + { + reset_form(doc, pdf_dict_gets(a, "Fields"), pdf_to_int(pdf_dict_gets(a, "Flags")) & 1); + } + else if (!strcmp(type, "Named")) + { + char *name = pdf_to_name(pdf_dict_gets(a, "N")); + + if (!strcmp(name, "Print")) + pdf_event_issue_print(doc); + } + } +} + +static void update_text_markup_appearance(pdf_document *doc, pdf_obj *annot, fz_annot_type type) +{ + float color[3]; + float alpha; + float line_height; + float line_thickness; + + switch (type) + { + case FZ_ANNOT_HIGHLIGHT: + color[0] = 1.0; + color[1] = 1.0; + color[2] = 0.0; + alpha = 0.5; + line_thickness = 1.0; + line_height = 0.5; + break; + case FZ_ANNOT_UNDERLINE: + color[0] = 0.0; + color[1] = 0.0; + color[2] = 1.0; + alpha = 1.0; + line_thickness = LINE_THICKNESS; + line_height = UNDERLINE_HEIGHT; + break; + case FZ_ANNOT_STRIKEOUT: + color[0] = 1.0; + color[1] = 0.0; + color[2] = 0.0; + alpha = 1.0; + line_thickness = LINE_THICKNESS; + line_height = STRIKE_HEIGHT; + break; + default: + return; + } + + pdf_set_markup_obj_appearance(doc, annot, color, alpha, line_thickness, line_height); +} + +void pdf_update_appearance(pdf_document *doc, pdf_obj *obj) +{ + if (!pdf_dict_gets(obj, "AP") || pdf_dict_gets(obj, "Dirty")) + { + fz_annot_type type = pdf_annot_obj_type(obj); + switch (type) + { + case FZ_ANNOT_WIDGET: + switch (pdf_field_type(doc, obj)) + { + case PDF_WIDGET_TYPE_TEXT: + { + pdf_obj *formatting = pdf_dict_getp(obj, "AA/F"); + if (formatting && doc->js) + { + /* Apply formatting */ + pdf_js_event e; + + e.target = obj; + e.value = pdf_field_value(doc, obj); + pdf_js_setup_event(doc->js, &e); + execute_action(doc, obj, formatting); + /* Update appearance from JS event.value */ + update_text_appearance(doc, obj, pdf_js_get_event(doc->js)->value); + } + else + { + /* Update appearance from field value */ + update_text_appearance(doc, obj, NULL); + } + } + break; + case PDF_WIDGET_TYPE_PUSHBUTTON: + update_pushbutton_appearance(doc, obj); + break; + case PDF_WIDGET_TYPE_LISTBOX: + case PDF_WIDGET_TYPE_COMBOBOX: + /* Treating listbox and combobox identically for now, + * and the behaviour is most appropriate for a combobox */ + update_combobox_appearance(doc, obj); + break; + } + break; + case FZ_ANNOT_STRIKEOUT: + case FZ_ANNOT_UNDERLINE: + case FZ_ANNOT_HIGHLIGHT: + update_text_markup_appearance(doc, obj, type); + break; + case FZ_ANNOT_INK: + pdf_set_ink_obj_appearance(doc, obj); + break; + default: + break; + } + + pdf_dict_dels(obj, "Dirty"); + } +} + +static void execute_action_chain(pdf_document *doc, pdf_obj *obj) +{ + pdf_obj *a = pdf_dict_gets(obj, "A"); + pdf_js_event e; + + e.target = obj; + e.value = ""; + pdf_js_setup_event(doc->js, &e); + + while (a) + { + execute_action(doc, obj, a); + a = pdf_dict_gets(a, "Next"); + } +} + +static void execute_additional_action(pdf_document *doc, pdf_obj *obj, char *act) +{ + pdf_obj *a = pdf_dict_getp(obj, act); + + if (a) + { + pdf_js_event e; + + e.target = obj; + e.value = ""; + pdf_js_setup_event(doc->js, &e); + execute_action(doc, obj, a); + } +} + +static void check_off(fz_context *ctx, pdf_obj *obj) +{ + pdf_obj *off = NULL; + + fz_var(off); + fz_try(ctx); + { + off = pdf_new_name(ctx, "Off"); + pdf_dict_puts(obj, "AS", off); + } + fz_always(ctx) + { + pdf_drop_obj(off); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void set_check(fz_context *ctx, pdf_obj *chk, char *name) +{ + pdf_obj *n = pdf_dict_getp(chk, "AP/N"); + pdf_obj *val = NULL; + + fz_var(val); + fz_try(ctx) + { + /* If name is a possible value of this check + * box then use it, otherwise use "Off" */ + if (pdf_dict_gets(n, name)) + val = pdf_new_name(ctx, name); + else + val = pdf_new_name(ctx, "Off"); + + pdf_dict_puts(chk, "AS", val); + } + fz_always(ctx) + { + pdf_drop_obj(val); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +/* Set the values of all fields in a group defined by a node + * in the hierarchy */ +static void set_check_grp(fz_context *ctx, pdf_obj *grp, char *val) +{ + pdf_obj *kids = pdf_dict_gets(grp, "Kids"); + + if (kids == NULL) + { + set_check(ctx, grp, val); + } + else + { + int i, n = pdf_array_len(kids); + + for (i = 0; i < n; i++) + set_check_grp(ctx, pdf_array_get(kids, i), val); + } +} + +static void recalculate(pdf_document *doc) +{ + fz_context *ctx = doc->ctx; + + if (doc->recalculating) + return; + + doc->recalculating = 1; + fz_try(ctx) + { + pdf_obj *co = pdf_dict_getp(pdf_trailer(doc), "Root/AcroForm/CO"); + + if (co && doc->js) + { + int i, n = pdf_array_len(co); + + for (i = 0; i < n; i++) + { + pdf_obj *field = pdf_array_get(co, i); + pdf_obj *calc = pdf_dict_getp(field, "AA/C"); + + if (calc) + { + pdf_js_event e; + + e.target = field; + e.value = pdf_field_value(doc, field); + pdf_js_setup_event(doc->js, &e); + execute_action(doc, field, calc); + /* A calculate action, updates event.value. We need + * to place the value in the field */ + update_field_value(doc->ctx, field, pdf_js_get_event(doc->js)->value); + } + } + } + } + fz_always(ctx) + { + doc->recalculating = 0; + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void toggle_check_box(pdf_document *doc, pdf_obj *obj) +{ + fz_context *ctx = doc->ctx; + pdf_obj *as = pdf_dict_gets(obj, "AS"); + int ff = pdf_get_field_flags(doc, obj); + int radio = ((ff & (Ff_Pushbutton|Ff_Radio)) == Ff_Radio); + char *val = NULL; + pdf_obj *grp = radio ? pdf_dict_gets(obj, "Parent") : find_head_of_field_group(obj); + + if (!grp) + grp = obj; + + if (as && strcmp(pdf_to_name(as), "Off")) + { + /* "as" neither missing nor set to Off. Set it to Off, unless + * this is a non-toggle-off radio button. */ + if ((ff & (Ff_Pushbutton|Ff_NoToggleToOff|Ff_Radio)) != (Ff_NoToggleToOff|Ff_Radio)) + { + check_off(ctx, obj); + val = "Off"; + } + } + else + { + pdf_obj *n, *key = NULL; + int len, i; + + n = pdf_dict_getp(obj, "AP/N"); + + /* Look for a key that isn't "Off" */ + len = pdf_dict_len(n); + for (i = 0; i < len; i++) + { + key = pdf_dict_get_key(n, i); + if (pdf_is_name(key) && strcmp(pdf_to_name(key), "Off")) + break; + } + + /* If we found no alternative value to Off then we have no value to use */ + if (!key) + return; + + val = pdf_to_name(key); + + if (radio) + { + /* For radio buttons, first turn off all buttons in the group and + * then set the one that was clicked */ + pdf_obj *kids = pdf_dict_gets(grp, "Kids"); + + len = pdf_array_len(kids); + for (i = 0; i < len; i++) + check_off(ctx, pdf_array_get(kids, i)); + + pdf_dict_puts(obj, "AS", key); + } + else + { + /* For check boxes, we have located the node of the field hierarchy + * below which all fields share a name with the clicked one. Set + * all to the same value. This may cause the group to act like + * radio buttons, if each have distinct "On" values */ + if (grp) + set_check_grp(doc->ctx, grp, val); + else + set_check(doc->ctx, obj, val); + } + } + + if (val && grp) + { + pdf_obj *v = NULL; + + fz_var(v); + fz_try(ctx) + { + v = pdf_new_string(ctx, val, strlen(val)); + pdf_dict_puts(grp, "V", v); + } + fz_always(ctx) + { + pdf_drop_obj(v); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + recalculate(doc); + } +} + +int pdf_has_unsaved_changes(pdf_document *doc) +{ + return doc->dirty; +} + +int pdf_pass_event(pdf_document *doc, pdf_page *page, pdf_ui_event *ui_event) +{ + pdf_annot *annot; + pdf_hotspot *hp = &doc->hotspot; + fz_point *pt = &(ui_event->event.pointer.pt); + int changed = 0; + + for (annot = page->annots; annot; annot = annot->next) + { + if (pt->x >= annot->pagerect.x0 && pt->x <= annot->pagerect.x1) + if (pt->y >= annot->pagerect.y0 && pt->y <= annot->pagerect.y1) + break; + } + + if (annot) + { + int f = pdf_to_int(pdf_dict_gets(annot->obj, "F")); + + if (f & (F_Hidden|F_NoView)) + annot = NULL; + } + + switch (ui_event->etype) + { + case PDF_EVENT_TYPE_POINTER: + { + switch (ui_event->event.pointer.ptype) + { + case PDF_POINTER_DOWN: + if (doc->focus_obj) + { + /* Execute the blur action */ + execute_additional_action(doc, doc->focus_obj, "AA/Bl"); + doc->focus = NULL; + pdf_drop_obj(doc->focus_obj); + doc->focus_obj = NULL; + } + + if (annot) + { + doc->focus = annot; + doc->focus_obj = pdf_keep_obj(annot->obj); + + hp->num = pdf_to_num(annot->obj); + hp->gen = pdf_to_gen(annot->obj); + hp->state = HOTSPOT_POINTER_DOWN; + changed = 1; + /* Exectute the down and focus actions */ + execute_additional_action(doc, annot->obj, "AA/Fo"); + execute_additional_action(doc, annot->obj, "AA/D"); + } + break; + + case PDF_POINTER_UP: + if (hp->state != 0) + changed = 1; + + hp->num = 0; + hp->gen = 0; + hp->state = 0; + + if (annot) + { + switch (annot->widget_type) + { + case PDF_WIDGET_TYPE_RADIOBUTTON: + case PDF_WIDGET_TYPE_CHECKBOX: + /* FIXME: treating radio buttons like check boxes, for now */ + toggle_check_box(doc, annot->obj); + changed = 1; + break; + } + + /* Execute the up action */ + execute_additional_action(doc, annot->obj, "AA/U"); + /* Execute the main action chain */ + execute_action_chain(doc, annot->obj); + } + break; + } + } + break; + } + + return changed; +} + +void pdf_update_page(pdf_document *doc, pdf_page *page) +{ + fz_context *ctx = doc->ctx; + pdf_annot *annot; + + /* Reset changed_annots to empty */ + page->changed_annots = NULL; + + /* + Free all annots in tmp_annots, since these were + referenced only from changed_annots. + */ + if (page->tmp_annots) + { + pdf_free_annot(ctx, page->tmp_annots); + page->tmp_annots = NULL; + } + + /* Add all changed annots to the list */ + for (annot = page->annots; annot; annot = annot->next) + { + pdf_xobject *ap = pdf_keep_xobject(ctx, annot->ap); + int ap_iteration = annot->ap_iteration; + + fz_try(ctx) + { + pdf_update_annot(doc, annot); + + if ((ap != annot->ap || ap_iteration != annot->ap_iteration)) + { + annot->next_changed = page->changed_annots; + page->changed_annots = annot; + } + } + fz_always(ctx) + { + pdf_drop_xobject(ctx, ap); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } + + /* + Add all deleted annots to the list, since these also + warrant a screen update + */ + for (annot = page->deleted_annots; annot; annot = annot->next) + { + annot->next_changed = page->changed_annots; + page->changed_annots = annot; + } + + /* + Move deleted_annots to tmp_annots to keep them separate + from any future deleted ones. They cannot yet be freed + since they are linked into changed_annots + */ + page->tmp_annots = page->deleted_annots; + page->deleted_annots = NULL; +} + +pdf_annot *pdf_poll_changed_annot(pdf_document *idoc, pdf_page *page) +{ + pdf_annot *annot = page->changed_annots; + + if (annot) + page->changed_annots = annot->next_changed; + + return annot; +} + +pdf_widget *pdf_focused_widget(pdf_document *doc) +{ + return (pdf_widget *)doc->focus; +} + +pdf_widget *pdf_first_widget(pdf_document *doc, pdf_page *page) +{ + pdf_annot *annot = page->annots; + + while (annot && annot->widget_type == PDF_WIDGET_TYPE_NOT_WIDGET) + annot = annot->next; + + return (pdf_widget *)annot; +} + +pdf_widget *pdf_next_widget(pdf_widget *previous) +{ + pdf_annot *annot = (pdf_annot *)previous; + + if (annot) + annot = annot->next; + + while (annot && annot->widget_type == PDF_WIDGET_TYPE_NOT_WIDGET) + annot = annot->next; + + return (pdf_widget *)annot; +} + +int pdf_widget_get_type(pdf_widget *widget) +{ + pdf_annot *annot = (pdf_annot *)widget; + return annot->widget_type; +} + +char *pdf_field_value(pdf_document *doc, pdf_obj *field) +{ + return get_string_or_stream(doc, pdf_get_inheritable(doc, field, "V")); +} + +static int set_text_field_value(pdf_document *doc, pdf_obj *field, char *text) +{ + pdf_obj *v = pdf_dict_getp(field, "AA/V"); + + if (v && doc->js) + { + pdf_js_event e; + + e.target = field; + e.value = text; + pdf_js_setup_event(doc->js, &e); + execute_action(doc, field, v); + + if (!pdf_js_get_event(doc->js)->rc) + return 0; + + text = pdf_js_get_event(doc->js)->value; + } + + doc->dirty = 1; + update_field_value(doc->ctx, field, text); + + return 1; +} + +static void update_checkbox_selector(pdf_document *doc, pdf_obj *field, char *val) +{ + fz_context *ctx = doc->ctx; + pdf_obj *kids = pdf_dict_gets(field, "Kids"); + + if (kids) + { + int i, n = pdf_array_len(kids); + + for (i = 0; i < n; i++) + update_checkbox_selector(doc, pdf_array_get(kids, i), val); + } + else + { + pdf_obj *n = pdf_dict_getp(field, "AP/N"); + pdf_obj *oval = NULL; + + fz_var(oval); + fz_try(ctx) + { + if (pdf_dict_gets(n, val)) + oval = pdf_new_name(ctx, val); + else + oval = pdf_new_name(ctx, "Off"); + + pdf_dict_puts(field, "AS", oval); + } + fz_always(ctx) + { + pdf_drop_obj(oval); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } +} + +static int set_checkbox_value(pdf_document *doc, pdf_obj *field, char *val) +{ + update_checkbox_selector(doc, field, val); + update_field_value(doc->ctx, field, val); + return 1; +} + +int pdf_field_set_value(pdf_document *doc, pdf_obj *field, char *text) +{ + int res = 0; + + switch (pdf_field_type(doc, field)) + { + case PDF_WIDGET_TYPE_TEXT: + res = set_text_field_value(doc, field, text); + break; + + case PDF_WIDGET_TYPE_CHECKBOX: + case PDF_WIDGET_TYPE_RADIOBUTTON: + res = set_checkbox_value(doc, field, text); + break; + + default: + /* text updater will do in most cases */ + update_field_value(doc->ctx, field, text); + res = 1; + break; + } + + recalculate(doc); + + return res; +} + +char *pdf_field_border_style(pdf_document *doc, pdf_obj *field) +{ + char *bs = pdf_to_name(pdf_dict_getp(field, "BS/S")); + + switch (*bs) + { + case 'S': return "Solid"; + case 'D': return "Dashed"; + case 'B': return "Beveled"; + case 'I': return "Inset"; + case 'U': return "Underline"; + } + + return "Solid"; +} + +void pdf_field_set_border_style(pdf_document *doc, pdf_obj *field, char *text) +{ + fz_context *ctx = doc->ctx; + pdf_obj *val = NULL; + + if (!strcmp(text, "Solid")) + val = pdf_new_name(ctx, "S"); + else if (!strcmp(text, "Dashed")) + val = pdf_new_name(ctx, "D"); + else if (!strcmp(text, "Beveled")) + val = pdf_new_name(ctx, "B"); + else if (!strcmp(text, "Inset")) + val = pdf_new_name(ctx, "I"); + else if (!strcmp(text, "Underline")) + val = pdf_new_name(ctx, "U"); + else + return; + + fz_try(ctx); + { + pdf_dict_putp(field, "BS/S", val); + pdf_field_mark_dirty(ctx, field); + } + fz_always(ctx) + { + pdf_drop_obj(val); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +void pdf_field_set_button_caption(pdf_document *doc, pdf_obj *field, char *text) +{ + fz_context *ctx = doc->ctx; + pdf_obj *val = pdf_new_string(ctx, text, strlen(text)); + + fz_try(ctx); + { + if (pdf_field_type(doc, field) == PDF_WIDGET_TYPE_PUSHBUTTON) + { + pdf_dict_putp(field, "MK/CA", val); + pdf_field_mark_dirty(ctx, field); + } + } + fz_always(ctx) + { + pdf_drop_obj(val); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +int pdf_field_display(pdf_document *doc, pdf_obj *field) +{ + pdf_obj *kids; + int f, res = Display_Visible; + + /* Base response on first of children. Not ideal, + * but not clear how to handle children with + * differing values */ + while ((kids = pdf_dict_gets(field, "Kids")) != NULL) + field = pdf_array_get(kids, 0); + + f = pdf_to_int(pdf_dict_gets(field, "F")); + + if (f & F_Hidden) + { + res = Display_Hidden; + } + else if (f & F_Print) + { + if (f & F_NoView) + res = Display_NoView; + } + else + { + if (f & F_NoView) + res = Display_Hidden; + else + res = Display_NoPrint; + } + + return res; +} + +/* + * get the field name in a char buffer that has spare room to + * add more characters at the end. + */ +static char *get_field_name(pdf_document *doc, pdf_obj *field, int spare) +{ + fz_context *ctx = doc->ctx; + char *res = NULL; + pdf_obj *parent = pdf_dict_gets(field, "Parent"); + char *lname = pdf_to_str_buf(pdf_dict_gets(field, "T")); + int llen = strlen(lname); + + /* + * If we found a name at this point in the field hierarchy + * then we'll need extra space for it and a dot + */ + if (llen) + spare += llen+1; + + if (parent) + { + res = get_field_name(doc, parent, spare); + } + else + { + res = fz_malloc(ctx, spare+1); + res[0] = 0; + } + + if (llen) + { + if (res[0]) + strcat(res, "."); + + strcat(res, lname); + } + + return res; +} + +char *pdf_field_name(pdf_document *doc, pdf_obj *field) +{ + return get_field_name(doc, field, 0); +} + +void pdf_field_set_display(pdf_document *doc, pdf_obj *field, int d) +{ + fz_context *ctx = doc->ctx; + pdf_obj *kids = pdf_dict_gets(field, "Kids"); + + if (!kids) + { + int mask = (F_Hidden|F_Print|F_NoView); + int f = pdf_to_int(pdf_dict_gets(field, "F")) & ~mask; + pdf_obj *fo = NULL; + + switch (d) + { + case Display_Visible: + f |= F_Print; + break; + case Display_Hidden: + f |= F_Hidden; + break; + case Display_NoView: + f |= (F_Print|F_NoView); + break; + case Display_NoPrint: + break; + } + + fz_var(fo); + fz_try(ctx) + { + fo = pdf_new_int(ctx, f); + pdf_dict_puts(field, "F", fo); + } + fz_always(ctx) + { + pdf_drop_obj(fo); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } + else + { + int i, n = pdf_array_len(kids); + + for (i = 0; i < n; i++) + pdf_field_set_display(doc, pdf_array_get(kids, i), d); + } +} + +void pdf_field_set_fill_color(pdf_document *doc, pdf_obj *field, pdf_obj *col) +{ + /* col == NULL mean transparent, but we can simply pass it on as with + * non-NULL values because pdf_dict_putp interprets a NULL value as + * delete */ + pdf_dict_putp(field, "MK/BG", col); + pdf_field_mark_dirty(doc->ctx, field); +} + +void pdf_field_set_text_color(pdf_document *doc, pdf_obj *field, pdf_obj *col) +{ + fz_context *ctx = doc->ctx; + da_info di; + fz_buffer *fzbuf = NULL; + char *da = pdf_to_str_buf(pdf_get_inheritable(doc, field, "DA")); + unsigned char *buf; + int len; + pdf_obj *daobj = NULL; + + memset(&di, 0, sizeof(di)); + + fz_var(fzbuf); + fz_var(di); + fz_var(daobj); + fz_try(ctx) + { + int i; + + parse_da(ctx, da, &di); + di.col_size = pdf_array_len(col); + + len = fz_mini(di.col_size, nelem(di.col)); + for (i = 0; i < len; i++) + di.col[i] = pdf_to_real(pdf_array_get(col, i)); + + fzbuf = fz_new_buffer(ctx, 0); + fzbuf_print_da(ctx, fzbuf, &di); + len = fz_buffer_storage(ctx, fzbuf, &buf); + daobj = pdf_new_string(ctx, (char *)buf, len); + pdf_dict_puts(field, "DA", daobj); + pdf_field_mark_dirty(ctx, field); + } + fz_always(ctx) + { + da_info_fin(ctx, &di); + fz_drop_buffer(ctx, fzbuf); + pdf_drop_obj(daobj); + } + fz_catch(ctx) + { + fz_warn(ctx, "%s", fz_caught_message(ctx)); + } +} + +fz_rect *pdf_bound_widget(pdf_widget *widget, fz_rect *rect) +{ + pdf_annot *annot = (pdf_annot *)widget; + + if (rect == NULL) + return NULL; + *rect = annot->pagerect; + + return rect; +} + +char *pdf_text_widget_text(pdf_document *doc, pdf_widget *tw) +{ + pdf_annot *annot = (pdf_annot *)tw; + fz_context *ctx = doc->ctx; + char *text = NULL; + + fz_var(text); + fz_try(ctx) + { + text = pdf_field_value(doc, annot->obj); + } + fz_catch(ctx) + { + fz_warn(ctx, "failed allocation in fz_text_widget_text"); + } + + return text; +} + +int pdf_text_widget_max_len(pdf_document *doc, pdf_widget *tw) +{ + pdf_annot *annot = (pdf_annot *)tw; + + return pdf_to_int(pdf_get_inheritable(doc, annot->obj, "MaxLen")); +} + +int pdf_text_widget_content_type(pdf_document *doc, pdf_widget *tw) +{ + pdf_annot *annot = (pdf_annot *)tw; + fz_context *ctx = doc->ctx; + char *code = NULL; + int type = PDF_WIDGET_CONTENT_UNRESTRAINED; + + fz_var(code); + fz_try(ctx) + { + code = get_string_or_stream(doc, pdf_dict_getp(annot->obj, "AA/F/JS")); + if (code) + { + if (strstr(code, "AFNumber_Format")) + type = PDF_WIDGET_CONTENT_NUMBER; + else if (strstr(code, "AFSpecial_Format")) + type = PDF_WIDGET_CONTENT_SPECIAL; + else if (strstr(code, "AFDate_FormatEx")) + type = PDF_WIDGET_CONTENT_DATE; + else if (strstr(code, "AFTime_FormatEx")) + type = PDF_WIDGET_CONTENT_TIME; + } + } + fz_always(ctx) + { + fz_free(ctx, code); + } + fz_catch(ctx) + { + fz_warn(ctx, "failure in fz_text_widget_content_type"); + } + + return type; +} + +static int run_keystroke(pdf_document *doc, pdf_obj *field, char **text) +{ + pdf_obj *k = pdf_dict_getp(field, "AA/K"); + + if (k && doc->js) + { + pdf_js_event e; + + e.target = field; + e.value = *text; + pdf_js_setup_event(doc->js, &e); + execute_action(doc, field, k); + + if (!pdf_js_get_event(doc->js)->rc) + return 0; + + *text = pdf_js_get_event(doc->js)->value; + } + + return 1; +} + +int pdf_text_widget_set_text(pdf_document *doc, pdf_widget *tw, char *text) +{ + pdf_annot *annot = (pdf_annot *)tw; + fz_context *ctx = doc->ctx; + int accepted = 0; + + fz_try(ctx) + { + accepted = run_keystroke(doc, annot->obj, &text); + if (accepted) + accepted = pdf_field_set_value(doc, annot->obj, text); + } + fz_catch(ctx) + { + fz_warn(ctx, "fz_text_widget_set_text failed"); + } + + return accepted; +} + +int pdf_choice_widget_options(pdf_document *doc, pdf_widget *tw, char *opts[]) +{ + pdf_annot *annot = (pdf_annot *)tw; + pdf_obj *optarr; + int i, n; + + if (!annot) + return 0; + + optarr = pdf_dict_gets(annot->obj, "Opt"); + n = pdf_array_len(optarr); + + if (opts) + { + for (i = 0; i < n; i++) + { + opts[i] = pdf_to_str_buf(pdf_array_get(optarr, i)); + } + } + + return n; +} + +int pdf_choice_widget_is_multiselect(pdf_document *doc, pdf_widget *tw) +{ + pdf_annot *annot = (pdf_annot *)tw; + + if (!annot) return 0; + + switch (pdf_field_type(doc, annot->obj)) + { + case PDF_WIDGET_TYPE_LISTBOX: + case PDF_WIDGET_TYPE_COMBOBOX: + return (pdf_get_field_flags(doc, annot->obj) & Ff_MultiSelect) != 0; + default: + return 0; + } +} + +int pdf_choice_widget_value(pdf_document *doc, pdf_widget *tw, char *opts[]) +{ + pdf_annot *annot = (pdf_annot *)tw; + pdf_obj *optarr; + int i, n; + + if (!annot) + return 0; + + optarr = pdf_dict_gets(annot->obj, "V"); + + if (pdf_is_string(optarr)) + { + if (opts) + opts[0] = pdf_to_str_buf(optarr); + + return 1; + } + else + { + n = pdf_array_len(optarr); + + if (opts) + { + for (i = 0; i < n; i++) + { + pdf_obj *elem = pdf_array_get(optarr, i); + + if (pdf_is_array(elem)) + elem = pdf_array_get(elem, 1); + + opts[i] = pdf_to_str_buf(elem); + } + } + + return n; + } +} + +void pdf_choice_widget_set_value(pdf_document *doc, pdf_widget *tw, int n, char *opts[]) +{ + fz_context *ctx = doc->ctx; + pdf_annot *annot = (pdf_annot *)tw; + pdf_obj *optarr = NULL, *opt = NULL; + int i; + + if (!annot) + return; + + fz_var(optarr); + fz_var(opt); + fz_try(ctx) + { + if (n != 1) + { + optarr = pdf_new_array(ctx, n); + + for (i = 0; i < n; i++) + { + opt = pdf_new_string(ctx, opts[i], strlen(opts[i])); + pdf_array_push(optarr, opt); + pdf_drop_obj(opt); + opt = NULL; + } + + pdf_dict_puts(annot->obj, "V", optarr); + pdf_drop_obj(optarr); + } + else + { + opt = pdf_new_string(ctx, opts[0], strlen(opts[0])); + pdf_dict_puts(annot->obj, "V", opt); + pdf_drop_obj(opt); + } + + /* FIXME: when n > 1, we should be regenerating the indexes */ + pdf_dict_dels(annot->obj, "I"); + + pdf_field_mark_dirty(ctx, annot->obj); + doc->dirty = 1; + } + fz_catch(ctx) + { + pdf_drop_obj(optarr); + pdf_drop_obj(opt); + fz_rethrow(ctx); + } +} + +int pdf_signature_widget_byte_range(pdf_document *doc, pdf_widget *widget, int (*byte_range)[2]) +{ + pdf_annot *annot = (pdf_annot *)widget; + pdf_obj *br = pdf_dict_getp(annot->obj, "V/ByteRange"); + int i, n = pdf_array_len(br)/2; + + if (byte_range) + { + for (i = 0; i < n; i++) + { + byte_range[i][0] = pdf_to_int(pdf_array_get(br, 2*i)); + byte_range[i][1] = pdf_to_int(pdf_array_get(br, 2*i+1)); + } + } + + return n; +} + +int pdf_signature_widget_contents(pdf_document *doc, pdf_widget *widget, char **contents) +{ + pdf_annot *annot = (pdf_annot *)widget; + pdf_obj *c = pdf_dict_getp(annot->obj, "V/Contents"); + if (contents) + *contents = pdf_to_str_buf(c); + return pdf_to_str_len(c); +} diff --git a/source/pdf/pdf-function.c b/source/pdf/pdf-function.c new file mode 100644 index 00000000..4771fbfd --- /dev/null +++ b/source/pdf/pdf-function.c @@ -0,0 +1,1718 @@ +#include "mupdf/pdf.h" + +typedef struct psobj_s psobj; + +enum +{ + SAMPLE = 0, + EXPONENTIAL = 2, + STITCHING = 3, + POSTSCRIPT = 4 +}; + +typedef struct pdf_function_s pdf_function; + +struct pdf_function_s +{ + fz_function base; + int type; /* 0=sample 2=exponential 3=stitching 4=postscript */ + float domain[FZ_FN_MAXM][2]; /* even index : min value, odd index : max value */ + float range[FZ_FN_MAXN][2]; /* even index : min value, odd index : max value */ + int has_range; + + union + { + struct { + unsigned short bps; + int size[FZ_FN_MAXM]; + float encode[FZ_FN_MAXM][2]; + float decode[FZ_FN_MAXN][2]; + float *samples; + } sa; + + struct { + float n; + float c0[FZ_FN_MAXN]; + float c1[FZ_FN_MAXN]; + } e; + + struct { + int k; + fz_function **funcs; /* k */ + float *bounds; /* k - 1 */ + float *encode; /* k * 2 */ + } st; + + struct { + psobj *code; + int cap; + } p; + } u; +}; + +#define RADIAN 57.2957795 + +static inline float lerp(float x, float xmin, float xmax, float ymin, float ymax) +{ + if (xmin == xmax) + return ymin; + if (ymin == ymax) + return ymin; + return ymin + (x - xmin) * (ymax - ymin) / (xmax - xmin); +} + +/* + * PostScript calculator + */ + +enum { PS_BOOL, PS_INT, PS_REAL, PS_OPERATOR, PS_BLOCK }; + +enum +{ + PS_OP_ABS, PS_OP_ADD, PS_OP_AND, PS_OP_ATAN, PS_OP_BITSHIFT, + PS_OP_CEILING, PS_OP_COPY, PS_OP_COS, PS_OP_CVI, PS_OP_CVR, + PS_OP_DIV, PS_OP_DUP, PS_OP_EQ, PS_OP_EXCH, PS_OP_EXP, + PS_OP_FALSE, PS_OP_FLOOR, PS_OP_GE, PS_OP_GT, PS_OP_IDIV, PS_OP_IF, + PS_OP_IFELSE, PS_OP_INDEX, PS_OP_LE, PS_OP_LN, PS_OP_LOG, PS_OP_LT, + PS_OP_MOD, PS_OP_MUL, PS_OP_NE, PS_OP_NEG, PS_OP_NOT, PS_OP_OR, + PS_OP_POP, PS_OP_RETURN, PS_OP_ROLL, PS_OP_ROUND, PS_OP_SIN, + PS_OP_SQRT, PS_OP_SUB, PS_OP_TRUE, PS_OP_TRUNCATE, PS_OP_XOR +}; + +static char *ps_op_names[] = +{ + "abs", "add", "and", "atan", "bitshift", "ceiling", "copy", + "cos", "cvi", "cvr", "div", "dup", "eq", "exch", "exp", + "false", "floor", "ge", "gt", "idiv", "if", "ifelse", "index", "le", "ln", + "log", "lt", "mod", "mul", "ne", "neg", "not", "or", "pop", "return", + "roll", "round", "sin", "sqrt", "sub", "true", "truncate", "xor" +}; + +struct psobj_s +{ + int type; + union + { + int b; /* boolean (stack only) */ + int i; /* integer (stack and code) */ + float f; /* real (stack and code) */ + int op; /* operator (code only) */ + int block; /* if/ifelse block pointer (code only) */ + } u; +}; + +typedef struct ps_stack_s ps_stack; + +struct ps_stack_s +{ + psobj stack[100]; + int sp; +}; + +#ifndef NDEBUG +void +pdf_debug_ps_stack(ps_stack *st) +{ + int i; + + printf("stack: "); + + for (i = 0; i < st->sp; i++) + { + switch (st->stack[i].type) + { + case PS_BOOL: + if (st->stack[i].u.b) + printf("true "); + else + printf("false "); + break; + + case PS_INT: + printf("%d ", st->stack[i].u.i); + break; + + case PS_REAL: + printf("%g ", st->stack[i].u.f); + break; + } + } + printf("\n"); + +} +#endif + +static void +ps_init_stack(ps_stack *st) +{ + memset(st->stack, 0, sizeof(st->stack)); + st->sp = 0; +} + +static inline int ps_overflow(ps_stack *st, int n) +{ + return n < 0 || st->sp + n >= nelem(st->stack); +} + +static inline int ps_underflow(ps_stack *st, int n) +{ + return n < 0 || st->sp - n < 0; +} + +static inline int ps_is_type(ps_stack *st, int t) +{ + return !ps_underflow(st, 1) && st->stack[st->sp - 1].type == t; +} + +static inline int ps_is_type2(ps_stack *st, int t) +{ + return !ps_underflow(st, 2) && st->stack[st->sp - 1].type == t && st->stack[st->sp - 2].type == t; +} + +static void +ps_push_bool(ps_stack *st, int b) +{ + if (!ps_overflow(st, 1)) + { + st->stack[st->sp].type = PS_BOOL; + st->stack[st->sp].u.b = b; + st->sp++; + } +} + +static void +ps_push_int(ps_stack *st, int n) +{ + if (!ps_overflow(st, 1)) + { + st->stack[st->sp].type = PS_INT; + st->stack[st->sp].u.i = n; + st->sp++; + } +} + +static void +ps_push_real(ps_stack *st, float n) +{ + if (!ps_overflow(st, 1)) + { + st->stack[st->sp].type = PS_REAL; + if (isnan(n)) + { + /* Push 1.0, as it's a small known value that won't + * cause a divide by 0. Same reason as in fz_atof. */ + n = 1.0; + } + st->stack[st->sp].u.f = fz_clamp(n, -FLT_MAX, FLT_MAX); + st->sp++; + } +} + +static int +ps_pop_bool(ps_stack *st) +{ + if (!ps_underflow(st, 1)) + { + if (ps_is_type(st, PS_BOOL)) + return st->stack[--st->sp].u.b; + } + return 0; +} + +static int +ps_pop_int(ps_stack *st) +{ + if (!ps_underflow(st, 1)) + { + if (ps_is_type(st, PS_INT)) + return st->stack[--st->sp].u.i; + if (ps_is_type(st, PS_REAL)) + return st->stack[--st->sp].u.f; + } + return 0; +} + +static float +ps_pop_real(ps_stack *st) +{ + if (!ps_underflow(st, 1)) + { + if (ps_is_type(st, PS_INT)) + return st->stack[--st->sp].u.i; + if (ps_is_type(st, PS_REAL)) + return st->stack[--st->sp].u.f; + } + return 0; +} + +static void +ps_copy(ps_stack *st, int n) +{ + if (!ps_underflow(st, n) && !ps_overflow(st, n)) + { + memcpy(st->stack + st->sp, st->stack + st->sp - n, n * sizeof(psobj)); + st->sp += n; + } +} + +static void +ps_roll(ps_stack *st, int n, int j) +{ + psobj tmp; + int i; + + if (ps_underflow(st, n) || j == 0 || n == 0) + return; + + if (j >= 0) + { + j %= n; + } + else + { + j = -j % n; + if (j != 0) + j = n - j; + } + + for (i = 0; i < j; i++) + { + tmp = st->stack[st->sp - 1]; + memmove(st->stack + st->sp - n + 1, st->stack + st->sp - n, n * sizeof(psobj)); + st->stack[st->sp - n] = tmp; + } +} + +static void +ps_index(ps_stack *st, int n) +{ + if (!ps_overflow(st, 1) && !ps_underflow(st, n)) + { + st->stack[st->sp] = st->stack[st->sp - n - 1]; + st->sp++; + } +} + +static void +ps_run(fz_context *ctx, psobj *code, ps_stack *st, int pc) +{ + int i1, i2; + float r1, r2; + int b1, b2; + + while (1) + { + switch (code[pc].type) + { + case PS_INT: + ps_push_int(st, code[pc++].u.i); + break; + + case PS_REAL: + ps_push_real(st, code[pc++].u.f); + break; + + case PS_OPERATOR: + switch (code[pc++].u.op) + { + case PS_OP_ABS: + if (ps_is_type(st, PS_INT)) + ps_push_int(st, abs(ps_pop_int(st))); + else + ps_push_real(st, fabsf(ps_pop_real(st))); + break; + + case PS_OP_ADD: + if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_int(st, i1 + i2); + } + else { + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_real(st, r1 + r2); + } + break; + + case PS_OP_AND: + if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_int(st, i1 & i2); + } + else { + b2 = ps_pop_bool(st); + b1 = ps_pop_bool(st); + ps_push_bool(st, b1 && b2); + } + break; + + case PS_OP_ATAN: + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + r1 = atan2f(r1, r2) * RADIAN; + if (r1 < 0) + r1 += 360; + ps_push_real(st, r1); + break; + + case PS_OP_BITSHIFT: + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + if (i2 > 0 && i2 < 8 * sizeof (i2)) + ps_push_int(st, i1 << i2); + else if (i2 < 0 && i2 > -8 * (int)sizeof (i2)) + ps_push_int(st, (int)((unsigned int)i1 >> -i2)); + else + ps_push_int(st, i1); + break; + + case PS_OP_CEILING: + r1 = ps_pop_real(st); + ps_push_real(st, ceilf(r1)); + break; + + case PS_OP_COPY: + ps_copy(st, ps_pop_int(st)); + break; + + case PS_OP_COS: + r1 = ps_pop_real(st); + ps_push_real(st, cosf(r1/RADIAN)); + break; + + case PS_OP_CVI: + ps_push_int(st, ps_pop_int(st)); + break; + + case PS_OP_CVR: + ps_push_real(st, ps_pop_real(st)); + break; + + case PS_OP_DIV: + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + if (fabsf(r2) >= FLT_EPSILON) + ps_push_real(st, r1 / r2); + else + ps_push_real(st, DIV_BY_ZERO(r1, r2, -FLT_MAX, FLT_MAX)); + break; + + case PS_OP_DUP: + ps_copy(st, 1); + break; + + case PS_OP_EQ: + if (ps_is_type2(st, PS_BOOL)) { + b2 = ps_pop_bool(st); + b1 = ps_pop_bool(st); + ps_push_bool(st, b1 == b2); + } + else if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_bool(st, i1 == i2); + } + else { + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_bool(st, r1 == r2); + } + break; + + case PS_OP_EXCH: + ps_roll(st, 2, 1); + break; + + case PS_OP_EXP: + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_real(st, powf(r1, r2)); + break; + + case PS_OP_FALSE: + ps_push_bool(st, 0); + break; + + case PS_OP_FLOOR: + r1 = ps_pop_real(st); + ps_push_real(st, floorf(r1)); + break; + + case PS_OP_GE: + if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_bool(st, i1 >= i2); + } + else { + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_bool(st, r1 >= r2); + } + break; + + case PS_OP_GT: + if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_bool(st, i1 > i2); + } + else { + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_bool(st, r1 > r2); + } + break; + + case PS_OP_IDIV: + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + if (i2 != 0) + ps_push_int(st, i1 / i2); + else + ps_push_int(st, DIV_BY_ZERO(i1, i2, INT_MIN, INT_MAX)); + break; + + case PS_OP_INDEX: + ps_index(st, ps_pop_int(st)); + break; + + case PS_OP_LE: + if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_bool(st, i1 <= i2); + } + else { + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_bool(st, r1 <= r2); + } + break; + + case PS_OP_LN: + r1 = ps_pop_real(st); + /* Bug 692941 - logf as separate statement */ + r2 = logf(r1); + ps_push_real(st, r2); + break; + + case PS_OP_LOG: + r1 = ps_pop_real(st); + ps_push_real(st, log10f(r1)); + break; + + case PS_OP_LT: + if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_bool(st, i1 < i2); + } + else { + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_bool(st, r1 < r2); + } + break; + + case PS_OP_MOD: + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + if (i2 != 0) + ps_push_int(st, i1 % i2); + else + ps_push_int(st, DIV_BY_ZERO(i1, i2, INT_MIN, INT_MAX)); + break; + + case PS_OP_MUL: + if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_int(st, i1 * i2); + } + else { + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_real(st, r1 * r2); + } + break; + + case PS_OP_NE: + if (ps_is_type2(st, PS_BOOL)) { + b2 = ps_pop_bool(st); + b1 = ps_pop_bool(st); + ps_push_bool(st, b1 != b2); + } + else if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_bool(st, i1 != i2); + } + else { + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_bool(st, r1 != r2); + } + break; + + case PS_OP_NEG: + if (ps_is_type(st, PS_INT)) + ps_push_int(st, -ps_pop_int(st)); + else + ps_push_real(st, -ps_pop_real(st)); + break; + + case PS_OP_NOT: + if (ps_is_type(st, PS_BOOL)) + ps_push_bool(st, !ps_pop_bool(st)); + else + ps_push_int(st, ~ps_pop_int(st)); + break; + + case PS_OP_OR: + if (ps_is_type2(st, PS_BOOL)) { + b2 = ps_pop_bool(st); + b1 = ps_pop_bool(st); + ps_push_bool(st, b1 || b2); + } + else { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_int(st, i1 | i2); + } + break; + + case PS_OP_POP: + if (!ps_underflow(st, 1)) + st->sp--; + break; + + case PS_OP_ROLL: + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_roll(st, i1, i2); + break; + + case PS_OP_ROUND: + if (!ps_is_type(st, PS_INT)) { + r1 = ps_pop_real(st); + ps_push_real(st, (r1 >= 0) ? floorf(r1 + 0.5f) : ceilf(r1 - 0.5f)); + } + break; + + case PS_OP_SIN: + r1 = ps_pop_real(st); + ps_push_real(st, sinf(r1/RADIAN)); + break; + + case PS_OP_SQRT: + r1 = ps_pop_real(st); + ps_push_real(st, sqrtf(r1)); + break; + + case PS_OP_SUB: + if (ps_is_type2(st, PS_INT)) { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_int(st, i1 - i2); + } + else { + r2 = ps_pop_real(st); + r1 = ps_pop_real(st); + ps_push_real(st, r1 - r2); + } + break; + + case PS_OP_TRUE: + ps_push_bool(st, 1); + break; + + case PS_OP_TRUNCATE: + if (!ps_is_type(st, PS_INT)) { + r1 = ps_pop_real(st); + ps_push_real(st, (r1 >= 0) ? floorf(r1) : ceilf(r1)); + } + break; + + case PS_OP_XOR: + if (ps_is_type2(st, PS_BOOL)) { + b2 = ps_pop_bool(st); + b1 = ps_pop_bool(st); + ps_push_bool(st, b1 ^ b2); + } + else { + i2 = ps_pop_int(st); + i1 = ps_pop_int(st); + ps_push_int(st, i1 ^ i2); + } + break; + + case PS_OP_IF: + b1 = ps_pop_bool(st); + if (b1) + ps_run(ctx, code, st, code[pc + 1].u.block); + pc = code[pc + 2].u.block; + break; + + case PS_OP_IFELSE: + b1 = ps_pop_bool(st); + if (b1) + ps_run(ctx, code, st, code[pc + 1].u.block); + else + ps_run(ctx, code, st, code[pc + 0].u.block); + pc = code[pc + 2].u.block; + break; + + case PS_OP_RETURN: + return; + + default: + fz_warn(ctx, "foreign operator in calculator function"); + return; + } + break; + + default: + fz_warn(ctx, "foreign object in calculator function"); + return; + } + } +} + +static void +resize_code(fz_context *ctx, pdf_function *func, int newsize) +{ + if (newsize >= func->u.p.cap) + { + int new_cap = func->u.p.cap + 64; + func->u.p.code = fz_resize_array(ctx, func->u.p.code, new_cap, sizeof(psobj)); + func->u.p.cap = new_cap; + } +} + +static void +parse_code(pdf_function *func, fz_stream *stream, int *codeptr, pdf_lexbuf *buf) +{ + pdf_token tok; + int opptr, elseptr, ifptr; + int a, b, mid, cmp; + fz_context *ctx = stream->ctx; + + while (1) + { + tok = pdf_lex(stream, buf); + + switch (tok) + { + case PDF_TOK_EOF: + fz_throw(ctx, FZ_ERROR_GENERIC, "truncated calculator function"); + + case PDF_TOK_INT: + resize_code(ctx, func, *codeptr); + func->u.p.code[*codeptr].type = PS_INT; + func->u.p.code[*codeptr].u.i = buf->i; + ++*codeptr; + break; + + case PDF_TOK_TRUE: + resize_code(ctx, func, *codeptr); + func->u.p.code[*codeptr].type = PS_BOOL; + func->u.p.code[*codeptr].u.b = 1; + ++*codeptr; + break; + + case PDF_TOK_FALSE: + resize_code(ctx, func, *codeptr); + func->u.p.code[*codeptr].type = PS_BOOL; + func->u.p.code[*codeptr].u.b = 0; + ++*codeptr; + break; + + case PDF_TOK_REAL: + resize_code(ctx, func, *codeptr); + func->u.p.code[*codeptr].type = PS_REAL; + func->u.p.code[*codeptr].u.f = buf->f; + ++*codeptr; + break; + + case PDF_TOK_OPEN_BRACE: + opptr = *codeptr; + *codeptr += 4; + + resize_code(ctx, func, *codeptr); + + ifptr = *codeptr; + parse_code(func, stream, codeptr, buf); + + tok = pdf_lex(stream, buf); + + if (tok == PDF_TOK_OPEN_BRACE) + { + elseptr = *codeptr; + parse_code(func, stream, codeptr, buf); + + tok = pdf_lex(stream, buf); + } + else + { + elseptr = -1; + } + + if (tok != PDF_TOK_KEYWORD) + fz_throw(ctx, FZ_ERROR_GENERIC, "missing keyword in 'if-else' context"); + + if (!strcmp(buf->scratch, "if")) + { + if (elseptr >= 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "too many branches for 'if'"); + func->u.p.code[opptr].type = PS_OPERATOR; + func->u.p.code[opptr].u.op = PS_OP_IF; + func->u.p.code[opptr+2].type = PS_BLOCK; + func->u.p.code[opptr+2].u.block = ifptr; + func->u.p.code[opptr+3].type = PS_BLOCK; + func->u.p.code[opptr+3].u.block = *codeptr; + } + else if (!strcmp(buf->scratch, "ifelse")) + { + if (elseptr < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "not enough branches for 'ifelse'"); + func->u.p.code[opptr].type = PS_OPERATOR; + func->u.p.code[opptr].u.op = PS_OP_IFELSE; + func->u.p.code[opptr+1].type = PS_BLOCK; + func->u.p.code[opptr+1].u.block = elseptr; + func->u.p.code[opptr+2].type = PS_BLOCK; + func->u.p.code[opptr+2].u.block = ifptr; + func->u.p.code[opptr+3].type = PS_BLOCK; + func->u.p.code[opptr+3].u.block = *codeptr; + } + else + { + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown keyword in 'if-else' context: '%s'", buf->scratch); + } + break; + + case PDF_TOK_CLOSE_BRACE: + resize_code(ctx, func, *codeptr); + func->u.p.code[*codeptr].type = PS_OPERATOR; + func->u.p.code[*codeptr].u.op = PS_OP_RETURN; + ++*codeptr; + return; + + case PDF_TOK_KEYWORD: + cmp = -1; + a = -1; + b = nelem(ps_op_names); + while (b - a > 1) + { + mid = (a + b) / 2; + cmp = strcmp(buf->scratch, ps_op_names[mid]); + if (cmp > 0) + a = mid; + else if (cmp < 0) + b = mid; + else + a = b = mid; + } + if (cmp != 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown operator: '%s'", buf->scratch); + + resize_code(ctx, func, *codeptr); + func->u.p.code[*codeptr].type = PS_OPERATOR; + func->u.p.code[*codeptr].u.op = a; + ++*codeptr; + break; + + default: + fz_throw(ctx, FZ_ERROR_GENERIC, "calculator function syntax error"); + } + } +} + +static void +load_postscript_func(pdf_function *func, pdf_document *xref, pdf_obj *dict, int num, int gen) +{ + fz_stream *stream = NULL; + int codeptr; + pdf_lexbuf buf; + pdf_token tok; + fz_context *ctx = xref->ctx; + int locked = 0; + + pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL); + + fz_var(stream); + fz_var(locked); + + fz_try(ctx) + { + stream = pdf_open_stream(xref, num, gen); + + tok = pdf_lex(stream, &buf); + if (tok != PDF_TOK_OPEN_BRACE) + { + fz_throw(ctx, FZ_ERROR_GENERIC, "stream is not a calculator function"); + } + + func->u.p.code = NULL; + func->u.p.cap = 0; + + codeptr = 0; + parse_code(func, stream, &codeptr, &buf); + } + fz_always(ctx) + { + fz_close(stream); + pdf_lexbuf_fin(&buf); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot parse calculator function (%d %d R)", num, gen); + } + + func->base.size += func->u.p.cap * sizeof(psobj); +} + +static void +eval_postscript_func(fz_context *ctx, pdf_function *func, float *in, float *out) +{ + ps_stack st; + float x; + int i; + + ps_init_stack(&st); + + for (i = 0; i < func->base.m; i++) + { + x = fz_clamp(in[i], func->domain[i][0], func->domain[i][1]); + ps_push_real(&st, x); + } + + ps_run(ctx, func->u.p.code, &st, 0); + + for (i = func->base.n - 1; i >= 0; i--) + { + x = ps_pop_real(&st); + out[i] = fz_clamp(x, func->range[i][0], func->range[i][1]); + } +} + +/* + * Sample function + */ + +#define MAX_SAMPLE_FUNCTION_SIZE (100 << 20) + +static void +load_sample_func(pdf_function *func, pdf_document *xref, pdf_obj *dict, int num, int gen) +{ + fz_context *ctx = xref->ctx; + fz_stream *stream; + pdf_obj *obj; + int samplecount; + int bps; + int i; + + func->u.sa.samples = NULL; + + obj = pdf_dict_gets(dict, "Size"); + if (pdf_array_len(obj) < func->base.m) + fz_throw(ctx, FZ_ERROR_GENERIC, "too few sample function dimension sizes"); + if (pdf_array_len(obj) > func->base.m) + fz_warn(ctx, "too many sample function dimension sizes"); + for (i = 0; i < func->base.m; i++) + { + func->u.sa.size[i] = pdf_to_int(pdf_array_get(obj, i)); + if (func->u.sa.size[i] <= 0) + { + fz_warn(ctx, "non-positive sample function dimension size"); + func->u.sa.size[i] = 1; + } + } + + obj = pdf_dict_gets(dict, "BitsPerSample"); + func->u.sa.bps = bps = pdf_to_int(obj); + + for (i = 0; i < func->base.m; i++) + { + func->u.sa.encode[i][0] = 0; + func->u.sa.encode[i][1] = func->u.sa.size[i] - 1; + } + obj = pdf_dict_gets(dict, "Encode"); + if (pdf_is_array(obj)) + { + int ranges = fz_mini(func->base.m, pdf_array_len(obj) / 2); + if (ranges != func->base.m) + fz_warn(ctx, "wrong number of sample function input mappings"); + + for (i = 0; i < ranges; i++) + { + func->u.sa.encode[i][0] = pdf_to_real(pdf_array_get(obj, i * 2 + 0)); + func->u.sa.encode[i][1] = pdf_to_real(pdf_array_get(obj, i * 2 + 1)); + } + } + + for (i = 0; i < func->base.n; i++) + { + func->u.sa.decode[i][0] = func->range[i][0]; + func->u.sa.decode[i][1] = func->range[i][1]; + } + + obj = pdf_dict_gets(dict, "Decode"); + if (pdf_is_array(obj)) + { + int ranges = fz_mini(func->base.n, pdf_array_len(obj) / 2); + if (ranges != func->base.n) + fz_warn(ctx, "wrong number of sample function output mappings"); + + for (i = 0; i < ranges; i++) + { + func->u.sa.decode[i][0] = pdf_to_real(pdf_array_get(obj, i * 2 + 0)); + func->u.sa.decode[i][1] = pdf_to_real(pdf_array_get(obj, i * 2 + 1)); + } + } + + for (i = 0, samplecount = func->base.n; i < func->base.m; i++) + samplecount *= func->u.sa.size[i]; + + if (samplecount > MAX_SAMPLE_FUNCTION_SIZE) + fz_throw(ctx, FZ_ERROR_GENERIC, "sample function too large"); + + func->u.sa.samples = fz_malloc_array(ctx, samplecount, sizeof(float)); + func->base.size += samplecount * sizeof(float); + + stream = pdf_open_stream(xref, num, gen); + + /* read samples */ + for (i = 0; i < samplecount; i++) + { + unsigned int x; + float s; + + if (fz_is_eof_bits(stream)) + { + fz_close(stream); + fz_throw(ctx, FZ_ERROR_GENERIC, "truncated sample function stream"); + } + + switch (bps) + { + case 1: s = fz_read_bits(stream, 1); break; + case 2: s = fz_read_bits(stream, 2) / 3.0f; break; + case 4: s = fz_read_bits(stream, 4) / 15.0f; break; + case 8: s = fz_read_byte(stream) / 255.0f; break; + case 12: s = fz_read_bits(stream, 12) / 4095.0f; break; + case 16: + x = fz_read_byte(stream) << 8; + x |= fz_read_byte(stream); + s = x / 65535.0f; + break; + case 24: + x = fz_read_byte(stream) << 16; + x |= fz_read_byte(stream) << 8; + x |= fz_read_byte(stream); + s = x / 16777215.0f; + break; + case 32: + x = fz_read_byte(stream) << 24; + x |= fz_read_byte(stream) << 16; + x |= fz_read_byte(stream) << 8; + x |= fz_read_byte(stream); + s = x / 4294967295.0f; + break; + default: + fz_close(stream); + fz_throw(ctx, FZ_ERROR_GENERIC, "sample stream bit depth %d unsupported", bps); + } + + func->u.sa.samples[i] = s; + } + + fz_close(stream); +} + +static float +interpolate_sample(pdf_function *func, int *scale, int *e0, int *e1, float *efrac, int dim, int idx) +{ + float a, b; + int idx0, idx1; + + idx0 = e0[dim] * scale[dim] + idx; + idx1 = e1[dim] * scale[dim] + idx; + + if (dim == 0) + { + a = func->u.sa.samples[idx0]; + b = func->u.sa.samples[idx1]; + } + else + { + a = interpolate_sample(func, scale, e0, e1, efrac, dim - 1, idx0); + b = interpolate_sample(func, scale, e0, e1, efrac, dim - 1, idx1); + } + + return a + (b - a) * efrac[dim]; +} + +static void +eval_sample_func(fz_context *ctx, pdf_function *func, float *in, float *out) +{ + int e0[FZ_FN_MAXM], e1[FZ_FN_MAXM], scale[FZ_FN_MAXM]; + float efrac[FZ_FN_MAXM]; + float x; + int i; + + /* encode input coordinates */ + for (i = 0; i < func->base.m; i++) + { + x = fz_clamp(in[i], func->domain[i][0], func->domain[i][1]); + x = lerp(x, func->domain[i][0], func->domain[i][1], + func->u.sa.encode[i][0], func->u.sa.encode[i][1]); + x = fz_clamp(x, 0, func->u.sa.size[i] - 1); + e0[i] = floorf(x); + e1[i] = ceilf(x); + efrac[i] = x - floorf(x); + } + + scale[0] = func->base.n; + for (i = 1; i < func->base.m; i++) + scale[i] = scale[i - 1] * func->u.sa.size[i]; + + for (i = 0; i < func->base.n; i++) + { + if (func->base.m == 1) + { + float a = func->u.sa.samples[e0[0] * func->base.n + i]; + float b = func->u.sa.samples[e1[0] * func->base.n + i]; + + float ab = a + (b - a) * efrac[0]; + + out[i] = lerp(ab, 0, 1, func->u.sa.decode[i][0], func->u.sa.decode[i][1]); + out[i] = fz_clamp(out[i], func->range[i][0], func->range[i][1]); + } + + else if (func->base.m == 2) + { + int s0 = func->base.n; + int s1 = s0 * func->u.sa.size[0]; + + float a = func->u.sa.samples[e0[0] * s0 + e0[1] * s1 + i]; + float b = func->u.sa.samples[e1[0] * s0 + e0[1] * s1 + i]; + float c = func->u.sa.samples[e0[0] * s0 + e1[1] * s1 + i]; + float d = func->u.sa.samples[e1[0] * s0 + e1[1] * s1 + i]; + + float ab = a + (b - a) * efrac[0]; + float cd = c + (d - c) * efrac[0]; + float abcd = ab + (cd - ab) * efrac[1]; + + out[i] = lerp(abcd, 0, 1, func->u.sa.decode[i][0], func->u.sa.decode[i][1]); + out[i] = fz_clamp(out[i], func->range[i][0], func->range[i][1]); + } + + else + { + x = interpolate_sample(func, scale, e0, e1, efrac, func->base.m - 1, i); + out[i] = lerp(x, 0, 1, func->u.sa.decode[i][0], func->u.sa.decode[i][1]); + out[i] = fz_clamp(out[i], func->range[i][0], func->range[i][1]); + } + } +} + +/* + * Exponential function + */ + +static void +load_exponential_func(fz_context *ctx, pdf_function *func, pdf_obj *dict) +{ + pdf_obj *obj; + int i; + + if (func->base.m > 1) + fz_warn(ctx, "exponential functions have at most one input"); + func->base.m = 1; + + obj = pdf_dict_gets(dict, "N"); + func->u.e.n = pdf_to_real(obj); + + /* See exponential functions (PDF 1.7 section 3.9.2) */ + if (func->u.e.n != (int) func->u.e.n) + { + /* If N is non-integer, input values may never be negative */ + for (i = 0; i < func->base.m; i++) + if (func->domain[i][0] < 0 || func->domain[i][1] < 0) + fz_warn(ctx, "exponential function input domain includes illegal negative input values"); + } + else if (func->u.e.n < 0) + { + /* if N is negative, input values may never be zero */ + for (i = 0; i < func->base.m; i++) + if (func->domain[i][0] == 0 || func->domain[i][1] == 0 || + (func->domain[i][0] < 0 && func->domain[i][1] > 0)) + fz_warn(ctx, "exponential function input domain includes illegal input value zero"); + } + + for (i = 0; i < func->base.n; i++) + { + func->u.e.c0[i] = 0; + func->u.e.c1[i] = 1; + } + + obj = pdf_dict_gets(dict, "C0"); + if (pdf_is_array(obj)) + { + int ranges = fz_mini(func->base.n, pdf_array_len(obj)); + if (ranges != func->base.n) + fz_warn(ctx, "wrong number of C0 constants for exponential function"); + + for (i = 0; i < ranges; i++) + func->u.e.c0[i] = pdf_to_real(pdf_array_get(obj, i)); + } + + obj = pdf_dict_gets(dict, "C1"); + if (pdf_is_array(obj)) + { + int ranges = fz_mini(func->base.n, pdf_array_len(obj)); + if (ranges != func->base.n) + fz_warn(ctx, "wrong number of C1 constants for exponential function"); + + for (i = 0; i < ranges; i++) + func->u.e.c1[i] = pdf_to_real(pdf_array_get(obj, i)); + } +} + +static void +eval_exponential_func(fz_context *ctx, pdf_function *func, float in, float *out) +{ + float x = in; + float tmp; + int i; + + x = fz_clamp(x, func->domain[0][0], func->domain[0][1]); + + /* Default output is zero, which is suitable for violated constraints */ + if ((func->u.e.n != (int)func->u.e.n && x < 0) || (func->u.e.n < 0 && x == 0)) + return; + + tmp = powf(x, func->u.e.n); + for (i = 0; i < func->base.n; i++) + { + out[i] = func->u.e.c0[i] + tmp * (func->u.e.c1[i] - func->u.e.c0[i]); + if (func->has_range) + out[i] = fz_clamp(out[i], func->range[i][0], func->range[i][1]); + } +} + +/* + * Stitching function + */ + +static void +load_stitching_func(pdf_function *func, pdf_document *xref, pdf_obj *dict) +{ + fz_context *ctx = xref->ctx; + fz_function **funcs; + pdf_obj *obj; + pdf_obj *sub; + pdf_obj *num; + int k; + int i; + + func->u.st.k = 0; + + if (func->base.m > 1) + fz_warn(ctx, "stitching functions have at most one input"); + func->base.m = 1; + + obj = pdf_dict_gets(dict, "Functions"); + if (!pdf_is_array(obj)) + fz_throw(ctx, FZ_ERROR_GENERIC, "stitching function has no input functions"); + + fz_try(ctx) + { + pdf_obj_mark(obj); + k = pdf_array_len(obj); + + func->u.st.funcs = fz_malloc_array(ctx, k, sizeof(fz_function*)); + func->u.st.bounds = fz_malloc_array(ctx, k - 1, sizeof(float)); + func->u.st.encode = fz_malloc_array(ctx, k * 2, sizeof(float)); + funcs = func->u.st.funcs; + + for (i = 0; i < k; i++) + { + sub = pdf_array_get(obj, i); + funcs[i] = pdf_load_function(xref, sub, 1, func->base.n); + + func->base.size += fz_function_size(funcs[i]); + func->u.st.k ++; + + if (funcs[i]->m != func->base.m) + fz_warn(ctx, "wrong number of inputs for sub function %d", i); + if (funcs[i]->n != func->base.n) + fz_warn(ctx, "wrong number of outputs for sub function %d", i); + } + } + fz_always(ctx) + { + pdf_obj_unmark(obj); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + obj = pdf_dict_gets(dict, "Bounds"); + if (!pdf_is_array(obj)) + fz_throw(ctx, FZ_ERROR_GENERIC, "stitching function has no bounds"); + { + if (pdf_array_len(obj) < k - 1) + fz_throw(ctx, FZ_ERROR_GENERIC, "too few subfunction boundaries"); + if (pdf_array_len(obj) > k) + fz_warn(ctx, "too many subfunction boundaries"); + + for (i = 0; i < k - 1; i++) + { + num = pdf_array_get(obj, i); + func->u.st.bounds[i] = pdf_to_real(num); + if (i && func->u.st.bounds[i - 1] > func->u.st.bounds[i]) + fz_throw(ctx, FZ_ERROR_GENERIC, "subfunction %d boundary out of range", i); + } + + if (k > 1 && (func->domain[0][0] > func->u.st.bounds[0] || + func->domain[0][1] < func->u.st.bounds[k - 2])) + fz_warn(ctx, "subfunction boundaries outside of input mapping"); + } + + for (i = 0; i < k; i++) + { + func->u.st.encode[i * 2 + 0] = 0; + func->u.st.encode[i * 2 + 1] = 0; + } + + obj = pdf_dict_gets(dict, "Encode"); + if (pdf_is_array(obj)) + { + int ranges = fz_mini(k, pdf_array_len(obj) / 2); + if (ranges != k) + fz_warn(ctx, "wrong number of stitching function input mappings"); + + for (i = 0; i < ranges; i++) + { + func->u.st.encode[i * 2 + 0] = pdf_to_real(pdf_array_get(obj, i * 2 + 0)); + func->u.st.encode[i * 2 + 1] = pdf_to_real(pdf_array_get(obj, i * 2 + 1)); + } + } +} + +static void +eval_stitching_func(fz_context *ctx, pdf_function *func, float in, float *out) +{ + float low, high; + int k = func->u.st.k; + float *bounds = func->u.st.bounds; + int i; + + in = fz_clamp(in, func->domain[0][0], func->domain[0][1]); + + for (i = 0; i < k - 1; i++) + { + if (in < bounds[i]) + break; + } + + if (i == 0 && k == 1) + { + low = func->domain[0][0]; + high = func->domain[0][1]; + } + else if (i == 0) + { + low = func->domain[0][0]; + high = bounds[0]; + } + else if (i == k - 1) + { + low = bounds[k - 2]; + high = func->domain[0][1]; + } + else + { + low = bounds[i - 1]; + high = bounds[i]; + } + + in = lerp(in, low, high, func->u.st.encode[i * 2 + 0], func->u.st.encode[i * 2 + 1]); + + fz_eval_function(ctx, func->u.st.funcs[i], &in, 1, out, func->u.st.funcs[i]->n); +} + +/* + * Common + */ + +static void +pdf_free_function_imp(fz_context *ctx, fz_storable *func_) +{ + pdf_function *func = (pdf_function *)func_; + int i; + + switch (func->type) + { + case SAMPLE: + fz_free(ctx, func->u.sa.samples); + break; + case EXPONENTIAL: + break; + case STITCHING: + for (i = 0; i < func->u.st.k; i++) + fz_drop_function(ctx, func->u.st.funcs[i]); + fz_free(ctx, func->u.st.funcs); + fz_free(ctx, func->u.st.bounds); + fz_free(ctx, func->u.st.encode); + break; + case POSTSCRIPT: + fz_free(ctx, func->u.p.code); + break; + } + fz_free(ctx, func); +} + +static void +pdf_eval_function(fz_context *ctx, fz_function *func_, float *in, float *out) +{ + pdf_function *func = (pdf_function *)func_; + + switch (func->type) + { + case SAMPLE: eval_sample_func(ctx, func, in, out); break; + case EXPONENTIAL: eval_exponential_func(ctx, func, *in, out); break; + case STITCHING: eval_stitching_func(ctx, func, *in, out); break; + case POSTSCRIPT: eval_postscript_func(ctx, func, in, out); break; + } +} + +/* + * Debugging prints + */ + +#ifndef NDEBUG +static void +pdf_debug_indent(char *prefix, int level, char *suffix) +{ + int i; + + printf("%s", prefix); + + for (i = 0; i < level; i++) + printf("\t"); + + printf("%s", suffix); +} + +static void +pdf_debug_ps_func_code(psobj *funccode, psobj *code, int level) +{ + int eof, wasop; + + pdf_debug_indent("", level, "{"); + + /* Print empty blocks as { }, instead of separating braces on different lines. */ + if (code->type == PS_OPERATOR && code->u.op == PS_OP_RETURN) + { + printf(" } "); + return; + } + + pdf_debug_indent("\n", ++level, ""); + + eof = 0; + wasop = 0; + while (!eof) + { + switch (code->type) + { + case PS_INT: + if (wasop) + pdf_debug_indent("\n", level, ""); + + printf("%d ", code->u.i); + wasop = 0; + code++; + break; + + case PS_REAL: + if (wasop) + pdf_debug_indent("\n", level, ""); + + printf("%g ", code->u.f); + wasop = 0; + code++; + break; + + case PS_OPERATOR: + if (code->u.op == PS_OP_RETURN) + { + printf("\n"); + eof = 1; + } + else if (code->u.op == PS_OP_IF) + { + printf("\n"); + pdf_debug_ps_func_code(funccode, &funccode[(code + 2)->u.block], level); + + printf("%s", ps_op_names[code->u.op]); + code = &funccode[(code + 3)->u.block]; + if (code->type != PS_OPERATOR || code->u.op != PS_OP_RETURN) + pdf_debug_indent("\n", level, ""); + + wasop = 0; + } + else if (code->u.op == PS_OP_IFELSE) + { + printf("\n"); + pdf_debug_ps_func_code(funccode, &funccode[(code + 2)->u.block], level); + + printf("\n"); + pdf_debug_ps_func_code(funccode, &funccode[(code + 1)->u.block], level); + + printf("%s", ps_op_names[code->u.op]); + code = &funccode[(code + 3)->u.block]; + if (code->type != PS_OPERATOR || code->u.op != PS_OP_RETURN) + pdf_debug_indent("\n", level, ""); + + wasop = 0; + } + else + { + printf("%s ", ps_op_names[code->u.op]); + code++; + wasop = 1; + } + break; + } + } + + pdf_debug_indent("", --level, "} "); +} + +static void +pdf_debug_function_imp(fz_function *func_, int level) +{ + int i; + pdf_function *func = (pdf_function *)func_; + + pdf_debug_indent("", level, "function {\n"); + + pdf_debug_indent("", ++level, ""); + switch (func->type) + { + case SAMPLE: + printf("sampled"); + break; + case EXPONENTIAL: + printf("exponential"); + break; + case STITCHING: + printf("stitching"); + break; + case POSTSCRIPT: + printf("postscript"); + break; + } + + pdf_debug_indent("\n", level, ""); + printf("%d input -> %d output\n", func->base.m, func->base.n); + + pdf_debug_indent("", level, "domain "); + for (i = 0; i < func->base.m; i++) + printf("%g %g ", func->domain[i][0], func->domain[i][1]); + printf("\n"); + + if (func->has_range) + { + pdf_debug_indent("", level, "range "); + for (i = 0; i < func->base.n; i++) + printf("%g %g ", func->range[i][0], func->range[i][1]); + printf("\n"); + } + + switch (func->type) + { + case SAMPLE: + pdf_debug_indent("", level, ""); + printf("bps: %d\n", func->u.sa.bps); + + pdf_debug_indent("", level, ""); + printf("size: [ "); + for (i = 0; i < func->base.m; i++) + printf("%d ", func->u.sa.size[i]); + printf("]\n"); + + pdf_debug_indent("", level, ""); + printf("encode: [ "); + for (i = 0; i < func->base.m; i++) + printf("%g %g ", func->u.sa.encode[i][0], func->u.sa.encode[i][1]); + printf("]\n"); + + pdf_debug_indent("", level, ""); + printf("decode: [ "); + for (i = 0; i < func->base.m; i++) + printf("%g %g ", func->u.sa.decode[i][0], func->u.sa.decode[i][1]); + printf("]\n"); + break; + + case EXPONENTIAL: + pdf_debug_indent("", level, ""); + printf("n: %g\n", func->u.e.n); + + pdf_debug_indent("", level, ""); + printf("c0: [ "); + for (i = 0; i < func->base.n; i++) + printf("%g ", func->u.e.c0[i]); + printf("]\n"); + + pdf_debug_indent("", level, ""); + printf("c1: [ "); + for (i = 0; i < func->base.n; i++) + printf("%g ", func->u.e.c1[i]); + printf("]\n"); + break; + + case STITCHING: + pdf_debug_indent("", level, ""); + printf("%d functions\n", func->u.st.k); + + pdf_debug_indent("", level, ""); + printf("bounds: [ "); + for (i = 0; i < func->u.st.k - 1; i++) + printf("%g ", func->u.st.bounds[i]); + printf("]\n"); + + pdf_debug_indent("", level, ""); + printf("encode: [ "); + for (i = 0; i < func->u.st.k * 2; i++) + printf("%g ", func->u.st.encode[i]); + printf("]\n"); + + for (i = 0; i < func->u.st.k; i++) + pdf_debug_function_imp(func->u.st.funcs[i], level); + break; + + case POSTSCRIPT: + pdf_debug_ps_func_code(func->u.p.code, func->u.p.code, level); + printf("\n"); + break; + } + + pdf_debug_indent("", --level, "}\n"); +} + +void +pdf_debug_function(fz_function *func) +{ + pdf_debug_function_imp(func, 0); +} +#endif + +fz_function * +pdf_load_function(pdf_document *xref, pdf_obj *dict, int in, int out) +{ + fz_context *ctx = xref->ctx; + pdf_function *func; + pdf_obj *obj; + int i; + + if (pdf_obj_marked(dict)) + fz_throw(ctx, FZ_ERROR_GENERIC, "Recursion in function definition"); + + if ((func = pdf_find_item(ctx, pdf_free_function_imp, dict))) + { + return (fz_function *)func; + } + + func = fz_malloc_struct(ctx, pdf_function); + FZ_INIT_STORABLE(&func->base, 1, pdf_free_function_imp); + func->base.size = sizeof(*func); + func->base.evaluate = pdf_eval_function; +#ifndef NDEBUG + func->base.debug = pdf_debug_function; +#endif + + obj = pdf_dict_gets(dict, "FunctionType"); + func->type = pdf_to_int(obj); + + /* required for all */ + obj = pdf_dict_gets(dict, "Domain"); + func->base.m = fz_clampi(pdf_array_len(obj) / 2, 1, FZ_FN_MAXM); + for (i = 0; i < func->base.m; i++) + { + func->domain[i][0] = pdf_to_real(pdf_array_get(obj, i * 2 + 0)); + func->domain[i][1] = pdf_to_real(pdf_array_get(obj, i * 2 + 1)); + } + + /* required for type0 and type4, optional otherwise */ + obj = pdf_dict_gets(dict, "Range"); + if (pdf_is_array(obj)) + { + func->has_range = 1; + func->base.n = fz_clampi(pdf_array_len(obj) / 2, 1, FZ_FN_MAXN); + for (i = 0; i < func->base.n; i++) + { + func->range[i][0] = pdf_to_real(pdf_array_get(obj, i * 2 + 0)); + func->range[i][1] = pdf_to_real(pdf_array_get(obj, i * 2 + 1)); + } + } + else + { + func->has_range = 0; + func->base.n = out; + } + + if (func->base.m != in) + fz_warn(ctx, "wrong number of function inputs"); + if (func->base.n != out) + fz_warn(ctx, "wrong number of function outputs"); + + fz_try(ctx) + { + switch (func->type) + { + case SAMPLE: + load_sample_func(func, xref, dict, pdf_to_num(dict), pdf_to_gen(dict)); + break; + + case EXPONENTIAL: + load_exponential_func(ctx, func, dict); + break; + + case STITCHING: + load_stitching_func(func, xref, dict); + break; + + case POSTSCRIPT: + load_postscript_func(func, xref, dict, pdf_to_num(dict), pdf_to_gen(dict)); + break; + + default: + fz_free(ctx, func); + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown function type (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); + } + + pdf_store_item(ctx, dict, func, func->base.size); + } + fz_catch(ctx) + { + int type = func->type; + fz_drop_function(ctx, (fz_function *)func); + fz_rethrow_message(ctx, "cannot load %s function (%d %d R)", + type == SAMPLE ? "sampled" : + type == EXPONENTIAL ? "exponential" : + type == STITCHING ? "stitching" : + type == POSTSCRIPT ? "calculator" : + "unknown", + pdf_to_num(dict), pdf_to_gen(dict)); + } + + return (fz_function *)func; +} diff --git a/source/pdf/pdf-glyphlist.h b/source/pdf/pdf-glyphlist.h new file mode 100644 index 00000000..f1416916 --- /dev/null +++ b/source/pdf/pdf-glyphlist.h @@ -0,0 +1,1461 @@ +/* +# Name: Adobe Glyph List +# Table version: 2.0 +# Date: September 20, 2002 +# +# See http://partners.adobe.com/asn/developer/typeforum/unicodegn.html +# +# Format: Semicolon-delimited fields: +# (1) glyph name +# (2) Unicode scalar value +#--end +*/ + +static const char *agl_name_list[] = { +"A","AE","AEacute","AEmacron","AEsmall","Aacute","Aacutesmall","Abreve", +"Abreveacute","Abrevecyrillic","Abrevedotbelow","Abrevegrave", +"Abrevehookabove","Abrevetilde","Acaron","Acircle","Acircumflex", +"Acircumflexacute","Acircumflexdotbelow","Acircumflexgrave", +"Acircumflexhookabove","Acircumflexsmall","Acircumflextilde","Acute", +"Acutesmall","Acyrillic","Adblgrave","Adieresis","Adieresiscyrillic", +"Adieresismacron","Adieresissmall","Adotbelow","Adotmacron","Agrave", +"Agravesmall","Ahookabove","Aiecyrillic","Ainvertedbreve","Alpha", +"Alphatonos","Amacron","Amonospace","Aogonek","Aring","Aringacute", +"Aringbelow","Aringsmall","Asmall","Atilde","Atildesmall","Aybarmenian","B", +"Bcircle","Bdotaccent","Bdotbelow","Becyrillic","Benarmenian","Beta","Bhook", +"Blinebelow","Bmonospace","Brevesmall","Bsmall","Btopbar","C","Caarmenian", +"Cacute","Caron","Caronsmall","Ccaron","Ccedilla","Ccedillaacute", +"Ccedillasmall","Ccircle","Ccircumflex","Cdot","Cdotaccent","Cedillasmall", +"Chaarmenian","Cheabkhasiancyrillic","Checyrillic", +"Chedescenderabkhasiancyrillic","Chedescendercyrillic","Chedieresiscyrillic", +"Cheharmenian","Chekhakassiancyrillic","Cheverticalstrokecyrillic","Chi", +"Chook","Circumflexsmall","Cmonospace","Coarmenian","Csmall","D","DZ", +"DZcaron","Daarmenian","Dafrican","Dcaron","Dcedilla","Dcircle", +"Dcircumflexbelow","Dcroat","Ddotaccent","Ddotbelow","Decyrillic","Deicoptic", +"Delta","Deltagreek","Dhook","Dieresis","DieresisAcute","DieresisGrave", +"Dieresissmall","Digammagreek","Djecyrillic","Dlinebelow","Dmonospace", +"Dotaccentsmall","Dslash","Dsmall","Dtopbar","Dz","Dzcaron", +"Dzeabkhasiancyrillic","Dzecyrillic","Dzhecyrillic","E","Eacute", +"Eacutesmall","Ebreve","Ecaron","Ecedillabreve","Echarmenian","Ecircle", +"Ecircumflex","Ecircumflexacute","Ecircumflexbelow","Ecircumflexdotbelow", +"Ecircumflexgrave","Ecircumflexhookabove","Ecircumflexsmall", +"Ecircumflextilde","Ecyrillic","Edblgrave","Edieresis","Edieresissmall", +"Edot","Edotaccent","Edotbelow","Efcyrillic","Egrave","Egravesmall", +"Eharmenian","Ehookabove","Eightroman","Einvertedbreve","Eiotifiedcyrillic", +"Elcyrillic","Elevenroman","Emacron","Emacronacute","Emacrongrave", +"Emcyrillic","Emonospace","Encyrillic","Endescendercyrillic","Eng", +"Enghecyrillic","Enhookcyrillic","Eogonek","Eopen","Epsilon","Epsilontonos", +"Ercyrillic","Ereversed","Ereversedcyrillic","Escyrillic", +"Esdescendercyrillic","Esh","Esmall","Eta","Etarmenian","Etatonos","Eth", +"Ethsmall","Etilde","Etildebelow","Euro","Ezh","Ezhcaron","Ezhreversed","F", +"Fcircle","Fdotaccent","Feharmenian","Feicoptic","Fhook","Fitacyrillic", +"Fiveroman","Fmonospace","Fourroman","Fsmall","G","GBsquare","Gacute","Gamma", +"Gammaafrican","Gangiacoptic","Gbreve","Gcaron","Gcedilla","Gcircle", +"Gcircumflex","Gcommaaccent","Gdot","Gdotaccent","Gecyrillic","Ghadarmenian", +"Ghemiddlehookcyrillic","Ghestrokecyrillic","Gheupturncyrillic","Ghook", +"Gimarmenian","Gjecyrillic","Gmacron","Gmonospace","Grave","Gravesmall", +"Gsmall","Gsmallhook","Gstroke","H","H18533","H18543","H18551","H22073", +"HPsquare","Haabkhasiancyrillic","Hadescendercyrillic","Hardsigncyrillic", +"Hbar","Hbrevebelow","Hcedilla","Hcircle","Hcircumflex","Hdieresis", +"Hdotaccent","Hdotbelow","Hmonospace","Hoarmenian","Horicoptic","Hsmall", +"Hungarumlaut","Hungarumlautsmall","Hzsquare","I","IAcyrillic","IJ", +"IUcyrillic","Iacute","Iacutesmall","Ibreve","Icaron","Icircle","Icircumflex", +"Icircumflexsmall","Icyrillic","Idblgrave","Idieresis","Idieresisacute", +"Idieresiscyrillic","Idieresissmall","Idot","Idotaccent","Idotbelow", +"Iebrevecyrillic","Iecyrillic","Ifraktur","Igrave","Igravesmall","Ihookabove", +"Iicyrillic","Iinvertedbreve","Iishortcyrillic","Imacron","Imacroncyrillic", +"Imonospace","Iniarmenian","Iocyrillic","Iogonek","Iota","Iotaafrican", +"Iotadieresis","Iotatonos","Ismall","Istroke","Itilde","Itildebelow", +"Izhitsacyrillic","Izhitsadblgravecyrillic","J","Jaarmenian","Jcircle", +"Jcircumflex","Jecyrillic","Jheharmenian","Jmonospace","Jsmall","K", +"KBsquare","KKsquare","Kabashkircyrillic","Kacute","Kacyrillic", +"Kadescendercyrillic","Kahookcyrillic","Kappa","Kastrokecyrillic", +"Kaverticalstrokecyrillic","Kcaron","Kcedilla","Kcircle","Kcommaaccent", +"Kdotbelow","Keharmenian","Kenarmenian","Khacyrillic","Kheicoptic","Khook", +"Kjecyrillic","Klinebelow","Kmonospace","Koppacyrillic","Koppagreek", +"Ksicyrillic","Ksmall","L","LJ","LL","Lacute","Lambda","Lcaron","Lcedilla", +"Lcircle","Lcircumflexbelow","Lcommaaccent","Ldot","Ldotaccent","Ldotbelow", +"Ldotbelowmacron","Liwnarmenian","Lj","Ljecyrillic","Llinebelow","Lmonospace", +"Lslash","Lslashsmall","Lsmall","M","MBsquare","Macron","Macronsmall", +"Macute","Mcircle","Mdotaccent","Mdotbelow","Menarmenian","Mmonospace", +"Msmall","Mturned","Mu","N","NJ","Nacute","Ncaron","Ncedilla","Ncircle", +"Ncircumflexbelow","Ncommaaccent","Ndotaccent","Ndotbelow","Nhookleft", +"Nineroman","Nj","Njecyrillic","Nlinebelow","Nmonospace","Nowarmenian", +"Nsmall","Ntilde","Ntildesmall","Nu","O","OE","OEsmall","Oacute", +"Oacutesmall","Obarredcyrillic","Obarreddieresiscyrillic","Obreve","Ocaron", +"Ocenteredtilde","Ocircle","Ocircumflex","Ocircumflexacute", +"Ocircumflexdotbelow","Ocircumflexgrave","Ocircumflexhookabove", +"Ocircumflexsmall","Ocircumflextilde","Ocyrillic","Odblacute","Odblgrave", +"Odieresis","Odieresiscyrillic","Odieresissmall","Odotbelow","Ogoneksmall", +"Ograve","Ogravesmall","Oharmenian","Ohm","Ohookabove","Ohorn","Ohornacute", +"Ohorndotbelow","Ohorngrave","Ohornhookabove","Ohorntilde","Ohungarumlaut", +"Oi","Oinvertedbreve","Omacron","Omacronacute","Omacrongrave","Omega", +"Omegacyrillic","Omegagreek","Omegaroundcyrillic","Omegatitlocyrillic", +"Omegatonos","Omicron","Omicrontonos","Omonospace","Oneroman","Oogonek", +"Oogonekmacron","Oopen","Oslash","Oslashacute","Oslashsmall","Osmall", +"Ostrokeacute","Otcyrillic","Otilde","Otildeacute","Otildedieresis", +"Otildesmall","P","Pacute","Pcircle","Pdotaccent","Pecyrillic","Peharmenian", +"Pemiddlehookcyrillic","Phi","Phook","Pi","Piwrarmenian","Pmonospace","Psi", +"Psicyrillic","Psmall","Q","Qcircle","Qmonospace","Qsmall","R","Raarmenian", +"Racute","Rcaron","Rcedilla","Rcircle","Rcommaaccent","Rdblgrave", +"Rdotaccent","Rdotbelow","Rdotbelowmacron","Reharmenian","Rfraktur","Rho", +"Ringsmall","Rinvertedbreve","Rlinebelow","Rmonospace","Rsmall", +"Rsmallinverted","Rsmallinvertedsuperior","S","SF010000","SF020000", +"SF030000","SF040000","SF050000","SF060000","SF070000","SF080000","SF090000", +"SF100000","SF110000","SF190000","SF200000","SF210000","SF220000","SF230000", +"SF240000","SF250000","SF260000","SF270000","SF280000","SF360000","SF370000", +"SF380000","SF390000","SF400000","SF410000","SF420000","SF430000","SF440000", +"SF450000","SF460000","SF470000","SF480000","SF490000","SF500000","SF510000", +"SF520000","SF530000","SF540000","Sacute","Sacutedotaccent","Sampigreek", +"Scaron","Scarondotaccent","Scaronsmall","Scedilla","Schwa","Schwacyrillic", +"Schwadieresiscyrillic","Scircle","Scircumflex","Scommaaccent","Sdotaccent", +"Sdotbelow","Sdotbelowdotaccent","Seharmenian","Sevenroman","Shaarmenian", +"Shacyrillic","Shchacyrillic","Sheicoptic","Shhacyrillic","Shimacoptic", +"Sigma","Sixroman","Smonospace","Softsigncyrillic","Ssmall","Stigmagreek","T", +"Tau","Tbar","Tcaron","Tcedilla","Tcircle","Tcircumflexbelow","Tcommaaccent", +"Tdotaccent","Tdotbelow","Tecyrillic","Tedescendercyrillic","Tenroman", +"Tetsecyrillic","Theta","Thook","Thorn","Thornsmall","Threeroman", +"Tildesmall","Tiwnarmenian","Tlinebelow","Tmonospace","Toarmenian","Tonefive", +"Tonesix","Tonetwo","Tretroflexhook","Tsecyrillic","Tshecyrillic","Tsmall", +"Twelveroman","Tworoman","U","Uacute","Uacutesmall","Ubreve","Ucaron", +"Ucircle","Ucircumflex","Ucircumflexbelow","Ucircumflexsmall","Ucyrillic", +"Udblacute","Udblgrave","Udieresis","Udieresisacute","Udieresisbelow", +"Udieresiscaron","Udieresiscyrillic","Udieresisgrave","Udieresismacron", +"Udieresissmall","Udotbelow","Ugrave","Ugravesmall","Uhookabove","Uhorn", +"Uhornacute","Uhorndotbelow","Uhorngrave","Uhornhookabove","Uhorntilde", +"Uhungarumlaut","Uhungarumlautcyrillic","Uinvertedbreve","Ukcyrillic", +"Umacron","Umacroncyrillic","Umacrondieresis","Umonospace","Uogonek", +"Upsilon","Upsilon1","Upsilonacutehooksymbolgreek","Upsilonafrican", +"Upsilondieresis","Upsilondieresishooksymbolgreek","Upsilonhooksymbol", +"Upsilontonos","Uring","Ushortcyrillic","Usmall","Ustraightcyrillic", +"Ustraightstrokecyrillic","Utilde","Utildeacute","Utildebelow","V","Vcircle", +"Vdotbelow","Vecyrillic","Vewarmenian","Vhook","Vmonospace","Voarmenian", +"Vsmall","Vtilde","W","Wacute","Wcircle","Wcircumflex","Wdieresis", +"Wdotaccent","Wdotbelow","Wgrave","Wmonospace","Wsmall","X","Xcircle", +"Xdieresis","Xdotaccent","Xeharmenian","Xi","Xmonospace","Xsmall","Y", +"Yacute","Yacutesmall","Yatcyrillic","Ycircle","Ycircumflex","Ydieresis", +"Ydieresissmall","Ydotaccent","Ydotbelow","Yericyrillic", +"Yerudieresiscyrillic","Ygrave","Yhook","Yhookabove","Yiarmenian", +"Yicyrillic","Yiwnarmenian","Ymonospace","Ysmall","Ytilde","Yusbigcyrillic", +"Yusbigiotifiedcyrillic","Yuslittlecyrillic","Yuslittleiotifiedcyrillic","Z", +"Zaarmenian","Zacute","Zcaron","Zcaronsmall","Zcircle","Zcircumflex","Zdot", +"Zdotaccent","Zdotbelow","Zecyrillic","Zedescendercyrillic", +"Zedieresiscyrillic","Zeta","Zhearmenian","Zhebrevecyrillic","Zhecyrillic", +"Zhedescendercyrillic","Zhedieresiscyrillic","Zlinebelow","Zmonospace", +"Zsmall","Zstroke","a","aabengali","aacute","aadeva","aagujarati", +"aagurmukhi","aamatragurmukhi","aarusquare","aavowelsignbengali", +"aavowelsigndeva","aavowelsigngujarati","abbreviationmarkarmenian", +"abbreviationsigndeva","abengali","abopomofo","abreve","abreveacute", +"abrevecyrillic","abrevedotbelow","abrevegrave","abrevehookabove", +"abrevetilde","acaron","acircle","acircumflex","acircumflexacute", +"acircumflexdotbelow","acircumflexgrave","acircumflexhookabove", +"acircumflextilde","acute","acutebelowcmb","acutecmb","acutecomb","acutedeva", +"acutelowmod","acutetonecmb","acyrillic","adblgrave","addakgurmukhi","adeva", +"adieresis","adieresiscyrillic","adieresismacron","adotbelow","adotmacron", +"ae","aeacute","aekorean","aemacron","afii00208","afii08941","afii10017", +"afii10018","afii10019","afii10020","afii10021","afii10022","afii10023", +"afii10024","afii10025","afii10026","afii10027","afii10028","afii10029", +"afii10030","afii10031","afii10032","afii10033","afii10034","afii10035", +"afii10036","afii10037","afii10038","afii10039","afii10040","afii10041", +"afii10042","afii10043","afii10044","afii10045","afii10046","afii10047", +"afii10048","afii10049","afii10050","afii10051","afii10052","afii10053", +"afii10054","afii10055","afii10056","afii10057","afii10058","afii10059", +"afii10060","afii10061","afii10062","afii10063","afii10064","afii10065", +"afii10066","afii10067","afii10068","afii10069","afii10070","afii10071", +"afii10072","afii10073","afii10074","afii10075","afii10076","afii10077", +"afii10078","afii10079","afii10080","afii10081","afii10082","afii10083", +"afii10084","afii10085","afii10086","afii10087","afii10088","afii10089", +"afii10090","afii10091","afii10092","afii10093","afii10094","afii10095", +"afii10096","afii10097","afii10098","afii10099","afii10100","afii10101", +"afii10102","afii10103","afii10104","afii10105","afii10106","afii10107", +"afii10108","afii10109","afii10110","afii10145","afii10146","afii10147", +"afii10148","afii10192","afii10193","afii10194","afii10195","afii10196", +"afii10831","afii10832","afii10846","afii299","afii300","afii301","afii57381", +"afii57388","afii57392","afii57393","afii57394","afii57395","afii57396", +"afii57397","afii57398","afii57399","afii57400","afii57401","afii57403", +"afii57407","afii57409","afii57410","afii57411","afii57412","afii57413", +"afii57414","afii57415","afii57416","afii57417","afii57418","afii57419", +"afii57420","afii57421","afii57422","afii57423","afii57424","afii57425", +"afii57426","afii57427","afii57428","afii57429","afii57430","afii57431", +"afii57432","afii57433","afii57434","afii57440","afii57441","afii57442", +"afii57443","afii57444","afii57445","afii57446","afii57448","afii57449", +"afii57450","afii57451","afii57452","afii57453","afii57454","afii57455", +"afii57456","afii57457","afii57458","afii57470","afii57505","afii57506", +"afii57507","afii57508","afii57509","afii57511","afii57512","afii57513", +"afii57514","afii57519","afii57534","afii57636","afii57645","afii57658", +"afii57664","afii57665","afii57666","afii57667","afii57668","afii57669", +"afii57670","afii57671","afii57672","afii57673","afii57674","afii57675", +"afii57676","afii57677","afii57678","afii57679","afii57680","afii57681", +"afii57682","afii57683","afii57684","afii57685","afii57686","afii57687", +"afii57688","afii57689","afii57690","afii57694","afii57695","afii57700", +"afii57705","afii57716","afii57717","afii57718","afii57723","afii57793", +"afii57794","afii57795","afii57796","afii57797","afii57798","afii57799", +"afii57800","afii57801","afii57802","afii57803","afii57804","afii57806", +"afii57807","afii57839","afii57841","afii57842","afii57929","afii61248", +"afii61289","afii61352","afii61573","afii61574","afii61575","afii61664", +"afii63167","afii64937","agrave","agujarati","agurmukhi","ahiragana", +"ahookabove","aibengali","aibopomofo","aideva","aiecyrillic","aigujarati", +"aigurmukhi","aimatragurmukhi","ainarabic","ainfinalarabic", +"aininitialarabic","ainmedialarabic","ainvertedbreve","aivowelsignbengali", +"aivowelsigndeva","aivowelsigngujarati","akatakana","akatakanahalfwidth", +"akorean","alef","alefarabic","alefdageshhebrew","aleffinalarabic", +"alefhamzaabovearabic","alefhamzaabovefinalarabic","alefhamzabelowarabic", +"alefhamzabelowfinalarabic","alefhebrew","aleflamedhebrew", +"alefmaddaabovearabic","alefmaddaabovefinalarabic","alefmaksuraarabic", +"alefmaksurafinalarabic","alefmaksurainitialarabic","alefmaksuramedialarabic", +"alefpatahhebrew","alefqamatshebrew","aleph","allequal","alpha","alphatonos", +"amacron","amonospace","ampersand","ampersandmonospace","ampersandsmall", +"amsquare","anbopomofo","angbopomofo","angkhankhuthai","angle", +"anglebracketleft","anglebracketleftvertical","anglebracketright", +"anglebracketrightvertical","angleleft","angleright","angstrom","anoteleia", +"anudattadeva","anusvarabengali","anusvaradeva","anusvaragujarati","aogonek", +"apaatosquare","aparen","apostrophearmenian","apostrophemod","apple", +"approaches","approxequal","approxequalorimage","approximatelyequal", +"araeaekorean","araeakorean","arc","arighthalfring","aring","aringacute", +"aringbelow","arrowboth","arrowdashdown","arrowdashleft","arrowdashright", +"arrowdashup","arrowdblboth","arrowdbldown","arrowdblleft","arrowdblright", +"arrowdblup","arrowdown","arrowdownleft","arrowdownright","arrowdownwhite", +"arrowheaddownmod","arrowheadleftmod","arrowheadrightmod","arrowheadupmod", +"arrowhorizex","arrowleft","arrowleftdbl","arrowleftdblstroke", +"arrowleftoverright","arrowleftwhite","arrowright","arrowrightdblstroke", +"arrowrightheavy","arrowrightoverleft","arrowrightwhite","arrowtableft", +"arrowtabright","arrowup","arrowupdn","arrowupdnbse","arrowupdownbase", +"arrowupleft","arrowupleftofdown","arrowupright","arrowupwhite","arrowvertex", +"asciicircum","asciicircummonospace","asciitilde","asciitildemonospace", +"ascript","ascriptturned","asmallhiragana","asmallkatakana", +"asmallkatakanahalfwidth","asterisk","asteriskaltonearabic","asteriskarabic", +"asteriskmath","asteriskmonospace","asterisksmall","asterism","asuperior", +"asymptoticallyequal","at","atilde","atmonospace","atsmall","aturned", +"aubengali","aubopomofo","audeva","augujarati","augurmukhi", +"aulengthmarkbengali","aumatragurmukhi","auvowelsignbengali", +"auvowelsigndeva","auvowelsigngujarati","avagrahadeva","aybarmenian","ayin", +"ayinaltonehebrew","ayinhebrew","b","babengali","backslash", +"backslashmonospace","badeva","bagujarati","bagurmukhi","bahiragana", +"bahtthai","bakatakana","bar","barmonospace","bbopomofo","bcircle", +"bdotaccent","bdotbelow","beamedsixteenthnotes","because","becyrillic", +"beharabic","behfinalarabic","behinitialarabic","behiragana", +"behmedialarabic","behmeeminitialarabic","behmeemisolatedarabic", +"behnoonfinalarabic","bekatakana","benarmenian","bet","beta", +"betasymbolgreek","betdagesh","betdageshhebrew","bethebrew","betrafehebrew", +"bhabengali","bhadeva","bhagujarati","bhagurmukhi","bhook","bihiragana", +"bikatakana","bilabialclick","bindigurmukhi","birusquare","blackcircle", +"blackdiamond","blackdownpointingtriangle","blackleftpointingpointer", +"blackleftpointingtriangle","blacklenticularbracketleft", +"blacklenticularbracketleftvertical","blacklenticularbracketright", +"blacklenticularbracketrightvertical","blacklowerlefttriangle", +"blacklowerrighttriangle","blackrectangle","blackrightpointingpointer", +"blackrightpointingtriangle","blacksmallsquare","blacksmilingface", +"blacksquare","blackstar","blackupperlefttriangle","blackupperrighttriangle", +"blackuppointingsmalltriangle","blackuppointingtriangle","blank","blinebelow", +"block","bmonospace","bobaimaithai","bohiragana","bokatakana","bparen", +"bqsquare","braceex","braceleft","braceleftbt","braceleftmid", +"braceleftmonospace","braceleftsmall","bracelefttp","braceleftvertical", +"braceright","bracerightbt","bracerightmid","bracerightmonospace", +"bracerightsmall","bracerighttp","bracerightvertical","bracketleft", +"bracketleftbt","bracketleftex","bracketleftmonospace","bracketlefttp", +"bracketright","bracketrightbt","bracketrightex","bracketrightmonospace", +"bracketrighttp","breve","brevebelowcmb","brevecmb","breveinvertedbelowcmb", +"breveinvertedcmb","breveinverteddoublecmb","bridgebelowcmb", +"bridgeinvertedbelowcmb","brokenbar","bstroke","bsuperior","btopbar", +"buhiragana","bukatakana","bullet","bulletinverse","bulletoperator", +"bullseye","c","caarmenian","cabengali","cacute","cadeva","cagujarati", +"cagurmukhi","calsquare","candrabindubengali","candrabinducmb", +"candrabindudeva","candrabindugujarati","capslock","careof","caron", +"caronbelowcmb","caroncmb","carriagereturn","cbopomofo","ccaron","ccedilla", +"ccedillaacute","ccircle","ccircumflex","ccurl","cdot","cdotaccent", +"cdsquare","cedilla","cedillacmb","cent","centigrade","centinferior", +"centmonospace","centoldstyle","centsuperior","chaarmenian","chabengali", +"chadeva","chagujarati","chagurmukhi","chbopomofo","cheabkhasiancyrillic", +"checkmark","checyrillic","chedescenderabkhasiancyrillic", +"chedescendercyrillic","chedieresiscyrillic","cheharmenian", +"chekhakassiancyrillic","cheverticalstrokecyrillic","chi", +"chieuchacirclekorean","chieuchaparenkorean","chieuchcirclekorean", +"chieuchkorean","chieuchparenkorean","chochangthai","chochanthai", +"chochingthai","chochoethai","chook","cieucacirclekorean","cieucaparenkorean", +"cieuccirclekorean","cieuckorean","cieucparenkorean","cieucuparenkorean", +"circle","circlemultiply","circleot","circleplus","circlepostalmark", +"circlewithlefthalfblack","circlewithrighthalfblack","circumflex", +"circumflexbelowcmb","circumflexcmb","clear","clickalveolar","clickdental", +"clicklateral","clickretroflex","club","clubsuitblack","clubsuitwhite", +"cmcubedsquare","cmonospace","cmsquaredsquare","coarmenian","colon", +"colonmonetary","colonmonospace","colonsign","colonsmall", +"colontriangularhalfmod","colontriangularmod","comma","commaabovecmb", +"commaaboverightcmb","commaaccent","commaarabic","commaarmenian", +"commainferior","commamonospace","commareversedabovecmb","commareversedmod", +"commasmall","commasuperior","commaturnedabovecmb","commaturnedmod","compass", +"congruent","contourintegral","control","controlACK","controlBEL","controlBS", +"controlCAN","controlCR","controlDC1","controlDC2","controlDC3","controlDC4", +"controlDEL","controlDLE","controlEM","controlENQ","controlEOT","controlESC", +"controlETB","controlETX","controlFF","controlFS","controlGS","controlHT", +"controlLF","controlNAK","controlRS","controlSI","controlSO","controlSOT", +"controlSTX","controlSUB","controlSYN","controlUS","controlVT","copyright", +"copyrightsans","copyrightserif","cornerbracketleft", +"cornerbracketlefthalfwidth","cornerbracketleftvertical","cornerbracketright", +"cornerbracketrighthalfwidth","cornerbracketrightvertical", +"corporationsquare","cosquare","coverkgsquare","cparen","cruzeiro", +"cstretched","curlyand","curlyor","currency","cyrBreve","cyrFlex","cyrbreve", +"cyrflex","d","daarmenian","dabengali","dadarabic","dadeva","dadfinalarabic", +"dadinitialarabic","dadmedialarabic","dagesh","dageshhebrew","dagger", +"daggerdbl","dagujarati","dagurmukhi","dahiragana","dakatakana","dalarabic", +"dalet","daletdagesh","daletdageshhebrew","dalethatafpatah", +"dalethatafpatahhebrew","dalethatafsegol","dalethatafsegolhebrew", +"dalethebrew","dalethiriq","dalethiriqhebrew","daletholam","daletholamhebrew", +"daletpatah","daletpatahhebrew","daletqamats","daletqamatshebrew", +"daletqubuts","daletqubutshebrew","daletsegol","daletsegolhebrew", +"daletsheva","daletshevahebrew","dalettsere","dalettserehebrew", +"dalfinalarabic","dammaarabic","dammalowarabic","dammatanaltonearabic", +"dammatanarabic","danda","dargahebrew","dargalefthebrew", +"dasiapneumatacyrilliccmb","dblGrave","dblanglebracketleft", +"dblanglebracketleftvertical","dblanglebracketright", +"dblanglebracketrightvertical","dblarchinvertedbelowcmb","dblarrowleft", +"dblarrowright","dbldanda","dblgrave","dblgravecmb","dblintegral", +"dbllowline","dbllowlinecmb","dbloverlinecmb","dblprimemod","dblverticalbar", +"dblverticallineabovecmb","dbopomofo","dbsquare","dcaron","dcedilla", +"dcircle","dcircumflexbelow","dcroat","ddabengali","ddadeva","ddagujarati", +"ddagurmukhi","ddalarabic","ddalfinalarabic","dddhadeva","ddhabengali", +"ddhadeva","ddhagujarati","ddhagurmukhi","ddotaccent","ddotbelow", +"decimalseparatorarabic","decimalseparatorpersian","decyrillic","degree", +"dehihebrew","dehiragana","deicoptic","dekatakana","deleteleft","deleteright", +"delta","deltaturned","denominatorminusonenumeratorbengali","dezh", +"dhabengali","dhadeva","dhagujarati","dhagurmukhi","dhook","dialytikatonos", +"dialytikatonoscmb","diamond","diamondsuitwhite","dieresis","dieresisacute", +"dieresisbelowcmb","dieresiscmb","dieresisgrave","dieresistonos","dihiragana", +"dikatakana","dittomark","divide","divides","divisionslash","djecyrillic", +"dkshade","dlinebelow","dlsquare","dmacron","dmonospace","dnblock", +"dochadathai","dodekthai","dohiragana","dokatakana","dollar","dollarinferior", +"dollarmonospace","dollaroldstyle","dollarsmall","dollarsuperior","dong", +"dorusquare","dotaccent","dotaccentcmb","dotbelowcmb","dotbelowcomb", +"dotkatakana","dotlessi","dotlessj","dotlessjstrokehook","dotmath", +"dottedcircle","doubleyodpatah","doubleyodpatahhebrew","downtackbelowcmb", +"downtackmod","dparen","dsuperior","dtail","dtopbar","duhiragana", +"dukatakana","dz","dzaltone","dzcaron","dzcurl","dzeabkhasiancyrillic", +"dzecyrillic","dzhecyrillic","e","eacute","earth","ebengali","ebopomofo", +"ebreve","ecandradeva","ecandragujarati","ecandravowelsigndeva", +"ecandravowelsigngujarati","ecaron","ecedillabreve","echarmenian", +"echyiwnarmenian","ecircle","ecircumflex","ecircumflexacute", +"ecircumflexbelow","ecircumflexdotbelow","ecircumflexgrave", +"ecircumflexhookabove","ecircumflextilde","ecyrillic","edblgrave","edeva", +"edieresis","edot","edotaccent","edotbelow","eegurmukhi","eematragurmukhi", +"efcyrillic","egrave","egujarati","eharmenian","ehbopomofo","ehiragana", +"ehookabove","eibopomofo","eight","eightarabic","eightbengali","eightcircle", +"eightcircleinversesansserif","eightdeva","eighteencircle","eighteenparen", +"eighteenperiod","eightgujarati","eightgurmukhi","eighthackarabic", +"eighthangzhou","eighthnotebeamed","eightideographicparen","eightinferior", +"eightmonospace","eightoldstyle","eightparen","eightperiod","eightpersian", +"eightroman","eightsuperior","eightthai","einvertedbreve","eiotifiedcyrillic", +"ekatakana","ekatakanahalfwidth","ekonkargurmukhi","ekorean","elcyrillic", +"element","elevencircle","elevenparen","elevenperiod","elevenroman", +"ellipsis","ellipsisvertical","emacron","emacronacute","emacrongrave", +"emcyrillic","emdash","emdashvertical","emonospace","emphasismarkarmenian", +"emptyset","enbopomofo","encyrillic","endash","endashvertical", +"endescendercyrillic","eng","engbopomofo","enghecyrillic","enhookcyrillic", +"enspace","eogonek","eokorean","eopen","eopenclosed","eopenreversed", +"eopenreversedclosed","eopenreversedhook","eparen","epsilon","epsilontonos", +"equal","equalmonospace","equalsmall","equalsuperior","equivalence", +"erbopomofo","ercyrillic","ereversed","ereversedcyrillic","escyrillic", +"esdescendercyrillic","esh","eshcurl","eshortdeva","eshortvowelsigndeva", +"eshreversedloop","eshsquatreversed","esmallhiragana","esmallkatakana", +"esmallkatakanahalfwidth","estimated","esuperior","eta","etarmenian", +"etatonos","eth","etilde","etildebelow","etnahtafoukhhebrew", +"etnahtafoukhlefthebrew","etnahtahebrew","etnahtalefthebrew","eturned", +"eukorean","euro","evowelsignbengali","evowelsigndeva","evowelsigngujarati", +"exclam","exclamarmenian","exclamdbl","exclamdown","exclamdownsmall", +"exclammonospace","exclamsmall","existential","ezh","ezhcaron","ezhcurl", +"ezhreversed","ezhtail","f","fadeva","fagurmukhi","fahrenheit","fathaarabic", +"fathalowarabic","fathatanarabic","fbopomofo","fcircle","fdotaccent", +"feharabic","feharmenian","fehfinalarabic","fehinitialarabic", +"fehmedialarabic","feicoptic","female","ff","ffi","ffl","fi","fifteencircle", +"fifteenparen","fifteenperiod","figuredash","filledbox","filledrect", +"finalkaf","finalkafdagesh","finalkafdageshhebrew","finalkafhebrew", +"finalkafqamats","finalkafqamatshebrew","finalkafsheva","finalkafshevahebrew", +"finalmem","finalmemhebrew","finalnun","finalnunhebrew","finalpe", +"finalpehebrew","finaltsadi","finaltsadihebrew","firsttonechinese","fisheye", +"fitacyrillic","five","fivearabic","fivebengali","fivecircle", +"fivecircleinversesansserif","fivedeva","fiveeighths","fivegujarati", +"fivegurmukhi","fivehackarabic","fivehangzhou","fiveideographicparen", +"fiveinferior","fivemonospace","fiveoldstyle","fiveparen","fiveperiod", +"fivepersian","fiveroman","fivesuperior","fivethai","fl","florin", +"fmonospace","fmsquare","fofanthai","fofathai","fongmanthai","forall","four", +"fourarabic","fourbengali","fourcircle","fourcircleinversesansserif", +"fourdeva","fourgujarati","fourgurmukhi","fourhackarabic","fourhangzhou", +"fourideographicparen","fourinferior","fourmonospace","fournumeratorbengali", +"fouroldstyle","fourparen","fourperiod","fourpersian","fourroman", +"foursuperior","fourteencircle","fourteenparen","fourteenperiod","fourthai", +"fourthtonechinese","fparen","fraction","franc","g","gabengali","gacute", +"gadeva","gafarabic","gaffinalarabic","gafinitialarabic","gafmedialarabic", +"gagujarati","gagurmukhi","gahiragana","gakatakana","gamma","gammalatinsmall", +"gammasuperior","gangiacoptic","gbopomofo","gbreve","gcaron","gcedilla", +"gcircle","gcircumflex","gcommaaccent","gdot","gdotaccent","gecyrillic", +"gehiragana","gekatakana","geometricallyequal","gereshaccenthebrew", +"gereshhebrew","gereshmuqdamhebrew","germandbls","gershayimaccenthebrew", +"gershayimhebrew","getamark","ghabengali","ghadarmenian","ghadeva", +"ghagujarati","ghagurmukhi","ghainarabic","ghainfinalarabic", +"ghaininitialarabic","ghainmedialarabic","ghemiddlehookcyrillic", +"ghestrokecyrillic","gheupturncyrillic","ghhadeva","ghhagurmukhi","ghook", +"ghzsquare","gihiragana","gikatakana","gimarmenian","gimel","gimeldagesh", +"gimeldageshhebrew","gimelhebrew","gjecyrillic","glottalinvertedstroke", +"glottalstop","glottalstopinverted","glottalstopmod","glottalstopreversed", +"glottalstopreversedmod","glottalstopreversedsuperior","glottalstopstroke", +"glottalstopstrokereversed","gmacron","gmonospace","gohiragana","gokatakana", +"gparen","gpasquare","gradient","grave","gravebelowcmb","gravecmb", +"gravecomb","gravedeva","gravelowmod","gravemonospace","gravetonecmb", +"greater","greaterequal","greaterequalorless","greatermonospace", +"greaterorequivalent","greaterorless","greateroverequal","greatersmall", +"gscript","gstroke","guhiragana","guillemotleft","guillemotright", +"guilsinglleft","guilsinglright","gukatakana","guramusquare","gysquare","h", +"haabkhasiancyrillic","haaltonearabic","habengali","hadescendercyrillic", +"hadeva","hagujarati","hagurmukhi","haharabic","hahfinalarabic", +"hahinitialarabic","hahiragana","hahmedialarabic","haitusquare","hakatakana", +"hakatakanahalfwidth","halantgurmukhi","hamzaarabic","hamzadammaarabic", +"hamzadammatanarabic","hamzafathaarabic","hamzafathatanarabic", +"hamzalowarabic","hamzalowkasraarabic","hamzalowkasratanarabic", +"hamzasukunarabic","hangulfiller","hardsigncyrillic","harpoonleftbarbup", +"harpoonrightbarbup","hasquare","hatafpatah","hatafpatah16","hatafpatah23", +"hatafpatah2f","hatafpatahhebrew","hatafpatahnarrowhebrew", +"hatafpatahquarterhebrew","hatafpatahwidehebrew","hatafqamats", +"hatafqamats1b","hatafqamats28","hatafqamats34","hatafqamatshebrew", +"hatafqamatsnarrowhebrew","hatafqamatsquarterhebrew","hatafqamatswidehebrew", +"hatafsegol","hatafsegol17","hatafsegol24","hatafsegol30","hatafsegolhebrew", +"hatafsegolnarrowhebrew","hatafsegolquarterhebrew","hatafsegolwidehebrew", +"hbar","hbopomofo","hbrevebelow","hcedilla","hcircle","hcircumflex", +"hdieresis","hdotaccent","hdotbelow","he","heart","heartsuitblack", +"heartsuitwhite","hedagesh","hedageshhebrew","hehaltonearabic","heharabic", +"hehebrew","hehfinalaltonearabic","hehfinalalttwoarabic","hehfinalarabic", +"hehhamzaabovefinalarabic","hehhamzaaboveisolatedarabic", +"hehinitialaltonearabic","hehinitialarabic","hehiragana", +"hehmedialaltonearabic","hehmedialarabic","heiseierasquare","hekatakana", +"hekatakanahalfwidth","hekutaarusquare","henghook","herutusquare","het", +"hethebrew","hhook","hhooksuperior","hieuhacirclekorean","hieuhaparenkorean", +"hieuhcirclekorean","hieuhkorean","hieuhparenkorean","hihiragana", +"hikatakana","hikatakanahalfwidth","hiriq","hiriq14","hiriq21","hiriq2d", +"hiriqhebrew","hiriqnarrowhebrew","hiriqquarterhebrew","hiriqwidehebrew", +"hlinebelow","hmonospace","hoarmenian","hohipthai","hohiragana","hokatakana", +"hokatakanahalfwidth","holam","holam19","holam26","holam32","holamhebrew", +"holamnarrowhebrew","holamquarterhebrew","holamwidehebrew","honokhukthai", +"hookabovecomb","hookcmb","hookpalatalizedbelowcmb","hookretroflexbelowcmb", +"hoonsquare","horicoptic","horizontalbar","horncmb","hotsprings","house", +"hparen","hsuperior","hturned","huhiragana","huiitosquare","hukatakana", +"hukatakanahalfwidth","hungarumlaut","hungarumlautcmb","hv","hyphen", +"hypheninferior","hyphenmonospace","hyphensmall","hyphensuperior","hyphentwo", +"i","iacute","iacyrillic","ibengali","ibopomofo","ibreve","icaron","icircle", +"icircumflex","icyrillic","idblgrave","ideographearthcircle", +"ideographfirecircle","ideographicallianceparen","ideographiccallparen", +"ideographiccentrecircle","ideographicclose","ideographiccomma", +"ideographiccommaleft","ideographiccongratulationparen", +"ideographiccorrectcircle","ideographicearthparen", +"ideographicenterpriseparen","ideographicexcellentcircle", +"ideographicfestivalparen","ideographicfinancialcircle", +"ideographicfinancialparen","ideographicfireparen","ideographichaveparen", +"ideographichighcircle","ideographiciterationmark","ideographiclaborcircle", +"ideographiclaborparen","ideographicleftcircle","ideographiclowcircle", +"ideographicmedicinecircle","ideographicmetalparen","ideographicmoonparen", +"ideographicnameparen","ideographicperiod","ideographicprintcircle", +"ideographicreachparen","ideographicrepresentparen", +"ideographicresourceparen","ideographicrightcircle","ideographicsecretcircle", +"ideographicselfparen","ideographicsocietyparen","ideographicspace", +"ideographicspecialparen","ideographicstockparen","ideographicstudyparen", +"ideographicsunparen","ideographicsuperviseparen","ideographicwaterparen", +"ideographicwoodparen","ideographiczero","ideographmetalcircle", +"ideographmooncircle","ideographnamecircle","ideographsuncircle", +"ideographwatercircle","ideographwoodcircle","ideva","idieresis", +"idieresisacute","idieresiscyrillic","idotbelow","iebrevecyrillic", +"iecyrillic","ieungacirclekorean","ieungaparenkorean","ieungcirclekorean", +"ieungkorean","ieungparenkorean","igrave","igujarati","igurmukhi","ihiragana", +"ihookabove","iibengali","iicyrillic","iideva","iigujarati","iigurmukhi", +"iimatragurmukhi","iinvertedbreve","iishortcyrillic","iivowelsignbengali", +"iivowelsigndeva","iivowelsigngujarati","ij","ikatakana","ikatakanahalfwidth", +"ikorean","ilde","iluyhebrew","imacron","imacroncyrillic", +"imageorapproximatelyequal","imatragurmukhi","imonospace","increment", +"infinity","iniarmenian","integral","integralbottom","integralbt", +"integralex","integraltop","integraltp","intersection","intisquare", +"invbullet","invcircle","invsmileface","iocyrillic","iogonek","iota", +"iotadieresis","iotadieresistonos","iotalatin","iotatonos","iparen", +"irigurmukhi","ismallhiragana","ismallkatakana","ismallkatakanahalfwidth", +"issharbengali","istroke","isuperior","iterationhiragana","iterationkatakana", +"itilde","itildebelow","iubopomofo","iucyrillic","ivowelsignbengali", +"ivowelsigndeva","ivowelsigngujarati","izhitsacyrillic", +"izhitsadblgravecyrillic","j","jaarmenian","jabengali","jadeva","jagujarati", +"jagurmukhi","jbopomofo","jcaron","jcircle","jcircumflex","jcrossedtail", +"jdotlessstroke","jecyrillic","jeemarabic","jeemfinalarabic", +"jeeminitialarabic","jeemmedialarabic","jeharabic","jehfinalarabic", +"jhabengali","jhadeva","jhagujarati","jhagurmukhi","jheharmenian","jis", +"jmonospace","jparen","jsuperior","k","kabashkircyrillic","kabengali", +"kacute","kacyrillic","kadescendercyrillic","kadeva","kaf","kafarabic", +"kafdagesh","kafdageshhebrew","kaffinalarabic","kafhebrew","kafinitialarabic", +"kafmedialarabic","kafrafehebrew","kagujarati","kagurmukhi","kahiragana", +"kahookcyrillic","kakatakana","kakatakanahalfwidth","kappa", +"kappasymbolgreek","kapyeounmieumkorean","kapyeounphieuphkorean", +"kapyeounpieupkorean","kapyeounssangpieupkorean","karoriisquare", +"kashidaautoarabic","kashidaautonosidebearingarabic","kasmallkatakana", +"kasquare","kasraarabic","kasratanarabic","kastrokecyrillic", +"katahiraprolongmarkhalfwidth","kaverticalstrokecyrillic","kbopomofo", +"kcalsquare","kcaron","kcedilla","kcircle","kcommaaccent","kdotbelow", +"keharmenian","kehiragana","kekatakana","kekatakanahalfwidth","kenarmenian", +"kesmallkatakana","kgreenlandic","khabengali","khacyrillic","khadeva", +"khagujarati","khagurmukhi","khaharabic","khahfinalarabic", +"khahinitialarabic","khahmedialarabic","kheicoptic","khhadeva","khhagurmukhi", +"khieukhacirclekorean","khieukhaparenkorean","khieukhcirclekorean", +"khieukhkorean","khieukhparenkorean","khokhaithai","khokhonthai", +"khokhuatthai","khokhwaithai","khomutthai","khook","khorakhangthai", +"khzsquare","kihiragana","kikatakana","kikatakanahalfwidth", +"kiroguramusquare","kiromeetorusquare","kirosquare","kiyeokacirclekorean", +"kiyeokaparenkorean","kiyeokcirclekorean","kiyeokkorean","kiyeokparenkorean", +"kiyeoksioskorean","kjecyrillic","klinebelow","klsquare","kmcubedsquare", +"kmonospace","kmsquaredsquare","kohiragana","kohmsquare","kokaithai", +"kokatakana","kokatakanahalfwidth","kooposquare","koppacyrillic", +"koreanstandardsymbol","koroniscmb","kparen","kpasquare","ksicyrillic", +"ktsquare","kturned","kuhiragana","kukatakana","kukatakanahalfwidth", +"kvsquare","kwsquare","l","labengali","lacute","ladeva","lagujarati", +"lagurmukhi","lakkhangyaothai","lamaleffinalarabic", +"lamalefhamzaabovefinalarabic","lamalefhamzaaboveisolatedarabic", +"lamalefhamzabelowfinalarabic","lamalefhamzabelowisolatedarabic", +"lamalefisolatedarabic","lamalefmaddaabovefinalarabic", +"lamalefmaddaaboveisolatedarabic","lamarabic","lambda","lambdastroke","lamed", +"lameddagesh","lameddageshhebrew","lamedhebrew","lamedholam", +"lamedholamdagesh","lamedholamdageshhebrew","lamedholamhebrew", +"lamfinalarabic","lamhahinitialarabic","laminitialarabic", +"lamjeeminitialarabic","lamkhahinitialarabic","lamlamhehisolatedarabic", +"lammedialarabic","lammeemhahinitialarabic","lammeeminitialarabic", +"lammeemjeeminitialarabic","lammeemkhahinitialarabic","largecircle","lbar", +"lbelt","lbopomofo","lcaron","lcedilla","lcircle","lcircumflexbelow", +"lcommaaccent","ldot","ldotaccent","ldotbelow","ldotbelowmacron", +"leftangleabovecmb","lefttackbelowcmb","less","lessequal", +"lessequalorgreater","lessmonospace","lessorequivalent","lessorgreater", +"lessoverequal","lesssmall","lezh","lfblock","lhookretroflex","lira", +"liwnarmenian","lj","ljecyrillic","ll","lladeva","llagujarati","llinebelow", +"llladeva","llvocalicbengali","llvocalicdeva","llvocalicvowelsignbengali", +"llvocalicvowelsigndeva","lmiddletilde","lmonospace","lmsquare","lochulathai", +"logicaland","logicalnot","logicalnotreversed","logicalor","lolingthai", +"longs","lowlinecenterline","lowlinecmb","lowlinedashed","lozenge","lparen", +"lslash","lsquare","lsuperior","ltshade","luthai","lvocalicbengali", +"lvocalicdeva","lvocalicvowelsignbengali","lvocalicvowelsigndeva","lxsquare", +"m","mabengali","macron","macronbelowcmb","macroncmb","macronlowmod", +"macronmonospace","macute","madeva","magujarati","magurmukhi", +"mahapakhhebrew","mahapakhlefthebrew","mahiragana","maichattawalowleftthai", +"maichattawalowrightthai","maichattawathai","maichattawaupperleftthai", +"maieklowleftthai","maieklowrightthai","maiekthai","maiekupperleftthai", +"maihanakatleftthai","maihanakatthai","maitaikhuleftthai","maitaikhuthai", +"maitholowleftthai","maitholowrightthai","maithothai","maithoupperleftthai", +"maitrilowleftthai","maitrilowrightthai","maitrithai","maitriupperleftthai", +"maiyamokthai","makatakana","makatakanahalfwidth","male","mansyonsquare", +"maqafhebrew","mars","masoracirclehebrew","masquare","mbopomofo","mbsquare", +"mcircle","mcubedsquare","mdotaccent","mdotbelow","meemarabic", +"meemfinalarabic","meeminitialarabic","meemmedialarabic", +"meemmeeminitialarabic","meemmeemisolatedarabic","meetorusquare","mehiragana", +"meizierasquare","mekatakana","mekatakanahalfwidth","mem","memdagesh", +"memdageshhebrew","memhebrew","menarmenian","merkhahebrew", +"merkhakefulahebrew","merkhakefulalefthebrew","merkhalefthebrew","mhook", +"mhzsquare","middledotkatakanahalfwidth","middot","mieumacirclekorean", +"mieumaparenkorean","mieumcirclekorean","mieumkorean","mieumpansioskorean", +"mieumparenkorean","mieumpieupkorean","mieumsioskorean","mihiragana", +"mikatakana","mikatakanahalfwidth","minus","minusbelowcmb","minuscircle", +"minusmod","minusplus","minute","miribaarusquare","mirisquare", +"mlonglegturned","mlsquare","mmcubedsquare","mmonospace","mmsquaredsquare", +"mohiragana","mohmsquare","mokatakana","mokatakanahalfwidth","molsquare", +"momathai","moverssquare","moverssquaredsquare","mparen","mpasquare", +"mssquare","msuperior","mturned","mu","mu1","muasquare","muchgreater", +"muchless","mufsquare","mugreek","mugsquare","muhiragana","mukatakana", +"mukatakanahalfwidth","mulsquare","multiply","mumsquare","munahhebrew", +"munahlefthebrew","musicalnote","musicalnotedbl","musicflatsign", +"musicsharpsign","mussquare","muvsquare","muwsquare","mvmegasquare", +"mvsquare","mwmegasquare","mwsquare","n","nabengali","nabla","nacute", +"nadeva","nagujarati","nagurmukhi","nahiragana","nakatakana", +"nakatakanahalfwidth","napostrophe","nasquare","nbopomofo","nbspace","ncaron", +"ncedilla","ncircle","ncircumflexbelow","ncommaaccent","ndotaccent", +"ndotbelow","nehiragana","nekatakana","nekatakanahalfwidth","newsheqelsign", +"nfsquare","ngabengali","ngadeva","ngagujarati","ngagurmukhi","ngonguthai", +"nhiragana","nhookleft","nhookretroflex","nieunacirclekorean", +"nieunaparenkorean","nieuncieuckorean","nieuncirclekorean","nieunhieuhkorean", +"nieunkorean","nieunpansioskorean","nieunparenkorean","nieunsioskorean", +"nieuntikeutkorean","nihiragana","nikatakana","nikatakanahalfwidth", +"nikhahitleftthai","nikhahitthai","nine","ninearabic","ninebengali", +"ninecircle","ninecircleinversesansserif","ninedeva","ninegujarati", +"ninegurmukhi","ninehackarabic","ninehangzhou","nineideographicparen", +"nineinferior","ninemonospace","nineoldstyle","nineparen","nineperiod", +"ninepersian","nineroman","ninesuperior","nineteencircle","nineteenparen", +"nineteenperiod","ninethai","nj","njecyrillic","nkatakana", +"nkatakanahalfwidth","nlegrightlong","nlinebelow","nmonospace","nmsquare", +"nnabengali","nnadeva","nnagujarati","nnagurmukhi","nnnadeva","nohiragana", +"nokatakana","nokatakanahalfwidth","nonbreakingspace","nonenthai","nonuthai", +"noonarabic","noonfinalarabic","noonghunnaarabic","noonghunnafinalarabic", +"noonhehinitialarabic","nooninitialarabic","noonjeeminitialarabic", +"noonjeemisolatedarabic","noonmedialarabic","noonmeeminitialarabic", +"noonmeemisolatedarabic","noonnoonfinalarabic","notcontains","notelement", +"notelementof","notequal","notgreater","notgreaternorequal", +"notgreaternorless","notidentical","notless","notlessnorequal","notparallel", +"notprecedes","notsubset","notsucceeds","notsuperset","nowarmenian","nparen", +"nssquare","nsuperior","ntilde","nu","nuhiragana","nukatakana", +"nukatakanahalfwidth","nuktabengali","nuktadeva","nuktagujarati", +"nuktagurmukhi","numbersign","numbersignmonospace","numbersignsmall", +"numeralsigngreek","numeralsignlowergreek","numero","nun","nundagesh", +"nundageshhebrew","nunhebrew","nvsquare","nwsquare","nyabengali","nyadeva", +"nyagujarati","nyagurmukhi","o","oacute","oangthai","obarred", +"obarredcyrillic","obarreddieresiscyrillic","obengali","obopomofo","obreve", +"ocandradeva","ocandragujarati","ocandravowelsigndeva", +"ocandravowelsigngujarati","ocaron","ocircle","ocircumflex", +"ocircumflexacute","ocircumflexdotbelow","ocircumflexgrave", +"ocircumflexhookabove","ocircumflextilde","ocyrillic","odblacute","odblgrave", +"odeva","odieresis","odieresiscyrillic","odotbelow","oe","oekorean","ogonek", +"ogonekcmb","ograve","ogujarati","oharmenian","ohiragana","ohookabove", +"ohorn","ohornacute","ohorndotbelow","ohorngrave","ohornhookabove", +"ohorntilde","ohungarumlaut","oi","oinvertedbreve","okatakana", +"okatakanahalfwidth","okorean","olehebrew","omacron","omacronacute", +"omacrongrave","omdeva","omega","omega1","omegacyrillic","omegalatinclosed", +"omegaroundcyrillic","omegatitlocyrillic","omegatonos","omgujarati","omicron", +"omicrontonos","omonospace","one","onearabic","onebengali","onecircle", +"onecircleinversesansserif","onedeva","onedotenleader","oneeighth", +"onefitted","onegujarati","onegurmukhi","onehackarabic","onehalf", +"onehangzhou","oneideographicparen","oneinferior","onemonospace", +"onenumeratorbengali","oneoldstyle","oneparen","oneperiod","onepersian", +"onequarter","oneroman","onesuperior","onethai","onethird","oogonek", +"oogonekmacron","oogurmukhi","oomatragurmukhi","oopen","oparen","openbullet", +"option","ordfeminine","ordmasculine","orthogonal","oshortdeva", +"oshortvowelsigndeva","oslash","oslashacute","osmallhiragana", +"osmallkatakana","osmallkatakanahalfwidth","ostrokeacute","osuperior", +"otcyrillic","otilde","otildeacute","otildedieresis","oubopomofo","overline", +"overlinecenterline","overlinecmb","overlinedashed","overlinedblwavy", +"overlinewavy","overscore","ovowelsignbengali","ovowelsigndeva", +"ovowelsigngujarati","p","paampssquare","paasentosquare","pabengali","pacute", +"padeva","pagedown","pageup","pagujarati","pagurmukhi","pahiragana", +"paiyannoithai","pakatakana","palatalizationcyrilliccmb","palochkacyrillic", +"pansioskorean","paragraph","parallel","parenleft","parenleftaltonearabic", +"parenleftbt","parenleftex","parenleftinferior","parenleftmonospace", +"parenleftsmall","parenleftsuperior","parenlefttp","parenleftvertical", +"parenright","parenrightaltonearabic","parenrightbt","parenrightex", +"parenrightinferior","parenrightmonospace","parenrightsmall", +"parenrightsuperior","parenrighttp","parenrightvertical","partialdiff", +"paseqhebrew","pashtahebrew","pasquare","patah","patah11","patah1d","patah2a", +"patahhebrew","patahnarrowhebrew","patahquarterhebrew","patahwidehebrew", +"pazerhebrew","pbopomofo","pcircle","pdotaccent","pe","pecyrillic","pedagesh", +"pedageshhebrew","peezisquare","pefinaldageshhebrew","peharabic", +"peharmenian","pehebrew","pehfinalarabic","pehinitialarabic","pehiragana", +"pehmedialarabic","pekatakana","pemiddlehookcyrillic","perafehebrew", +"percent","percentarabic","percentmonospace","percentsmall","period", +"periodarmenian","periodcentered","periodhalfwidth","periodinferior", +"periodmonospace","periodsmall","periodsuperior","perispomenigreekcmb", +"perpendicular","perthousand","peseta","pfsquare","phabengali","phadeva", +"phagujarati","phagurmukhi","phi","phi1","phieuphacirclekorean", +"phieuphaparenkorean","phieuphcirclekorean","phieuphkorean", +"phieuphparenkorean","philatin","phinthuthai","phisymbolgreek","phook", +"phophanthai","phophungthai","phosamphaothai","pi","pieupacirclekorean", +"pieupaparenkorean","pieupcieuckorean","pieupcirclekorean", +"pieupkiyeokkorean","pieupkorean","pieupparenkorean","pieupsioskiyeokkorean", +"pieupsioskorean","pieupsiostikeutkorean","pieupthieuthkorean", +"pieuptikeutkorean","pihiragana","pikatakana","pisymbolgreek","piwrarmenian", +"plus","plusbelowcmb","pluscircle","plusminus","plusmod","plusmonospace", +"plussmall","plussuperior","pmonospace","pmsquare","pohiragana", +"pointingindexdownwhite","pointingindexleftwhite","pointingindexrightwhite", +"pointingindexupwhite","pokatakana","poplathai","postalmark","postalmarkface", +"pparen","precedes","prescription","primemod","primereversed","product", +"projective","prolongedkana","propellor","propersubset","propersuperset", +"proportion","proportional","psi","psicyrillic","psilipneumatacyrilliccmb", +"pssquare","puhiragana","pukatakana","pvsquare","pwsquare","q","qadeva", +"qadmahebrew","qafarabic","qaffinalarabic","qafinitialarabic", +"qafmedialarabic","qamats","qamats10","qamats1a","qamats1c","qamats27", +"qamats29","qamats33","qamatsde","qamatshebrew","qamatsnarrowhebrew", +"qamatsqatanhebrew","qamatsqatannarrowhebrew","qamatsqatanquarterhebrew", +"qamatsqatanwidehebrew","qamatsquarterhebrew","qamatswidehebrew", +"qarneyparahebrew","qbopomofo","qcircle","qhook","qmonospace","qof", +"qofdagesh","qofdageshhebrew","qofhatafpatah","qofhatafpatahhebrew", +"qofhatafsegol","qofhatafsegolhebrew","qofhebrew","qofhiriq","qofhiriqhebrew", +"qofholam","qofholamhebrew","qofpatah","qofpatahhebrew","qofqamats", +"qofqamatshebrew","qofqubuts","qofqubutshebrew","qofsegol","qofsegolhebrew", +"qofsheva","qofshevahebrew","qoftsere","qoftserehebrew","qparen", +"quarternote","qubuts","qubuts18","qubuts25","qubuts31","qubutshebrew", +"qubutsnarrowhebrew","qubutsquarterhebrew","qubutswidehebrew","question", +"questionarabic","questionarmenian","questiondown","questiondownsmall", +"questiongreek","questionmonospace","questionsmall","quotedbl","quotedblbase", +"quotedblleft","quotedblmonospace","quotedblprime","quotedblprimereversed", +"quotedblright","quoteleft","quoteleftreversed","quotereversed","quoteright", +"quoterightn","quotesinglbase","quotesingle","quotesinglemonospace","r", +"raarmenian","rabengali","racute","radeva","radical","radicalex", +"radoverssquare","radoverssquaredsquare","radsquare","rafe","rafehebrew", +"ragujarati","ragurmukhi","rahiragana","rakatakana","rakatakanahalfwidth", +"ralowerdiagonalbengali","ramiddlediagonalbengali","ramshorn","ratio", +"rbopomofo","rcaron","rcedilla","rcircle","rcommaaccent","rdblgrave", +"rdotaccent","rdotbelow","rdotbelowmacron","referencemark","reflexsubset", +"reflexsuperset","registered","registersans","registerserif","reharabic", +"reharmenian","rehfinalarabic","rehiragana","rehyehaleflamarabic", +"rekatakana","rekatakanahalfwidth","resh","reshdageshhebrew","reshhatafpatah", +"reshhatafpatahhebrew","reshhatafsegol","reshhatafsegolhebrew","reshhebrew", +"reshhiriq","reshhiriqhebrew","reshholam","reshholamhebrew","reshpatah", +"reshpatahhebrew","reshqamats","reshqamatshebrew","reshqubuts", +"reshqubutshebrew","reshsegol","reshsegolhebrew","reshsheva", +"reshshevahebrew","reshtsere","reshtserehebrew","reversedtilde","reviahebrew", +"reviamugrashhebrew","revlogicalnot","rfishhook","rfishhookreversed", +"rhabengali","rhadeva","rho","rhook","rhookturned","rhookturnedsuperior", +"rhosymbolgreek","rhotichookmod","rieulacirclekorean","rieulaparenkorean", +"rieulcirclekorean","rieulhieuhkorean","rieulkiyeokkorean", +"rieulkiyeoksioskorean","rieulkorean","rieulmieumkorean","rieulpansioskorean", +"rieulparenkorean","rieulphieuphkorean","rieulpieupkorean", +"rieulpieupsioskorean","rieulsioskorean","rieulthieuthkorean", +"rieultikeutkorean","rieulyeorinhieuhkorean","rightangle","righttackbelowcmb", +"righttriangle","rihiragana","rikatakana","rikatakanahalfwidth","ring", +"ringbelowcmb","ringcmb","ringhalfleft","ringhalfleftarmenian", +"ringhalfleftbelowcmb","ringhalfleftcentered","ringhalfright", +"ringhalfrightbelowcmb","ringhalfrightcentered","rinvertedbreve", +"rittorusquare","rlinebelow","rlongleg","rlonglegturned","rmonospace", +"rohiragana","rokatakana","rokatakanahalfwidth","roruathai","rparen", +"rrabengali","rradeva","rragurmukhi","rreharabic","rrehfinalarabic", +"rrvocalicbengali","rrvocalicdeva","rrvocalicgujarati", +"rrvocalicvowelsignbengali","rrvocalicvowelsigndeva", +"rrvocalicvowelsigngujarati","rsuperior","rtblock","rturned", +"rturnedsuperior","ruhiragana","rukatakana","rukatakanahalfwidth", +"rupeemarkbengali","rupeesignbengali","rupiah","ruthai","rvocalicbengali", +"rvocalicdeva","rvocalicgujarati","rvocalicvowelsignbengali", +"rvocalicvowelsigndeva","rvocalicvowelsigngujarati","s","sabengali","sacute", +"sacutedotaccent","sadarabic","sadeva","sadfinalarabic","sadinitialarabic", +"sadmedialarabic","sagujarati","sagurmukhi","sahiragana","sakatakana", +"sakatakanahalfwidth","sallallahoualayhewasallamarabic","samekh", +"samekhdagesh","samekhdageshhebrew","samekhhebrew","saraaathai","saraaethai", +"saraaimaimalaithai","saraaimaimuanthai","saraamthai","saraathai","saraethai", +"saraiileftthai","saraiithai","saraileftthai","saraithai","saraothai", +"saraueeleftthai","saraueethai","saraueleftthai","sarauethai","sarauthai", +"sarauuthai","sbopomofo","scaron","scarondotaccent","scedilla","schwa", +"schwacyrillic","schwadieresiscyrillic","schwahook","scircle","scircumflex", +"scommaaccent","sdotaccent","sdotbelow","sdotbelowdotaccent", +"seagullbelowcmb","second","secondtonechinese","section","seenarabic", +"seenfinalarabic","seeninitialarabic","seenmedialarabic","segol","segol13", +"segol1f","segol2c","segolhebrew","segolnarrowhebrew","segolquarterhebrew", +"segoltahebrew","segolwidehebrew","seharmenian","sehiragana","sekatakana", +"sekatakanahalfwidth","semicolon","semicolonarabic","semicolonmonospace", +"semicolonsmall","semivoicedmarkkana","semivoicedmarkkanahalfwidth", +"sentisquare","sentosquare","seven","sevenarabic","sevenbengali", +"sevencircle","sevencircleinversesansserif","sevendeva","seveneighths", +"sevengujarati","sevengurmukhi","sevenhackarabic","sevenhangzhou", +"sevenideographicparen","seveninferior","sevenmonospace","sevenoldstyle", +"sevenparen","sevenperiod","sevenpersian","sevenroman","sevensuperior", +"seventeencircle","seventeenparen","seventeenperiod","seventhai","sfthyphen", +"shaarmenian","shabengali","shacyrillic","shaddaarabic","shaddadammaarabic", +"shaddadammatanarabic","shaddafathaarabic","shaddafathatanarabic", +"shaddakasraarabic","shaddakasratanarabic","shade","shadedark","shadelight", +"shademedium","shadeva","shagujarati","shagurmukhi","shalshelethebrew", +"shbopomofo","shchacyrillic","sheenarabic","sheenfinalarabic", +"sheeninitialarabic","sheenmedialarabic","sheicoptic","sheqel","sheqelhebrew", +"sheva","sheva115","sheva15","sheva22","sheva2e","shevahebrew", +"shevanarrowhebrew","shevaquarterhebrew","shevawidehebrew","shhacyrillic", +"shimacoptic","shin","shindagesh","shindageshhebrew","shindageshshindot", +"shindageshshindothebrew","shindageshsindot","shindageshsindothebrew", +"shindothebrew","shinhebrew","shinshindot","shinshindothebrew","shinsindot", +"shinsindothebrew","shook","sigma","sigma1","sigmafinal", +"sigmalunatesymbolgreek","sihiragana","sikatakana","sikatakanahalfwidth", +"siluqhebrew","siluqlefthebrew","similar","sindothebrew","siosacirclekorean", +"siosaparenkorean","sioscieuckorean","sioscirclekorean","sioskiyeokkorean", +"sioskorean","siosnieunkorean","siosparenkorean","siospieupkorean", +"siostikeutkorean","six","sixarabic","sixbengali","sixcircle", +"sixcircleinversesansserif","sixdeva","sixgujarati","sixgurmukhi", +"sixhackarabic","sixhangzhou","sixideographicparen","sixinferior", +"sixmonospace","sixoldstyle","sixparen","sixperiod","sixpersian","sixroman", +"sixsuperior","sixteencircle","sixteencurrencydenominatorbengali", +"sixteenparen","sixteenperiod","sixthai","slash","slashmonospace","slong", +"slongdotaccent","smileface","smonospace","sofpasuqhebrew","softhyphen", +"softsigncyrillic","sohiragana","sokatakana","sokatakanahalfwidth", +"soliduslongoverlaycmb","solidusshortoverlaycmb","sorusithai","sosalathai", +"sosothai","sosuathai","space","spacehackarabic","spade","spadesuitblack", +"spadesuitwhite","sparen","squarebelowcmb","squarecc","squarecm", +"squarediagonalcrosshatchfill","squarehorizontalfill","squarekg","squarekm", +"squarekmcapital","squareln","squarelog","squaremg","squaremil","squaremm", +"squaremsquared","squareorthogonalcrosshatchfill", +"squareupperlefttolowerrightfill","squareupperrighttolowerleftfill", +"squareverticalfill","squarewhitewithsmallblack","srsquare","ssabengali", +"ssadeva","ssagujarati","ssangcieuckorean","ssanghieuhkorean", +"ssangieungkorean","ssangkiyeokkorean","ssangnieunkorean","ssangpieupkorean", +"ssangsioskorean","ssangtikeutkorean","ssuperior","sterling", +"sterlingmonospace","strokelongoverlaycmb","strokeshortoverlaycmb","subset", +"subsetnotequal","subsetorequal","succeeds","suchthat","suhiragana", +"sukatakana","sukatakanahalfwidth","sukunarabic","summation","sun","superset", +"supersetnotequal","supersetorequal","svsquare","syouwaerasquare","t", +"tabengali","tackdown","tackleft","tadeva","tagujarati","tagurmukhi", +"taharabic","tahfinalarabic","tahinitialarabic","tahiragana", +"tahmedialarabic","taisyouerasquare","takatakana","takatakanahalfwidth", +"tatweelarabic","tau","tav","tavdages","tavdagesh","tavdageshhebrew", +"tavhebrew","tbar","tbopomofo","tcaron","tccurl","tcedilla","tcheharabic", +"tchehfinalarabic","tchehinitialarabic","tchehmedialarabic", +"tchehmeeminitialarabic","tcircle","tcircumflexbelow","tcommaaccent", +"tdieresis","tdotaccent","tdotbelow","tecyrillic","tedescendercyrillic", +"teharabic","tehfinalarabic","tehhahinitialarabic","tehhahisolatedarabic", +"tehinitialarabic","tehiragana","tehjeeminitialarabic", +"tehjeemisolatedarabic","tehmarbutaarabic","tehmarbutafinalarabic", +"tehmedialarabic","tehmeeminitialarabic","tehmeemisolatedarabic", +"tehnoonfinalarabic","tekatakana","tekatakanahalfwidth","telephone", +"telephoneblack","telishagedolahebrew","telishaqetanahebrew","tencircle", +"tenideographicparen","tenparen","tenperiod","tenroman","tesh","tet", +"tetdagesh","tetdageshhebrew","tethebrew","tetsecyrillic","tevirhebrew", +"tevirlefthebrew","thabengali","thadeva","thagujarati","thagurmukhi", +"thalarabic","thalfinalarabic","thanthakhatlowleftthai", +"thanthakhatlowrightthai","thanthakhatthai","thanthakhatupperleftthai", +"theharabic","thehfinalarabic","thehinitialarabic","thehmedialarabic", +"thereexists","therefore","theta","theta1","thetasymbolgreek", +"thieuthacirclekorean","thieuthaparenkorean","thieuthcirclekorean", +"thieuthkorean","thieuthparenkorean","thirteencircle","thirteenparen", +"thirteenperiod","thonangmonthothai","thook","thophuthaothai","thorn", +"thothahanthai","thothanthai","thothongthai","thothungthai", +"thousandcyrillic","thousandsseparatorarabic","thousandsseparatorpersian", +"three","threearabic","threebengali","threecircle", +"threecircleinversesansserif","threedeva","threeeighths","threegujarati", +"threegurmukhi","threehackarabic","threehangzhou","threeideographicparen", +"threeinferior","threemonospace","threenumeratorbengali","threeoldstyle", +"threeparen","threeperiod","threepersian","threequarters", +"threequartersemdash","threeroman","threesuperior","threethai","thzsquare", +"tihiragana","tikatakana","tikatakanahalfwidth","tikeutacirclekorean", +"tikeutaparenkorean","tikeutcirclekorean","tikeutkorean","tikeutparenkorean", +"tilde","tildebelowcmb","tildecmb","tildecomb","tildedoublecmb", +"tildeoperator","tildeoverlaycmb","tildeverticalcmb","timescircle", +"tipehahebrew","tipehalefthebrew","tippigurmukhi","titlocyrilliccmb", +"tiwnarmenian","tlinebelow","tmonospace","toarmenian","tohiragana", +"tokatakana","tokatakanahalfwidth","tonebarextrahighmod","tonebarextralowmod", +"tonebarhighmod","tonebarlowmod","tonebarmidmod","tonefive","tonesix", +"tonetwo","tonos","tonsquare","topatakthai","tortoiseshellbracketleft", +"tortoiseshellbracketleftsmall","tortoiseshellbracketleftvertical", +"tortoiseshellbracketright","tortoiseshellbracketrightsmall", +"tortoiseshellbracketrightvertical","totaothai","tpalatalhook","tparen", +"trademark","trademarksans","trademarkserif","tretroflexhook","triagdn", +"triaglf","triagrt","triagup","ts","tsadi","tsadidagesh","tsadidageshhebrew", +"tsadihebrew","tsecyrillic","tsere","tsere12","tsere1e","tsere2b", +"tserehebrew","tserenarrowhebrew","tserequarterhebrew","tserewidehebrew", +"tshecyrillic","tsuperior","ttabengali","ttadeva","ttagujarati","ttagurmukhi", +"tteharabic","ttehfinalarabic","ttehinitialarabic","ttehmedialarabic", +"tthabengali","tthadeva","tthagujarati","tthagurmukhi","tturned","tuhiragana", +"tukatakana","tukatakanahalfwidth","tusmallhiragana","tusmallkatakana", +"tusmallkatakanahalfwidth","twelvecircle","twelveparen","twelveperiod", +"twelveroman","twentycircle","twentyhangzhou","twentyparen","twentyperiod", +"two","twoarabic","twobengali","twocircle","twocircleinversesansserif", +"twodeva","twodotenleader","twodotleader","twodotleadervertical", +"twogujarati","twogurmukhi","twohackarabic","twohangzhou", +"twoideographicparen","twoinferior","twomonospace","twonumeratorbengali", +"twooldstyle","twoparen","twoperiod","twopersian","tworoman","twostroke", +"twosuperior","twothai","twothirds","u","uacute","ubar","ubengali", +"ubopomofo","ubreve","ucaron","ucircle","ucircumflex","ucircumflexbelow", +"ucyrillic","udattadeva","udblacute","udblgrave","udeva","udieresis", +"udieresisacute","udieresisbelow","udieresiscaron","udieresiscyrillic", +"udieresisgrave","udieresismacron","udotbelow","ugrave","ugujarati", +"ugurmukhi","uhiragana","uhookabove","uhorn","uhornacute","uhorndotbelow", +"uhorngrave","uhornhookabove","uhorntilde","uhungarumlaut", +"uhungarumlautcyrillic","uinvertedbreve","ukatakana","ukatakanahalfwidth", +"ukcyrillic","ukorean","umacron","umacroncyrillic","umacrondieresis", +"umatragurmukhi","umonospace","underscore","underscoredbl", +"underscoremonospace","underscorevertical","underscorewavy","union", +"universal","uogonek","uparen","upblock","upperdothebrew","upsilon", +"upsilondieresis","upsilondieresistonos","upsilonlatin","upsilontonos", +"uptackbelowcmb","uptackmod","uragurmukhi","uring","ushortcyrillic", +"usmallhiragana","usmallkatakana","usmallkatakanahalfwidth", +"ustraightcyrillic","ustraightstrokecyrillic","utilde","utildeacute", +"utildebelow","uubengali","uudeva","uugujarati","uugurmukhi", +"uumatragurmukhi","uuvowelsignbengali","uuvowelsigndeva", +"uuvowelsigngujarati","uvowelsignbengali","uvowelsigndeva", +"uvowelsigngujarati","v","vadeva","vagujarati","vagurmukhi","vakatakana", +"vav","vavdagesh","vavdagesh65","vavdageshhebrew","vavhebrew","vavholam", +"vavholamhebrew","vavvavhebrew","vavyodhebrew","vcircle","vdotbelow", +"vecyrillic","veharabic","vehfinalarabic","vehinitialarabic", +"vehmedialarabic","vekatakana","venus","verticalbar","verticallineabovecmb", +"verticallinebelowcmb","verticallinelowmod","verticallinemod","vewarmenian", +"vhook","vikatakana","viramabengali","viramadeva","viramagujarati", +"visargabengali","visargadeva","visargagujarati","vmonospace","voarmenian", +"voicediterationhiragana","voicediterationkatakana","voicedmarkkana", +"voicedmarkkanahalfwidth","vokatakana","vparen","vtilde","vturned", +"vuhiragana","vukatakana","w","wacute","waekorean","wahiragana","wakatakana", +"wakatakanahalfwidth","wakorean","wasmallhiragana","wasmallkatakana", +"wattosquare","wavedash","wavyunderscorevertical","wawarabic", +"wawfinalarabic","wawhamzaabovearabic","wawhamzaabovefinalarabic","wbsquare", +"wcircle","wcircumflex","wdieresis","wdotaccent","wdotbelow","wehiragana", +"weierstrass","wekatakana","wekorean","weokorean","wgrave","whitebullet", +"whitecircle","whitecircleinverse","whitecornerbracketleft", +"whitecornerbracketleftvertical","whitecornerbracketright", +"whitecornerbracketrightvertical","whitediamond", +"whitediamondcontainingblacksmalldiamond","whitedownpointingsmalltriangle", +"whitedownpointingtriangle","whiteleftpointingsmalltriangle", +"whiteleftpointingtriangle","whitelenticularbracketleft", +"whitelenticularbracketright","whiterightpointingsmalltriangle", +"whiterightpointingtriangle","whitesmallsquare","whitesmilingface", +"whitesquare","whitestar","whitetelephone","whitetortoiseshellbracketleft", +"whitetortoiseshellbracketright","whiteuppointingsmalltriangle", +"whiteuppointingtriangle","wihiragana","wikatakana","wikorean","wmonospace", +"wohiragana","wokatakana","wokatakanahalfwidth","won","wonmonospace", +"wowaenthai","wparen","wring","wsuperior","wturned","wynn","x","xabovecmb", +"xbopomofo","xcircle","xdieresis","xdotaccent","xeharmenian","xi", +"xmonospace","xparen","xsuperior","y","yaadosquare","yabengali","yacute", +"yadeva","yaekorean","yagujarati","yagurmukhi","yahiragana","yakatakana", +"yakatakanahalfwidth","yakorean","yamakkanthai","yasmallhiragana", +"yasmallkatakana","yasmallkatakanahalfwidth","yatcyrillic","ycircle", +"ycircumflex","ydieresis","ydotaccent","ydotbelow","yeharabic", +"yehbarreearabic","yehbarreefinalarabic","yehfinalarabic", +"yehhamzaabovearabic","yehhamzaabovefinalarabic","yehhamzaaboveinitialarabic", +"yehhamzaabovemedialarabic","yehinitialarabic","yehmedialarabic", +"yehmeeminitialarabic","yehmeemisolatedarabic","yehnoonfinalarabic", +"yehthreedotsbelowarabic","yekorean","yen","yenmonospace","yeokorean", +"yeorinhieuhkorean","yerahbenyomohebrew","yerahbenyomolefthebrew", +"yericyrillic","yerudieresiscyrillic","yesieungkorean", +"yesieungpansioskorean","yesieungsioskorean","yetivhebrew","ygrave","yhook", +"yhookabove","yiarmenian","yicyrillic","yikorean","yinyang","yiwnarmenian", +"ymonospace","yod","yoddagesh","yoddageshhebrew","yodhebrew","yodyodhebrew", +"yodyodpatahhebrew","yohiragana","yoikorean","yokatakana", +"yokatakanahalfwidth","yokorean","yosmallhiragana","yosmallkatakana", +"yosmallkatakanahalfwidth","yotgreek","yoyaekorean","yoyakorean","yoyakthai", +"yoyingthai","yparen","ypogegrammeni","ypogegrammenigreekcmb","yr","yring", +"ysuperior","ytilde","yturned","yuhiragana","yuikorean","yukatakana", +"yukatakanahalfwidth","yukorean","yusbigcyrillic","yusbigiotifiedcyrillic", +"yuslittlecyrillic","yuslittleiotifiedcyrillic","yusmallhiragana", +"yusmallkatakana","yusmallkatakanahalfwidth","yuyekorean","yuyeokorean", +"yyabengali","yyadeva","z","zaarmenian","zacute","zadeva","zagurmukhi", +"zaharabic","zahfinalarabic","zahinitialarabic","zahiragana", +"zahmedialarabic","zainarabic","zainfinalarabic","zakatakana", +"zaqefgadolhebrew","zaqefqatanhebrew","zarqahebrew","zayin","zayindagesh", +"zayindageshhebrew","zayinhebrew","zbopomofo","zcaron","zcircle", +"zcircumflex","zcurl","zdot","zdotaccent","zdotbelow","zecyrillic", +"zedescendercyrillic","zedieresiscyrillic","zehiragana","zekatakana","zero", +"zeroarabic","zerobengali","zerodeva","zerogujarati","zerogurmukhi", +"zerohackarabic","zeroinferior","zeromonospace","zerooldstyle","zeropersian", +"zerosuperior","zerothai","zerowidthjoiner","zerowidthnonjoiner", +"zerowidthspace","zeta","zhbopomofo","zhearmenian","zhebrevecyrillic", +"zhecyrillic","zhedescendercyrillic","zhedieresiscyrillic","zihiragana", +"zikatakana","zinorhebrew","zlinebelow","zmonospace","zohiragana", +"zokatakana","zparen","zretroflexhook","zstroke","zuhiragana","zukatakana", +}; + +static const unsigned short agl_code_list[] = { +65,198,508,482,63462,193,63457,258,7854,1232,7862,7856,7858,7860,461,9398,194, +7844,7852,7846,7848,63458,7850,63177,63412,1040,512,196,1234,478,63460,7840, +480,192,63456,7842,1236,514,913,902,256,65313,260,197,506,7680,63461,63329, +195,63459,1329,66,9399,7682,7684,1041,1330,914,385,7686,65314,63220,63330,386, +67,1342,262,63178,63221,268,199,7688,63463,9400,264,266,266,63416,1353,1212, +1063,1214,1206,1268,1347,1227,1208,935,391,63222,65315,1361,63331,68,497,452, +1332,393,270,7696,9401,7698,272,7690,7692,1044,1006,8710,916,394,63179,63180, +63181,63400,988,1026,7694,65316,63223,272,63332,395,498,453,1248,1029,1039,69, +201,63465,276,282,7708,1333,9402,202,7870,7704,7878,7872,7874,63466,7876,1028, +516,203,63467,278,278,7864,1060,200,63464,1335,7866,8551,518,1124,1051,8554, +274,7702,7700,1052,65317,1053,1186,330,1188,1223,280,400,917,904,1056,398, +1069,1057,1194,425,63333,919,1336,905,208,63472,7868,7706,8364,439,494,440,70, +9403,7710,1366,996,401,1138,8548,65318,8547,63334,71,13191,500,915,404,1002, +286,486,290,9404,284,290,288,288,1043,1346,1172,1170,1168,403,1331,1027,7712, +65319,63182,63328,63335,667,484,72,9679,9642,9643,9633,13259,1192,1202,1066, +294,7722,7720,9405,292,7718,7714,7716,65320,1344,1000,63336,63183,63224,13200, +73,1071,306,1070,205,63469,300,463,9406,206,63470,1030,520,207,7726,1252, +63471,304,304,7882,1238,1045,8465,204,63468,7880,1048,522,1049,298,1250,65321, +1339,1025,302,921,406,938,906,63337,407,296,7724,1140,1142,74,1345,9407,308, +1032,1355,65322,63338,75,13189,13261,1184,7728,1050,1178,1219,922,1182,1180, +488,310,9408,310,7730,1364,1343,1061,998,408,1036,7732,65323,1152,990,1134, +63339,76,455,63167,313,923,317,315,9409,7740,315,319,319,7734,7736,1340,456, +1033,7738,65324,321,63225,63340,77,13190,63184,63407,7742,9410,7744,7746,1348, +65325,63341,412,924,78,458,323,327,325,9411,7754,325,7748,7750,413,8552,459, +1034,7752,65326,1350,63342,209,63473,925,79,338,63226,211,63475,1256,1258,334, +465,415,9412,212,7888,7896,7890,7892,63476,7894,1054,336,524,214,1254,63478, +7884,63227,210,63474,1365,8486,7886,416,7898,7906,7900,7902,7904,336,418,526, +332,7762,7760,8486,1120,937,1146,1148,911,927,908,65327,8544,490,492,390,216, +510,63480,63343,510,1150,213,7756,7758,63477,80,7764,9413,7766,1055,1354,1190, +934,420,928,1363,65328,936,1136,63344,81,9414,65329,63345,82,1356,340,344,342, +9415,342,528,7768,7770,7772,1360,8476,929,63228,530,7774,65330,63346,641,694, +83,9484,9492,9488,9496,9532,9516,9524,9500,9508,9472,9474,9569,9570,9558,9557, +9571,9553,9559,9565,9564,9563,9566,9567,9562,9556,9577,9574,9568,9552,9580, +9575,9576,9572,9573,9561,9560,9554,9555,9579,9578,346,7780,992,352,7782,63229, +350,399,1240,1242,9416,348,536,7776,7778,7784,1357,8550,1351,1064,1065,994, +1210,1004,931,8549,65331,1068,63347,986,84,932,358,356,354,9417,7792,354,7786, +7788,1058,1196,8553,1204,920,428,222,63486,8546,63230,1359,7790,65332,1337, +444,388,423,430,1062,1035,63348,8555,8545,85,218,63482,364,467,9418,219,7798, +63483,1059,368,532,220,471,7794,473,1264,475,469,63484,7908,217,63481,7910, +431,7912,7920,7914,7916,7918,368,1266,534,1144,362,1262,7802,65333,370,933, +978,979,433,939,980,978,910,366,1038,63349,1198,1200,360,7800,7796,86,9419, +7806,1042,1358,434,65334,1352,63350,7804,87,7810,9420,372,7812,7814,7816,7808, +65335,63351,88,9421,7820,7818,1341,926,65336,63352,89,221,63485,1122,9422,374, +376,63487,7822,7924,1067,1272,7922,435,7926,1349,1031,1362,65337,63353,7928, +1130,1132,1126,1128,90,1334,377,381,63231,9423,7824,379,379,7826,1047,1176, +1246,918,1338,1217,1046,1174,1244,7828,65338,63354,437,97,2438,225,2310,2694, +2566,2622,13059,2494,2366,2750,1375,2416,2437,12570,259,7855,1233,7863,7857, +7859,7861,462,9424,226,7845,7853,7847,7849,7851,180,791,769,769,2388,719,833, +1072,513,2673,2309,228,1235,479,7841,481,230,509,12624,483,8213,8356,1040, +1041,1042,1043,1044,1045,1025,1046,1047,1048,1049,1050,1051,1052,1053,1054, +1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069, +1070,1071,1168,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1038, +63172,63173,1072,1073,1074,1075,1076,1077,1105,1078,1079,1080,1081,1082,1083, +1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098, +1099,1100,1101,1102,1103,1169,1106,1107,1108,1109,1110,1111,1112,1113,1114, +1115,1116,1118,1039,1122,1138,1140,63174,1119,1123,1139,1141,63175,63176,1241, +8206,8207,8205,1642,1548,1632,1633,1634,1635,1636,1637,1638,1639,1640,1641, +1563,1567,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581, +1582,1583,1584,1585,1586,1587,1588,1589,1590,1591,1592,1593,1594,1600,1601, +1602,1603,1604,1605,1606,1608,1609,1610,1611,1612,1613,1614,1615,1616,1617, +1618,1607,1700,1662,1670,1688,1711,1657,1672,1681,1722,1746,1749,8362,1470, +1475,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501, +1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,64298,64299, +64331,64287,1520,1521,1522,64309,1460,1461,1462,1467,1464,1463,1456,1458,1457, +1459,1474,1473,1465,1468,1469,1471,1472,700,8453,8467,8470,8236,8237,8238, +8204,1645,701,224,2693,2565,12354,7843,2448,12574,2320,1237,2704,2576,2632, +1593,65226,65227,65228,515,2504,2376,2760,12450,65393,12623,1488,1575,64304, +65166,1571,65156,1573,65160,1488,64335,1570,65154,1609,65264,65267,65268, +64302,64303,8501,8780,945,940,257,65345,38,65286,63270,13250,12578,12580,3674, +8736,12296,65087,12297,65088,9001,9002,8491,903,2386,2434,2306,2690,261,13056, +9372,1370,700,63743,8784,8776,8786,8773,12686,12685,8978,7834,229,507,7681, +8596,8675,8672,8674,8673,8660,8659,8656,8658,8657,8595,8601,8600,8681,709,706, +707,708,63719,8592,8656,8653,8646,8678,8594,8655,10142,8644,8680,8676,8677, +8593,8597,8616,8616,8598,8645,8599,8679,63718,94,65342,126,65374,593,594, +12353,12449,65383,42,1645,1645,8727,65290,65121,8258,63209,8771,64,227,65312, +65131,592,2452,12576,2324,2708,2580,2519,2636,2508,2380,2764,2365,1377,1506, +64288,1506,98,2476,92,65340,2348,2732,2604,12400,3647,12496,124,65372,12549, +9425,7683,7685,9836,8757,1073,1576,65168,65169,12409,65170,64671,64520,64621, +12505,1378,1489,946,976,64305,64305,1489,64332,2477,2349,2733,2605,595,12403, +12499,664,2562,13105,9679,9670,9660,9668,9664,12304,65083,12305,65084,9699, +9698,9644,9658,9654,9642,9787,9632,9733,9700,9701,9652,9650,9251,7687,9608, +65346,3610,12412,12508,9373,13251,63732,123,63731,63730,65371,65115,63729, +65079,125,63742,63741,65373,65116,63740,65080,91,63728,63727,65339,63726,93, +63739,63738,65341,63737,728,814,774,815,785,865,810,826,166,384,63210,387, +12406,12502,8226,9688,8729,9678,99,1390,2458,263,2330,2714,2586,13192,2433, +784,2305,2689,8682,8453,711,812,780,8629,12568,269,231,7689,9426,265,597,267, +267,13253,184,807,162,8451,63199,65504,63394,63200,1401,2459,2331,2715,2587, +12564,1213,10003,1095,1215,1207,1269,1395,1228,1209,967,12919,12823,12905, +12618,12809,3594,3592,3593,3596,392,12918,12822,12904,12616,12808,12828,9675, +8855,8857,8853,12342,9680,9681,710,813,770,8999,450,448,449,451,9827,9827, +9831,13220,65347,13216,1409,58,8353,65306,8353,65109,721,720,44,787,789,63171, +1548,1373,63201,65292,788,701,65104,63202,786,699,9788,8773,8750,8963,6,7,8, +24,13,17,18,19,20,127,16,25,5,4,27,23,3,12,28,29,9,10,21,30,15,14,2,1,26,22, +31,11,169,63721,63193,12300,65378,65089,12301,65379,65090,13183,13255,13254, +9374,8354,663,8911,8910,164,63185,63186,63188,63189,100,1380,2470,1590,2342, +65214,65215,65216,1468,1468,8224,8225,2726,2598,12384,12480,1583,1491,64307, +64307,1491,1491,1491,1491,1491,1491,1491,1491,1491,1491,1491,1491,1491,1491, +1491,1491,1491,1491,1491,1491,1491,65194,1615,1615,1612,1612,2404,1447,1447, +1157,63187,12298,65085,12299,65086,811,8660,8658,2405,63190,783,8748,8215,819, +831,698,8214,782,12553,13256,271,7697,9427,7699,273,2465,2337,2721,2593,1672, +64393,2396,2466,2338,2722,2594,7691,7693,1643,1643,1076,176,1453,12391,1007, +12487,9003,8998,948,397,2552,676,2471,2343,2727,2599,599,901,836,9830,9826, +168,63191,804,776,63192,901,12386,12482,12291,247,8739,8725,1106,9619,7695, +13207,273,65348,9604,3598,3604,12393,12489,36,63203,65284,63268,65129,63204, +8363,13094,729,775,803,803,12539,305,63166,644,8901,9676,64287,64287,798,725, +9375,63211,598,396,12389,12485,499,675,454,677,1249,1109,1119,101,233,9793, +2447,12572,277,2317,2701,2373,2757,283,7709,1381,1415,9428,234,7871,7705,7879, +7873,7875,7877,1108,517,2319,235,279,279,7865,2575,2631,1092,232,2703,1383, +12573,12360,7867,12575,56,1640,2542,9319,10129,2414,9329,9349,9369,2798,2670, +1640,12328,9835,12839,8328,65304,63288,9339,9359,1784,8567,8312,3672,519,1125, +12456,65396,2676,12628,1083,8712,9322,9342,9362,8570,8230,8942,275,7703,7701, +1084,8212,65073,65349,1371,8709,12579,1085,8211,65074,1187,331,12581,1189, +1224,8194,281,12627,603,666,604,606,605,9376,949,941,61,65309,65126,8316,8801, +12582,1088,600,1101,1089,1195,643,646,2318,2374,426,645,12359,12455,65386, +8494,63212,951,1384,942,240,7869,7707,1425,1425,1425,1425,477,12641,8364,2503, +2375,2759,33,1372,8252,161,63393,65281,63265,8707,658,495,659,441,442,102, +2398,2654,8457,1614,1614,1611,12552,9429,7711,1601,1414,65234,65235,65236,997, +9792,64256,64259,64260,64257,9326,9346,9366,8210,9632,9644,1498,64314,64314, +1498,1498,1498,1498,1498,1501,1501,1503,1503,1507,1507,1509,1509,713,9673, +1139,53,1637,2539,9316,10126,2411,8541,2795,2667,1637,12325,12836,8325,65301, +63285,9336,9356,1781,8564,8309,3669,64258,402,65350,13209,3615,3613,3663,8704, +52,1636,2538,9315,10125,2410,2794,2666,1636,12324,12835,8324,65300,2551,63284, +9335,9355,1780,8563,8308,9325,9345,9365,3668,715,9377,8260,8355,103,2455,501, +2327,1711,64403,64404,64405,2711,2583,12364,12460,947,611,736,1003,12557,287, +487,291,9430,285,291,289,289,1075,12370,12466,8785,1436,1523,1437,223,1438, +1524,12307,2456,1394,2328,2712,2584,1594,65230,65231,65232,1173,1171,1169, +2394,2650,608,13203,12366,12462,1379,1490,64306,64306,1490,1107,446,660,662, +704,661,705,740,673,674,7713,65351,12372,12468,9378,13228,8711,96,790,768,768, +2387,718,65344,832,62,8805,8923,65310,8819,8823,8807,65125,609,485,12368,171, +187,8249,8250,12464,13080,13257,104,1193,1729,2489,1203,2361,2745,2617,1581, +65186,65187,12399,65188,13098,12495,65418,2637,1569,1569,1569,1569,1569,1569, +1569,1569,1569,12644,1098,8636,8640,13258,1458,1458,1458,1458,1458,1458,1458, +1458,1459,1459,1459,1459,1459,1459,1459,1459,1457,1457,1457,1457,1457,1457, +1457,1457,295,12559,7723,7721,9431,293,7719,7715,7717,1492,9829,9829,9825, +64308,64308,1729,1607,1492,64423,65258,65258,64421,64420,64424,65259,12408, +64425,65260,13179,12504,65421,13110,615,13113,1495,1495,614,689,12923,12827, +12909,12622,12813,12402,12498,65419,1460,1460,1460,1460,1460,1460,1460,1460, +7830,65352,1392,3627,12411,12507,65422,1465,1465,1465,1465,1465,1465,1465, +1465,3630,777,777,801,802,13122,1001,8213,795,9832,8962,9379,688,613,12405, +13107,12501,65420,733,779,405,45,63205,65293,65123,63206,8208,105,237,1103, +2439,12583,301,464,9432,238,1110,521,12943,12939,12863,12858,12965,12294, +12289,65380,12855,12963,12847,12861,12957,12864,12950,12854,12843,12850,12964, +12293,12952,12856,12967,12966,12969,12846,12842,12852,12290,12958,12867,12857, +12862,12968,12953,12866,12851,12288,12853,12849,12859,12848,12860,12844,12845, +12295,12942,12938,12948,12944,12940,12941,2311,239,7727,1253,7883,1239,1077, +12917,12821,12903,12615,12807,236,2695,2567,12356,7881,2440,1080,2312,2696, +2568,2624,523,1081,2496,2368,2752,307,12452,65394,12643,732,1452,299,1251, +8787,2623,65353,8710,8734,1387,8747,8993,8993,63733,8992,8992,8745,13061,9688, +9689,9787,1105,303,953,970,912,617,943,9380,2674,12355,12451,65384,2554,616, +63213,12445,12541,297,7725,12585,1102,2495,2367,2751,1141,1143,106,1393,2460, +2332,2716,2588,12560,496,9433,309,669,607,1112,1580,65182,65183,65184,1688, +64395,2461,2333,2717,2589,1403,12292,65354,9381,690,107,1185,2453,7729,1082, +1179,2325,1499,1603,64315,64315,65242,1499,65243,65244,64333,2709,2581,12363, +1220,12459,65398,954,1008,12657,12676,12664,12665,13069,1600,1600,12533,13188, +1616,1613,1183,65392,1181,12558,13193,489,311,9434,311,7731,1412,12369,12465, +65401,1391,12534,312,2454,1093,2326,2710,2582,1582,65190,65191,65192,999,2393, +2649,12920,12824,12906,12619,12810,3586,3589,3587,3588,3675,409,3590,13201, +12365,12461,65399,13077,13078,13076,12910,12814,12896,12593,12800,12595,1116, +7733,13208,13222,65355,13218,12371,13248,3585,12467,65402,13086,1153,12927, +835,9382,13226,1135,13263,670,12367,12463,65400,13240,13246,108,2482,314,2354, +2738,2610,3653,65276,65272,65271,65274,65273,65275,65270,65269,1604,955,411, +1500,64316,64316,1500,1500,1500,1500,1500,65246,64714,65247,64713,64715,65010, +65248,64904,64716,65247,65247,9711,410,620,12556,318,316,9435,7741,316,320, +320,7735,7737,794,792,60,8804,8922,65308,8818,8822,8806,65124,622,9612,621, +8356,1388,457,1113,63168,2355,2739,7739,2356,2529,2401,2531,2403,619,65356, +13264,3628,8743,172,8976,8744,3621,383,65102,818,65101,9674,9383,322,8467, +63214,9617,3622,2444,2316,2530,2402,13267,109,2478,175,817,772,717,65507,7743, +2350,2734,2606,1444,1444,12414,63637,63636,3659,63635,63628,63627,3656,63626, +63620,3633,63625,3655,63631,63630,3657,63629,63634,63633,3658,63632,3654, +12510,65423,9794,13127,1470,9794,1455,13187,12551,13268,9436,13221,7745,7747, +1605,65250,65251,65252,64721,64584,13133,12417,13182,12513,65426,1502,64318, +64318,1502,1396,1445,1446,1446,1445,625,13202,65381,183,12914,12818,12900, +12609,12656,12804,12654,12655,12415,12511,65424,8722,800,8854,727,8723,8242, +13130,13129,624,13206,13219,65357,13215,12418,13249,12514,65427,13270,3617, +13223,13224,9384,13227,13235,63215,623,181,181,13186,8811,8810,13196,956, +13197,12416,12512,65425,13205,215,13211,1443,1443,9834,9835,9837,9839,13234, +13238,13244,13241,13239,13247,13245,110,2472,8711,324,2344,2728,2600,12394, +12490,65413,329,13185,12555,160,328,326,9437,7755,326,7749,7751,12397,12493, +65416,8362,13195,2457,2329,2713,2585,3591,12435,626,627,12911,12815,12597, +12897,12598,12596,12648,12801,12647,12646,12395,12491,65414,63641,3661,57, +1641,2543,9320,10130,2415,2799,2671,1641,12329,12840,8329,65305,63289,9340, +9360,1785,8568,8313,9330,9350,9370,3673,460,1114,12531,65437,414,7753,65358, +13210,2467,2339,2723,2595,2345,12398,12494,65417,160,3603,3609,1606,65254, +1722,64415,65255,65255,64722,64587,65256,64725,64590,64653,8716,8713,8713, +8800,8815,8817,8825,8802,8814,8816,8742,8832,8836,8833,8837,1398,9385,13233, +8319,241,957,12396,12492,65415,2492,2364,2748,2620,35,65283,65119,884,885, +8470,1504,64320,64320,1504,13237,13243,2462,2334,2718,2590,111,243,3629,629, +1257,1259,2451,12571,335,2321,2705,2377,2761,466,9438,244,7889,7897,7891,7893, +7895,1086,337,525,2323,246,1255,7885,339,12634,731,808,242,2707,1413,12362, +7887,417,7899,7907,7901,7903,7905,337,419,527,12458,65397,12631,1451,333,7763, +7761,2384,969,982,1121,631,1147,1149,974,2768,959,972,65359,49,1633,2535,9312, +10122,2407,8228,8539,63196,2791,2663,1633,189,12321,12832,8321,65297,2548, +63281,9332,9352,1777,188,8560,185,3665,8531,491,493,2579,2635,596,9386,9702, +8997,170,186,8735,2322,2378,248,511,12361,12457,65387,511,63216,1151,245,7757, +7759,12577,8254,65098,773,65097,65100,65099,175,2507,2379,2763,112,13184, +13099,2474,7765,2346,8671,8670,2730,2602,12401,3631,12497,1156,1216,12671,182, +8741,40,64830,63725,63724,8333,65288,65113,8317,63723,65077,41,64831,63736, +63735,8334,65289,65114,8318,63734,65078,8706,1472,1433,13225,1463,1463,1463, +1463,1463,1463,1463,1463,1441,12550,9439,7767,1508,1087,64324,64324,13115, +64323,1662,1402,1508,64343,64344,12410,64345,12506,1191,64334,37,1642,65285, +65130,46,1417,183,65377,63207,65294,65106,63208,834,8869,8240,8359,13194,2475, +2347,2731,2603,966,981,12922,12826,12908,12621,12812,632,3642,981,421,3614, +3612,3616,960,12915,12819,12662,12901,12658,12610,12805,12660,12612,12661, +12663,12659,12404,12500,982,1411,43,799,8853,177,726,65291,65122,8314,65360, +13272,12413,9759,9756,9758,9757,12509,3611,12306,12320,9387,8826,8478,697, +8245,8719,8965,12540,8984,8834,8835,8759,8733,968,1137,1158,13232,12407,12503, +13236,13242,113,2392,1448,1602,65238,65239,65240,1464,1464,1464,1464,1464, +1464,1464,1464,1464,1464,1464,1464,1464,1464,1464,1464,1439,12561,9440,672, +65361,1511,64327,64327,1511,1511,1511,1511,1511,1511,1511,1511,1511,1511,1511, +1511,1511,1511,1511,1511,1511,1511,1511,1511,1511,9388,9833,1467,1467,1467, +1467,1467,1467,1467,1467,63,1567,1374,191,63423,894,65311,63295,34,8222,8220, +65282,12318,12317,8221,8216,8219,8219,8217,329,8218,39,65287,114,1404,2480, +341,2352,8730,63717,13230,13231,13229,1471,1471,2736,2608,12425,12521,65431, +2545,2544,612,8758,12566,345,343,9441,343,529,7769,7771,7773,8251,8838,8839, +174,63720,63194,1585,1408,65198,12428,1585,12524,65434,1512,64328,1512,1512, +1512,1512,1512,1512,1512,1512,1512,1512,1512,1512,1512,1512,1512,1512,1512, +1512,1512,1512,1512,8765,1431,1431,8976,638,639,2525,2397,961,637,635,693, +1009,734,12913,12817,12899,12608,12602,12649,12601,12603,12652,12803,12607, +12604,12651,12605,12606,12650,12653,8735,793,8895,12426,12522,65432,730,805, +778,703,1369,796,723,702,825,722,531,13137,7775,636,634,65362,12429,12525, +65435,3619,9389,2524,2353,2652,1681,64397,2528,2400,2784,2500,2372,2756,63217, +9616,633,692,12427,12523,65433,2546,2547,63197,3620,2443,2315,2699,2499,2371, +2755,115,2488,347,7781,1589,2360,65210,65211,65212,2744,2616,12373,12469, +65403,65018,1505,64321,64321,1505,3634,3649,3652,3651,3635,3632,3648,63622, +3637,63621,3636,3650,63624,3639,63623,3638,3640,3641,12569,353,7783,351,601, +1241,1243,602,9442,349,537,7777,7779,7785,828,8243,714,167,1587,65202,65203, +65204,1462,1462,1462,1462,1462,1462,1462,1426,1462,1405,12379,12475,65406,59, +1563,65307,65108,12444,65439,13090,13091,55,1639,2541,9318,10128,2413,8542, +2797,2669,1639,12327,12838,8327,65303,63287,9338,9358,1783,8566,8311,9328, +9348,9368,3671,173,1399,2486,1096,1617,64609,64606,64608,1617,64610,64607, +9618,9619,9617,9618,2358,2742,2614,1427,12565,1097,1588,65206,65207,65208,995, +8362,8362,1456,1456,1456,1456,1456,1456,1456,1456,1456,1211,1005,1513,64329, +64329,64300,64300,64301,64301,1473,1513,64298,64298,64299,64299,642,963,962, +962,1010,12375,12471,65404,1469,1469,8764,1474,12916,12820,12670,12902,12666, +12613,12667,12806,12669,12668,54,1638,2540,9317,10127,2412,2796,2668,1638, +12326,12837,8326,65302,63286,9337,9357,1782,8565,8310,9327,2553,9347,9367, +3670,47,65295,383,7835,9786,65363,1475,173,1100,12381,12477,65407,824,823, +3625,3624,3595,3626,32,32,9824,9824,9828,9390,827,13252,13213,9641,9636,13199, +13214,13262,13265,13266,13198,13269,13212,13217,9638,9639,9640,9637,9635, +13275,2487,2359,2743,12617,12677,12672,12594,12645,12611,12614,12600,63218, +163,65505,822,821,8834,8842,8838,8827,8715,12377,12473,65405,1618,8721,9788, +8835,8843,8839,13276,13180,116,2468,8868,8867,2340,2724,2596,1591,65218,65219, +12383,65220,13181,12479,65408,1600,964,1514,64330,64330,64330,1514,359,12554, +357,680,355,1670,64379,64380,64381,64380,9443,7793,355,7831,7787,7789,1090, +1197,1578,65174,64674,64524,65175,12390,64673,64523,1577,65172,65176,64676, +64526,64627,12486,65411,8481,9742,1440,1449,9321,12841,9341,9361,8569,679, +1496,64312,64312,1496,1205,1435,1435,2469,2341,2725,2597,1584,65196,63640, +63639,3660,63638,1579,65178,65179,65180,8707,8756,952,977,977,12921,12825, +12907,12620,12811,9324,9344,9364,3601,429,3602,254,3607,3600,3608,3606,1154, +1644,1644,51,1635,2537,9314,10124,2409,8540,2793,2665,1635,12323,12834,8323, +65299,2550,63283,9334,9354,1779,190,63198,8562,179,3667,13204,12385,12481, +65409,12912,12816,12898,12599,12802,732,816,771,771,864,8764,820,830,8855, +1430,1430,2672,1155,1407,7791,65364,1385,12392,12488,65412,741,745,742,744, +743,445,389,424,900,13095,3599,12308,65117,65081,12309,65118,65082,3605,427, +9391,8482,63722,63195,648,9660,9668,9658,9650,678,1510,64326,64326,1510,1094, +1461,1461,1461,1461,1461,1461,1461,1461,1115,63219,2463,2335,2719,2591,1657, +64359,64360,64361,2464,2336,2720,2592,647,12388,12484,65410,12387,12483,65391, +9323,9343,9363,8571,9331,21316,9351,9371,50,1634,2536,9313,10123,2408,8229, +8229,65072,2792,2664,1634,12322,12833,8322,65298,2549,63282,9333,9353,1778, +8561,443,178,3666,8532,117,250,649,2441,12584,365,468,9444,251,7799,1091,2385, +369,533,2313,252,472,7795,474,1265,476,470,7909,249,2697,2569,12358,7911,432, +7913,7921,7915,7917,7919,369,1267,535,12454,65395,1145,12636,363,1263,7803, +2625,65365,95,8215,65343,65075,65103,8746,8704,371,9392,9600,1476,965,971,944, +650,973,797,724,2675,367,1118,12357,12453,65385,1199,1201,361,7801,7797,2442, +2314,2698,2570,2626,2498,2370,2754,2497,2369,2753,118,2357,2741,2613,12535, +1493,64309,64309,64309,1493,64331,64331,1520,1521,9445,7807,1074,1700,64363, +64364,64365,12537,9792,124,781,809,716,712,1406,651,12536,2509,2381,2765,2435, +2307,2691,65366,1400,12446,12542,12443,65438,12538,9393,7805,652,12436,12532, +119,7811,12633,12431,12527,65436,12632,12430,12526,13143,12316,65076,1608, +65262,1572,65158,13277,9446,373,7813,7815,7817,12433,8472,12529,12638,12637, +7809,9702,9675,9689,12302,65091,12303,65092,9671,9672,9663,9661,9667,9665, +12310,12311,9657,9655,9643,9786,9633,9734,9743,12312,12313,9653,9651,12432, +12528,12639,65367,12434,12530,65382,8361,65510,3623,9394,7832,695,653,447,120, +829,12562,9447,7821,7819,1389,958,65368,9395,739,121,13134,2479,253,2351, +12626,2735,2607,12420,12516,65428,12625,3662,12419,12515,65388,1123,9448,375, +255,7823,7925,1610,1746,64431,65266,1574,65162,65163,65164,65267,65268,64733, +64600,64660,1745,12630,165,65509,12629,12678,1450,1450,1099,1273,12673,12675, +12674,1434,7923,436,7927,1397,1111,12642,9775,1410,65369,1497,64313,64313, +1497,1522,64287,12424,12681,12520,65430,12635,12423,12519,65390,1011,12680, +12679,3618,3597,9396,890,837,422,7833,696,7929,654,12422,12684,12518,65429, +12640,1131,1133,1127,1129,12421,12517,65389,12683,12682,2527,2399,122,1382, +378,2395,2651,1592,65222,65223,12374,65224,1586,65200,12470,1429,1428,1432, +1494,64310,64310,1494,12567,382,9449,7825,657,380,380,7827,1079,1177,1247, +12380,12476,48,1632,2534,2406,2790,2662,1632,8320,65296,63280,1776,8304,3664, +65279,8204,8203,950,12563,1386,1218,1078,1175,1245,12376,12472,1454,7829, +65370,12382,12478,9397,656,438,12378,12474, +}; + +static const unsigned short agl_dup_offsets[] = { +32,0,124,3,160,6,173,9,175,12,181,15,183,18,266,21,267,24,272,27,273,30, +278,33,279,36,288,39,289,42,290,45,291,48,304,51,310,54,311,57,315,60,316,63, +319,66,320,69,325,72,326,75,329,78,336,81,337,84,342,87,343,90,354,93,355,96, +368,99,369,102,379,105,380,108,383,111,510,114,511,117,700,120,701,123, +732,126,768,129,769,132,771,135,777,138,803,141,901,144,962,147,977,150, +978,153,981,156,982,159,1025,162,1026,165,1027,168,1028,171,1029,174,1030,177, +1031,180,1032,183,1033,186,1034,189,1035,192,1036,195,1038,198,1039,201, +1040,204,1041,207,1042,210,1043,213,1044,216,1045,219,1046,222,1047,225, +1048,228,1049,231,1050,234,1051,237,1052,240,1053,243,1054,246,1055,249, +1056,252,1057,255,1058,258,1059,261,1060,264,1061,267,1062,270,1063,273, +1064,276,1065,279,1066,282,1067,285,1068,288,1069,291,1070,294,1071,297, +1072,300,1073,303,1074,306,1075,309,1076,312,1077,315,1078,318,1079,321, +1080,324,1081,327,1082,330,1083,333,1084,336,1085,339,1086,342,1087,345, +1088,348,1089,351,1090,354,1091,357,1092,360,1093,363,1094,366,1095,369, +1096,372,1097,375,1098,378,1099,381,1100,384,1101,387,1102,390,1103,393, +1105,396,1106,399,1107,402,1108,405,1109,408,1110,411,1111,414,1112,417, +1113,420,1114,423,1115,426,1116,429,1118,432,1119,435,1122,438,1123,441, +1138,444,1139,447,1140,450,1141,453,1168,456,1169,459,1241,462,1425,465, +1430,470,1431,473,1435,476,1443,479,1444,482,1445,485,1446,488,1447,491, +1450,494,1456,497,1457,508,1458,518,1459,528,1460,538,1461,548,1462,558, +1463,568,1464,578,1465,596,1467,606,1468,616,1469,620,1470,624,1471,627, +1472,631,1473,634,1474,637,1475,640,1488,643,1489,647,1490,651,1491,655, +1492,679,1493,683,1494,687,1495,691,1496,695,1497,699,1498,703,1499,711, +1500,715,1501,723,1502,727,1503,731,1504,735,1505,739,1506,743,1507,747, +1508,751,1509,755,1510,759,1511,763,1512,787,1513,811,1514,815,1520,819, +1521,822,1522,825,1548,828,1563,831,1567,834,1569,837,1570,848,1571,851, +1572,854,1573,857,1574,860,1575,863,1576,866,1577,869,1578,872,1579,875, +1580,878,1581,881,1582,884,1583,887,1584,890,1585,893,1586,897,1587,900, +1588,903,1589,906,1590,909,1591,912,1592,915,1593,918,1594,921,1600,924, +1601,929,1602,932,1603,935,1604,938,1605,941,1606,944,1607,947,1608,950, +1609,953,1610,956,1611,959,1612,962,1613,966,1614,969,1615,973,1616,977, +1617,980,1618,984,1632,987,1633,991,1634,995,1635,999,1636,1003,1637,1007, +1638,1011,1639,1015,1640,1019,1641,1023,1642,1027,1643,1030,1644,1033, +1645,1036,1657,1040,1662,1043,1670,1046,1672,1049,1681,1052,1688,1055, +1700,1058,1711,1061,1722,1064,1729,1067,1746,1070,8204,1073,8213,1076, +8215,1079,8219,1082,8229,1085,8353,1088,8356,1091,8362,1094,8364,1099, +8453,1102,8467,1105,8470,1108,8486,1111,8616,1114,8656,1117,8658,1120, +8660,1123,8704,1126,8707,1129,8710,1132,8711,1135,8713,1138,8735,1141, +8764,1144,8773,1147,8834,1150,8835,1153,8838,1156,8839,1159,8853,1162, +8855,1165,8976,1168,8992,1171,8993,1174,9617,1177,9618,1180,9619,1183, +9632,1186,9633,1189,9642,1192,9643,1195,9644,1198,9650,1201,9658,1204, +9660,1207,9668,1210,9675,1213,9679,1216,9688,1219,9689,1222,9702,1225, +9786,1228,9787,1231,9788,1234,9792,1237,9794,1240,9824,1243,9827,1246, +9829,1249,9835,1252,64287,1255,64298,1260,64299,1264,64300,1268,64301,1271, +64305,1274,64306,1277,64307,1280,64308,1283,64309,1286,64310,1291,64312,1294, +64313,1297,64314,1300,64315,1303,64316,1306,64318,1309,64320,1312,64321,1315, +64324,1318,64326,1321,64327,1324,64329,1327,64330,1330,64331,1334,64380,1338, +65247,1341,65255,1345,65258,1348,65267,1351,65268,1354, +}; + +static const char *agl_dup_names[] = { +"space","spacehackarabic",0,"bar","verticalbar",0,"nbspace", +"nonbreakingspace",0,"sfthyphen","softhyphen",0,"macron","overscore",0,"mu", +"mu1",0,"middot","periodcentered",0,"Cdot","Cdotaccent",0,"cdot","cdotaccent", +0,"Dcroat","Dslash",0,"dcroat","dmacron",0,"Edot","Edotaccent",0,"edot", +"edotaccent",0,"Gdot","Gdotaccent",0,"gdot","gdotaccent",0,"Gcedilla", +"Gcommaaccent",0,"gcedilla","gcommaaccent",0,"Idot","Idotaccent",0,"Kcedilla", +"Kcommaaccent",0,"kcedilla","kcommaaccent",0,"Lcedilla","Lcommaaccent",0, +"lcedilla","lcommaaccent",0,"Ldot","Ldotaccent",0,"ldot","ldotaccent",0, +"Ncedilla","Ncommaaccent",0,"ncedilla","ncommaaccent",0,"napostrophe", +"quoterightn",0,"Odblacute","Ohungarumlaut",0,"odblacute","ohungarumlaut",0, +"Rcedilla","Rcommaaccent",0,"rcedilla","rcommaaccent",0,"Tcedilla", +"Tcommaaccent",0,"tcedilla","tcommaaccent",0,"Udblacute","Uhungarumlaut",0, +"udblacute","uhungarumlaut",0,"Zdot","Zdotaccent",0,"zdot","zdotaccent",0, +"longs","slong",0,"Oslashacute","Ostrokeacute",0,"oslashacute","ostrokeacute", +0,"afii57929","apostrophemod",0,"afii64937","commareversedmod",0,"ilde", +"tilde",0,"gravecmb","gravecomb",0,"acutecmb","acutecomb",0,"tildecmb", +"tildecomb",0,"hookabovecomb","hookcmb",0,"dotbelowcmb","dotbelowcomb",0, +"dialytikatonos","dieresistonos",0,"sigma1","sigmafinal",0,"theta1", +"thetasymbolgreek",0,"Upsilon1","Upsilonhooksymbol",0,"phi1","phisymbolgreek", +0,"omega1","pisymbolgreek",0,"Iocyrillic","afii10023",0,"Djecyrillic", +"afii10051",0,"Gjecyrillic","afii10052",0,"Ecyrillic","afii10053",0, +"Dzecyrillic","afii10054",0,"Icyrillic","afii10055",0,"Yicyrillic", +"afii10056",0,"Jecyrillic","afii10057",0,"Ljecyrillic","afii10058",0, +"Njecyrillic","afii10059",0,"Tshecyrillic","afii10060",0,"Kjecyrillic", +"afii10061",0,"Ushortcyrillic","afii10062",0,"Dzhecyrillic","afii10145",0, +"Acyrillic","afii10017",0,"Becyrillic","afii10018",0,"Vecyrillic","afii10019", +0,"Gecyrillic","afii10020",0,"Decyrillic","afii10021",0,"Iecyrillic", +"afii10022",0,"Zhecyrillic","afii10024",0,"Zecyrillic","afii10025",0, +"Iicyrillic","afii10026",0,"Iishortcyrillic","afii10027",0,"Kacyrillic", +"afii10028",0,"Elcyrillic","afii10029",0,"Emcyrillic","afii10030",0, +"Encyrillic","afii10031",0,"Ocyrillic","afii10032",0,"Pecyrillic","afii10033", +0,"Ercyrillic","afii10034",0,"Escyrillic","afii10035",0,"Tecyrillic", +"afii10036",0,"Ucyrillic","afii10037",0,"Efcyrillic","afii10038",0, +"Khacyrillic","afii10039",0,"Tsecyrillic","afii10040",0,"Checyrillic", +"afii10041",0,"Shacyrillic","afii10042",0,"Shchacyrillic","afii10043",0, +"Hardsigncyrillic","afii10044",0,"Yericyrillic","afii10045",0, +"Softsigncyrillic","afii10046",0,"Ereversedcyrillic","afii10047",0, +"IUcyrillic","afii10048",0,"IAcyrillic","afii10049",0,"acyrillic","afii10065", +0,"afii10066","becyrillic",0,"afii10067","vecyrillic",0,"afii10068", +"gecyrillic",0,"afii10069","decyrillic",0,"afii10070","iecyrillic",0, +"afii10072","zhecyrillic",0,"afii10073","zecyrillic",0,"afii10074", +"iicyrillic",0,"afii10075","iishortcyrillic",0,"afii10076","kacyrillic",0, +"afii10077","elcyrillic",0,"afii10078","emcyrillic",0,"afii10079", +"encyrillic",0,"afii10080","ocyrillic",0,"afii10081","pecyrillic",0, +"afii10082","ercyrillic",0,"afii10083","escyrillic",0,"afii10084", +"tecyrillic",0,"afii10085","ucyrillic",0,"afii10086","efcyrillic",0, +"afii10087","khacyrillic",0,"afii10088","tsecyrillic",0,"afii10089", +"checyrillic",0,"afii10090","shacyrillic",0,"afii10091","shchacyrillic",0, +"afii10092","hardsigncyrillic",0,"afii10093","yericyrillic",0,"afii10094", +"softsigncyrillic",0,"afii10095","ereversedcyrillic",0,"afii10096", +"iucyrillic",0,"afii10097","iacyrillic",0,"afii10071","iocyrillic",0, +"afii10099","djecyrillic",0,"afii10100","gjecyrillic",0,"afii10101", +"ecyrillic",0,"afii10102","dzecyrillic",0,"afii10103","icyrillic",0, +"afii10104","yicyrillic",0,"afii10105","jecyrillic",0,"afii10106", +"ljecyrillic",0,"afii10107","njecyrillic",0,"afii10108","tshecyrillic",0, +"afii10109","kjecyrillic",0,"afii10110","ushortcyrillic",0,"afii10193", +"dzhecyrillic",0,"Yatcyrillic","afii10146",0,"afii10194","yatcyrillic",0, +"Fitacyrillic","afii10147",0,"afii10195","fitacyrillic",0,"Izhitsacyrillic", +"afii10148",0,"afii10196","izhitsacyrillic",0,"Gheupturncyrillic","afii10050", +0,"afii10098","gheupturncyrillic",0,"afii10846","schwacyrillic",0, +"etnahtafoukhhebrew","etnahtafoukhlefthebrew","etnahtahebrew", +"etnahtalefthebrew",0,"tipehahebrew","tipehalefthebrew",0,"reviahebrew", +"reviamugrashhebrew",0,"tevirhebrew","tevirlefthebrew",0,"munahhebrew", +"munahlefthebrew",0,"mahapakhhebrew","mahapakhlefthebrew",0,"merkhahebrew", +"merkhalefthebrew",0,"merkhakefulahebrew","merkhakefulalefthebrew",0, +"dargahebrew","dargalefthebrew",0,"yerahbenyomohebrew", +"yerahbenyomolefthebrew",0,"afii57799","sheva","sheva115","sheva15","sheva22", +"sheva2e","shevahebrew","shevanarrowhebrew","shevaquarterhebrew", +"shevawidehebrew",0,"afii57801","hatafsegol","hatafsegol17","hatafsegol24", +"hatafsegol30","hatafsegolhebrew","hatafsegolnarrowhebrew", +"hatafsegolquarterhebrew","hatafsegolwidehebrew",0,"afii57800","hatafpatah", +"hatafpatah16","hatafpatah23","hatafpatah2f","hatafpatahhebrew", +"hatafpatahnarrowhebrew","hatafpatahquarterhebrew","hatafpatahwidehebrew",0, +"afii57802","hatafqamats","hatafqamats1b","hatafqamats28","hatafqamats34", +"hatafqamatshebrew","hatafqamatsnarrowhebrew","hatafqamatsquarterhebrew", +"hatafqamatswidehebrew",0,"afii57793","hiriq","hiriq14","hiriq21","hiriq2d", +"hiriqhebrew","hiriqnarrowhebrew","hiriqquarterhebrew","hiriqwidehebrew",0, +"afii57794","tsere","tsere12","tsere1e","tsere2b","tserehebrew", +"tserenarrowhebrew","tserequarterhebrew","tserewidehebrew",0,"afii57795", +"segol","segol13","segol1f","segol2c","segolhebrew","segolnarrowhebrew", +"segolquarterhebrew","segolwidehebrew",0,"afii57798","patah","patah11", +"patah1d","patah2a","patahhebrew","patahnarrowhebrew","patahquarterhebrew", +"patahwidehebrew",0,"afii57797","qamats","qamats10","qamats1a","qamats1c", +"qamats27","qamats29","qamats33","qamatsde","qamatshebrew", +"qamatsnarrowhebrew","qamatsqatanhebrew","qamatsqatannarrowhebrew", +"qamatsqatanquarterhebrew","qamatsqatanwidehebrew","qamatsquarterhebrew", +"qamatswidehebrew",0,"afii57806","holam","holam19","holam26","holam32", +"holamhebrew","holamnarrowhebrew","holamquarterhebrew","holamwidehebrew",0, +"afii57796","qubuts","qubuts18","qubuts25","qubuts31","qubutshebrew", +"qubutsnarrowhebrew","qubutsquarterhebrew","qubutswidehebrew",0,"afii57807", +"dagesh","dageshhebrew",0,"afii57839","siluqhebrew","siluqlefthebrew",0, +"afii57645","maqafhebrew",0,"afii57841","rafe","rafehebrew",0,"afii57842", +"paseqhebrew",0,"afii57804","shindothebrew",0,"afii57803","sindothebrew",0, +"afii57658","sofpasuqhebrew",0,"afii57664","alef","alefhebrew",0,"afii57665", +"bet","bethebrew",0,"afii57666","gimel","gimelhebrew",0,"afii57667","dalet", +"dalethatafpatah","dalethatafpatahhebrew","dalethatafsegol", +"dalethatafsegolhebrew","dalethebrew","dalethiriq","dalethiriqhebrew", +"daletholam","daletholamhebrew","daletpatah","daletpatahhebrew","daletqamats", +"daletqamatshebrew","daletqubuts","daletqubutshebrew","daletsegol", +"daletsegolhebrew","daletsheva","daletshevahebrew","dalettsere", +"dalettserehebrew",0,"afii57668","he","hehebrew",0,"afii57669","vav", +"vavhebrew",0,"afii57670","zayin","zayinhebrew",0,"afii57671","het", +"hethebrew",0,"afii57672","tet","tethebrew",0,"afii57673","yod","yodhebrew",0, +"afii57674","finalkaf","finalkafhebrew","finalkafqamats", +"finalkafqamatshebrew","finalkafsheva","finalkafshevahebrew",0,"afii57675", +"kaf","kafhebrew",0,"afii57676","lamed","lamedhebrew","lamedholam", +"lamedholamdagesh","lamedholamdageshhebrew","lamedholamhebrew",0,"afii57677", +"finalmem","finalmemhebrew",0,"afii57678","mem","memhebrew",0,"afii57679", +"finalnun","finalnunhebrew",0,"afii57680","nun","nunhebrew",0,"afii57681", +"samekh","samekhhebrew",0,"afii57682","ayin","ayinhebrew",0,"afii57683", +"finalpe","finalpehebrew",0,"afii57684","pe","pehebrew",0,"afii57685", +"finaltsadi","finaltsadihebrew",0,"afii57686","tsadi","tsadihebrew",0, +"afii57687","qof","qofhatafpatah","qofhatafpatahhebrew","qofhatafsegol", +"qofhatafsegolhebrew","qofhebrew","qofhiriq","qofhiriqhebrew","qofholam", +"qofholamhebrew","qofpatah","qofpatahhebrew","qofqamats","qofqamatshebrew", +"qofqubuts","qofqubutshebrew","qofsegol","qofsegolhebrew","qofsheva", +"qofshevahebrew","qoftsere","qoftserehebrew",0,"afii57688","resh", +"reshhatafpatah","reshhatafpatahhebrew","reshhatafsegol", +"reshhatafsegolhebrew","reshhebrew","reshhiriq","reshhiriqhebrew","reshholam", +"reshholamhebrew","reshpatah","reshpatahhebrew","reshqamats", +"reshqamatshebrew","reshqubuts","reshqubutshebrew","reshsegol", +"reshsegolhebrew","reshsheva","reshshevahebrew","reshtsere","reshtserehebrew", +0,"afii57689","shin","shinhebrew",0,"afii57690","tav","tavhebrew",0, +"afii57716","vavvavhebrew",0,"afii57717","vavyodhebrew",0,"afii57718", +"yodyodhebrew",0,"afii57388","commaarabic",0,"afii57403","semicolonarabic",0, +"afii57407","questionarabic",0,"afii57409","hamzaarabic","hamzadammaarabic", +"hamzadammatanarabic","hamzafathaarabic","hamzafathatanarabic", +"hamzalowarabic","hamzalowkasraarabic","hamzalowkasratanarabic", +"hamzasukunarabic",0,"afii57410","alefmaddaabovearabic",0,"afii57411", +"alefhamzaabovearabic",0,"afii57412","wawhamzaabovearabic",0,"afii57413", +"alefhamzabelowarabic",0,"afii57414","yehhamzaabovearabic",0,"afii57415", +"alefarabic",0,"afii57416","beharabic",0,"afii57417","tehmarbutaarabic",0, +"afii57418","teharabic",0,"afii57419","theharabic",0,"afii57420","jeemarabic", +0,"afii57421","haharabic",0,"afii57422","khaharabic",0,"afii57423", +"dalarabic",0,"afii57424","thalarabic",0,"afii57425","reharabic", +"rehyehaleflamarabic",0,"afii57426","zainarabic",0,"afii57427","seenarabic",0, +"afii57428","sheenarabic",0,"afii57429","sadarabic",0,"afii57430","dadarabic", +0,"afii57431","taharabic",0,"afii57432","zaharabic",0,"afii57433","ainarabic", +0,"afii57434","ghainarabic",0,"afii57440","kashidaautoarabic", +"kashidaautonosidebearingarabic","tatweelarabic",0,"afii57441","feharabic",0, +"afii57442","qafarabic",0,"afii57443","kafarabic",0,"afii57444","lamarabic",0, +"afii57445","meemarabic",0,"afii57446","noonarabic",0,"afii57470","heharabic", +0,"afii57448","wawarabic",0,"afii57449","alefmaksuraarabic",0,"afii57450", +"yeharabic",0,"afii57451","fathatanarabic",0,"afii57452", +"dammatanaltonearabic","dammatanarabic",0,"afii57453","kasratanarabic",0, +"afii57454","fathaarabic","fathalowarabic",0,"afii57455","dammaarabic", +"dammalowarabic",0,"afii57456","kasraarabic",0,"afii57457","shaddaarabic", +"shaddafathatanarabic",0,"afii57458","sukunarabic",0,"afii57392","zeroarabic", +"zerohackarabic",0,"afii57393","onearabic","onehackarabic",0,"afii57394", +"twoarabic","twohackarabic",0,"afii57395","threearabic","threehackarabic",0, +"afii57396","fourarabic","fourhackarabic",0,"afii57397","fivearabic", +"fivehackarabic",0,"afii57398","sixarabic","sixhackarabic",0,"afii57399", +"sevenarabic","sevenhackarabic",0,"afii57400","eightarabic","eighthackarabic", +0,"afii57401","ninearabic","ninehackarabic",0,"afii57381","percentarabic",0, +"decimalseparatorarabic","decimalseparatorpersian",0, +"thousandsseparatorarabic","thousandsseparatorpersian",0,"afii63167", +"asteriskaltonearabic","asteriskarabic",0,"afii57511","tteharabic",0, +"afii57506","peharabic",0,"afii57507","tcheharabic",0,"afii57512", +"ddalarabic",0,"afii57513","rreharabic",0,"afii57508","jeharabic",0, +"afii57505","veharabic",0,"afii57509","gafarabic",0,"afii57514", +"noonghunnaarabic",0,"haaltonearabic","hehaltonearabic",0,"afii57519", +"yehbarreearabic",0,"afii61664","zerowidthnonjoiner",0,"afii00208", +"horizontalbar",0,"dbllowline","underscoredbl",0,"quoteleftreversed", +"quotereversed",0,"twodotenleader","twodotleader",0,"colonmonetary", +"colonsign",0,"afii08941","lira",0,"afii57636","newsheqelsign","sheqel", +"sheqelhebrew",0,"Euro","euro",0,"afii61248","careof",0,"afii61289","lsquare", +0,"afii61352","numero",0,"Ohm","Omega",0,"arrowupdnbse","arrowupdownbase",0, +"arrowdblleft","arrowleftdbl",0,"arrowdblright","dblarrowright",0, +"arrowdblboth","dblarrowleft",0,"forall","universal",0,"existential", +"thereexists",0,"Delta","increment",0,"gradient","nabla",0,"notelement", +"notelementof",0,"orthogonal","rightangle",0,"similar","tildeoperator",0, +"approximatelyequal","congruent",0,"propersubset","subset",0,"propersuperset", +"superset",0,"reflexsubset","subsetorequal",0,"reflexsuperset", +"supersetorequal",0,"circleplus","pluscircle",0,"circlemultiply", +"timescircle",0,"logicalnotreversed","revlogicalnot",0,"integraltop", +"integraltp",0,"integralbottom","integralbt",0,"ltshade","shadelight",0, +"shade","shademedium",0,"dkshade","shadedark",0,"blacksquare","filledbox",0, +"H22073","whitesquare",0,"H18543","blacksmallsquare",0,"H18551", +"whitesmallsquare",0,"blackrectangle","filledrect",0, +"blackuppointingtriangle","triagup",0,"blackrightpointingpointer","triagrt",0, +"blackdownpointingtriangle","triagdn",0,"blackleftpointingpointer","triaglf", +0,"circle","whitecircle",0,"H18533","blackcircle",0,"bulletinverse", +"invbullet",0,"invcircle","whitecircleinverse",0,"openbullet","whitebullet",0, +"smileface","whitesmilingface",0,"blacksmilingface","invsmileface",0, +"compass","sun",0,"female","venus",0,"male","mars",0,"spade","spadesuitblack", +0,"club","clubsuitblack",0,"heart","heartsuitblack",0,"eighthnotebeamed", +"musicalnotedbl",0,"afii57705","doubleyodpatah","doubleyodpatahhebrew", +"yodyodpatahhebrew",0,"afii57694","shinshindot","shinshindothebrew",0, +"afii57695","shinsindot","shinsindothebrew",0,"shindageshshindot", +"shindageshshindothebrew",0,"shindageshsindot","shindageshsindothebrew",0, +"betdagesh","betdageshhebrew",0,"gimeldagesh","gimeldageshhebrew",0, +"daletdagesh","daletdageshhebrew",0,"hedagesh","hedageshhebrew",0,"afii57723", +"vavdagesh","vavdagesh65","vavdageshhebrew",0,"zayindagesh", +"zayindageshhebrew",0,"tetdagesh","tetdageshhebrew",0,"yoddagesh", +"yoddageshhebrew",0,"finalkafdagesh","finalkafdageshhebrew",0,"kafdagesh", +"kafdageshhebrew",0,"lameddagesh","lameddageshhebrew",0,"memdagesh", +"memdageshhebrew",0,"nundagesh","nundageshhebrew",0,"samekhdagesh", +"samekhdageshhebrew",0,"pedagesh","pedageshhebrew",0,"tsadidagesh", +"tsadidageshhebrew",0,"qofdagesh","qofdageshhebrew",0,"shindagesh", +"shindageshhebrew",0,"tavdages","tavdagesh","tavdageshhebrew",0,"afii57700", +"vavholam","vavholamhebrew",0,"tchehinitialarabic","tchehmeeminitialarabic",0, +"laminitialarabic","lammeemjeeminitialarabic","lammeemkhahinitialarabic",0, +"noonhehinitialarabic","nooninitialarabic",0,"hehfinalalttwoarabic", +"hehfinalarabic",0,"alefmaksurainitialarabic","yehinitialarabic",0, +"alefmaksuramedialarabic","yehmedialarabic",0, +}; diff --git a/source/pdf/pdf-image.c b/source/pdf/pdf-image.c new file mode 100644 index 00000000..719841d5 --- /dev/null +++ b/source/pdf/pdf-image.c @@ -0,0 +1,285 @@ +#include "mupdf/pdf.h" + +static fz_image *pdf_load_jpx(pdf_document *xref, pdf_obj *dict, int forcemask); + +static fz_image * +pdf_load_image_imp(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask) +{ + fz_stream *stm = NULL; + fz_image *image = NULL; + pdf_obj *obj, *res; + + int w, h, bpc, n; + int imagemask; + int interpolate; + int indexed; + fz_image *mask = NULL; /* explicit mask/soft mask image */ + int usecolorkey = 0; + fz_colorspace *colorspace = NULL; + float decode[FZ_MAX_COLORS * 2]; + int colorkey[FZ_MAX_COLORS * 2]; + + int i; + fz_context *ctx = xref->ctx; + + fz_var(stm); + fz_var(mask); + fz_var(image); + + fz_try(ctx) + { + /* special case for JPEG2000 images */ + if (pdf_is_jpx_image(ctx, dict)) + { + image = pdf_load_jpx(xref, dict, forcemask); + + if (forcemask) + { + fz_pixmap *mask_pixmap; + if (image->n != 2) + fz_throw(ctx, FZ_ERROR_GENERIC, "soft mask must be grayscale"); + mask_pixmap = fz_alpha_from_gray(ctx, image->tile, 1); + fz_drop_pixmap(ctx, image->tile); + image->tile = mask_pixmap; + } + break; /* Out of fz_try */ + } + + w = pdf_to_int(pdf_dict_getsa(dict, "Width", "W")); + h = pdf_to_int(pdf_dict_getsa(dict, "Height", "H")); + bpc = pdf_to_int(pdf_dict_getsa(dict, "BitsPerComponent", "BPC")); + if (bpc == 0) + bpc = 8; + imagemask = pdf_to_bool(pdf_dict_getsa(dict, "ImageMask", "IM")); + interpolate = pdf_to_bool(pdf_dict_getsa(dict, "Interpolate", "I")); + + indexed = 0; + usecolorkey = 0; + mask = NULL; + + if (imagemask) + bpc = 1; + + if (w <= 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "image width is zero (or less)"); + if (h <= 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "image height is zero (or less)"); + if (bpc <= 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is zero (or less)"); + if (bpc > 16) + fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is too large: %d", bpc); + if (w > (1 << 16)) + fz_throw(ctx, FZ_ERROR_GENERIC, "image is too wide"); + if (h > (1 << 16)) + fz_throw(ctx, FZ_ERROR_GENERIC, "image is too high"); + + obj = pdf_dict_getsa(dict, "ColorSpace", "CS"); + if (obj && !imagemask && !forcemask) + { + /* colorspace resource lookup is only done for inline images */ + if (pdf_is_name(obj)) + { + res = pdf_dict_get(pdf_dict_gets(rdb, "ColorSpace"), obj); + if (res) + obj = res; + } + + colorspace = pdf_load_colorspace(xref, obj); + + if (!strcmp(colorspace->name, "Indexed")) + indexed = 1; + + n = colorspace->n; + } + else + { + n = 1; + } + + obj = pdf_dict_getsa(dict, "Decode", "D"); + if (obj) + { + for (i = 0; i < n * 2; i++) + decode[i] = pdf_to_real(pdf_array_get(obj, i)); + } + else + { + float maxval = indexed ? (1 << bpc) - 1 : 1; + for (i = 0; i < n * 2; i++) + decode[i] = i & 1 ? maxval : 0; + } + + obj = pdf_dict_getsa(dict, "SMask", "Mask"); + if (pdf_is_dict(obj)) + { + /* Not allowed for inline images or soft masks */ + if (cstm) + fz_warn(ctx, "Ignoring invalid inline image soft mask"); + else if (forcemask) + fz_warn(ctx, "Ignoring recursive image soft mask"); + else + mask = (fz_image *)pdf_load_image_imp(xref, rdb, obj, NULL, 1); + } + else if (pdf_is_array(obj)) + { + usecolorkey = 1; + for (i = 0; i < n * 2; i++) + { + if (!pdf_is_int(pdf_array_get(obj, i))) + { + fz_warn(ctx, "invalid value in color key mask"); + usecolorkey = 0; + } + colorkey[i] = pdf_to_int(pdf_array_get(obj, i)); + } + } + + /* Now, do we load a ref, or do we load the actual thing? */ + if (!cstm) + { + /* Just load the compressed image data now and we can + * decode it on demand. */ + int num = pdf_to_num(dict); + int gen = pdf_to_gen(dict); + fz_compressed_buffer *buffer = pdf_load_compressed_stream(xref, num, gen); + image = fz_new_image(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, usecolorkey ? colorkey : NULL, buffer, mask); + break; /* Out of fz_try */ + } + + /* We need to decompress the image now */ + if (cstm) + { + int stride = (w * n * bpc + 7) / 8; + stm = pdf_open_inline_stream(xref, dict, stride * h, cstm, NULL); + } + else + { + stm = pdf_open_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); + } + + image = fz_new_image(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, usecolorkey ? colorkey : NULL, NULL, mask); + image->tile = fz_decomp_image_from_stream(ctx, stm, image, cstm != NULL, indexed, 0, 0); + } + fz_catch(ctx) + { + fz_drop_image(ctx, image); + fz_rethrow(ctx); + } + return image; +} + +fz_image * +pdf_load_inline_image(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, fz_stream *file) +{ + return (fz_image *)pdf_load_image_imp(xref, rdb, dict, file, 0); +} + +int +pdf_is_jpx_image(fz_context *ctx, pdf_obj *dict) +{ + pdf_obj *filter; + int i, n; + + filter = pdf_dict_gets(dict, "Filter"); + if (!strcmp(pdf_to_name(filter), "JPXDecode")) + return 1; + n = pdf_array_len(filter); + for (i = 0; i < n; i++) + if (!strcmp(pdf_to_name(pdf_array_get(filter, i)), "JPXDecode")) + return 1; + return 0; +} + +static fz_image * +pdf_load_jpx(pdf_document *xref, pdf_obj *dict, int forcemask) +{ + fz_buffer *buf = NULL; + fz_colorspace *colorspace = NULL; + fz_pixmap *img = NULL; + pdf_obj *obj; + fz_context *ctx = xref->ctx; + int indexed = 0; + fz_image *mask = NULL; + + fz_var(img); + fz_var(buf); + fz_var(colorspace); + fz_var(mask); + + buf = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); + + /* FIXME: We can't handle decode arrays for indexed images currently */ + fz_try(ctx) + { + obj = pdf_dict_gets(dict, "ColorSpace"); + if (obj) + { + colorspace = pdf_load_colorspace(xref, obj); + indexed = !strcmp(colorspace->name, "Indexed"); + } + + img = fz_load_jpx(ctx, buf->data, buf->len, colorspace, indexed); + + if (img && colorspace == NULL) + colorspace = fz_keep_colorspace(ctx, img->colorspace); + + fz_drop_buffer(ctx, buf); + buf = NULL; + + obj = pdf_dict_getsa(dict, "SMask", "Mask"); + if (pdf_is_dict(obj)) + { + if (forcemask) + fz_warn(ctx, "Ignoring recursive JPX soft mask"); + else + mask = (fz_image *)pdf_load_image_imp(xref, NULL, obj, NULL, 1); + } + + obj = pdf_dict_getsa(dict, "Decode", "D"); + if (obj && !indexed) + { + float decode[FZ_MAX_COLORS * 2]; + int i; + + for (i = 0; i < img->n * 2; i++) + decode[i] = pdf_to_real(pdf_array_get(obj, i)); + + fz_decode_tile(img, decode); + } + } + fz_catch(ctx) + { + if (colorspace) + fz_drop_colorspace(ctx, colorspace); + fz_drop_buffer(ctx, buf); + fz_drop_pixmap(ctx, img); + fz_rethrow(ctx); + } + return fz_new_image_from_pixmap(ctx, img, mask); +} + +static int +fz_image_size(fz_context *ctx, fz_image *im) +{ + if (im == NULL) + return 0; + return sizeof(*im) + fz_pixmap_size(ctx, im->tile) + (im->buffer && im->buffer->buffer ? im->buffer->buffer->cap : 0); +} + +fz_image * +pdf_load_image(pdf_document *xref, pdf_obj *dict) +{ + fz_context *ctx = xref->ctx; + fz_image *image; + + if ((image = pdf_find_item(ctx, fz_free_image, dict))) + { + return (fz_image *)image; + } + + image = pdf_load_image_imp(xref, NULL, dict, NULL, 0); + + pdf_store_item(ctx, dict, image, fz_image_size(ctx, image)); + + return (fz_image *)image; +} diff --git a/source/pdf/pdf-interpret.c b/source/pdf/pdf-interpret.c new file mode 100644 index 00000000..43a6d466 --- /dev/null +++ b/source/pdf/pdf-interpret.c @@ -0,0 +1,3111 @@ +#include "mupdf/pdf.h" + +#define TILE + +typedef struct pdf_material_s pdf_material; +typedef struct pdf_gstate_s pdf_gstate; +typedef struct pdf_csi_s pdf_csi; + +enum +{ + PDF_FILL, + PDF_STROKE, +}; + +enum +{ + PDF_MAT_NONE, + PDF_MAT_COLOR, + PDF_MAT_PATTERN, + PDF_MAT_SHADE, +}; + +struct pdf_material_s +{ + int kind; + fz_colorspace *colorspace; + pdf_pattern *pattern; + fz_shade *shade; + int gstate_num; + float alpha; + float v[FZ_MAX_COLORS]; +}; + +struct pdf_gstate_s +{ + fz_matrix ctm; + int clip_depth; + + /* path stroking */ + fz_stroke_state *stroke_state; + + /* materials */ + pdf_material stroke; + pdf_material fill; + + /* text state */ + float char_space; + float word_space; + float scale; + float leading; + pdf_font_desc *font; + float size; + int render; + float rise; + + /* transparency */ + int blendmode; + pdf_xobject *softmask; + fz_matrix softmask_ctm; + float softmask_bc[FZ_MAX_COLORS]; + int luminosity; +}; + +struct pdf_csi_s +{ + fz_device *dev; + pdf_document *xref; + + int nested_depth; + + /* usage mode for optional content groups */ + char *event; /* "View", "Print", "Export" */ + + /* interpreter stack */ + pdf_obj *obj; + char name[256]; + unsigned char string[256]; + int string_len; + float stack[32]; + int top; + + int xbalance; + int in_text; + int in_hidden_ocg; + + /* path object state */ + fz_path *path; + int clip; + int clip_even_odd; + + /* text object state */ + fz_text *text; + fz_rect text_bbox; + fz_matrix tlm; + fz_matrix tm; + int text_mode; + int accumulate; + + /* graphics state */ + pdf_gstate *gstate; + int gcap; + int gtop; + int gbot; + int gparent; + + /* cookie support */ + fz_cookie *cookie; +}; + +static void pdf_run_contents_object(pdf_csi *csi, pdf_obj *rdb, pdf_obj *contents); +static void pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, const fz_matrix *transform); +static void pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, pdf_gstate *pat_gstate, const fz_rect *area, int what); + +static int +ocg_intents_include(pdf_ocg_descriptor *desc, char *name) +{ + int i, len; + + if (strcmp(name, "All") == 0) + return 1; + + /* In the absence of a specified intent, it's 'View' */ + if (!desc->intent) + return (strcmp(name, "View") == 0); + + if (pdf_is_name(desc->intent)) + { + char *intent = pdf_to_name(desc->intent); + if (strcmp(intent, "All") == 0) + return 1; + return (strcmp(intent, name) == 0); + } + if (!pdf_is_array(desc->intent)) + return 0; + + len = pdf_array_len(desc->intent); + for (i=0; i < len; i++) + { + char *intent = pdf_to_name(pdf_array_get(desc->intent, i)); + if (strcmp(intent, "All") == 0) + return 1; + if (strcmp(intent, name) == 0) + return 1; + } + return 0; +} + +static int +pdf_is_hidden_ocg(pdf_obj *ocg, pdf_csi *csi, pdf_obj *rdb) +{ + char event_state[16]; + pdf_obj *obj, *obj2; + char *type; + pdf_ocg_descriptor *desc = csi->xref->ocg; + fz_context *ctx = csi->dev->ctx; + + /* Avoid infinite recursions */ + if (pdf_obj_marked(ocg)) + return 0; + + /* If no ocg descriptor, everything is visible */ + if (!desc) + return 0; + + /* If we've been handed a name, look it up in the properties. */ + if (pdf_is_name(ocg)) + { + ocg = pdf_dict_gets(pdf_dict_gets(rdb, "Properties"), pdf_to_name(ocg)); + } + /* If we haven't been given an ocg at all, then we're visible */ + if (!ocg) + return 0; + + fz_strlcpy(event_state, csi->event, sizeof event_state); + fz_strlcat(event_state, "State", sizeof event_state); + + type = pdf_to_name(pdf_dict_gets(ocg, "Type")); + + if (strcmp(type, "OCG") == 0) + { + /* An Optional Content Group */ + int num = pdf_to_num(ocg); + int gen = pdf_to_gen(ocg); + int len = desc->len; + int i; + + for (i = 0; i < len; i++) + { + if (desc->ocgs[i].num == num && desc->ocgs[i].gen == gen) + { + if (desc->ocgs[i].state == 0) + return 1; /* If off, hidden */ + break; + } + } + + /* Check Intents; if our intent is not part of the set given + * by the current config, we should ignore it. */ + obj = pdf_dict_gets(ocg, "Intent"); + if (pdf_is_name(obj)) + { + /* If it doesn't match, it's hidden */ + if (ocg_intents_include(desc, pdf_to_name(obj)) == 0) + return 1; + } + else if (pdf_is_array(obj)) + { + int match = 0; + len = pdf_array_len(obj); + for (i=0; i<len; i++) { + match |= ocg_intents_include(desc, pdf_to_name(pdf_array_get(obj, i))); + if (match) + break; + } + /* If we don't match any, it's hidden */ + if (match == 0) + return 1; + } + else + { + /* If it doesn't match, it's hidden */ + if (ocg_intents_include(desc, "View") == 0) + return 1; + } + + /* FIXME: Currently we do a very simple check whereby we look + * at the Usage object (an Optional Content Usage Dictionary) + * and check to see if the corresponding 'event' key is on + * or off. + * + * Really we should only look at Usage dictionaries that + * correspond to entries in the AS list in the OCG config. + * Given that we don't handle Zoom or User, or Language + * dicts, this is not really a problem. */ + obj = pdf_dict_gets(ocg, "Usage"); + if (!pdf_is_dict(obj)) + return 0; + /* FIXME: Should look at Zoom (and return hidden if out of + * max/min range) */ + /* FIXME: Could provide hooks to the caller to check if + * User is appropriate - if not return hidden. */ + obj2 = pdf_dict_gets(obj, csi->event); + if (strcmp(pdf_to_name(pdf_dict_gets(obj2, event_state)), "OFF") == 0) + { + return 1; + } + return 0; + } + else if (strcmp(type, "OCMD") == 0) + { + /* An Optional Content Membership Dictionary */ + char *name; + int combine, on; + + obj = pdf_dict_gets(ocg, "VE"); + if (pdf_is_array(obj)) { + /* FIXME: Calculate visibility from array */ + return 0; + } + name = pdf_to_name(pdf_dict_gets(ocg, "P")); + /* Set combine; Bit 0 set => AND, Bit 1 set => true means + * Off, otherwise true means On */ + if (strcmp(name, "AllOn") == 0) + { + combine = 1; + } + else if (strcmp(name, "AnyOff") == 0) + { + combine = 2; + } + else if (strcmp(name, "AllOff") == 0) + { + combine = 3; + } + else /* Assume it's the default (AnyOn) */ + { + combine = 0; + } + + if (pdf_obj_mark(ocg)) + return 0; /* Should never happen */ + fz_try(ctx) + { + obj = pdf_dict_gets(ocg, "OCGs"); + on = combine & 1; + if (pdf_is_array(obj)) { + int i, len; + len = pdf_array_len(obj); + for (i = 0; i < len; i++) + { + int hidden; + hidden = pdf_is_hidden_ocg(pdf_array_get(obj, i), csi, rdb); + if ((combine & 1) == 0) + hidden = !hidden; + if (combine & 2) + on &= hidden; + else + on |= hidden; + } + } + else + { + on = pdf_is_hidden_ocg(obj, csi, rdb); + if ((combine & 1) == 0) + on = !on; + } + } + fz_always(ctx) + { + pdf_obj_unmark(ocg); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + return !on; + } + /* No idea what sort of object this is - be visible */ + return 0; +} + +/* + * Emit graphics calls to device. + */ + +typedef struct softmask_save_s softmask_save; + +struct softmask_save_s +{ + pdf_xobject *softmask; + fz_matrix ctm; +}; + +static pdf_gstate * +begin_softmask(pdf_csi * csi, softmask_save *save) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_xobject *softmask = gstate->softmask; + fz_rect mask_bbox; + fz_context *ctx; + fz_matrix save_tm, save_tlm, save_ctm; + int save_in_text; + + save->softmask = softmask; + if (softmask == NULL) + return gstate; + save->ctm = gstate->softmask_ctm; + save_ctm = gstate->ctm; + + mask_bbox = softmask->bbox; + ctx = csi->dev->ctx; + save_tm = csi->tm; + save_tlm = csi->tlm; + save_in_text = csi->in_text; + + csi->in_text = 0; + if (gstate->luminosity) + mask_bbox = fz_infinite_rect; + else + { + fz_transform_rect(&mask_bbox, &softmask->matrix); + fz_transform_rect(&mask_bbox, &gstate->softmask_ctm); + } + gstate->softmask = NULL; + gstate->ctm = gstate->softmask_ctm; + + fz_begin_mask(csi->dev, &mask_bbox, gstate->luminosity, + softmask->colorspace, gstate->softmask_bc); + fz_try(ctx) + { + pdf_run_xobject(csi, NULL, softmask, &fz_identity); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + /* FIXME: Ignore error - nasty, but if we throw from + * here the clip stack would be messed up. */ + if (csi->cookie) + csi->cookie->errors++; + } + + fz_end_mask(csi->dev); + + csi->tm = save_tm; + csi->tlm = save_tlm; + csi->in_text = save_in_text; + + gstate = csi->gstate + csi->gtop; + gstate->ctm = save_ctm; + + return gstate; +} + +static void +end_softmask(pdf_csi *csi, softmask_save *save) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + + if (save->softmask == NULL) + return; + + gstate->softmask = save->softmask; + gstate->softmask_ctm = save->ctm; + fz_pop_clip(csi->dev); +} + +static void +pdf_begin_group(pdf_csi *csi, const fz_rect *bbox, softmask_save *softmask) +{ + pdf_gstate *gstate = begin_softmask(csi, softmask); + + if (gstate->blendmode) + fz_begin_group(csi->dev, bbox, 1, 0, gstate->blendmode, 1); +} + +static void +pdf_end_group(pdf_csi *csi, softmask_save *softmask) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + + if (gstate->blendmode) + fz_end_group(csi->dev); + + end_softmask(csi, softmask); +} + +static void +pdf_show_shade(pdf_csi *csi, fz_shade *shd) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_rect bbox; + softmask_save softmask = { NULL }; + + if (csi->in_hidden_ocg > 0) + return; + + fz_bound_shade(ctx, shd, &gstate->ctm, &bbox); + + pdf_begin_group(csi, &bbox, &softmask); + + /* FIXME: The gstate->ctm in the next line may be wrong; maybe + * it should be the parent gstates ctm? */ + fz_fill_shade(csi->dev, shd, &gstate->ctm, gstate->fill.alpha); + + pdf_end_group(csi, &softmask); +} + +static void +pdf_show_image(pdf_csi *csi, fz_image *image) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_matrix image_ctm; + fz_rect bbox; + softmask_save softmask = { NULL }; + + if (csi->in_hidden_ocg > 0) + return; + + /* PDF has images bottom-up, so flip them right side up here */ + image_ctm = gstate->ctm; + fz_pre_scale(fz_pre_translate(&image_ctm, 0, 1), 1, -1); + + bbox = fz_unit_rect; + fz_transform_rect(&bbox, &image_ctm); + + if (image->mask) + { + /* apply blend group even though we skip the soft mask */ + if (gstate->blendmode) + fz_begin_group(csi->dev, &bbox, 0, 0, gstate->blendmode, 1); + fz_clip_image_mask(csi->dev, image->mask, &bbox, &image_ctm); + } + else + pdf_begin_group(csi, &bbox, &softmask); + + if (!image->colorspace) + { + + switch (gstate->fill.kind) + { + case PDF_MAT_NONE: + break; + case PDF_MAT_COLOR: + fz_fill_image_mask(csi->dev, image, &image_ctm, + gstate->fill.colorspace, gstate->fill.v, gstate->fill.alpha); + break; + case PDF_MAT_PATTERN: + if (gstate->fill.pattern) + { + fz_clip_image_mask(csi->dev, image, &bbox, &image_ctm); + pdf_show_pattern(csi, gstate->fill.pattern, &csi->gstate[gstate->fill.gstate_num], &bbox, PDF_FILL); + fz_pop_clip(csi->dev); + } + break; + case PDF_MAT_SHADE: + if (gstate->fill.shade) + { + fz_clip_image_mask(csi->dev, image, &bbox, &image_ctm); + fz_fill_shade(csi->dev, gstate->fill.shade, &csi->gstate[gstate->fill.gstate_num].ctm, gstate->fill.alpha); + fz_pop_clip(csi->dev); + } + break; + } + } + else + { + fz_fill_image(csi->dev, image, &image_ctm, gstate->fill.alpha); + } + + if (image->mask) + { + fz_pop_clip(csi->dev); + if (gstate->blendmode) + fz_end_group(csi->dev); + } + else + pdf_end_group(csi, &softmask); +} + +static void +pdf_show_path(pdf_csi *csi, int doclose, int dofill, int dostroke, int even_odd) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_path *path; + fz_rect bbox; + softmask_save softmask = { NULL }; + + if (dostroke) { + if (csi->dev->flags & (FZ_DEVFLAG_STROKECOLOR_UNDEFINED | FZ_DEVFLAG_LINEJOIN_UNDEFINED | FZ_DEVFLAG_LINEWIDTH_UNDEFINED)) + csi->dev->flags |= FZ_DEVFLAG_UNCACHEABLE; + else if (gstate->stroke_state->dash_len != 0 && csi->dev->flags & (FZ_DEVFLAG_STARTCAP_UNDEFINED | FZ_DEVFLAG_DASHCAP_UNDEFINED | FZ_DEVFLAG_ENDCAP_UNDEFINED)) + csi->dev->flags |= FZ_DEVFLAG_UNCACHEABLE; + else if (gstate->stroke_state->linejoin == FZ_LINEJOIN_MITER && (csi->dev->flags & FZ_DEVFLAG_MITERLIMIT_UNDEFINED)) + csi->dev->flags |= FZ_DEVFLAG_UNCACHEABLE; + } + if (dofill) { + if (csi->dev->flags & FZ_DEVFLAG_FILLCOLOR_UNDEFINED) + csi->dev->flags |= FZ_DEVFLAG_UNCACHEABLE; + } + + path = csi->path; + csi->path = fz_new_path(ctx); + + fz_try(ctx) + { + if (doclose) + fz_closepath(ctx, path); + + fz_bound_path(ctx, path, (dostroke ? gstate->stroke_state : NULL), &gstate->ctm, &bbox); + + if (csi->clip) + { + gstate->clip_depth++; + fz_clip_path(csi->dev, path, NULL, csi->clip_even_odd, &gstate->ctm); + csi->clip = 0; + } + + if (csi->in_hidden_ocg > 0) + dostroke = dofill = 0; + + if (dofill || dostroke) + pdf_begin_group(csi, &bbox, &softmask); + + if (dofill) + { + switch (gstate->fill.kind) + { + case PDF_MAT_NONE: + break; + case PDF_MAT_COLOR: + fz_fill_path(csi->dev, path, even_odd, &gstate->ctm, + gstate->fill.colorspace, gstate->fill.v, gstate->fill.alpha); + break; + case PDF_MAT_PATTERN: + if (gstate->fill.pattern) + { + fz_clip_path(csi->dev, path, NULL, even_odd, &gstate->ctm); + pdf_show_pattern(csi, gstate->fill.pattern, &csi->gstate[gstate->fill.gstate_num], &bbox, PDF_FILL); + fz_pop_clip(csi->dev); + } + break; + case PDF_MAT_SHADE: + if (gstate->fill.shade) + { + fz_clip_path(csi->dev, path, NULL, even_odd, &gstate->ctm); + /* The cluster and page 2 of patterns.pdf shows that fz_fill_shade should NOT be called with gstate->ctm. */ + fz_fill_shade(csi->dev, gstate->fill.shade, &csi->gstate[gstate->fill.gstate_num].ctm, gstate->fill.alpha); + fz_pop_clip(csi->dev); + } + break; + } + } + + if (dostroke) + { + switch (gstate->stroke.kind) + { + case PDF_MAT_NONE: + break; + case PDF_MAT_COLOR: + fz_stroke_path(csi->dev, path, gstate->stroke_state, &gstate->ctm, + gstate->stroke.colorspace, gstate->stroke.v, gstate->stroke.alpha); + break; + case PDF_MAT_PATTERN: + if (gstate->stroke.pattern) + { + fz_clip_stroke_path(csi->dev, path, &bbox, gstate->stroke_state, &gstate->ctm); + pdf_show_pattern(csi, gstate->stroke.pattern, &csi->gstate[gstate->stroke.gstate_num], &bbox, PDF_STROKE); + fz_pop_clip(csi->dev); + } + break; + case PDF_MAT_SHADE: + if (gstate->stroke.shade) + { + fz_clip_stroke_path(csi->dev, path, &bbox, gstate->stroke_state, &gstate->ctm); + fz_fill_shade(csi->dev, gstate->stroke.shade, &csi->gstate[gstate->stroke.gstate_num].ctm, gstate->stroke.alpha); + fz_pop_clip(csi->dev); + } + break; + } + } + + if (dofill || dostroke) + pdf_end_group(csi, &softmask); + } + fz_always(ctx) + { + fz_free_path(ctx, path); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +/* + * Assemble and emit text + */ + +static void +pdf_flush_text(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_text *text; + int dofill; + int dostroke; + int doclip; + int doinvisible; + fz_context *ctx = csi->dev->ctx; + softmask_save softmask = { NULL }; + + if (!csi->text) + return; + text = csi->text; + csi->text = NULL; + + dofill = dostroke = doclip = doinvisible = 0; + switch (csi->text_mode) + { + case 0: dofill = 1; break; + case 1: dostroke = 1; break; + case 2: dofill = dostroke = 1; break; + case 3: doinvisible = 1; break; + case 4: dofill = doclip = 1; break; + case 5: dostroke = doclip = 1; break; + case 6: dofill = dostroke = doclip = 1; break; + case 7: doclip = 1; break; + } + + if (csi->in_hidden_ocg > 0) + dostroke = dofill = 0; + + fz_try(ctx) + { + fz_rect tb = csi->text_bbox; + + fz_transform_rect(&tb, &gstate->ctm); + + /* Don't bother sending a text group with nothing in it */ + if (text->len == 0) + break; + + pdf_begin_group(csi, &tb, &softmask); + + if (doinvisible) + fz_ignore_text(csi->dev, text, &gstate->ctm); + + if (dofill) + { + switch (gstate->fill.kind) + { + case PDF_MAT_NONE: + break; + case PDF_MAT_COLOR: + fz_fill_text(csi->dev, text, &gstate->ctm, + gstate->fill.colorspace, gstate->fill.v, gstate->fill.alpha); + break; + case PDF_MAT_PATTERN: + if (gstate->fill.pattern) + { + fz_clip_text(csi->dev, text, &gstate->ctm, 0); + pdf_show_pattern(csi, gstate->fill.pattern, &csi->gstate[gstate->fill.gstate_num], &tb, PDF_FILL); + fz_pop_clip(csi->dev); + } + break; + case PDF_MAT_SHADE: + if (gstate->fill.shade) + { + fz_clip_text(csi->dev, text, &gstate->ctm, 0); + /* Page 2 of patterns.pdf shows that fz_fill_shade should NOT be called with gstate->ctm */ + fz_fill_shade(csi->dev, gstate->fill.shade, &csi->gstate[gstate->fill.gstate_num].ctm, gstate->fill.alpha); + fz_pop_clip(csi->dev); + } + break; + } + } + + if (dostroke) + { + switch (gstate->stroke.kind) + { + case PDF_MAT_NONE: + break; + case PDF_MAT_COLOR: + fz_stroke_text(csi->dev, text, gstate->stroke_state, &gstate->ctm, + gstate->stroke.colorspace, gstate->stroke.v, gstate->stroke.alpha); + break; + case PDF_MAT_PATTERN: + if (gstate->stroke.pattern) + { + fz_clip_stroke_text(csi->dev, text, gstate->stroke_state, &gstate->ctm); + pdf_show_pattern(csi, gstate->stroke.pattern, &csi->gstate[gstate->stroke.gstate_num], &tb, PDF_STROKE); + fz_pop_clip(csi->dev); + } + break; + case PDF_MAT_SHADE: + if (gstate->stroke.shade) + { + fz_clip_stroke_text(csi->dev, text, gstate->stroke_state, &gstate->ctm); + fz_fill_shade(csi->dev, gstate->stroke.shade, &csi->gstate[gstate->stroke.gstate_num].ctm, gstate->stroke.alpha); + fz_pop_clip(csi->dev); + } + break; + } + } + + if (doclip) + { + if (csi->accumulate < 2) + gstate->clip_depth++; + fz_clip_text(csi->dev, text, &gstate->ctm, csi->accumulate); + csi->accumulate = 2; + } + + pdf_end_group(csi, &softmask); + } + fz_always(ctx) + { + fz_free_text(ctx, text); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +pdf_show_char(pdf_csi *csi, int cid) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_font_desc *fontdesc = gstate->font; + fz_matrix tsm, trm; + float w0, w1, tx, ty; + pdf_hmtx h; + pdf_vmtx v; + int gid; + int ucsbuf[8]; + int ucslen; + int i; + fz_rect bbox; + int render_direct; + + tsm.a = gstate->size * gstate->scale; + tsm.b = 0; + tsm.c = 0; + tsm.d = gstate->size; + tsm.e = 0; + tsm.f = gstate->rise; + + ucslen = 0; + if (fontdesc->to_unicode) + ucslen = pdf_lookup_cmap_full(fontdesc->to_unicode, cid, ucsbuf); + if (ucslen == 0 && cid < fontdesc->cid_to_ucs_len) + { + ucsbuf[0] = fontdesc->cid_to_ucs[cid]; + ucslen = 1; + } + if (ucslen == 0 || (ucslen == 1 && ucsbuf[0] == 0)) + { + ucsbuf[0] = '?'; + ucslen = 1; + } + + gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); + + if (fontdesc->wmode == 1) + { + v = pdf_lookup_vmtx(ctx, fontdesc, cid); + tsm.e -= v.x * fabsf(gstate->size) * 0.001f; + tsm.f -= v.y * gstate->size * 0.001f; + } + + fz_concat(&trm, &tsm, &csi->tm); + + fz_bound_glyph(ctx, fontdesc->font, gid, &trm, &bbox); + /* Compensate for the glyph cache limited positioning precision */ + bbox.x0 -= 1; + bbox.y0 -= 1; + bbox.x1 += 1; + bbox.y1 += 1; + + /* If we are a type3 font within a type 3 font, or are otherwise + * uncachable, then render direct. */ + render_direct = (!fontdesc->font->ft_face && csi->nested_depth > 0) || !fz_glyph_cacheable(ctx, fontdesc->font, gid); + + /* flush buffered text if face or matrix or rendermode has changed */ + if (!csi->text || + fontdesc->font != csi->text->font || + fontdesc->wmode != csi->text->wmode || + fabsf(trm.a - csi->text->trm.a) > FLT_EPSILON || + fabsf(trm.b - csi->text->trm.b) > FLT_EPSILON || + fabsf(trm.c - csi->text->trm.c) > FLT_EPSILON || + fabsf(trm.d - csi->text->trm.d) > FLT_EPSILON || + gstate->render != csi->text_mode || + render_direct) + { + pdf_flush_text(csi); + + csi->text = fz_new_text(ctx, fontdesc->font, &trm, fontdesc->wmode); + csi->text->trm.e = 0; + csi->text->trm.f = 0; + csi->text_mode = gstate->render; + csi->text_bbox = fz_empty_rect; + } + + if (render_direct) + { + /* Render the glyph stream direct here (only happens for + * type3 glyphs that seem to inherit current graphics + * attributes, or type 3 glyphs within type3 glyphs). */ + fz_matrix composed; + fz_concat(&composed, &trm, &gstate->ctm); + fz_render_t3_glyph_direct(ctx, csi->dev, fontdesc->font, gid, &composed, gstate, csi->nested_depth); + } + else + { + fz_union_rect(&csi->text_bbox, &bbox); + + /* add glyph to textobject */ + fz_add_text(ctx, csi->text, gid, ucsbuf[0], trm.e, trm.f); + + /* add filler glyphs for one-to-many unicode mapping */ + for (i = 1; i < ucslen; i++) + fz_add_text(ctx, csi->text, -1, ucsbuf[i], trm.e, trm.f); + } + + if (fontdesc->wmode == 0) + { + h = pdf_lookup_hmtx(ctx, fontdesc, cid); + w0 = h.w * 0.001f; + tx = (w0 * gstate->size + gstate->char_space) * gstate->scale; + fz_pre_translate(&csi->tm, tx, 0); + } + + if (fontdesc->wmode == 1) + { + w1 = v.w * 0.001f; + ty = w1 * gstate->size + gstate->char_space; + fz_pre_translate(&csi->tm, 0, ty); + } +} + +static void +pdf_show_space(pdf_csi *csi, float tadj) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_font_desc *fontdesc = gstate->font; + + if (!fontdesc) + { + fz_warn(ctx, "cannot draw text since font and size not set"); + return; + } + + if (fontdesc->wmode == 0) + fz_pre_translate(&csi->tm, tadj * gstate->scale, 0); + else + fz_pre_translate(&csi->tm, 0, tadj); +} + +static void +pdf_show_string(pdf_csi *csi, unsigned char *buf, int len) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_font_desc *fontdesc = gstate->font; + unsigned char *end = buf + len; + int cpt, cid; + + if (!fontdesc) + { + fz_warn(ctx, "cannot draw text since font and size not set"); + return; + } + + while (buf < end) + { + int w = pdf_decode_cmap(fontdesc->encoding, buf, &cpt); + buf += w; + + cid = pdf_lookup_cmap(fontdesc->encoding, cpt); + if (cid >= 0) + pdf_show_char(csi, cid); + else + fz_warn(ctx, "cannot encode character with code point %#x", cpt); + if (cpt == 32 && w == 1) + pdf_show_space(csi, gstate->word_space); + } +} + +static void +pdf_show_text(pdf_csi *csi, pdf_obj *text) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + int i; + + if (pdf_is_array(text)) + { + int n = pdf_array_len(text); + for (i = 0; i < n; i++) + { + pdf_obj *item = pdf_array_get(text, i); + if (pdf_is_string(item)) + pdf_show_string(csi, (unsigned char *)pdf_to_str_buf(item), pdf_to_str_len(item)); + else + pdf_show_space(csi, - pdf_to_real(item) * gstate->size * 0.001f); + } + } + else if (pdf_is_string(text)) + { + pdf_show_string(csi, (unsigned char *)pdf_to_str_buf(text), pdf_to_str_len(text)); + } +} + +/* + * Interpreter and graphics state stack. + */ + +static void +pdf_init_gstate(fz_context *ctx, pdf_gstate *gs, const fz_matrix *ctm) +{ + gs->ctm = *ctm; + gs->clip_depth = 0; + + gs->stroke_state = fz_new_stroke_state(ctx); + + gs->stroke.kind = PDF_MAT_COLOR; + gs->stroke.colorspace = fz_device_gray(ctx); /* No fz_keep_colorspace as static */ + gs->stroke.v[0] = 0; + gs->stroke.pattern = NULL; + gs->stroke.shade = NULL; + gs->stroke.alpha = 1; + gs->stroke.gstate_num = -1; + + gs->fill.kind = PDF_MAT_COLOR; + gs->fill.colorspace = fz_device_gray(ctx); /* No fz_keep_colorspace as static */ + gs->fill.v[0] = 0; + gs->fill.pattern = NULL; + gs->fill.shade = NULL; + gs->fill.alpha = 1; + gs->fill.gstate_num = -1; + + gs->char_space = 0; + gs->word_space = 0; + gs->scale = 1; + gs->leading = 0; + gs->font = NULL; + gs->size = -1; + gs->render = 0; + gs->rise = 0; + + gs->blendmode = 0; + gs->softmask = NULL; + gs->softmask_ctm = fz_identity; + gs->luminosity = 0; +} + +static pdf_material * +pdf_keep_material(fz_context *ctx, pdf_material *mat) +{ + if (mat->colorspace) + fz_keep_colorspace(ctx, mat->colorspace); + if (mat->pattern) + pdf_keep_pattern(ctx, mat->pattern); + if (mat->shade) + fz_keep_shade(ctx, mat->shade); + return mat; +} + +static pdf_material * +pdf_drop_material(fz_context *ctx, pdf_material *mat) +{ + if (mat->colorspace) + fz_drop_colorspace(ctx, mat->colorspace); + if (mat->pattern) + pdf_drop_pattern(ctx, mat->pattern); + if (mat->shade) + fz_drop_shade(ctx, mat->shade); + return mat; +} + +static void +pdf_keep_gstate(fz_context *ctx, pdf_gstate *gs) +{ + pdf_keep_material(ctx, &gs->stroke); + pdf_keep_material(ctx, &gs->fill); + if (gs->font) + pdf_keep_font(ctx, gs->font); + if (gs->softmask) + pdf_keep_xobject(ctx, gs->softmask); + fz_keep_stroke_state(ctx, gs->stroke_state); +} + +static void +pdf_drop_gstate(fz_context *ctx, pdf_gstate *gs) +{ + pdf_drop_material(ctx, &gs->stroke); + pdf_drop_material(ctx, &gs->fill); + if (gs->font) + pdf_drop_font(ctx, gs->font); + if (gs->softmask) + pdf_drop_xobject(ctx, gs->softmask); + fz_drop_stroke_state(ctx, gs->stroke_state); +} + +static void +pdf_copy_gstate(fz_context *ctx, pdf_gstate *gs, pdf_gstate *old) +{ + pdf_drop_gstate(ctx, gs); + *gs = *old; + pdf_keep_gstate(ctx, gs); +} + +static void +pdf_copy_pattern_gstate(fz_context *ctx, pdf_gstate *gs, const pdf_gstate *old) +{ + gs->ctm = old->ctm; + gs->font = old->font; + gs->softmask = old->softmask; + + fz_drop_stroke_state(ctx, gs->stroke_state); + gs->stroke_state = fz_keep_stroke_state(ctx, old->stroke_state); + + if (gs->font) + pdf_keep_font(ctx, gs->font); + if (gs->softmask) + pdf_keep_xobject(ctx, gs->softmask); +} + +static pdf_csi * +pdf_new_csi(pdf_document *xref, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie, pdf_gstate *gstate, int nested) +{ + pdf_csi *csi; + fz_context *ctx = dev->ctx; + + csi = fz_malloc_struct(ctx, pdf_csi); + fz_try(ctx) + { + csi->xref = xref; + csi->dev = dev; + csi->event = event; + + csi->top = 0; + csi->obj = NULL; + csi->name[0] = 0; + csi->string_len = 0; + memset(csi->stack, 0, sizeof csi->stack); + + csi->xbalance = 0; + csi->in_text = 0; + csi->in_hidden_ocg = 0; + + csi->path = fz_new_path(ctx); + csi->clip = 0; + csi->clip_even_odd = 0; + + csi->text = NULL; + csi->tlm = fz_identity; + csi->tm = fz_identity; + csi->text_mode = 0; + csi->accumulate = 1; + + csi->gcap = 64; + csi->gstate = fz_malloc_array(ctx, csi->gcap, sizeof(pdf_gstate)); + + csi->nested_depth = nested; + pdf_init_gstate(ctx, &csi->gstate[0], ctm); + if (gstate) + { + pdf_copy_gstate(ctx, &csi->gstate[0], gstate); + csi->gstate[0].ctm = *ctm; + } + csi->gtop = 0; + csi->gbot = 0; + csi->gparent = 0; + + csi->cookie = cookie; + } + fz_catch(ctx) + { + fz_free_path(ctx, csi->path); + fz_free(ctx, csi); + fz_rethrow(ctx); + } + + return csi; +} + +static void +pdf_clear_stack(pdf_csi *csi) +{ + int i; + + pdf_drop_obj(csi->obj); + csi->obj = NULL; + + csi->name[0] = 0; + csi->string_len = 0; + for (i = 0; i < csi->top; i++) + csi->stack[i] = 0; + + csi->top = 0; +} + +static void +pdf_gsave(pdf_csi *csi) +{ + fz_context *ctx = csi->dev->ctx; + + if (csi->gtop == csi->gcap-1) + { + csi->gstate = fz_resize_array(ctx, csi->gstate, csi->gcap*2, sizeof(pdf_gstate)); + csi->gcap *= 2; + } + + memcpy(&csi->gstate[csi->gtop + 1], &csi->gstate[csi->gtop], sizeof(pdf_gstate)); + + csi->gtop++; + pdf_keep_gstate(ctx, &csi->gstate[csi->gtop]); +} + +static void +pdf_grestore(pdf_csi *csi) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gs = csi->gstate + csi->gtop; + int clip_depth = gs->clip_depth; + + if (csi->gtop <= csi->gbot) + { + fz_warn(ctx, "gstate underflow in content stream"); + return; + } + + pdf_drop_gstate(ctx, gs); + csi->gtop --; + + gs = csi->gstate + csi->gtop; + while (clip_depth > gs->clip_depth) + { + fz_try(ctx) + { + fz_pop_clip(csi->dev); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + /* Silently swallow the problem */ + } + clip_depth--; + } +} + +static void +pdf_free_csi(pdf_csi *csi) +{ + fz_context *ctx = csi->dev->ctx; + + while (csi->gtop) + pdf_grestore(csi); + + pdf_drop_material(ctx, &csi->gstate[0].fill); + pdf_drop_material(ctx, &csi->gstate[0].stroke); + if (csi->gstate[0].font) + pdf_drop_font(ctx, csi->gstate[0].font); + if (csi->gstate[0].softmask) + pdf_drop_xobject(ctx, csi->gstate[0].softmask); + fz_drop_stroke_state(ctx, csi->gstate[0].stroke_state); + + while (csi->gstate[0].clip_depth--) + fz_pop_clip(csi->dev); + + if (csi->path) fz_free_path(ctx, csi->path); + if (csi->text) fz_free_text(ctx, csi->text); + + pdf_clear_stack(csi); + + fz_free(ctx, csi->gstate); + + fz_free(ctx, csi); +} + +/* + * Material state + */ + +static void +pdf_set_colorspace(pdf_csi *csi, int what, fz_colorspace *colorspace) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + + pdf_flush_text(csi); + + mat = what == PDF_FILL ? &gs->fill : &gs->stroke; + + fz_drop_colorspace(ctx, mat->colorspace); + + mat->kind = PDF_MAT_COLOR; + mat->colorspace = fz_keep_colorspace(ctx, colorspace); + + mat->v[0] = 0; + mat->v[1] = 0; + mat->v[2] = 0; + mat->v[3] = 1; +} + +static void +pdf_set_color(pdf_csi *csi, int what, float *v) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + int i; + + pdf_flush_text(csi); + + mat = what == PDF_FILL ? &gs->fill : &gs->stroke; + + switch (mat->kind) + { + case PDF_MAT_PATTERN: + case PDF_MAT_COLOR: + if (!strcmp(mat->colorspace->name, "Lab")) + { + mat->v[0] = v[0] / 100; + mat->v[1] = (v[1] + 100) / 200; + mat->v[2] = (v[2] + 100) / 200; + } + for (i = 0; i < mat->colorspace->n; i++) + mat->v[i] = v[i]; + break; + default: + fz_warn(ctx, "color incompatible with material"); + } +} + +static void +pdf_set_shade(pdf_csi *csi, int what, fz_shade *shade) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + + pdf_flush_text(csi); + + mat = what == PDF_FILL ? &gs->fill : &gs->stroke; + + if (mat->shade) + fz_drop_shade(ctx, mat->shade); + + mat->kind = PDF_MAT_SHADE; + mat->shade = fz_keep_shade(ctx, shade); +} + +static void +pdf_set_pattern(pdf_csi *csi, int what, pdf_pattern *pat, float *v) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + + pdf_flush_text(csi); + + mat = what == PDF_FILL ? &gs->fill : &gs->stroke; + + if (mat->pattern) + pdf_drop_pattern(ctx, mat->pattern); + + mat->kind = PDF_MAT_PATTERN; + if (pat) + mat->pattern = pdf_keep_pattern(ctx, pat); + else + mat->pattern = NULL; + mat->gstate_num = csi->gparent; + + if (v) + pdf_set_color(csi, what, v); +} + +static void +pdf_unset_pattern(pdf_csi *csi, int what) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gs = csi->gstate + csi->gtop; + pdf_material *mat; + mat = what == PDF_FILL ? &gs->fill : &gs->stroke; + if (mat->kind == PDF_MAT_PATTERN) + { + if (mat->pattern) + pdf_drop_pattern(ctx, mat->pattern); + mat->pattern = NULL; + mat->kind = PDF_MAT_COLOR; + } +} + +/* + * Patterns, XObjects and ExtGState + */ + +static void +pdf_show_pattern(pdf_csi *csi, pdf_pattern *pat, pdf_gstate *pat_gstate, const fz_rect *area, int what) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gstate; + int gparent_save; + fz_matrix ptm, invptm, gparent_save_ctm; + int x0, y0, x1, y1; + float fx0, fy0, fx1, fy1; + int oldtop; + fz_rect local_area; + + pdf_gsave(csi); + gstate = csi->gstate + csi->gtop; + /* Patterns are run with the gstate of the parent */ + pdf_copy_pattern_gstate(ctx, gstate, pat_gstate); + + if (pat->ismask) + { + pdf_unset_pattern(csi, PDF_FILL); + pdf_unset_pattern(csi, PDF_STROKE); + if (what == PDF_FILL) + { + pdf_drop_material(ctx, &gstate->stroke); + pdf_keep_material(ctx, &gstate->fill); + gstate->stroke = gstate->fill; + } + if (what == PDF_STROKE) + { + pdf_drop_material(ctx, &gstate->fill); + pdf_keep_material(ctx, &gstate->stroke); + gstate->fill = gstate->stroke; + } + } + else + { + // TODO: unset only the current fill/stroke or both? + pdf_unset_pattern(csi, what); + } + + /* don't apply soft masks to objects in the pattern as well */ + if (gstate->softmask) + { + pdf_drop_xobject(ctx, gstate->softmask); + gstate->softmask = NULL; + } + + fz_concat(&ptm, &pat->matrix, &pat_gstate->ctm); + fz_invert_matrix(&invptm, &ptm); + + /* The parent_ctm is amended with our pattern matrix */ + gparent_save = csi->gparent; + csi->gparent = csi->gtop-1; + gparent_save_ctm = csi->gstate[csi->gparent].ctm; + csi->gstate[csi->gparent].ctm = ptm; + + fz_try(ctx) + { + /* patterns are painted using the parent_ctm. area = bbox of + * shape to be filled in device space. Map it back to pattern + * space. */ + local_area = *area; + fz_transform_rect(&local_area, &invptm); + + fx0 = (local_area.x0 - pat->bbox.x0) / pat->xstep; + fy0 = (local_area.y0 - pat->bbox.y0) / pat->ystep; + fx1 = (local_area.x1 - pat->bbox.x0) / pat->xstep; + fy1 = (local_area.y1 - pat->bbox.y0) / pat->ystep; + if (fx0 > fx1) + { + float t = fx0; fx0 = fx1; fx1 = t; + } + if (fy0 > fy1) + { + float t = fy0; fy0 = fy1; fy1 = t; + } + + oldtop = csi->gtop; + +#ifdef TILE + /* We have tried various formulations in the past, but this one is + * best we've found; only use it as a tile if a whole repeat is + * required in at least one direction. Note, that this allows for + * 'sections' of 4 tiles to be show, but all non-overlapping. */ + if (fx1-fx0 > 1 || fy1-fy0 > 1) +#else + if (0) +#endif + { + fz_begin_tile(csi->dev, &local_area, &pat->bbox, pat->xstep, pat->ystep, &ptm); + gstate->ctm = ptm; + pdf_gsave(csi); + pdf_run_contents_object(csi, pat->resources, pat->contents); + pdf_grestore(csi); + while (oldtop < csi->gtop) + pdf_grestore(csi); + fz_end_tile(csi->dev); + } + else + { + int x, y; + + /* When calculating the number of tiles required, we adjust by + * a small amount to allow for rounding errors. By choosing + * this amount to be smaller than 1/256, we guarantee we won't + * cause problems that will be visible even under our most + * extreme antialiasing. */ + x0 = floorf(fx0 + 0.001); + y0 = floorf(fy0 + 0.001); + x1 = ceilf(fx1 - 0.001); + y1 = ceilf(fy1 - 0.001); + + for (y = y0; y < y1; y++) + { + for (x = x0; x < x1; x++) + { + gstate->ctm = ptm; + fz_pre_translate(&gstate->ctm, x * pat->xstep, y * pat->ystep); + pdf_gsave(csi); + fz_try(ctx) + { + pdf_run_contents_object(csi, pat->resources, pat->contents); + } + fz_always(ctx) + { + pdf_grestore(csi); + while (oldtop < csi->gtop) + pdf_grestore(csi); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot render pattern tile"); + } + } + } + } + } + fz_always(ctx) + { + csi->gstate[csi->gparent].ctm = gparent_save_ctm; + csi->gparent = gparent_save; + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + pdf_grestore(csi); +} + +static void +pdf_run_xobject(pdf_csi *csi, pdf_obj *resources, pdf_xobject *xobj, const fz_matrix *transform) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gstate = NULL; + int oldtop = 0; + fz_matrix local_transform = *transform; + softmask_save softmask = { NULL }; + int gparent_save; + fz_matrix gparent_save_ctm; + + /* Avoid infinite recursion */ + if (xobj == NULL || pdf_obj_mark(xobj->me)) + return; + + fz_var(gstate); + fz_var(oldtop); + + gparent_save = csi->gparent; + csi->gparent = csi->gtop; + + fz_try(ctx) + { + pdf_gsave(csi); + + gstate = csi->gstate + csi->gtop; + oldtop = csi->gtop; + + /* apply xobject's transform matrix */ + fz_concat(&local_transform, &xobj->matrix, &local_transform); + fz_concat(&gstate->ctm, &local_transform, &gstate->ctm); + + /* The gparent is updated with the modified ctm */ + gparent_save_ctm = csi->gstate[csi->gparent].ctm; + csi->gstate[csi->gparent].ctm = gstate->ctm; + + /* apply soft mask, create transparency group and reset state */ + if (xobj->transparency) + { + fz_rect bbox = xobj->bbox; + fz_transform_rect(&bbox, &gstate->ctm); + gstate = begin_softmask(csi, &softmask); + + fz_begin_group(csi->dev, &bbox, + xobj->isolated, xobj->knockout, gstate->blendmode, gstate->fill.alpha); + + gstate->blendmode = 0; + gstate->stroke.alpha = 1; + gstate->fill.alpha = 1; + } + + /* clip to the bounds */ + + fz_moveto(ctx, csi->path, xobj->bbox.x0, xobj->bbox.y0); + fz_lineto(ctx, csi->path, xobj->bbox.x1, xobj->bbox.y0); + fz_lineto(ctx, csi->path, xobj->bbox.x1, xobj->bbox.y1); + fz_lineto(ctx, csi->path, xobj->bbox.x0, xobj->bbox.y1); + fz_closepath(ctx, csi->path); + csi->clip = 1; + pdf_show_path(csi, 0, 0, 0, 0); + + /* run contents */ + + if (xobj->resources) + resources = xobj->resources; + + pdf_run_contents_object(csi, resources, xobj->contents); + } + fz_always(ctx) + { + csi->gstate[csi->gparent].ctm = gparent_save_ctm; + csi->gparent = gparent_save; + + if (gstate) + { + while (oldtop < csi->gtop) + pdf_grestore(csi); + + pdf_grestore(csi); + } + + pdf_obj_unmark(xobj->me); + + /* wrap up transparency stacks */ + if (xobj->transparency) + { + fz_end_group(csi->dev); + end_softmask(csi, &softmask); + } + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + +} + +static void +pdf_run_extgstate(pdf_csi *csi, pdf_obj *rdb, pdf_obj *extgstate) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_colorspace *colorspace; + int i, k, n; + + pdf_flush_text(csi); + + n = pdf_dict_len(extgstate); + for (i = 0; i < n; i++) + { + pdf_obj *key = pdf_dict_get_key(extgstate, i); + pdf_obj *val = pdf_dict_get_val(extgstate, i); + char *s = pdf_to_name(key); + + if (!strcmp(s, "Font")) + { + if (pdf_is_array(val) && pdf_array_len(val) == 2) + { + pdf_obj *font = pdf_array_get(val, 0); + + if (gstate->font) + { + pdf_drop_font(ctx, gstate->font); + gstate->font = NULL; + } + + gstate->font = pdf_load_font(csi->xref, rdb, font, csi->nested_depth); + if (!gstate->font) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find font in store"); + gstate->size = pdf_to_real(pdf_array_get(val, 1)); + } + else + fz_throw(ctx, FZ_ERROR_GENERIC, "malformed /Font dictionary"); + } + + else if (!strcmp(s, "LC")) + { + csi->dev->flags &= ~(FZ_DEVFLAG_STARTCAP_UNDEFINED | FZ_DEVFLAG_DASHCAP_UNDEFINED | FZ_DEVFLAG_ENDCAP_UNDEFINED); + gstate->stroke_state = fz_unshare_stroke_state(ctx, gstate->stroke_state); + gstate->stroke_state->start_cap = pdf_to_int(val); + gstate->stroke_state->dash_cap = pdf_to_int(val); + gstate->stroke_state->end_cap = pdf_to_int(val); + } + else if (!strcmp(s, "LW")) + { + csi->dev->flags &= ~FZ_DEVFLAG_LINEWIDTH_UNDEFINED; + gstate->stroke_state = fz_unshare_stroke_state(ctx, gstate->stroke_state); + gstate->stroke_state->linewidth = pdf_to_real(val); + } + else if (!strcmp(s, "LJ")) + { + csi->dev->flags &= ~FZ_DEVFLAG_LINEJOIN_UNDEFINED; + gstate->stroke_state = fz_unshare_stroke_state(ctx, gstate->stroke_state); + gstate->stroke_state->linejoin = pdf_to_int(val); + } + else if (!strcmp(s, "ML")) + { + csi->dev->flags &= ~FZ_DEVFLAG_MITERLIMIT_UNDEFINED; + gstate->stroke_state = fz_unshare_stroke_state(ctx, gstate->stroke_state); + gstate->stroke_state->miterlimit = pdf_to_real(val); + } + + else if (!strcmp(s, "D")) + { + if (pdf_is_array(val) && pdf_array_len(val) == 2) + { + pdf_obj *dashes = pdf_array_get(val, 0); + int len = pdf_array_len(dashes); + gstate->stroke_state = fz_unshare_stroke_state_with_len(ctx, gstate->stroke_state, len); + gstate->stroke_state->dash_len = len; + for (k = 0; k < len; k++) + gstate->stroke_state->dash_list[k] = pdf_to_real(pdf_array_get(dashes, k)); + gstate->stroke_state->dash_phase = pdf_to_real(pdf_array_get(val, 1)); + } + else + fz_throw(ctx, FZ_ERROR_GENERIC, "malformed /D"); + } + + else if (!strcmp(s, "CA")) + gstate->stroke.alpha = fz_clamp(pdf_to_real(val), 0, 1); + + else if (!strcmp(s, "ca")) + gstate->fill.alpha = fz_clamp(pdf_to_real(val), 0, 1); + + else if (!strcmp(s, "BM")) + { + if (pdf_is_array(val)) + val = pdf_array_get(val, 0); + gstate->blendmode = fz_lookup_blendmode(pdf_to_name(val)); + } + + else if (!strcmp(s, "SMask")) + { + if (pdf_is_dict(val)) + { + pdf_xobject *xobj; + pdf_obj *group, *luminosity, *bc, *tr; + + if (gstate->softmask) + { + pdf_drop_xobject(ctx, gstate->softmask); + gstate->softmask = NULL; + } + + group = pdf_dict_gets(val, "G"); + if (!group) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load softmask xobject (%d %d R)", pdf_to_num(val), pdf_to_gen(val)); + xobj = pdf_load_xobject(csi->xref, group); + + colorspace = xobj->colorspace; + if (!colorspace) + colorspace = fz_device_gray(ctx); + + /* The softmask_ctm no longer has the softmask matrix rolled into it, as this + * causes the softmask matrix to be applied twice. */ + gstate->softmask_ctm = gstate->ctm; + gstate->softmask = xobj; + for (k = 0; k < colorspace->n; k++) + gstate->softmask_bc[k] = 0; + + bc = pdf_dict_gets(val, "BC"); + if (pdf_is_array(bc)) + { + for (k = 0; k < colorspace->n; k++) + gstate->softmask_bc[k] = pdf_to_real(pdf_array_get(bc, k)); + } + + luminosity = pdf_dict_gets(val, "S"); + if (pdf_is_name(luminosity) && !strcmp(pdf_to_name(luminosity), "Luminosity")) + gstate->luminosity = 1; + else + gstate->luminosity = 0; + + tr = pdf_dict_gets(val, "TR"); + if (tr && strcmp(pdf_to_name(tr), "Identity")) + fz_warn(ctx, "ignoring transfer function"); + } + else if (pdf_is_name(val) && !strcmp(pdf_to_name(val), "None")) + { + if (gstate->softmask) + { + pdf_drop_xobject(ctx, gstate->softmask); + gstate->softmask = NULL; + } + } + } + + else if (!strcmp(s, "TR2")) + { + if (strcmp(pdf_to_name(val), "Identity") && strcmp(pdf_to_name(val), "Default")) + fz_warn(ctx, "ignoring transfer function"); + } + + else if (!strcmp(s, "TR")) + { + /* TR is ignored in the presence of TR2 */ + pdf_obj *tr2 = pdf_dict_gets(extgstate, "TR2"); + if (tr2 && strcmp(pdf_to_name(val), "Identity")) + fz_warn(ctx, "ignoring transfer function"); + } + } +} + +/* + * Operators + */ + +static void pdf_run_BDC(pdf_csi *csi, pdf_obj *rdb) +{ + pdf_obj *ocg; + + /* If we are already in a hidden OCG, then we'll still be hidden - + * just increment the depth so we pop back to visibility when we've + * seen enough EDCs. */ + if (csi->in_hidden_ocg > 0) + { + csi->in_hidden_ocg++; + return; + } + + ocg = pdf_dict_gets(pdf_dict_gets(rdb, "Properties"), csi->name); + if (!ocg) + { + /* No Properties array, or name not found in the properties + * means visible. */ + return; + } + if (strcmp(pdf_to_name(pdf_dict_gets(ocg, "Type")), "OCG") != 0) + { + /* Wrong type of property */ + return; + } + if (pdf_is_hidden_ocg(ocg, csi, rdb)) + csi->in_hidden_ocg++; +} + +static void pdf_run_BI(pdf_csi *csi, pdf_obj *rdb, fz_stream *file) +{ + fz_context *ctx = csi->dev->ctx; + int ch; + fz_image *img; + pdf_obj *obj; + + obj = pdf_parse_dict(csi->xref, file, &csi->xref->lexbuf.base); + + /* read whitespace after ID keyword */ + ch = fz_read_byte(file); + if (ch == '\r') + if (fz_peek_byte(file) == '\n') + fz_read_byte(file); + + fz_try(ctx) + { + img = pdf_load_inline_image(csi->xref, rdb, obj, file); + } + fz_always(ctx) + { + pdf_drop_obj(obj); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + pdf_show_image(csi, img); + + fz_drop_image(ctx, img); + + /* find EI */ + ch = fz_read_byte(file); + while (ch != 'E' && ch != EOF) + ch = fz_read_byte(file); + ch = fz_read_byte(file); + if (ch != 'I') + fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error after inline image"); +} + +static void pdf_run_B(pdf_csi *csi) +{ + pdf_show_path(csi, 0, 1, 1, 0); +} + +static void pdf_run_BMC(pdf_csi *csi) +{ + /* If we are already in a hidden OCG, then we'll still be hidden - + * just increment the depth so we pop back to visibility when we've + * seen enough EDCs. */ + if (csi->in_hidden_ocg > 0) + { + csi->in_hidden_ocg++; + } +} + +static void pdf_run_BT(pdf_csi *csi) +{ + csi->in_text = 1; + csi->tm = fz_identity; + csi->tlm = fz_identity; +} + +static void pdf_run_BX(pdf_csi *csi) +{ + csi->xbalance ++; +} + +static void pdf_run_Bstar(pdf_csi *csi) +{ + pdf_show_path(csi, 0, 1, 1, 1); +} + +static void pdf_run_cs_imp(pdf_csi *csi, pdf_obj *rdb, int what) +{ + fz_context *ctx = csi->dev->ctx; + fz_colorspace *colorspace; + pdf_obj *obj, *dict; + + if (!strcmp(csi->name, "Pattern")) + { + pdf_set_pattern(csi, what, NULL, NULL); + } + else + { + if (!strcmp(csi->name, "DeviceGray")) + colorspace = fz_device_gray(ctx); /* No fz_keep_colorspace as static */ + else if (!strcmp(csi->name, "DeviceRGB")) + colorspace = fz_device_rgb(ctx); /* No fz_keep_colorspace as static */ + else if (!strcmp(csi->name, "DeviceCMYK")) + colorspace = fz_device_cmyk(ctx); /* No fz_keep_colorspace as static */ + else + { + dict = pdf_dict_gets(rdb, "ColorSpace"); + if (!dict) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find ColorSpace dictionary"); + obj = pdf_dict_gets(dict, csi->name); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find colorspace resource '%s'", csi->name); + colorspace = pdf_load_colorspace(csi->xref, obj); + } + + pdf_set_colorspace(csi, what, colorspace); + + fz_drop_colorspace(ctx, colorspace); + } +} + +static void pdf_run_CS(pdf_csi *csi, pdf_obj *rdb) +{ + csi->dev->flags &= ~FZ_DEVFLAG_STROKECOLOR_UNDEFINED; + + pdf_run_cs_imp(csi, rdb, PDF_STROKE); +} + +static void pdf_run_cs(pdf_csi *csi, pdf_obj *rdb) +{ + csi->dev->flags &= ~FZ_DEVFLAG_FILLCOLOR_UNDEFINED; + + pdf_run_cs_imp(csi, rdb, PDF_FILL); +} + +static void pdf_run_DP(pdf_csi *csi) +{ +} + +static void pdf_run_Do(pdf_csi *csi, pdf_obj *rdb) +{ + fz_context *ctx = csi->dev->ctx; + pdf_obj *dict; + pdf_obj *obj; + pdf_obj *subtype; + + dict = pdf_dict_gets(rdb, "XObject"); + if (!dict) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find XObject dictionary when looking for: '%s'", csi->name); + + obj = pdf_dict_gets(dict, csi->name); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xobject resource: '%s'", csi->name); + + subtype = pdf_dict_gets(obj, "Subtype"); + if (!pdf_is_name(subtype)) + fz_throw(ctx, FZ_ERROR_GENERIC, "no XObject subtype specified"); + + if (pdf_is_hidden_ocg(pdf_dict_gets(obj, "OC"), csi, rdb)) + return; + + if (!strcmp(pdf_to_name(subtype), "Form") && pdf_dict_gets(obj, "Subtype2")) + subtype = pdf_dict_gets(obj, "Subtype2"); + + if (!strcmp(pdf_to_name(subtype), "Form")) + { + pdf_xobject *xobj; + + xobj = pdf_load_xobject(csi->xref, obj); + + /* Inherit parent resources, in case this one was empty XXX check where it's loaded */ + if (!xobj->resources) + xobj->resources = pdf_keep_obj(rdb); + + fz_try(ctx) + { + pdf_run_xobject(csi, xobj->resources, xobj, &fz_identity); + } + fz_always(ctx) + { + pdf_drop_xobject(ctx, xobj); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot draw xobject (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); + } + } + + else if (!strcmp(pdf_to_name(subtype), "Image")) + { + if ((csi->dev->hints & FZ_IGNORE_IMAGE) == 0) + { + fz_image *img = pdf_load_image(csi->xref, obj); + + fz_try(ctx) + { + pdf_show_image(csi, img); + } + fz_always(ctx) + { + fz_drop_image(ctx, img); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } + } + + else if (!strcmp(pdf_to_name(subtype), "PS")) + { + fz_warn(ctx, "ignoring XObject with subtype PS"); + } + + else + { + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown XObject subtype: '%s'", pdf_to_name(subtype)); + } +} + +static void pdf_run_EMC(pdf_csi *csi) +{ + if (csi->in_hidden_ocg > 0) + csi->in_hidden_ocg--; +} + +static void pdf_run_ET(pdf_csi *csi) +{ + pdf_flush_text(csi); + csi->accumulate = 1; + csi->in_text = 0; +} + +static void pdf_run_EX(pdf_csi *csi) +{ + csi->xbalance --; +} + +static void pdf_run_F(pdf_csi *csi) +{ + pdf_show_path(csi, 0, 1, 0, 0); +} + +static void pdf_run_G(pdf_csi *csi) +{ + csi->dev->flags &= ~FZ_DEVFLAG_STROKECOLOR_UNDEFINED; + pdf_set_colorspace(csi, PDF_STROKE, fz_device_gray(csi->dev->ctx)); + pdf_set_color(csi, PDF_STROKE, csi->stack); +} + +static void pdf_run_J(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + csi->dev->flags &= ~(FZ_DEVFLAG_STARTCAP_UNDEFINED | FZ_DEVFLAG_DASHCAP_UNDEFINED | FZ_DEVFLAG_ENDCAP_UNDEFINED); + gstate->stroke_state = fz_unshare_stroke_state(csi->dev->ctx, gstate->stroke_state); + gstate->stroke_state->start_cap = csi->stack[0]; + gstate->stroke_state->dash_cap = csi->stack[0]; + gstate->stroke_state->end_cap = csi->stack[0]; +} + +static void pdf_run_K(pdf_csi *csi) +{ + csi->dev->flags &= ~FZ_DEVFLAG_STROKECOLOR_UNDEFINED; + pdf_set_colorspace(csi, PDF_STROKE, fz_device_cmyk(csi->dev->ctx)); + pdf_set_color(csi, PDF_STROKE, csi->stack); +} + +static void pdf_run_M(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + csi->dev->flags &= ~FZ_DEVFLAG_MITERLIMIT_UNDEFINED; + gstate->stroke_state = fz_unshare_stroke_state(csi->dev->ctx, gstate->stroke_state); + gstate->stroke_state->miterlimit = csi->stack[0]; +} + +static void pdf_run_MP(pdf_csi *csi) +{ +} + +static void pdf_run_Q(pdf_csi *csi) +{ + pdf_grestore(csi); +} + +static void pdf_run_RG(pdf_csi *csi) +{ + csi->dev->flags &= ~FZ_DEVFLAG_STROKECOLOR_UNDEFINED; + pdf_set_colorspace(csi, PDF_STROKE, fz_device_rgb(csi->dev->ctx)); + pdf_set_color(csi, PDF_STROKE, csi->stack); +} + +static void pdf_run_S(pdf_csi *csi) +{ + pdf_show_path(csi, 0, 0, 1, 0); +} + +static void pdf_run_SC_imp(pdf_csi *csi, pdf_obj *rdb, int what, pdf_material *mat) +{ + fz_context *ctx = csi->dev->ctx; + pdf_obj *patterntype; + pdf_obj *dict; + pdf_obj *obj; + int kind; + + kind = mat->kind; + if (csi->name[0]) + kind = PDF_MAT_PATTERN; + + switch (kind) + { + case PDF_MAT_NONE: + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot set color in mask objects"); + + case PDF_MAT_COLOR: + pdf_set_color(csi, what, csi->stack); + break; + + case PDF_MAT_PATTERN: + dict = pdf_dict_gets(rdb, "Pattern"); + if (!dict) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find Pattern dictionary"); + + obj = pdf_dict_gets(dict, csi->name); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find pattern resource '%s'", csi->name); + + patterntype = pdf_dict_gets(obj, "PatternType"); + + if (pdf_to_int(patterntype) == 1) + { + pdf_pattern *pat; + pat = pdf_load_pattern(csi->xref, obj); + pdf_set_pattern(csi, what, pat, csi->top > 0 ? csi->stack : NULL); + pdf_drop_pattern(ctx, pat); + } + else if (pdf_to_int(patterntype) == 2) + { + fz_shade *shd; + shd = pdf_load_shading(csi->xref, obj); + pdf_set_shade(csi, what, shd); + fz_drop_shade(ctx, shd); + } + else + { + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown pattern type: %d", pdf_to_int(patterntype)); + } + break; + + case PDF_MAT_SHADE: + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot set color in shade objects"); + } + mat->gstate_num = csi->gparent; +} + +static void pdf_run_SC(pdf_csi *csi, pdf_obj *rdb) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + csi->dev->flags &= ~FZ_DEVFLAG_STROKECOLOR_UNDEFINED; + pdf_run_SC_imp(csi, rdb, PDF_STROKE, &gstate->stroke); +} + +static void pdf_run_sc(pdf_csi *csi, pdf_obj *rdb) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + csi->dev->flags &= ~FZ_DEVFLAG_FILLCOLOR_UNDEFINED; + pdf_run_SC_imp(csi, rdb, PDF_FILL, &gstate->fill); +} + +static void pdf_run_Tc(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->char_space = csi->stack[0]; +} + +static void pdf_run_Tw(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->word_space = csi->stack[0]; +} + +static void pdf_run_Tz(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + float a = csi->stack[0] / 100; + pdf_flush_text(csi); + gstate->scale = a; +} + +static void pdf_run_TL(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->leading = csi->stack[0]; +} + +static void pdf_run_Tf(pdf_csi *csi, pdf_obj *rdb) +{ + fz_context *ctx = csi->dev->ctx; + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_obj *dict; + pdf_obj *obj; + + gstate->size = csi->stack[0]; + if (gstate->font) + pdf_drop_font(ctx, gstate->font); + gstate->font = NULL; + + dict = pdf_dict_gets(rdb, "Font"); + if (!dict) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find Font dictionary"); + + obj = pdf_dict_gets(dict, csi->name); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find font resource: '%s'", csi->name); + + gstate->font = pdf_load_font(csi->xref, rdb, obj, csi->nested_depth); +} + +static void pdf_run_Tr(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->render = csi->stack[0]; +} + +static void pdf_run_Ts(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + gstate->rise = csi->stack[0]; +} + +static void pdf_run_Td(pdf_csi *csi) +{ + fz_pre_translate(&csi->tlm, csi->stack[0], csi->stack[1]); + csi->tm = csi->tlm; +} + +static void pdf_run_TD(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + + gstate->leading = -csi->stack[1]; + fz_pre_translate(&csi->tlm, csi->stack[0], csi->stack[1]); + csi->tm = csi->tlm; +} + +static void pdf_run_Tm(pdf_csi *csi) +{ + csi->tm.a = csi->stack[0]; + csi->tm.b = csi->stack[1]; + csi->tm.c = csi->stack[2]; + csi->tm.d = csi->stack[3]; + csi->tm.e = csi->stack[4]; + csi->tm.f = csi->stack[5]; + csi->tlm = csi->tm; +} + +static void pdf_run_Tstar(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_pre_translate(&csi->tlm, 0, -gstate->leading); + csi->tm = csi->tlm; +} + +static void pdf_run_Tj(pdf_csi *csi) +{ + if (csi->string_len) + pdf_show_string(csi, csi->string, csi->string_len); + else + pdf_show_text(csi, csi->obj); +} + +static void pdf_run_TJ(pdf_csi *csi) +{ + if (csi->string_len) + pdf_show_string(csi, csi->string, csi->string_len); + else + pdf_show_text(csi, csi->obj); +} + +static void pdf_run_W(pdf_csi *csi) +{ + csi->clip = 1; + csi->clip_even_odd = 0; +} + +static void pdf_run_Wstar(pdf_csi *csi) +{ + csi->clip = 1; + csi->clip_even_odd = 1; +} + +static void pdf_run_b(pdf_csi *csi) +{ + pdf_show_path(csi, 1, 1, 1, 0); +} + +static void pdf_run_bstar(pdf_csi *csi) +{ + pdf_show_path(csi, 1, 1, 1, 1); +} + +static void pdf_run_c(pdf_csi *csi) +{ + float a, b, c, d, e, f; + a = csi->stack[0]; + b = csi->stack[1]; + c = csi->stack[2]; + d = csi->stack[3]; + e = csi->stack[4]; + f = csi->stack[5]; + fz_curveto(csi->dev->ctx, csi->path, a, b, c, d, e, f); +} + +static void pdf_run_cm(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + fz_matrix m; + + m.a = csi->stack[0]; + m.b = csi->stack[1]; + m.c = csi->stack[2]; + m.d = csi->stack[3]; + m.e = csi->stack[4]; + m.f = csi->stack[5]; + + fz_concat(&gstate->ctm, &m, &gstate->ctm); +} + +static void pdf_run_d(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_obj *array; + int i; + int len; + + array = csi->obj; + len = pdf_array_len(array); + gstate->stroke_state = fz_unshare_stroke_state_with_len(csi->dev->ctx, gstate->stroke_state, len); + gstate->stroke_state->dash_len = len; + for (i = 0; i < len; i++) + gstate->stroke_state->dash_list[i] = pdf_to_real(pdf_array_get(array, i)); + gstate->stroke_state->dash_phase = csi->stack[0]; +} + +static void pdf_run_d0(pdf_csi *csi) +{ + if (csi->nested_depth > 1) + return; + csi->dev->flags |= FZ_DEVFLAG_COLOR; +} + +static void pdf_run_d1(pdf_csi *csi) +{ + if (csi->nested_depth > 1) + return; + csi->dev->flags |= FZ_DEVFLAG_MASK; + csi->dev->flags &= ~(FZ_DEVFLAG_FILLCOLOR_UNDEFINED | + FZ_DEVFLAG_STROKECOLOR_UNDEFINED | + FZ_DEVFLAG_STARTCAP_UNDEFINED | + FZ_DEVFLAG_DASHCAP_UNDEFINED | + FZ_DEVFLAG_ENDCAP_UNDEFINED | + FZ_DEVFLAG_LINEJOIN_UNDEFINED | + FZ_DEVFLAG_MITERLIMIT_UNDEFINED | + FZ_DEVFLAG_LINEWIDTH_UNDEFINED); +} + +static void pdf_run_f(pdf_csi *csi) +{ + pdf_show_path(csi, 0, 1, 0, 0); +} + +static void pdf_run_fstar(pdf_csi *csi) +{ + pdf_show_path(csi, 0, 1, 0, 1); +} + +static void pdf_run_g(pdf_csi *csi) +{ + csi->dev->flags &= ~FZ_DEVFLAG_FILLCOLOR_UNDEFINED; + pdf_set_colorspace(csi, PDF_FILL, fz_device_gray(csi->dev->ctx)); + pdf_set_color(csi, PDF_FILL, csi->stack); +} + +static void pdf_run_gs(pdf_csi *csi, pdf_obj *rdb) +{ + pdf_obj *dict; + pdf_obj *obj; + fz_context *ctx = csi->dev->ctx; + + dict = pdf_dict_gets(rdb, "ExtGState"); + if (!dict) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find ExtGState dictionary"); + + obj = pdf_dict_gets(dict, csi->name); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find extgstate resource '%s'", csi->name); + + pdf_run_extgstate(csi, rdb, obj); +} + +static void pdf_run_h(pdf_csi *csi) +{ + fz_closepath(csi->dev->ctx, csi->path); +} + +static void pdf_run_i(pdf_csi *csi) +{ +} + +static void pdf_run_j(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + csi->dev->flags &= ~FZ_DEVFLAG_LINEJOIN_UNDEFINED; + gstate->stroke_state = fz_unshare_stroke_state(csi->dev->ctx, gstate->stroke_state); + gstate->stroke_state->linejoin = csi->stack[0]; +} + +static void pdf_run_k(pdf_csi *csi) +{ + csi->dev->flags &= ~FZ_DEVFLAG_FILLCOLOR_UNDEFINED; + pdf_set_colorspace(csi, PDF_FILL, fz_device_cmyk(csi->dev->ctx)); + pdf_set_color(csi, PDF_FILL, csi->stack); +} + +static void pdf_run_l(pdf_csi *csi) +{ + float a, b; + a = csi->stack[0]; + b = csi->stack[1]; + fz_lineto(csi->dev->ctx, csi->path, a, b); +} + +static void pdf_run_m(pdf_csi *csi) +{ + float a, b; + a = csi->stack[0]; + b = csi->stack[1]; + fz_moveto(csi->dev->ctx, csi->path, a, b); +} + +static void pdf_run_n(pdf_csi *csi) +{ + pdf_show_path(csi, 0, 0, 0, 0); +} + +static void pdf_run_q(pdf_csi *csi) +{ + pdf_gsave(csi); +} + +static void pdf_run_re(pdf_csi *csi) +{ + fz_context *ctx = csi->dev->ctx; + float x, y, w, h; + + x = csi->stack[0]; + y = csi->stack[1]; + w = csi->stack[2]; + h = csi->stack[3]; + + fz_moveto(ctx, csi->path, x, y); + fz_lineto(ctx, csi->path, x + w, y); + fz_lineto(ctx, csi->path, x + w, y + h); + fz_lineto(ctx, csi->path, x, y + h); + fz_closepath(ctx, csi->path); +} + +static void pdf_run_rg(pdf_csi *csi) +{ + csi->dev->flags &= ~FZ_DEVFLAG_FILLCOLOR_UNDEFINED; + pdf_set_colorspace(csi, PDF_FILL, fz_device_rgb(csi->dev->ctx)); + pdf_set_color(csi, PDF_FILL, csi->stack); +} + +static void pdf_run_ri(pdf_csi *csi) +{ +} + +static void pdf_run(pdf_csi *csi) +{ + pdf_show_path(csi, 1, 0, 1, 0); +} + +static void pdf_run_sh(pdf_csi *csi, pdf_obj *rdb) +{ + fz_context *ctx = csi->dev->ctx; + pdf_obj *dict; + pdf_obj *obj; + fz_shade *shd; + + dict = pdf_dict_gets(rdb, "Shading"); + if (!dict) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find shading dictionary"); + + obj = pdf_dict_gets(dict, csi->name); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find shading resource: '%s'", csi->name); + + if ((csi->dev->hints & FZ_IGNORE_SHADE) == 0) + { + shd = pdf_load_shading(csi->xref, obj); + + fz_try(ctx) + { + pdf_show_shade(csi, shd); + } + fz_always(ctx) + { + fz_drop_shade(ctx, shd); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } +} + +static void pdf_run_v(pdf_csi *csi) +{ + float a, b, c, d; + a = csi->stack[0]; + b = csi->stack[1]; + c = csi->stack[2]; + d = csi->stack[3]; + fz_curvetov(csi->dev->ctx, csi->path, a, b, c, d); +} + +static void pdf_run_w(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_flush_text(csi); /* linewidth affects stroked text rendering mode */ + csi->dev->flags &= ~FZ_DEVFLAG_LINEWIDTH_UNDEFINED; + gstate->stroke_state = fz_unshare_stroke_state(csi->dev->ctx, gstate->stroke_state); + gstate->stroke_state->linewidth = csi->stack[0]; +} + +static void pdf_run_y(pdf_csi *csi) +{ + float a, b, c, d; + a = csi->stack[0]; + b = csi->stack[1]; + c = csi->stack[2]; + d = csi->stack[3]; + fz_curvetoy(csi->dev->ctx, csi->path, a, b, c, d); +} + +static void pdf_run_squote(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + + fz_pre_translate(&csi->tlm, 0, -gstate->leading); + csi->tm = csi->tlm; + + if (csi->string_len) + pdf_show_string(csi, csi->string, csi->string_len); + else + pdf_show_text(csi, csi->obj); +} + +static void pdf_run_dquote(pdf_csi *csi) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + + gstate->word_space = csi->stack[0]; + gstate->char_space = csi->stack[1]; + + fz_pre_translate(&csi->tlm, 0, -gstate->leading); + csi->tm = csi->tlm; + + if (csi->string_len) + pdf_show_string(csi, csi->string, csi->string_len); + else + pdf_show_text(csi, csi->obj); +} + +#define A(a) (a) +#define B(a,b) (a | b << 8) +#define C(a,b,c) (a | b << 8 | c << 16) + +static int +pdf_run_keyword(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, char *buf) +{ + fz_context *ctx = csi->dev->ctx; + int key; + + key = buf[0]; + if (buf[1]) + { + key |= buf[1] << 8; + if (buf[2]) + { + key |= buf[2] << 16; + if (buf[3]) + key = 0; + } + } + + switch (key) + { + case A('"'): pdf_run_dquote(csi); break; + case A('\''): pdf_run_squote(csi); break; + case A('B'): pdf_run_B(csi); break; + case B('B','*'): pdf_run_Bstar(csi); break; + case C('B','D','C'): pdf_run_BDC(csi, rdb); break; + case B('B','I'): + pdf_run_BI(csi, rdb, file); + break; + case C('B','M','C'): pdf_run_BMC(csi); break; + case B('B','T'): pdf_run_BT(csi); break; + case B('B','X'): pdf_run_BX(csi); break; + case B('C','S'): pdf_run_CS(csi, rdb); break; + case B('D','P'): pdf_run_DP(csi); break; + case B('D','o'): + fz_try(ctx) + { + pdf_run_Do(csi, rdb); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot draw xobject/image"); + } + break; + case C('E','M','C'): pdf_run_EMC(csi); break; + case B('E','T'): pdf_run_ET(csi); break; + case B('E','X'): pdf_run_EX(csi); break; + case A('F'): pdf_run_F(csi); break; + case A('G'): pdf_run_G(csi); break; + case A('J'): pdf_run_J(csi); break; + case A('K'): pdf_run_K(csi); break; + case A('M'): pdf_run_M(csi); break; + case B('M','P'): pdf_run_MP(csi); break; + case A('Q'): pdf_run_Q(csi); break; + case B('R','G'): pdf_run_RG(csi); break; + case A('S'): pdf_run_S(csi); break; + case B('S','C'): pdf_run_SC(csi, rdb); break; + case C('S','C','N'): pdf_run_SC(csi, rdb); break; + case B('T','*'): pdf_run_Tstar(csi); break; + case B('T','D'): pdf_run_TD(csi); break; + case B('T','J'): pdf_run_TJ(csi); break; + case B('T','L'): pdf_run_TL(csi); break; + case B('T','c'): pdf_run_Tc(csi); break; + case B('T','d'): pdf_run_Td(csi); break; + case B('T','f'): + fz_try(ctx) + { + pdf_run_Tf(csi, rdb); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot set font"); + } + break; + case B('T','j'): pdf_run_Tj(csi); break; + case B('T','m'): pdf_run_Tm(csi); break; + case B('T','r'): pdf_run_Tr(csi); break; + case B('T','s'): pdf_run_Ts(csi); break; + case B('T','w'): pdf_run_Tw(csi); break; + case B('T','z'): pdf_run_Tz(csi); break; + case A('W'): pdf_run_W(csi); break; + case B('W','*'): pdf_run_Wstar(csi); break; + case A('b'): pdf_run_b(csi); break; + case B('b','*'): pdf_run_bstar(csi); break; + case A('c'): pdf_run_c(csi); break; + case B('c','m'): pdf_run_cm(csi); break; + case B('c','s'): pdf_run_cs(csi, rdb); break; + case A('d'): pdf_run_d(csi); break; + case B('d','0'): pdf_run_d0(csi); break; + case B('d','1'): pdf_run_d1(csi); break; + case A('f'): pdf_run_f(csi); break; + case B('f','*'): pdf_run_fstar(csi); break; + case A('g'): pdf_run_g(csi); break; + case B('g','s'): + fz_try(ctx) + { + pdf_run_gs(csi, rdb); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot set graphics state"); + } + break; + case A('h'): pdf_run_h(csi); break; + case A('i'): pdf_run_i(csi); break; + case A('j'): pdf_run_j(csi); break; + case A('k'): pdf_run_k(csi); break; + case A('l'): pdf_run_l(csi); break; + case A('m'): pdf_run_m(csi); break; + case A('n'): pdf_run_n(csi); break; + case A('q'): pdf_run_q(csi); break; + case B('r','e'): pdf_run_re(csi); break; + case B('r','g'): pdf_run_rg(csi); break; + case B('r','i'): pdf_run_ri(csi); break; + case A('s'): pdf_run(csi); break; + case B('s','c'): pdf_run_sc(csi, rdb); break; + case C('s','c','n'): pdf_run_sc(csi, rdb); break; + case B('s','h'): + fz_try(ctx) + { + pdf_run_sh(csi, rdb); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot draw shading"); + } + break; + case A('v'): pdf_run_v(csi); break; + case A('w'): pdf_run_w(csi); break; + case A('y'): pdf_run_y(csi); break; + default: + if (!csi->xbalance) + { + fz_warn(ctx, "unknown keyword: '%s'", buf); + return 1; + } + break; + } + return 0; +} + +static void +pdf_run_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file, pdf_lexbuf *buf) +{ + fz_context *ctx = csi->dev->ctx; + pdf_token tok = PDF_TOK_ERROR; + int in_array; + int ignoring_errors = 0; + + /* make sure we have a clean slate if we come here from flush_text */ + pdf_clear_stack(csi); + in_array = 0; + + fz_var(in_array); + fz_var(tok); + + if (csi->cookie) + { + csi->cookie->progress_max = -1; + csi->cookie->progress = 0; + } + + do + { + fz_try(ctx) + { + do + { + /* Check the cookie */ + if (csi->cookie) + { + if (csi->cookie->abort) + { + tok = PDF_TOK_EOF; + break; + } + csi->cookie->progress++; + } + + tok = pdf_lex(file, buf); + + if (in_array) + { + if (tok == PDF_TOK_CLOSE_ARRAY) + { + in_array = 0; + } + else if (tok == PDF_TOK_REAL) + { + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_show_space(csi, -buf->f * gstate->size * 0.001f); + } + else if (tok == PDF_TOK_INT) + { + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_show_space(csi, -buf->i * gstate->size * 0.001f); + } + else if (tok == PDF_TOK_STRING) + { + pdf_show_string(csi, (unsigned char *)buf->scratch, buf->len); + } + else if (tok == PDF_TOK_KEYWORD) + { + if (!strcmp(buf->scratch, "Tw") || !strcmp(buf->scratch, "Tc")) + fz_warn(ctx, "ignoring keyword '%s' inside array", buf->scratch); + else + fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in array"); + } + else if (tok == PDF_TOK_EOF) + break; + else + fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in array"); + } + + else switch (tok) + { + case PDF_TOK_ENDSTREAM: + case PDF_TOK_EOF: + tok = PDF_TOK_EOF; + break; + + case PDF_TOK_OPEN_ARRAY: + if (!csi->in_text) + { + if (csi->obj) + { + pdf_drop_obj(csi->obj); + csi->obj = NULL; + } + csi->obj = pdf_parse_array(csi->xref, file, buf); + } + else + { + in_array = 1; + } + break; + + case PDF_TOK_OPEN_DICT: + if (csi->obj) + { + pdf_drop_obj(csi->obj); + csi->obj = NULL; + } + csi->obj = pdf_parse_dict(csi->xref, file, buf); + break; + + case PDF_TOK_NAME: + fz_strlcpy(csi->name, buf->scratch, sizeof(csi->name)); + break; + + case PDF_TOK_INT: + if (csi->top < nelem(csi->stack)) { + csi->stack[csi->top] = buf->i; + csi->top ++; + } + else + fz_throw(ctx, FZ_ERROR_GENERIC, "stack overflow"); + break; + + case PDF_TOK_REAL: + if (csi->top < nelem(csi->stack)) { + csi->stack[csi->top] = buf->f; + csi->top ++; + } + else + fz_throw(ctx, FZ_ERROR_GENERIC, "stack overflow"); + break; + + case PDF_TOK_STRING: + if (buf->len <= sizeof(csi->string)) + { + memcpy(csi->string, buf->scratch, buf->len); + csi->string_len = buf->len; + } + else + { + if (csi->obj) + { + pdf_drop_obj(csi->obj); + csi->obj = NULL; + } + csi->obj = pdf_new_string(ctx, buf->scratch, buf->len); + } + break; + + case PDF_TOK_KEYWORD: + if (pdf_run_keyword(csi, rdb, file, buf->scratch)) + { + tok = PDF_TOK_EOF; + } + pdf_clear_stack(csi); + break; + + default: + fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in content stream"); + } + } + while (tok != PDF_TOK_EOF); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + /* Swallow the error */ + if (csi->cookie) + csi->cookie->errors++; + if (!ignoring_errors) + { + fz_warn(ctx, "Ignoring errors during rendering"); + ignoring_errors = 1; + } + /* If we do catch an error, then reset ourselves to a + * base lexing state */ + in_array = 0; + } + } + while (tok != PDF_TOK_EOF); +} + +/* + * Entry points + */ + +static void +pdf_run_contents_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file) +{ + fz_context *ctx = csi->dev->ctx; + pdf_lexbuf *buf; + int save_in_text; + int save_gbot; + + fz_var(buf); + + if (file == NULL) + return; + + buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */ + pdf_lexbuf_init(ctx, buf, PDF_LEXBUF_SMALL); + save_in_text = csi->in_text; + csi->in_text = 0; + save_gbot = csi->gbot; + csi->gbot = csi->gtop; + fz_try(ctx) + { + pdf_run_stream(csi, rdb, file, buf); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + fz_warn(ctx, "Content stream parsing error - rendering truncated"); + } + while (csi->gtop > csi->gbot) + pdf_grestore(csi); + csi->gbot = save_gbot; + csi->in_text = save_in_text; + pdf_lexbuf_fin(buf); + fz_free(ctx, buf); +} + +static void +pdf_run_contents_object(pdf_csi *csi, pdf_obj *rdb, pdf_obj *contents) +{ + fz_context *ctx = csi->dev->ctx; + fz_stream *file = NULL; + + if (contents == NULL) + return; + + file = pdf_open_contents_stream(csi->xref, contents); + fz_try(ctx) + { + pdf_run_contents_stream(csi, rdb, file); + } + fz_always(ctx) + { + fz_close(file); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +pdf_run_contents_buffer(pdf_csi *csi, pdf_obj *rdb, fz_buffer *contents) +{ + fz_context *ctx = csi->dev->ctx; + fz_stream *file = NULL; + + if (contents == NULL) + return; + + file = fz_open_buffer(ctx, contents); + fz_try(ctx) + { + pdf_run_contents_stream(csi, rdb, file); + } + fz_always(ctx) + { + fz_close(file); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void pdf_run_page_contents_with_usage(pdf_document *xref, pdf_page *page, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie) +{ + fz_context *ctx = dev->ctx; + pdf_csi *csi; + fz_matrix local_ctm; + + fz_concat(&local_ctm, &page->ctm, ctm); + + if (page->transparency) + { + fz_rect mediabox = page->mediabox; + fz_begin_group(dev, fz_transform_rect(&mediabox, &local_ctm), 1, 0, 0, 1); + } + + csi = pdf_new_csi(xref, dev, &local_ctm, event, cookie, NULL, 0); + fz_try(ctx) + { + /* We need to save an extra level here to allow for level 0 + * to be the 'parent' gstate level. */ + pdf_gsave(csi); + pdf_run_contents_object(csi, page->resources, page->contents); + } + fz_always(ctx) + { + while (csi->gtop > 0) + pdf_grestore(csi); + pdf_free_csi(csi); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot parse page content stream"); + } + + if (page->transparency) + fz_end_group(dev); +} + +void pdf_run_page_contents(pdf_document *xref, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie) +{ + pdf_run_page_contents_with_usage(xref, page, dev, ctm, "View", cookie); +} + +static void pdf_run_annot_with_usage(pdf_document *xref, pdf_page *page, pdf_annot *annot, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie) +{ + fz_context *ctx = dev->ctx; + pdf_csi *csi; + int flags; + fz_matrix local_ctm; + + fz_concat(&local_ctm, &page->ctm, ctm); + + flags = pdf_to_int(pdf_dict_gets(annot->obj, "F")); + + /* TODO: NoZoom and NoRotate */ + if (flags & (1 << 0)) /* Invisible */ + return; + if (flags & (1 << 1)) /* Hidden */ + return; + if (!strcmp(event, "Print") && !(flags & (1 << 2))) /* Print */ + return; + if (!strcmp(event, "View") && (flags & (1 << 5))) /* NoView */ + return; + + csi = pdf_new_csi(xref, dev, &local_ctm, event, cookie, NULL, 0); + if (!pdf_is_hidden_ocg(pdf_dict_gets(annot->obj, "OC"), csi, page->resources)) + { + fz_try(ctx) + { + /* We need to save an extra level here to allow for level 0 + * to be the 'parent' gstate level. */ + pdf_gsave(csi); + pdf_run_xobject(csi, page->resources, annot->ap, &annot->matrix); + } + fz_catch(ctx) + { + while (csi->gtop > 0) + pdf_grestore(csi); + pdf_free_csi(csi); + fz_rethrow_message(ctx, "cannot parse annotation appearance stream"); + } + } + pdf_free_csi(csi); +} + +void pdf_run_annot(pdf_document *xref, pdf_page *page, pdf_annot *annot, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie) +{ + pdf_run_annot_with_usage(xref, page, annot, dev, ctm, "View", cookie); +} + +static void pdf_run_page_annots_with_usage(pdf_document *xref, pdf_page *page, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie) +{ + pdf_annot *annot; + + if (cookie && cookie->progress_max != -1) + { + int count = 1; + for (annot = page->annots; annot; annot = annot->next) + count++; + cookie->progress_max += count; + } + + for (annot = page->annots; annot; annot = annot->next) + { + /* Check the cookie for aborting */ + if (cookie) + { + if (cookie->abort) + break; + cookie->progress++; + } + + pdf_run_annot_with_usage(xref, page, annot, dev, ctm, event, cookie); + } +} + +void +pdf_run_page_with_usage(pdf_document *xref, pdf_page *page, fz_device *dev, const fz_matrix *ctm, char *event, fz_cookie *cookie) +{ + pdf_run_page_contents_with_usage(xref, page, dev, ctm, event, cookie); + pdf_run_page_annots_with_usage(xref, page, dev, ctm, event, cookie); +} + +void +pdf_run_page(pdf_document *xref, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie) +{ + pdf_run_page_with_usage(xref, page, dev, ctm, "View", cookie); +} + +void +pdf_run_glyph(pdf_document *xref, pdf_obj *resources, fz_buffer *contents, fz_device *dev, const fz_matrix *ctm, void *gstate, int nested_depth) +{ + pdf_csi *csi = pdf_new_csi(xref, dev, ctm, "View", NULL, gstate, nested_depth+1); + fz_context *ctx = xref->ctx; + + fz_try(ctx) + { + if (nested_depth > 10) + fz_throw(ctx, FZ_ERROR_GENERIC, "Too many nestings of Type3 glyphs"); + pdf_run_contents_buffer(csi, resources, contents); + } + fz_always(ctx) + { + pdf_free_csi(csi); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot parse glyph content stream"); + } +} diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c new file mode 100644 index 00000000..a8bf9f4b --- /dev/null +++ b/source/pdf/pdf-lex.c @@ -0,0 +1,553 @@ +#include "mupdf/pdf.h" + +#define IS_NUMBER \ + '+':case'-':case'.':case'0':case'1':case'2':case'3':\ + case'4':case'5':case'6':case'7':case'8':case'9' +#define IS_WHITE \ + '\000':case'\011':case'\012':case'\014':case'\015':case'\040' +#define IS_HEX \ + '0':case'1':case'2':case'3':case'4':case'5':case'6':\ + case'7':case'8':case'9':case'A':case'B':case'C':\ + case'D':case'E':case'F':case'a':case'b':case'c':\ + case'd':case'e':case'f' +#define IS_DELIM \ + '(':case')':case'<':case'>':case'[':case']':case'{':\ + case'}':case'/':case'%' + +#define RANGE_0_9 \ + '0':case'1':case'2':case'3':case'4':case'5':\ + case'6':case'7':case'8':case'9' +#define RANGE_a_f \ + 'a':case'b':case'c':case'd':case'e':case'f' +#define RANGE_A_F \ + 'A':case'B':case'C':case'D':case'E':case'F' + +static inline int iswhite(int ch) +{ + return + ch == '\000' || + ch == '\011' || + ch == '\012' || + ch == '\014' || + ch == '\015' || + ch == '\040'; +} + +static inline int unhex(int ch) +{ + if (ch >= '0' && ch <= '9') return ch - '0'; + if (ch >= 'A' && ch <= 'F') return ch - 'A' + 0xA; + if (ch >= 'a' && ch <= 'f') return ch - 'a' + 0xA; + return 0; +} + +static void +lex_white(fz_stream *f) +{ + int c; + do { + c = fz_read_byte(f); + } while ((c <= 32) && (iswhite(c))); + if (c != EOF) + fz_unread_byte(f); +} + +static void +lex_comment(fz_stream *f) +{ + int c; + do { + c = fz_read_byte(f); + } while ((c != '\012') && (c != '\015') && (c != EOF)); +} + +static int +lex_number(fz_stream *f, pdf_lexbuf *buf, int c) +{ + int neg = 0; + int i = 0; + int n; + int d; + float v; + + /* Initially we might have +, -, . or a digit */ + switch (c) + { + case '.': + goto loop_after_dot; + case '-': + neg = 1; + break; + case '+': + break; + default: /* Must be a digit */ + i = c - '0'; + break; + } + + while (1) + { + c = fz_read_byte(f); + switch (c) + { + case '.': + goto loop_after_dot; + case RANGE_0_9: + i = 10*i + c - '0'; + /* FIXME: Need overflow check here; do we care? */ + break; + default: + fz_unread_byte(f); + /* Fallthrough */ + case EOF: + if (neg) + i = -i; + buf->i = i; + return PDF_TOK_INT; + } + } + + /* In here, we've seen a dot, so can accept just digits */ +loop_after_dot: + n = 0; + d = 1; + while (1) + { + c = fz_read_byte(f); + switch (c) + { + case RANGE_0_9: + if (d >= INT_MAX/10) + goto underflow; + n = n*10 + (c - '0'); + d *= 10; + break; + default: + fz_unread_byte(f); + /* Fallthrough */ + case EOF: + v = (float)i + ((float)n / (float)d); + if (neg) + v = -v; + buf->f = v; + return PDF_TOK_REAL; + } + } + +underflow: + /* Ignore any digits after here, because they are too small */ + while (1) + { + c = fz_read_byte(f); + switch (c) + { + case RANGE_0_9: + break; + default: + fz_unread_byte(f); + /* Fallthrough */ + case EOF: + v = (float)i + ((float)n / (float)d); + if (neg) + v = -v; + buf->f = v; + return PDF_TOK_REAL; + } + } +} + +static void +lex_name(fz_stream *f, pdf_lexbuf *buf) +{ + char *s = buf->scratch; + int n = buf->size; + + while (n > 1) + { + int c = fz_read_byte(f); + switch (c) + { + case IS_WHITE: + case IS_DELIM: + fz_unread_byte(f); + goto end; + case EOF: + goto end; + case '#': + { + int d; + c = fz_read_byte(f); + switch (c) + { + case RANGE_0_9: + d = (c - '0') << 4; + break; + case RANGE_a_f: + d = (c - 'a' + 10) << 4; + break; + case RANGE_A_F: + d = (c - 'A' + 10) << 4; + break; + default: + fz_unread_byte(f); + /* fallthrough */ + case EOF: + goto end; + } + c = fz_read_byte(f); + switch (c) + { + case RANGE_0_9: + c -= '0'; + break; + case RANGE_a_f: + c -= 'a' - 10; + break; + case RANGE_A_F: + c -= 'A' - 10; + break; + default: + fz_unread_byte(f); + /* fallthrough */ + case EOF: + *s++ = d; + n--; + goto end; + } + *s++ = d + c; + n--; + break; + } + default: + *s++ = c; + n--; + break; + } + } +end: + *s = '\0'; + buf->len = s - buf->scratch; +} + +static int +lex_string(fz_stream *f, pdf_lexbuf *lb) +{ + char *s = lb->scratch; + char *e = s + lb->size; + int bal = 1; + int oct; + int c; + + while (1) + { + if (s == e) + { + s += pdf_lexbuf_grow(lb); + e = lb->scratch + lb->size; + } + c = fz_read_byte(f); + switch (c) + { + case EOF: + goto end; + case '(': + bal++; + *s++ = c; + break; + case ')': + bal --; + if (bal == 0) + goto end; + *s++ = c; + break; + case '\\': + c = fz_read_byte(f); + switch (c) + { + case EOF: + goto end; + case 'n': + *s++ = '\n'; + break; + case 'r': + *s++ = '\r'; + break; + case 't': + *s++ = '\t'; + break; + case 'b': + *s++ = '\b'; + break; + case 'f': + *s++ = '\f'; + break; + case '(': + *s++ = '('; + break; + case ')': + *s++ = ')'; + break; + case '\\': + *s++ = '\\'; + break; + case RANGE_0_9: + oct = c - '0'; + c = fz_read_byte(f); + if (c >= '0' && c <= '9') + { + oct = oct * 8 + (c - '0'); + c = fz_read_byte(f); + if (c >= '0' && c <= '9') + oct = oct * 8 + (c - '0'); + else if (c != EOF) + fz_unread_byte(f); + } + else if (c != EOF) + fz_unread_byte(f); + *s++ = oct; + break; + case '\n': + break; + case '\r': + c = fz_read_byte(f); + if ((c != '\n') && (c != EOF)) + fz_unread_byte(f); + break; + default: + *s++ = c; + } + break; + default: + *s++ = c; + break; + } + } +end: + lb->len = s - lb->scratch; + return PDF_TOK_STRING; +} + +static int +lex_hex_string(fz_stream *f, pdf_lexbuf *lb) +{ + char *s = lb->scratch; + char *e = s + lb->size; + int a = 0, x = 0; + int c; + + while (1) + { + if (s == e) + { + s += pdf_lexbuf_grow(lb); + e = lb->scratch + lb->size; + } + c = fz_read_byte(f); + switch (c) + { + case IS_WHITE: + break; + case IS_HEX: + if (x) + { + *s++ = a * 16 + unhex(c); + x = !x; + } + else + { + a = unhex(c); + x = !x; + } + break; + case '>': + case EOF: + goto end; + default: + fz_warn(f->ctx, "ignoring invalid character in hex string"); + } + } +end: + lb->len = s - lb->scratch; + return PDF_TOK_STRING; +} + +static pdf_token +pdf_token_from_keyword(char *key) +{ + switch (*key) + { + case 'R': + if (!strcmp(key, "R")) return PDF_TOK_R; + break; + case 't': + if (!strcmp(key, "true")) return PDF_TOK_TRUE; + if (!strcmp(key, "trailer")) return PDF_TOK_TRAILER; + break; + case 'f': + if (!strcmp(key, "false")) return PDF_TOK_FALSE; + break; + case 'n': + if (!strcmp(key, "null")) return PDF_TOK_NULL; + break; + case 'o': + if (!strcmp(key, "obj")) return PDF_TOK_OBJ; + break; + case 'e': + if (!strcmp(key, "endobj")) return PDF_TOK_ENDOBJ; + if (!strcmp(key, "endstream")) return PDF_TOK_ENDSTREAM; + break; + case 's': + if (!strcmp(key, "stream")) return PDF_TOK_STREAM; + if (!strcmp(key, "startxref")) return PDF_TOK_STARTXREF; + break; + case 'x': + if (!strcmp(key, "xref")) return PDF_TOK_XREF; + break; + default: + break; + } + + return PDF_TOK_KEYWORD; +} + +void pdf_lexbuf_init(fz_context *ctx, pdf_lexbuf *lb, int size) +{ + lb->size = lb->base_size = size; + lb->len = 0; + lb->ctx = ctx; + lb->scratch = &lb->buffer[0]; +} + +void pdf_lexbuf_fin(pdf_lexbuf *lb) +{ + if (lb && lb->size != lb->base_size) + fz_free(lb->ctx, lb->scratch); +} + +ptrdiff_t pdf_lexbuf_grow(pdf_lexbuf *lb) +{ + char *old = lb->scratch; + int newsize = lb->size * 2; + if (lb->size == lb->base_size) + { + lb->scratch = fz_malloc(lb->ctx, newsize); + memcpy(lb->scratch, lb->buffer, lb->size); + } + else + { + lb->scratch = fz_resize_array(lb->ctx, lb->scratch, newsize, 1); + } + lb->size = newsize; + return lb->scratch - old; +} + +pdf_token +pdf_lex(fz_stream *f, pdf_lexbuf *buf) +{ + while (1) + { + int c = fz_read_byte(f); + switch (c) + { + case EOF: + return PDF_TOK_EOF; + case IS_WHITE: + lex_white(f); + break; + case '%': + lex_comment(f); + break; + case '/': + lex_name(f, buf); + return PDF_TOK_NAME; + case '(': + return lex_string(f, buf); + case ')': + fz_warn(f->ctx, "lexical error (unexpected ')')"); + continue; + case '<': + c = fz_read_byte(f); + if (c == '<') + { + return PDF_TOK_OPEN_DICT; + } + else + { + fz_unread_byte(f); + return lex_hex_string(f, buf); + } + case '>': + c = fz_read_byte(f); + if (c == '>') + { + return PDF_TOK_CLOSE_DICT; + } + fz_warn(f->ctx, "lexical error (unexpected '>')"); + fz_unread_byte(f); + continue; + case '[': + return PDF_TOK_OPEN_ARRAY; + case ']': + return PDF_TOK_CLOSE_ARRAY; + case '{': + return PDF_TOK_OPEN_BRACE; + case '}': + return PDF_TOK_CLOSE_BRACE; + case IS_NUMBER: + return lex_number(f, buf, c); + default: /* isregular: !isdelim && !iswhite && c != EOF */ + fz_unread_byte(f); + lex_name(f, buf); + return pdf_token_from_keyword(buf->scratch); + } + } +} + +void pdf_print_token(fz_context *ctx, fz_buffer *fzbuf, int tok, pdf_lexbuf *buf) +{ + switch (tok) + { + case PDF_TOK_NAME: + fz_buffer_printf(ctx, fzbuf, "/%s", buf->scratch); + break; + case PDF_TOK_STRING: + if (buf->len >= buf->size) + pdf_lexbuf_grow(buf); + buf->scratch[buf->len] = 0; + fz_buffer_cat_pdf_string(ctx, fzbuf, buf->scratch); + break; + case PDF_TOK_OPEN_DICT: + fz_buffer_printf(ctx, fzbuf, "<<"); + break; + case PDF_TOK_CLOSE_DICT: + fz_buffer_printf(ctx, fzbuf, ">>"); + break; + case PDF_TOK_OPEN_ARRAY: + fz_buffer_printf(ctx, fzbuf, "["); + break; + case PDF_TOK_CLOSE_ARRAY: + fz_buffer_printf(ctx, fzbuf, "]"); + break; + case PDF_TOK_OPEN_BRACE: + fz_buffer_printf(ctx, fzbuf, "{"); + break; + case PDF_TOK_CLOSE_BRACE: + fz_buffer_printf(ctx, fzbuf, "}"); + break; + case PDF_TOK_INT: + fz_buffer_printf(ctx, fzbuf, "%d", buf->i); + break; + case PDF_TOK_REAL: + { + char sbuf[256]; + sprintf(sbuf, "%g", buf->f); + if (strchr(sbuf, 'e')) /* bad news! */ + sprintf(sbuf, fabsf(buf->f) > 1 ? "%1.1f" : "%1.8f", buf->f); + fz_buffer_printf(ctx, fzbuf, "%s", sbuf); + } + break; + default: + fz_buffer_printf(ctx, fzbuf, "%s", buf->scratch); + break; + } +} diff --git a/source/pdf/pdf-metrics.c b/source/pdf/pdf-metrics.c new file mode 100644 index 00000000..8a4b7d11 --- /dev/null +++ b/source/pdf/pdf-metrics.c @@ -0,0 +1,141 @@ +#include "mupdf/pdf.h" + +void +pdf_set_font_wmode(fz_context *ctx, pdf_font_desc *font, int wmode) +{ + font->wmode = wmode; +} + +void +pdf_set_default_hmtx(fz_context *ctx, pdf_font_desc *font, int w) +{ + font->dhmtx.w = w; +} + +void +pdf_set_default_vmtx(fz_context *ctx, pdf_font_desc *font, int y, int w) +{ + font->dvmtx.y = y; + font->dvmtx.w = w; +} + +void +pdf_add_hmtx(fz_context *ctx, pdf_font_desc *font, int lo, int hi, int w) +{ + if (font->hmtx_len + 1 >= font->hmtx_cap) + { + int new_cap = font->hmtx_cap + 16; + font->hmtx = fz_resize_array(ctx, font->hmtx, new_cap, sizeof(pdf_hmtx)); + font->hmtx_cap = new_cap; + } + + font->hmtx[font->hmtx_len].lo = lo; + font->hmtx[font->hmtx_len].hi = hi; + font->hmtx[font->hmtx_len].w = w; + font->hmtx_len++; +} + +void +pdf_add_vmtx(fz_context *ctx, pdf_font_desc *font, int lo, int hi, int x, int y, int w) +{ + if (font->vmtx_len + 1 >= font->vmtx_cap) + { + int new_cap = font->vmtx_cap + 16; + font->vmtx = fz_resize_array(ctx, font->vmtx, new_cap, sizeof(pdf_vmtx)); + font->vmtx_cap = new_cap; + } + + font->vmtx[font->vmtx_len].lo = lo; + font->vmtx[font->vmtx_len].hi = hi; + font->vmtx[font->vmtx_len].x = x; + font->vmtx[font->vmtx_len].y = y; + font->vmtx[font->vmtx_len].w = w; + font->vmtx_len++; +} + +static int cmph(const void *a0, const void *b0) +{ + pdf_hmtx *a = (pdf_hmtx*)a0; + pdf_hmtx *b = (pdf_hmtx*)b0; + return a->lo - b->lo; +} + +static int cmpv(const void *a0, const void *b0) +{ + pdf_vmtx *a = (pdf_vmtx*)a0; + pdf_vmtx *b = (pdf_vmtx*)b0; + return a->lo - b->lo; +} + +void +pdf_end_hmtx(fz_context *ctx, pdf_font_desc *font) +{ + if (!font->hmtx) + return; + qsort(font->hmtx, font->hmtx_len, sizeof(pdf_hmtx), cmph); + font->size += font->hmtx_cap * sizeof(pdf_hmtx); +} + +void +pdf_end_vmtx(fz_context *ctx, pdf_font_desc *font) +{ + if (!font->vmtx) + return; + qsort(font->vmtx, font->vmtx_len, sizeof(pdf_vmtx), cmpv); + font->size += font->vmtx_cap * sizeof(pdf_vmtx); +} + +pdf_hmtx +pdf_lookup_hmtx(fz_context *ctx, pdf_font_desc *font, int cid) +{ + int l = 0; + int r = font->hmtx_len - 1; + int m; + + if (!font->hmtx) + goto notfound; + + while (l <= r) + { + m = (l + r) >> 1; + if (cid < font->hmtx[m].lo) + r = m - 1; + else if (cid > font->hmtx[m].hi) + l = m + 1; + else + return font->hmtx[m]; + } + +notfound: + return font->dhmtx; +} + +pdf_vmtx +pdf_lookup_vmtx(fz_context *ctx, pdf_font_desc *font, int cid) +{ + pdf_hmtx h; + pdf_vmtx v; + int l = 0; + int r = font->vmtx_len - 1; + int m; + + if (!font->vmtx) + goto notfound; + + while (l <= r) + { + m = (l + r) >> 1; + if (cid < font->vmtx[m].lo) + r = m - 1; + else if (cid > font->vmtx[m].hi) + l = m + 1; + else + return font->vmtx[m]; + } + +notfound: + h = pdf_lookup_hmtx(ctx, font, cid); + v = font->dvmtx; + v.x = h.w / 2; + return v; +} diff --git a/source/pdf/pdf-nametree.c b/source/pdf/pdf-nametree.c new file mode 100644 index 00000000..ea386dda --- /dev/null +++ b/source/pdf/pdf-nametree.c @@ -0,0 +1,166 @@ +#include "mupdf/pdf.h" + +static pdf_obj * +pdf_lookup_name_imp(fz_context *ctx, pdf_obj *node, pdf_obj *needle) +{ + pdf_obj *kids = pdf_dict_gets(node, "Kids"); + pdf_obj *names = pdf_dict_gets(node, "Names"); + + if (pdf_is_array(kids)) + { + int l = 0; + int r = pdf_array_len(kids) - 1; + + while (l <= r) + { + int m = (l + r) >> 1; + pdf_obj *kid = pdf_array_get(kids, m); + pdf_obj *limits = pdf_dict_gets(kid, "Limits"); + pdf_obj *first = pdf_array_get(limits, 0); + pdf_obj *last = pdf_array_get(limits, 1); + + if (pdf_objcmp(needle, first) < 0) + r = m - 1; + else if (pdf_objcmp(needle, last) > 0) + l = m + 1; + else + { + pdf_obj *obj; + + if (pdf_obj_mark(node)) + break; + obj = pdf_lookup_name_imp(ctx, kid, needle); + pdf_obj_unmark(node); + return obj; + } + } + } + + if (pdf_is_array(names)) + { + int l = 0; + int r = (pdf_array_len(names) / 2) - 1; + + while (l <= r) + { + int m = (l + r) >> 1; + int c; + pdf_obj *key = pdf_array_get(names, m * 2); + pdf_obj *val = pdf_array_get(names, m * 2 + 1); + + c = pdf_objcmp(needle, key); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return val; + } + + /* Spec says names should be sorted (hence the binary search, + * above), but Acrobat copes with non-sorted. Drop back to a + * simple search if the binary search fails. */ + r = pdf_array_len(names)/2; + for (l = 0; l < r; l++) + if (!pdf_objcmp(needle, pdf_array_get(names, l * 2))) + return pdf_array_get(names, l * 2 + 1); + } + + return NULL; +} + +pdf_obj * +pdf_lookup_name(pdf_document *xref, char *which, pdf_obj *needle) +{ + fz_context *ctx = xref->ctx; + + pdf_obj *root = pdf_dict_gets(pdf_trailer(xref), "Root"); + pdf_obj *names = pdf_dict_gets(root, "Names"); + pdf_obj *tree = pdf_dict_gets(names, which); + return pdf_lookup_name_imp(ctx, tree, needle); +} + +pdf_obj * +pdf_lookup_dest(pdf_document *xref, pdf_obj *needle) +{ + fz_context *ctx = xref->ctx; + + pdf_obj *root = pdf_dict_gets(pdf_trailer(xref), "Root"); + pdf_obj *dests = pdf_dict_gets(root, "Dests"); + pdf_obj *names = pdf_dict_gets(root, "Names"); + pdf_obj *dest = NULL; + + /* PDF 1.1 has destinations in a dictionary */ + if (dests) + { + if (pdf_is_name(needle)) + return pdf_dict_get(dests, needle); + else + return pdf_dict_gets(dests, pdf_to_str_buf(needle)); + } + + /* PDF 1.2 has destinations in a name tree */ + if (names && !dest) + { + pdf_obj *tree = pdf_dict_gets(names, "Dests"); + return pdf_lookup_name_imp(ctx, tree, needle); + } + + return NULL; +} + +static void +pdf_load_name_tree_imp(pdf_obj *dict, pdf_document *xref, pdf_obj *node) +{ + fz_context *ctx = xref->ctx; + pdf_obj *kids = pdf_dict_gets(node, "Kids"); + pdf_obj *names = pdf_dict_gets(node, "Names"); + int i; + + UNUSED(ctx); + + if (kids && !pdf_obj_mark(node)) + { + int len = pdf_array_len(kids); + for (i = 0; i < len; i++) + pdf_load_name_tree_imp(dict, xref, pdf_array_get(kids, i)); + pdf_obj_unmark(node); + } + + if (names) + { + int len = pdf_array_len(names); + for (i = 0; i + 1 < len; i += 2) + { + pdf_obj *key = pdf_array_get(names, i); + pdf_obj *val = pdf_array_get(names, i + 1); + if (pdf_is_string(key)) + { + key = pdf_to_utf8_name(xref, key); + pdf_dict_put(dict, key, val); + pdf_drop_obj(key); + } + else if (pdf_is_name(key)) + { + pdf_dict_put(dict, key, val); + } + } + } +} + +pdf_obj * +pdf_load_name_tree(pdf_document *xref, char *which) +{ + fz_context *ctx = xref->ctx; + + pdf_obj *root = pdf_dict_gets(pdf_trailer(xref), "Root"); + pdf_obj *names = pdf_dict_gets(root, "Names"); + pdf_obj *tree = pdf_dict_gets(names, which); + if (pdf_is_dict(tree)) + { + pdf_obj *dict = pdf_new_dict(ctx, 100); + pdf_load_name_tree_imp(dict, xref, tree); + return dict; + } + return NULL; +} diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c new file mode 100644 index 00000000..8e5cf419 --- /dev/null +++ b/source/pdf/pdf-object.c @@ -0,0 +1,1576 @@ +#include "mupdf/pdf.h" + +typedef enum pdf_objkind_e +{ + PDF_NULL = 0, + PDF_BOOL = 'b', + PDF_INT = 'i', + PDF_REAL = 'f', + PDF_STRING = 's', + PDF_NAME = 'n', + PDF_ARRAY = 'a', + PDF_DICT = 'd', + PDF_INDIRECT = 'r' +} pdf_objkind; + +struct keyval +{ + pdf_obj *k; + pdf_obj *v; +}; + +struct pdf_obj_s +{ + int refs; + char kind; + char marked; + fz_context *ctx; + union + { + int b; + int i; + float f; + struct { + unsigned short len; + char buf[1]; + } s; + char n[1]; + struct { + int len; + int cap; + pdf_obj **items; + } a; + struct { + char sorted; + int len; + int cap; + struct keyval *items; + } d; + struct { + int num; + int gen; + pdf_document *xref; + } r; + } u; +}; + +pdf_obj * +pdf_new_null(fz_context *ctx) +{ + pdf_obj *obj; + obj = Memento_label(fz_malloc(ctx, sizeof(pdf_obj)), "pdf_obj(null)"); + obj->ctx = ctx; + obj->refs = 1; + obj->kind = PDF_NULL; + obj->marked = 0; + return obj; +} + +pdf_obj * +pdf_new_bool(fz_context *ctx, int b) +{ + pdf_obj *obj; + obj = Memento_label(fz_malloc(ctx, sizeof(pdf_obj)), "pdf_obj(bool)"); + obj->ctx = ctx; + obj->refs = 1; + obj->kind = PDF_BOOL; + obj->marked = 0; + obj->u.b = b; + return obj; +} + +pdf_obj * +pdf_new_int(fz_context *ctx, int i) +{ + pdf_obj *obj; + obj = Memento_label(fz_malloc(ctx, sizeof(pdf_obj)), "pdf_obj(int)"); + obj->ctx = ctx; + obj->refs = 1; + obj->kind = PDF_INT; + obj->marked = 0; + obj->u.i = i; + return obj; +} + +pdf_obj * +pdf_new_real(fz_context *ctx, float f) +{ + pdf_obj *obj; + obj = Memento_label(fz_malloc(ctx, sizeof(pdf_obj)), "pdf_obj(real)"); + obj->ctx = ctx; + obj->refs = 1; + obj->kind = PDF_REAL; + obj->marked = 0; + obj->u.f = f; + return obj; +} + +pdf_obj * +pdf_new_string(fz_context *ctx, const char *str, int len) +{ + pdf_obj *obj; + obj = Memento_label(fz_malloc(ctx, offsetof(pdf_obj, u.s.buf) + len + 1), "pdf_obj(string)"); + obj->ctx = ctx; + obj->refs = 1; + obj->kind = PDF_STRING; + obj->marked = 0; + obj->u.s.len = len; + memcpy(obj->u.s.buf, str, len); + obj->u.s.buf[len] = '\0'; + return obj; +} + +pdf_obj * +pdf_new_name(fz_context *ctx, const char *str) +{ + pdf_obj *obj; + obj = Memento_label(fz_malloc(ctx, offsetof(pdf_obj, u.n) + strlen(str) + 1), "pdf_obj(name)"); + obj->ctx = ctx; + obj->refs = 1; + obj->kind = PDF_NAME; + obj->marked = 0; + strcpy(obj->u.n, str); + return obj; +} + +pdf_obj * +pdf_new_indirect(fz_context *ctx, int num, int gen, void *xref) +{ + pdf_obj *obj; + obj = Memento_label(fz_malloc(ctx, sizeof(pdf_obj)), "pdf_obj(indirect)"); + obj->ctx = ctx; + obj->refs = 1; + obj->kind = PDF_INDIRECT; + obj->marked = 0; + obj->u.r.num = num; + obj->u.r.gen = gen; + obj->u.r.xref = xref; + return obj; +} + +pdf_obj * +pdf_keep_obj(pdf_obj *obj) +{ + if (obj) + obj->refs ++; + return obj; +} + +int pdf_is_indirect(pdf_obj *obj) +{ + return obj ? obj->kind == PDF_INDIRECT : 0; +} + +#define RESOLVE(obj) \ + do { \ + if (obj && obj->kind == PDF_INDIRECT) \ + {\ + obj = pdf_resolve_indirect(obj); \ + } \ + } while (0) + +int pdf_is_null(pdf_obj *obj) +{ + RESOLVE(obj); + return obj ? obj->kind == PDF_NULL : 0; +} + +int pdf_is_bool(pdf_obj *obj) +{ + RESOLVE(obj); + return obj ? obj->kind == PDF_BOOL : 0; +} + +int pdf_is_int(pdf_obj *obj) +{ + RESOLVE(obj); + return obj ? obj->kind == PDF_INT : 0; +} + +int pdf_is_real(pdf_obj *obj) +{ + RESOLVE(obj); + return obj ? obj->kind == PDF_REAL : 0; +} + +int pdf_is_string(pdf_obj *obj) +{ + RESOLVE(obj); + return obj ? obj->kind == PDF_STRING : 0; +} + +int pdf_is_name(pdf_obj *obj) +{ + RESOLVE(obj); + return obj ? obj->kind == PDF_NAME : 0; +} + +int pdf_is_array(pdf_obj *obj) +{ + RESOLVE(obj); + return obj ? obj->kind == PDF_ARRAY : 0; +} + +int pdf_is_dict(pdf_obj *obj) +{ + RESOLVE(obj); + return obj ? obj->kind == PDF_DICT : 0; +} + +int pdf_to_bool(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj) + return 0; + return obj->kind == PDF_BOOL ? obj->u.b : 0; +} + +int pdf_to_int(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj) + return 0; + if (obj->kind == PDF_INT) + return obj->u.i; + if (obj->kind == PDF_REAL) + return (int)(obj->u.f + 0.5f); /* No roundf in MSVC */ + return 0; +} + +float pdf_to_real(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj) + return 0; + if (obj->kind == PDF_REAL) + return obj->u.f; + if (obj->kind == PDF_INT) + return obj->u.i; + return 0; +} + +char *pdf_to_name(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_NAME) + return ""; + return obj->u.n; +} + +char *pdf_to_str_buf(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_STRING) + return ""; + return obj->u.s.buf; +} + +int pdf_to_str_len(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_STRING) + return 0; + return obj->u.s.len; +} + +void pdf_set_int(pdf_obj *obj, int i) +{ + if (!obj || obj->kind != PDF_INT) + return; + obj->u.i = i; +} + +/* for use by pdf_crypt_obj_imp to decrypt AES string in place */ +void pdf_set_str_len(pdf_obj *obj, int newlen) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_STRING) + return; /* This should never happen */ + if (newlen > obj->u.s.len) + return; /* This should never happen */ + obj->u.s.len = newlen; +} + +pdf_obj *pdf_to_dict(pdf_obj *obj) +{ + RESOLVE(obj); + return (obj && obj->kind == PDF_DICT ? obj : NULL); +} + +int pdf_to_num(pdf_obj *obj) +{ + if (!obj || obj->kind != PDF_INDIRECT) + return 0; + return obj->u.r.num; +} + +int pdf_to_gen(pdf_obj *obj) +{ + if (!obj || obj->kind != PDF_INDIRECT) + return 0; + return obj->u.r.gen; +} + +pdf_document *pdf_get_indirect_document(pdf_obj *obj) +{ + if (!obj || obj->kind != PDF_INDIRECT) + return NULL; + return obj->u.r.xref; +} + +int +pdf_objcmp(pdf_obj *a, pdf_obj *b) +{ + int i; + + if (a == b) + return 0; + + if (!a || !b) + return 1; + + if (a->kind != b->kind) + return 1; + + switch (a->kind) + { + case PDF_NULL: + return 0; + + case PDF_BOOL: + return a->u.b - b->u.b; + + case PDF_INT: + return a->u.i - b->u.i; + + case PDF_REAL: + if (a->u.f < b->u.f) + return -1; + if (a->u.f > b->u.f) + return 1; + return 0; + + case PDF_STRING: + if (a->u.s.len < b->u.s.len) + { + if (memcmp(a->u.s.buf, b->u.s.buf, a->u.s.len) <= 0) + return -1; + return 1; + } + if (a->u.s.len > b->u.s.len) + { + if (memcmp(a->u.s.buf, b->u.s.buf, b->u.s.len) >= 0) + return 1; + return -1; + } + return memcmp(a->u.s.buf, b->u.s.buf, a->u.s.len); + + case PDF_NAME: + return strcmp(a->u.n, b->u.n); + + case PDF_INDIRECT: + if (a->u.r.num == b->u.r.num) + return a->u.r.gen - b->u.r.gen; + return a->u.r.num - b->u.r.num; + + case PDF_ARRAY: + if (a->u.a.len != b->u.a.len) + return a->u.a.len - b->u.a.len; + for (i = 0; i < a->u.a.len; i++) + if (pdf_objcmp(a->u.a.items[i], b->u.a.items[i])) + return 1; + return 0; + + case PDF_DICT: + if (a->u.d.len != b->u.d.len) + return a->u.d.len - b->u.d.len; + for (i = 0; i < a->u.d.len; i++) + { + if (pdf_objcmp(a->u.d.items[i].k, b->u.d.items[i].k)) + return 1; + if (pdf_objcmp(a->u.d.items[i].v, b->u.d.items[i].v)) + return 1; + } + return 0; + + } + return 1; +} + +static char * +pdf_objkindstr(pdf_obj *obj) +{ + if (!obj) + return "<NULL>"; + switch (obj->kind) + { + case PDF_NULL: return "null"; + case PDF_BOOL: return "boolean"; + case PDF_INT: return "integer"; + case PDF_REAL: return "real"; + case PDF_STRING: return "string"; + case PDF_NAME: return "name"; + case PDF_ARRAY: return "array"; + case PDF_DICT: return "dictionary"; + case PDF_INDIRECT: return "reference"; + } + return "<unknown>"; +} + +pdf_obj * +pdf_new_array(fz_context *ctx, int initialcap) +{ + pdf_obj *obj; + int i; + + obj = Memento_label(fz_malloc(ctx, sizeof(pdf_obj)), "pdf_obj(array)"); + obj->ctx = ctx; + obj->refs = 1; + obj->kind = PDF_ARRAY; + obj->marked = 0; + + obj->u.a.len = 0; + obj->u.a.cap = initialcap > 1 ? initialcap : 6; + + fz_try(ctx) + { + obj->u.a.items = Memento_label(fz_malloc_array(ctx, obj->u.a.cap, sizeof(pdf_obj*)), "pdf_obj(array items)"); + } + fz_catch(ctx) + { + fz_free(ctx, obj); + fz_rethrow(ctx); + } + for (i = 0; i < obj->u.a.cap; i++) + obj->u.a.items[i] = NULL; + + return obj; +} + +static void +pdf_array_grow(pdf_obj *obj) +{ + int i; + int new_cap = (obj->u.a.cap * 3) / 2; + + obj->u.a.items = fz_resize_array(obj->ctx, obj->u.a.items, new_cap, sizeof(pdf_obj*)); + obj->u.a.cap = new_cap; + + for (i = obj->u.a.len ; i < obj->u.a.cap; i++) + obj->u.a.items[i] = NULL; +} + +pdf_obj * +pdf_copy_array(fz_context *ctx, pdf_obj *obj) +{ + pdf_obj *arr; + int i; + int n; + + RESOLVE(obj); + if (!obj) + return NULL; /* Can't warn :( */ + if (obj->kind != PDF_ARRAY) + fz_warn(ctx, "assert: not an array (%s)", pdf_objkindstr(obj)); + + arr = pdf_new_array(ctx, pdf_array_len(obj)); + n = pdf_array_len(obj); + for (i = 0; i < n; i++) + pdf_array_push(arr, pdf_array_get(obj, i)); + + return arr; +} + +int +pdf_array_len(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_ARRAY) + return 0; + return obj->u.a.len; +} + +pdf_obj * +pdf_array_get(pdf_obj *obj, int i) +{ + RESOLVE(obj); + + if (!obj || obj->kind != PDF_ARRAY) + return NULL; + + if (i < 0 || i >= obj->u.a.len) + return NULL; + + return obj->u.a.items[i]; +} + +void +pdf_array_put(pdf_obj *obj, int i, pdf_obj *item) +{ + RESOLVE(obj); + + if (!obj) + return; /* Can't warn :( */ + if (obj->kind != PDF_ARRAY) + fz_warn(obj->ctx, "assert: not an array (%s)", pdf_objkindstr(obj)); + else if (i < 0) + fz_warn(obj->ctx, "assert: index %d < 0", i); + else if (i >= obj->u.a.len) + fz_warn(obj->ctx, "assert: index %d > length %d", i, obj->u.a.len); + else + { + pdf_drop_obj(obj->u.a.items[i]); + obj->u.a.items[i] = pdf_keep_obj(item); + } +} + +void +pdf_array_push(pdf_obj *obj, pdf_obj *item) +{ + RESOLVE(obj); + + if (!obj) + return; /* Can't warn :( */ + if (obj->kind != PDF_ARRAY) + fz_warn(obj->ctx, "assert: not an array (%s)", pdf_objkindstr(obj)); + else + { + if (obj->u.a.len + 1 > obj->u.a.cap) + pdf_array_grow(obj); + obj->u.a.items[obj->u.a.len] = pdf_keep_obj(item); + obj->u.a.len++; + } +} + +void +pdf_array_push_drop(pdf_obj *obj, pdf_obj *item) +{ + fz_context *ctx = obj->ctx; + + fz_try(ctx) + { + pdf_array_push(obj, item); + } + fz_always(ctx) + { + pdf_drop_obj(item); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +void +pdf_array_insert(pdf_obj *obj, pdf_obj *item) +{ + RESOLVE(obj); + + if (!obj) + return; /* Can't warn :( */ + if (obj->kind != PDF_ARRAY) + fz_warn(obj->ctx, "assert: not an array (%s)", pdf_objkindstr(obj)); + else + { + if (obj->u.a.len + 1 > obj->u.a.cap) + pdf_array_grow(obj); + memmove(obj->u.a.items + 1, obj->u.a.items, obj->u.a.len * sizeof(pdf_obj*)); + obj->u.a.items[0] = pdf_keep_obj(item); + obj->u.a.len++; + } +} + +int +pdf_array_contains(pdf_obj *arr, pdf_obj *obj) +{ + int i, len; + + len = pdf_array_len(arr); + for (i = 0; i < len; i++) + if (!pdf_objcmp(pdf_array_get(arr, i), obj)) + return 1; + + return 0; +} + +pdf_obj *pdf_new_rect(fz_context *ctx, const fz_rect *rect) +{ + pdf_obj *arr = NULL; + pdf_obj *item = NULL; + + fz_var(arr); + fz_var(item); + fz_try(ctx) + { + arr = pdf_new_array(ctx, 4); + + item = pdf_new_real(ctx, rect->x0); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + + item = pdf_new_real(ctx, rect->y0); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + + item = pdf_new_real(ctx, rect->x1); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + + item = pdf_new_real(ctx, rect->y1); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + } + fz_catch(ctx) + { + pdf_drop_obj(item); + pdf_drop_obj(arr); + fz_rethrow(ctx); + } + + return arr; +} + +pdf_obj *pdf_new_matrix(fz_context *ctx, const fz_matrix *mtx) +{ + pdf_obj *arr = NULL; + pdf_obj *item = NULL; + + fz_var(arr); + fz_var(item); + fz_try(ctx) + { + arr = pdf_new_array(ctx, 6); + + item = pdf_new_real(ctx, mtx->a); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + + item = pdf_new_real(ctx, mtx->b); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + + item = pdf_new_real(ctx, mtx->c); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + + item = pdf_new_real(ctx, mtx->d); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + + item = pdf_new_real(ctx, mtx->e); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + + item = pdf_new_real(ctx, mtx->f); + pdf_array_push(arr, item); + pdf_drop_obj(item); + item = NULL; + } + fz_catch(ctx) + { + pdf_drop_obj(item); + pdf_drop_obj(arr); + fz_rethrow(ctx); + } + + return arr; +} + +/* dicts may only have names as keys! */ + +static int keyvalcmp(const void *ap, const void *bp) +{ + const struct keyval *a = ap; + const struct keyval *b = bp; + return strcmp(pdf_to_name(a->k), pdf_to_name(b->k)); +} + +pdf_obj * +pdf_new_dict(fz_context *ctx, int initialcap) +{ + pdf_obj *obj; + int i; + + obj = Memento_label(fz_malloc(ctx, sizeof(pdf_obj)), "pdf_obj(dict)"); + obj->ctx = ctx; + obj->refs = 1; + obj->kind = PDF_DICT; + obj->marked = 0; + + obj->u.d.sorted = 0; + obj->u.d.len = 0; + obj->u.d.cap = initialcap > 1 ? initialcap : 10; + + fz_try(ctx) + { + obj->u.d.items = Memento_label(fz_malloc_array(ctx, obj->u.d.cap, sizeof(struct keyval)), "pdf_obj(dict items)"); + } + fz_catch(ctx) + { + fz_free(ctx, obj); + fz_rethrow(ctx); + } + for (i = 0; i < obj->u.d.cap; i++) + { + obj->u.d.items[i].k = NULL; + obj->u.d.items[i].v = NULL; + } + + return obj; +} + +static void +pdf_dict_grow(pdf_obj *obj) +{ + int i; + int new_cap = (obj->u.d.cap * 3) / 2; + + obj->u.d.items = fz_resize_array(obj->ctx, obj->u.d.items, new_cap, sizeof(struct keyval)); + obj->u.d.cap = new_cap; + + for (i = obj->u.d.len; i < obj->u.d.cap; i++) + { + obj->u.d.items[i].k = NULL; + obj->u.d.items[i].v = NULL; + } +} + +pdf_obj * +pdf_copy_dict(fz_context *ctx, pdf_obj *obj) +{ + pdf_obj *dict; + int i, n; + + RESOLVE(obj); + if (!obj) + return NULL; /* Can't warn :( */ + if (obj->kind != PDF_DICT) + fz_warn(ctx, "assert: not a dict (%s)", pdf_objkindstr(obj)); + + n = pdf_dict_len(obj); + dict = pdf_new_dict(ctx, n); + for (i = 0; i < n; i++) + pdf_dict_put(dict, pdf_dict_get_key(obj, i), pdf_dict_get_val(obj, i)); + + return dict; +} + +int +pdf_dict_len(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_DICT) + return 0; + return obj->u.d.len; +} + +pdf_obj * +pdf_dict_get_key(pdf_obj *obj, int i) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_DICT) + return NULL; + + if (i < 0 || i >= obj->u.d.len) + return NULL; + + return obj->u.d.items[i].k; +} + +pdf_obj * +pdf_dict_get_val(pdf_obj *obj, int i) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_DICT) + return NULL; + + if (i < 0 || i >= obj->u.d.len) + return NULL; + + return obj->u.d.items[i].v; +} + +static int +pdf_dict_finds(pdf_obj *obj, const char *key, int *location) +{ + if (obj->u.d.sorted && obj->u.d.len > 0) + { + int l = 0; + int r = obj->u.d.len - 1; + + if (strcmp(pdf_to_name(obj->u.d.items[r].k), key) < 0) + { + if (location) + *location = r + 1; + return -1; + } + + while (l <= r) + { + int m = (l + r) >> 1; + int c = -strcmp(pdf_to_name(obj->u.d.items[m].k), key); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return m; + + if (location) + *location = l; + } + } + + else + { + int i; + for (i = 0; i < obj->u.d.len; i++) + if (strcmp(pdf_to_name(obj->u.d.items[i].k), key) == 0) + return i; + + if (location) + *location = obj->u.d.len; + } + + return -1; +} + +pdf_obj * +pdf_dict_gets(pdf_obj *obj, const char *key) +{ + int i; + + RESOLVE(obj); + if (!obj || obj->kind != PDF_DICT) + return NULL; + + i = pdf_dict_finds(obj, key, NULL); + if (i >= 0) + return obj->u.d.items[i].v; + + return NULL; +} + +pdf_obj * +pdf_dict_getp(pdf_obj *obj, const char *keys) +{ + char buf[256]; + char *k, *e; + + if (strlen(keys)+1 > 256) + fz_throw(obj->ctx, FZ_ERROR_GENERIC, "buffer overflow in pdf_dict_getp"); + + strcpy(buf, keys); + + e = buf; + while (*e && obj) + { + k = e; + while (*e != '/' && *e != '\0') + e++; + + if (*e == '/') + { + *e = '\0'; + e++; + } + + obj = pdf_dict_gets(obj, k); + } + + return obj; +} + +pdf_obj * +pdf_dict_get(pdf_obj *obj, pdf_obj *key) +{ + if (!key || key->kind != PDF_NAME) + return NULL; + return pdf_dict_gets(obj, pdf_to_name(key)); +} + +pdf_obj * +pdf_dict_getsa(pdf_obj *obj, const char *key, const char *abbrev) +{ + pdf_obj *v; + v = pdf_dict_gets(obj, key); + if (v) + return v; + return pdf_dict_gets(obj, abbrev); +} + +void +pdf_dict_put(pdf_obj *obj, pdf_obj *key, pdf_obj *val) +{ + int location; + char *s; + int i; + + RESOLVE(obj); + if (!obj) + return; /* Can't warn :( */ + if (obj->kind != PDF_DICT) + { + fz_warn(obj->ctx, "assert: not a dict (%s)", pdf_objkindstr(obj)); + return; + } + + RESOLVE(key); + if (!key || key->kind != PDF_NAME) + { + fz_warn(obj->ctx, "assert: key is not a name (%s)", pdf_objkindstr(obj)); + return; + } + else + s = pdf_to_name(key); + + if (!val) + { + fz_warn(obj->ctx, "assert: val does not exist for key (%s)", s); + return; + } + + if (obj->u.d.len > 100 && !obj->u.d.sorted) + pdf_sort_dict(obj); + + i = pdf_dict_finds(obj, s, &location); + if (i >= 0 && i < obj->u.d.len) + { + if (obj->u.d.items[i].v != val) + { + pdf_drop_obj(obj->u.d.items[i].v); + obj->u.d.items[i].v = pdf_keep_obj(val); + } + } + else + { + if (obj->u.d.len + 1 > obj->u.d.cap) + pdf_dict_grow(obj); + + i = location; + if (obj->u.d.sorted && obj->u.d.len > 0) + memmove(&obj->u.d.items[i + 1], + &obj->u.d.items[i], + (obj->u.d.len - i) * sizeof(struct keyval)); + + obj->u.d.items[i].k = pdf_keep_obj(key); + obj->u.d.items[i].v = pdf_keep_obj(val); + obj->u.d.len ++; + } +} + +void +pdf_dict_puts(pdf_obj *obj, const char *key, pdf_obj *val) +{ + fz_context *ctx = obj->ctx; + pdf_obj *keyobj = pdf_new_name(ctx, key); + + fz_try(ctx) + { + pdf_dict_put(obj, keyobj, val); + } + fz_always(ctx) + { + pdf_drop_obj(keyobj); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +void +pdf_dict_puts_drop(pdf_obj *obj, const char *key, pdf_obj *val) +{ + fz_context *ctx = obj->ctx; + pdf_obj *keyobj = NULL; + + fz_var(keyobj); + + fz_try(ctx) + { + keyobj = pdf_new_name(ctx, key); + pdf_dict_put(obj, keyobj, val); + } + fz_always(ctx) + { + pdf_drop_obj(keyobj); + pdf_drop_obj(val); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +void +pdf_dict_putp(pdf_obj *obj, const char *keys, pdf_obj *val) +{ + fz_context *ctx = obj->ctx; + char buf[256]; + char *k, *e; + pdf_obj *cobj = NULL; + + if (strlen(keys)+1 > 256) + fz_throw(obj->ctx, FZ_ERROR_GENERIC, "buffer overflow in pdf_dict_getp"); + + strcpy(buf, keys); + + e = buf; + while (*e) + { + k = e; + while (*e != '/' && *e != '\0') + e++; + + if (*e == '/') + { + *e = '\0'; + e++; + } + + if (*e) + { + /* Not the last key in the key path. Create subdict if not already there. */ + cobj = pdf_dict_gets(obj, k); + if (cobj == NULL) + { + cobj = pdf_new_dict(ctx, 1); + fz_try(ctx) + { + pdf_dict_puts(obj, k, cobj); + } + fz_always(ctx) + { + pdf_drop_obj(cobj); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } + /* Move to subdict */ + obj = cobj; + } + else + { + /* Last key. Use it to store the value */ + /* Use val = NULL to request delete */ + if (val) + pdf_dict_puts(obj, k, val); + else + pdf_dict_dels(obj, k); + } + } +} + +void +pdf_dict_putp_drop(pdf_obj *obj, const char *keys, pdf_obj *val) +{ + fz_context *ctx = obj->ctx; + + fz_try(ctx) + { + pdf_dict_putp(obj, keys, val); + } + fz_always(ctx) + { + pdf_drop_obj(val); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +void +pdf_dict_dels(pdf_obj *obj, const char *key) +{ + RESOLVE(obj); + + if (!obj) + return; /* Can't warn :( */ + if (obj->kind != PDF_DICT) + fz_warn(obj->ctx, "assert: not a dict (%s)", pdf_objkindstr(obj)); + else + { + int i = pdf_dict_finds(obj, key, NULL); + if (i >= 0) + { + pdf_drop_obj(obj->u.d.items[i].k); + pdf_drop_obj(obj->u.d.items[i].v); + obj->u.d.sorted = 0; + obj->u.d.items[i] = obj->u.d.items[obj->u.d.len-1]; + obj->u.d.len --; + } + } +} + +void +pdf_dict_del(pdf_obj *obj, pdf_obj *key) +{ + RESOLVE(key); + if (!key || key->kind != PDF_NAME) + fz_warn(obj->ctx, "assert: key is not a name (%s)", pdf_objkindstr(obj)); + else + pdf_dict_dels(obj, key->u.n); +} + +void +pdf_sort_dict(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_DICT) + return; + if (!obj->u.d.sorted) + { + qsort(obj->u.d.items, obj->u.d.len, sizeof(struct keyval), keyvalcmp); + obj->u.d.sorted = 1; + } +} + +int +pdf_obj_marked(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj) + return 0; + return obj->marked; +} + +int +pdf_obj_mark(pdf_obj *obj) +{ + int marked; + RESOLVE(obj); + if (!obj) + return 0; + marked = obj->marked; + obj->marked = 1; + return marked; +} + +void +pdf_obj_unmark(pdf_obj *obj) +{ + RESOLVE(obj); + if (!obj) + return; + obj->marked = 0; +} + +static void +pdf_free_array(pdf_obj *obj) +{ + int i; + + for (i = 0; i < obj->u.a.len; i++) + pdf_drop_obj(obj->u.a.items[i]); + + fz_free(obj->ctx, obj->u.a.items); + fz_free(obj->ctx, obj); +} + +static void +pdf_free_dict(pdf_obj *obj) +{ + int i; + + for (i = 0; i < obj->u.d.len; i++) { + pdf_drop_obj(obj->u.d.items[i].k); + pdf_drop_obj(obj->u.d.items[i].v); + } + + fz_free(obj->ctx, obj->u.d.items); + fz_free(obj->ctx, obj); +} + +void +pdf_drop_obj(pdf_obj *obj) +{ + if (!obj) + return; + if (--obj->refs) + return; + if (obj->kind == PDF_ARRAY) + pdf_free_array(obj); + else if (obj->kind == PDF_DICT) + pdf_free_dict(obj); + else + fz_free(obj->ctx, obj); +} + +pdf_obj *pdf_new_obj_from_str(fz_context *ctx, const char *src) +{ + pdf_obj *result; + pdf_lexbuf lexbuf; + fz_stream *stream = fz_open_memory(ctx, (unsigned char *)src, strlen(src)); + + pdf_lexbuf_init(ctx, &lexbuf, PDF_LEXBUF_SMALL); + fz_try(ctx) + { + result = pdf_parse_stm_obj(NULL, stream, &lexbuf); + } + fz_always(ctx) + { + pdf_lexbuf_fin(&lexbuf); + fz_close(stream); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + return NULL; + } + + return result; +} + +/* Pretty printing objects */ + +struct fmt +{ + char *buf; + int cap; + int len; + int indent; + int tight; + int col; + int sep; + int last; +}; + +static void fmt_obj(struct fmt *fmt, pdf_obj *obj); + +static inline int iswhite(int ch) +{ + return + ch == '\000' || + ch == '\011' || + ch == '\012' || + ch == '\014' || + ch == '\015' || + ch == '\040'; +} + +static inline int isdelim(int ch) +{ + return + ch == '(' || ch == ')' || + ch == '<' || ch == '>' || + ch == '[' || ch == ']' || + ch == '{' || ch == '}' || + ch == '/' || + ch == '%'; +} + +static inline void fmt_putc(struct fmt *fmt, int c) +{ + if (fmt->sep && !isdelim(fmt->last) && !isdelim(c)) { + fmt->sep = 0; + fmt_putc(fmt, ' '); + } + fmt->sep = 0; + + if (fmt->buf && fmt->len < fmt->cap) + fmt->buf[fmt->len] = c; + + if (c == '\n') + fmt->col = 0; + else + fmt->col ++; + + fmt->len ++; + + fmt->last = c; +} + +static inline void fmt_indent(struct fmt *fmt) +{ + int i = fmt->indent; + while (i--) { + fmt_putc(fmt, ' '); + fmt_putc(fmt, ' '); + } +} + +static inline void fmt_puts(struct fmt *fmt, char *s) +{ + while (*s) + fmt_putc(fmt, *s++); +} + +static inline void fmt_sep(struct fmt *fmt) +{ + fmt->sep = 1; +} + +static void fmt_str(struct fmt *fmt, pdf_obj *obj) +{ + char *s = pdf_to_str_buf(obj); + int n = pdf_to_str_len(obj); + int i, c; + + fmt_putc(fmt, '('); + for (i = 0; i < n; i++) + { + c = (unsigned char)s[i]; + if (c == '\n') + fmt_puts(fmt, "\\n"); + else if (c == '\r') + fmt_puts(fmt, "\\r"); + else if (c == '\t') + fmt_puts(fmt, "\\t"); + else if (c == '\b') + fmt_puts(fmt, "\\b"); + else if (c == '\f') + fmt_puts(fmt, "\\f"); + else if (c == '(') + fmt_puts(fmt, "\\("); + else if (c == ')') + fmt_puts(fmt, "\\)"); + else if (c == '\\') + fmt_puts(fmt, "\\\\"); + else if (c < 32 || c >= 127) { + char buf[16]; + fmt_putc(fmt, '\\'); + sprintf(buf, "%03o", c); + fmt_puts(fmt, buf); + } + else + fmt_putc(fmt, c); + } + fmt_putc(fmt, ')'); +} + +static void fmt_hex(struct fmt *fmt, pdf_obj *obj) +{ + char *s = pdf_to_str_buf(obj); + int n = pdf_to_str_len(obj); + int i, b, c; + + fmt_putc(fmt, '<'); + for (i = 0; i < n; i++) { + b = (unsigned char) s[i]; + c = (b >> 4) & 0x0f; + fmt_putc(fmt, c < 0xA ? c + '0' : c + 'A' - 0xA); + c = (b) & 0x0f; + fmt_putc(fmt, c < 0xA ? c + '0' : c + 'A' - 0xA); + } + fmt_putc(fmt, '>'); +} + +static void fmt_name(struct fmt *fmt, pdf_obj *obj) +{ + unsigned char *s = (unsigned char *) pdf_to_name(obj); + int i, c; + + fmt_putc(fmt, '/'); + + for (i = 0; s[i]; i++) + { + if (isdelim(s[i]) || iswhite(s[i]) || + s[i] == '#' || s[i] < 32 || s[i] >= 127) + { + fmt_putc(fmt, '#'); + c = (s[i] >> 4) & 0xf; + fmt_putc(fmt, c < 0xA ? c + '0' : c + 'A' - 0xA); + c = s[i] & 0xf; + fmt_putc(fmt, c < 0xA ? c + '0' : c + 'A' - 0xA); + } + else + { + fmt_putc(fmt, s[i]); + } + } +} + +static void fmt_array(struct fmt *fmt, pdf_obj *obj) +{ + int i, n; + + n = pdf_array_len(obj); + if (fmt->tight) { + fmt_putc(fmt, '['); + for (i = 0; i < n; i++) { + fmt_obj(fmt, pdf_array_get(obj, i)); + fmt_sep(fmt); + } + fmt_putc(fmt, ']'); + } + else { + fmt_puts(fmt, "[ "); + for (i = 0; i < n; i++) { + if (fmt->col > 60) { + fmt_putc(fmt, '\n'); + fmt_indent(fmt); + } + fmt_obj(fmt, pdf_array_get(obj, i)); + fmt_putc(fmt, ' '); + } + fmt_putc(fmt, ']'); + fmt_sep(fmt); + } +} + +static void fmt_dict(struct fmt *fmt, pdf_obj *obj) +{ + int i, n; + pdf_obj *key, *val; + + n = pdf_dict_len(obj); + if (fmt->tight) { + fmt_puts(fmt, "<<"); + for (i = 0; i < n; i++) { + fmt_obj(fmt, pdf_dict_get_key(obj, i)); + fmt_sep(fmt); + fmt_obj(fmt, pdf_dict_get_val(obj, i)); + fmt_sep(fmt); + } + fmt_puts(fmt, ">>"); + } + else { + fmt_puts(fmt, "<<\n"); + fmt->indent ++; + for (i = 0; i < n; i++) { + key = pdf_dict_get_key(obj, i); + val = pdf_dict_get_val(obj, i); + fmt_indent(fmt); + fmt_obj(fmt, key); + fmt_putc(fmt, ' '); + if (!pdf_is_indirect(val) && pdf_is_array(val)) + fmt->indent ++; + fmt_obj(fmt, val); + fmt_putc(fmt, '\n'); + if (!pdf_is_indirect(val) && pdf_is_array(val)) + fmt->indent --; + } + fmt->indent --; + fmt_indent(fmt); + fmt_puts(fmt, ">>"); + } +} + +static void fmt_obj(struct fmt *fmt, pdf_obj *obj) +{ + char buf[256]; + + if (!obj) + fmt_puts(fmt, "<NULL>"); + else if (pdf_is_indirect(obj)) + { + sprintf(buf, "%d %d R", pdf_to_num(obj), pdf_to_gen(obj)); + fmt_puts(fmt, buf); + } + else if (pdf_is_null(obj)) + fmt_puts(fmt, "null"); + else if (pdf_is_bool(obj)) + fmt_puts(fmt, pdf_to_bool(obj) ? "true" : "false"); + else if (pdf_is_int(obj)) + { + sprintf(buf, "%d", pdf_to_int(obj)); + fmt_puts(fmt, buf); + } + else if (pdf_is_real(obj)) + { + sprintf(buf, "%1.9g", pdf_to_real(obj)); + if (strchr(buf, 'e')) /* bad news! */ + sprintf(buf, fabsf(pdf_to_real(obj)) > 1 ? "%1.1f" : "%1.8f", pdf_to_real(obj)); + fmt_puts(fmt, buf); + } + else if (pdf_is_string(obj)) + { + char *str = pdf_to_str_buf(obj); + int len = pdf_to_str_len(obj); + int added = 0; + int i, c; + for (i = 0; i < len; i++) { + c = (unsigned char)str[i]; + if (strchr("()\\\n\r\t\b\f", c)) + added ++; + else if (c < 32 || c >= 127) + added += 3; + } + if (added < len) + fmt_str(fmt, obj); + else + fmt_hex(fmt, obj); + } + else if (pdf_is_name(obj)) + fmt_name(fmt, obj); + else if (pdf_is_array(obj)) + fmt_array(fmt, obj); + else if (pdf_is_dict(obj)) + fmt_dict(fmt, obj); + else + fmt_puts(fmt, "<unknown object>"); +} + +static int +pdf_sprint_obj(char *s, int n, pdf_obj *obj, int tight) +{ + struct fmt fmt; + + fmt.indent = 0; + fmt.col = 0; + fmt.sep = 0; + fmt.last = 0; + + fmt.tight = tight; + fmt.buf = s; + fmt.cap = n; + fmt.len = 0; + fmt_obj(&fmt, obj); + + if (fmt.buf && fmt.len < fmt.cap) + fmt.buf[fmt.len] = '\0'; + + return fmt.len; +} + +int +pdf_fprint_obj(FILE *fp, pdf_obj *obj, int tight) +{ + char buf[1024]; + char *ptr; + int n; + + n = pdf_sprint_obj(NULL, 0, obj, tight); + if ((n + 1) < sizeof buf) + { + pdf_sprint_obj(buf, sizeof buf, obj, tight); + fputs(buf, fp); + fputc('\n', fp); + } + else + { + ptr = fz_malloc(obj->ctx, n + 1); + pdf_sprint_obj(ptr, n + 1, obj, tight); + fputs(ptr, fp); + fputc('\n', fp); + fz_free(obj->ctx, ptr); + } + return n; +} + +#ifndef NDEBUG +void +pdf_print_obj(pdf_obj *obj) +{ + pdf_fprint_obj(stdout, obj, 0); +} + +void +pdf_print_ref(pdf_obj *ref) +{ + pdf_print_obj(pdf_resolve_indirect(ref)); +} +#endif diff --git a/source/pdf/pdf-outline.c b/source/pdf/pdf-outline.c new file mode 100644 index 00000000..584d60ea --- /dev/null +++ b/source/pdf/pdf-outline.c @@ -0,0 +1,72 @@ +#include "mupdf/pdf.h" + +static fz_outline * +pdf_load_outline_imp(pdf_document *xref, pdf_obj *dict) +{ + fz_context *ctx = xref->ctx; + fz_outline *node, **prev, *first; + pdf_obj *obj; + pdf_obj *odict = dict; + + fz_var(dict); + fz_var(first); + + fz_try(ctx) + { + first = NULL; + prev = &first; + while (dict && pdf_is_dict(dict)) + { + if (pdf_obj_mark(dict)) + break; + node = fz_malloc_struct(ctx, fz_outline); + node->title = NULL; + node->dest.kind = FZ_LINK_NONE; + node->down = NULL; + node->next = NULL; + *prev = node; + prev = &node->next; + + obj = pdf_dict_gets(dict, "Title"); + if (obj) + node->title = pdf_to_utf8(xref, obj); + + if ((obj = pdf_dict_gets(dict, "Dest"))) + node->dest = pdf_parse_link_dest(xref, obj); + else if ((obj = pdf_dict_gets(dict, "A"))) + node->dest = pdf_parse_action(xref, obj); + + obj = pdf_dict_gets(dict, "First"); + if (obj) + node->down = pdf_load_outline_imp(xref, obj); + + dict = pdf_dict_gets(dict, "Next"); + } + } + fz_always(ctx) + { + for (dict = odict; dict && pdf_obj_marked(dict); dict = pdf_dict_gets(dict, "Next")) + pdf_obj_unmark(dict); + } + fz_catch(ctx) + { + fz_free_outline(ctx, first); + fz_rethrow(ctx); + } + + return first; +} + +fz_outline * +pdf_load_outline(pdf_document *xref) +{ + pdf_obj *root, *obj, *first; + + root = pdf_dict_gets(pdf_trailer(xref), "Root"); + obj = pdf_dict_gets(root, "Outlines"); + first = pdf_dict_gets(obj, "First"); + if (first) + return pdf_load_outline_imp(xref, first); + + return NULL; +} diff --git a/source/pdf/pdf-page.c b/source/pdf/pdf-page.c new file mode 100644 index 00000000..8a12b67b --- /dev/null +++ b/source/pdf/pdf-page.c @@ -0,0 +1,489 @@ +#include "mupdf/pdf.h" + +struct info +{ + pdf_obj *resources; + pdf_obj *mediabox; + pdf_obj *cropbox; + pdf_obj *rotate; +}; + +static void +put_marker_bool(fz_context *ctx, pdf_obj *rdb, char *marker, int val) +{ + pdf_obj *tmp; + + tmp = pdf_new_bool(ctx, val); + fz_try(ctx) + { + pdf_dict_puts(rdb, marker, tmp); + } + fz_always(ctx) + { + pdf_drop_obj(tmp); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +typedef struct pdf_page_load_s pdf_page_load; + +struct pdf_page_load_s +{ + int max; + int pos; + pdf_obj *node; + pdf_obj *kids; + struct info info; +}; + +static void +pdf_load_page_tree_node(pdf_document *xref, pdf_obj *node, struct info info) +{ + pdf_obj *dict, *kids, *count; + pdf_obj *obj; + fz_context *ctx = xref->ctx; + pdf_page_load *stack = NULL; + int stacklen = -1; + int stackmax = 0; + + fz_try(ctx) + { + do + { + if (!node || pdf_obj_mark(node)) + { + /* NULL node, or we've been here before. + * Nothing to do. */ + } + else + { + kids = pdf_dict_gets(node, "Kids"); + count = pdf_dict_gets(node, "Count"); + if (pdf_is_array(kids) && pdf_is_int(count)) + { + /* Push this onto the stack */ + obj = pdf_dict_gets(node, "Resources"); + if (obj) + info.resources = obj; + obj = pdf_dict_gets(node, "MediaBox"); + if (obj) + info.mediabox = obj; + obj = pdf_dict_gets(node, "CropBox"); + if (obj) + info.cropbox = obj; + obj = pdf_dict_gets(node, "Rotate"); + if (obj) + info.rotate = obj; + stacklen++; + if (stacklen == stackmax) + { + stack = fz_resize_array(ctx, stack, stackmax ? stackmax*2 : 10, sizeof(*stack)); + stackmax = stackmax ? stackmax*2 : 10; + } + stack[stacklen].kids = kids; + stack[stacklen].node = node; + stack[stacklen].pos = -1; + stack[stacklen].max = pdf_array_len(kids); + stack[stacklen].info = info; + } + else if ((dict = pdf_to_dict(node)) != NULL) + { + if (info.resources && !pdf_dict_gets(dict, "Resources")) + pdf_dict_puts(dict, "Resources", info.resources); + if (info.mediabox && !pdf_dict_gets(dict, "MediaBox")) + pdf_dict_puts(dict, "MediaBox", info.mediabox); + if (info.cropbox && !pdf_dict_gets(dict, "CropBox")) + pdf_dict_puts(dict, "CropBox", info.cropbox); + if (info.rotate && !pdf_dict_gets(dict, "Rotate")) + pdf_dict_puts(dict, "Rotate", info.rotate); + + if (xref->page_len == xref->page_cap) + { + fz_warn(ctx, "found more pages than expected"); + xref->page_refs = fz_resize_array(ctx, xref->page_refs, xref->page_cap+1, sizeof(pdf_obj*)); + xref->page_objs = fz_resize_array(ctx, xref->page_objs, xref->page_cap+1, sizeof(pdf_obj*)); + xref->page_cap ++; + } + + xref->page_refs[xref->page_len] = pdf_keep_obj(node); + xref->page_objs[xref->page_len] = pdf_keep_obj(dict); + xref->page_len ++; + pdf_obj_unmark(node); + } + } + /* Get the next node */ + if (stacklen < 0) + break; + while (++stack[stacklen].pos == stack[stacklen].max) + { + pdf_obj_unmark(stack[stacklen].node); + stacklen--; + if (stacklen < 0) /* No more to pop! */ + break; + node = stack[stacklen].node; + info = stack[stacklen].info; + pdf_obj_unmark(node); /* Unmark it, cos we're about to mark it again */ + } + if (stacklen >= 0) + node = pdf_array_get(stack[stacklen].kids, stack[stacklen].pos); + } + while (stacklen >= 0); + } + fz_always(ctx) + { + while (stacklen >= 0) + pdf_obj_unmark(stack[stacklen--].node); + fz_free(ctx, stack); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +pdf_load_page_tree(pdf_document *xref) +{ + fz_context *ctx = xref->ctx; + pdf_obj *catalog; + pdf_obj *pages; + pdf_obj *count; + struct info info; + + if (xref->page_refs) + return; + + catalog = pdf_dict_gets(pdf_trailer(xref), "Root"); + pages = pdf_dict_gets(catalog, "Pages"); + count = pdf_dict_gets(pages, "Count"); + + if (!pdf_is_dict(pages)) + fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree"); + if (!pdf_is_int(count) || pdf_to_int(count) < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "missing page count"); + + xref->page_cap = pdf_to_int(count); + xref->page_len = 0; + xref->page_refs = fz_malloc_array(ctx, xref->page_cap, sizeof(pdf_obj*)); + xref->page_objs = fz_malloc_array(ctx, xref->page_cap, sizeof(pdf_obj*)); + + info.resources = NULL; + info.mediabox = NULL; + info.cropbox = NULL; + info.rotate = NULL; + + pdf_load_page_tree_node(xref, pages, info); +} + +int +pdf_count_pages(pdf_document *xref) +{ + pdf_load_page_tree(xref); + return xref->page_len; +} + +int +pdf_lookup_page_number(pdf_document *xref, pdf_obj *page) +{ + int i, num = pdf_to_num(page); + + pdf_load_page_tree(xref); + for (i = 0; i < xref->page_len; i++) + if (num == pdf_to_num(xref->page_refs[i])) + return i; + return -1; +} + +/* We need to know whether to install a page-level transparency group */ + +static int pdf_resources_use_blending(fz_context *ctx, pdf_obj *rdb); + +static int +pdf_extgstate_uses_blending(fz_context *ctx, pdf_obj *dict) +{ + pdf_obj *obj = pdf_dict_gets(dict, "BM"); + if (pdf_is_name(obj) && strcmp(pdf_to_name(obj), "Normal")) + return 1; + return 0; +} + +static int +pdf_pattern_uses_blending(fz_context *ctx, pdf_obj *dict) +{ + pdf_obj *obj; + obj = pdf_dict_gets(dict, "Resources"); + if (pdf_resources_use_blending(ctx, obj)) + return 1; + obj = pdf_dict_gets(dict, "ExtGState"); + return pdf_extgstate_uses_blending(ctx, obj); +} + +static int +pdf_xobject_uses_blending(fz_context *ctx, pdf_obj *dict) +{ + pdf_obj *obj = pdf_dict_gets(dict, "Resources"); + return pdf_resources_use_blending(ctx, obj); +} + +static int +pdf_resources_use_blending(fz_context *ctx, pdf_obj *rdb) +{ + pdf_obj *obj; + int i, n, useBM = 0; + + if (!rdb) + return 0; + + /* Have we been here before and stashed an answer? */ + obj = pdf_dict_gets(rdb, ".useBM"); + if (obj) + return pdf_to_bool(obj); + + /* stop on cyclic resource dependencies */ + if (pdf_obj_mark(rdb)) + return 0; + + fz_try(ctx) + { + obj = pdf_dict_gets(rdb, "ExtGState"); + n = pdf_dict_len(obj); + for (i = 0; i < n; i++) + if (pdf_extgstate_uses_blending(ctx, pdf_dict_get_val(obj, i))) + goto found; + + obj = pdf_dict_gets(rdb, "Pattern"); + n = pdf_dict_len(obj); + for (i = 0; i < n; i++) + if (pdf_pattern_uses_blending(ctx, pdf_dict_get_val(obj, i))) + goto found; + + obj = pdf_dict_gets(rdb, "XObject"); + n = pdf_dict_len(obj); + for (i = 0; i < n; i++) + if (pdf_xobject_uses_blending(ctx, pdf_dict_get_val(obj, i))) + goto found; + if (0) + { +found: + useBM = 1; + } + } + fz_always(ctx) + { + pdf_obj_unmark(rdb); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + put_marker_bool(ctx, rdb, ".useBM", useBM); + return useBM; +} + +static void +pdf_load_transition(pdf_document *xref, pdf_page *page, pdf_obj *transdict) +{ + char *name; + pdf_obj *obj; + int type; + + obj = pdf_dict_gets(transdict, "D"); + page->transition.duration = (obj ? pdf_to_real(obj) : 1); + + page->transition.vertical = (pdf_to_name(pdf_dict_gets(transdict, "Dm"))[0] != 'H'); + page->transition.outwards = (pdf_to_name(pdf_dict_gets(transdict, "M"))[0] != 'I'); + /* FIXME: If 'Di' is None, it should be handled differently, but + * this only affects Fly, and we don't implement that currently. */ + page->transition.direction = (pdf_to_int(pdf_dict_gets(transdict, "Di"))); + /* FIXME: Read SS for Fly when we implement it */ + /* FIXME: Read B for Fly when we implement it */ + + name = pdf_to_name(pdf_dict_gets(transdict, "S")); + if (!strcmp(name, "Split")) + type = FZ_TRANSITION_SPLIT; + else if (!strcmp(name, "Blinds")) + type = FZ_TRANSITION_BLINDS; + else if (!strcmp(name, "Box")) + type = FZ_TRANSITION_BOX; + else if (!strcmp(name, "Wipe")) + type = FZ_TRANSITION_WIPE; + else if (!strcmp(name, "Dissolve")) + type = FZ_TRANSITION_DISSOLVE; + else if (!strcmp(name, "Glitter")) + type = FZ_TRANSITION_GLITTER; + else if (!strcmp(name, "Fly")) + type = FZ_TRANSITION_FLY; + else if (!strcmp(name, "Push")) + type = FZ_TRANSITION_PUSH; + else if (!strcmp(name, "Cover")) + type = FZ_TRANSITION_COVER; + else if (!strcmp(name, "Uncover")) + type = FZ_TRANSITION_UNCOVER; + else if (!strcmp(name, "Fade")) + type = FZ_TRANSITION_FADE; + else + type = FZ_TRANSITION_NONE; + page->transition.type = type; +} + +pdf_page * +pdf_load_page(pdf_document *xref, int number) +{ + fz_context *ctx = xref->ctx; + pdf_page *page; + pdf_annot *annot; + pdf_obj *pageobj, *pageref, *obj; + fz_rect mediabox, cropbox, realbox; + float userunit; + fz_matrix mat; + + pdf_load_page_tree(xref); + if (number < 0 || number >= xref->page_len) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find page %d", number + 1); + + pageobj = xref->page_objs[number]; + pageref = xref->page_refs[number]; + + page = fz_malloc_struct(ctx, pdf_page); + page->resources = NULL; + page->contents = NULL; + page->transparency = 0; + page->links = NULL; + page->annots = NULL; + page->deleted_annots = NULL; + page->tmp_annots = NULL; + page->me = pdf_keep_obj(pageobj); + + obj = pdf_dict_gets(pageobj, "UserUnit"); + if (pdf_is_real(obj)) + userunit = pdf_to_real(obj); + else + userunit = 1; + + pdf_to_rect(ctx, pdf_dict_gets(pageobj, "MediaBox"), &mediabox); + if (fz_is_empty_rect(&mediabox)) + { + fz_warn(ctx, "cannot find page size for page %d", number + 1); + mediabox.x0 = 0; + mediabox.y0 = 0; + mediabox.x1 = 612; + mediabox.y1 = 792; + } + + pdf_to_rect(ctx, pdf_dict_gets(pageobj, "CropBox"), &cropbox); + if (!fz_is_empty_rect(&cropbox)) + fz_intersect_rect(&mediabox, &cropbox); + + page->mediabox.x0 = fz_min(mediabox.x0, mediabox.x1) * userunit; + page->mediabox.y0 = fz_min(mediabox.y0, mediabox.y1) * userunit; + page->mediabox.x1 = fz_max(mediabox.x0, mediabox.x1) * userunit; + page->mediabox.y1 = fz_max(mediabox.y0, mediabox.y1) * userunit; + + if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1) + { + fz_warn(ctx, "invalid page size in page %d", number + 1); + page->mediabox = fz_unit_rect; + } + + page->rotate = pdf_to_int(pdf_dict_gets(pageobj, "Rotate")); + /* Snap page->rotate to 0, 90, 180 or 270 */ + if (page->rotate < 0) + page->rotate = 360 - ((-page->rotate) % 360); + if (page->rotate >= 360) + page->rotate = page->rotate % 360; + page->rotate = 90*((page->rotate + 45)/90); + if (page->rotate > 360) + page->rotate = 0; + + fz_pre_rotate(fz_scale(&page->ctm, 1, -1), -page->rotate); + realbox = page->mediabox; + fz_transform_rect(&realbox, &page->ctm); + fz_pre_scale(fz_translate(&mat, -realbox.x0, -realbox.y0), userunit, userunit); + fz_concat(&page->ctm, &page->ctm, &mat); + + obj = pdf_dict_gets(pageobj, "Annots"); + if (obj) + { + page->links = pdf_load_link_annots(xref, obj, &page->ctm); + page->annots = pdf_load_annots(xref, obj, page); + } + + page->duration = pdf_to_real(pdf_dict_gets(pageobj, "Dur")); + + obj = pdf_dict_gets(pageobj, "Trans"); + page->transition_present = (obj != NULL); + if (obj) + { + pdf_load_transition(xref, page, obj); + } + + page->resources = pdf_dict_gets(pageobj, "Resources"); + if (page->resources) + pdf_keep_obj(page->resources); + + obj = pdf_dict_gets(pageobj, "Contents"); + fz_try(ctx) + { + page->contents = pdf_keep_obj(obj); + + if (pdf_resources_use_blending(ctx, page->resources)) + page->transparency = 1; + + for (annot = page->annots; annot && !page->transparency; annot = annot->next) + if (annot->ap && pdf_resources_use_blending(ctx, annot->ap->resources)) + page->transparency = 1; + } + fz_catch(ctx) + { + pdf_free_page(xref, page); + fz_rethrow_message(ctx, "cannot load page %d contents (%d 0 R)", number + 1, pdf_to_num(pageref)); + } + + return page; +} + +fz_rect * +pdf_bound_page(pdf_document *xref, pdf_page *page, fz_rect *bounds) +{ + fz_matrix mtx; + fz_rect mediabox = page->mediabox; + fz_transform_rect(&mediabox, fz_rotate(&mtx, page->rotate)); + bounds->x0 = bounds->y0 = 0; + bounds->x1 = mediabox.x1 - mediabox.x0; + bounds->y1 = mediabox.y1 - mediabox.y0; + return bounds; +} + +fz_link * +pdf_load_links(pdf_document *xref, pdf_page *page) +{ + return fz_keep_link(xref->ctx, page->links); +} + +void +pdf_free_page(pdf_document *xref, pdf_page *page) +{ + if (page == NULL) + return; + pdf_drop_obj(page->resources); + pdf_drop_obj(page->contents); + if (page->links) + fz_drop_link(xref->ctx, page->links); + if (page->annots) + pdf_free_annot(xref->ctx, page->annots); + if (page->deleted_annots) + pdf_free_annot(xref->ctx, page->deleted_annots); + if (page->tmp_annots) + pdf_free_annot(xref->ctx, page->tmp_annots); + /* xref->focus, when not NULL, refers to one of + * the annotations and must be NULLed when the + * annotations are destroyed. xref->focus_obj + * keeps track of the actual annotation object. */ + xref->focus = NULL; + pdf_drop_obj(page->me); + fz_free(xref->ctx, page); +} diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c new file mode 100644 index 00000000..18ab3113 --- /dev/null +++ b/source/pdf/pdf-parse.c @@ -0,0 +1,611 @@ +#include "mupdf/pdf.h" + +fz_rect * +pdf_to_rect(fz_context *ctx, pdf_obj *array, fz_rect *r) +{ + float a = pdf_to_real(pdf_array_get(array, 0)); + float b = pdf_to_real(pdf_array_get(array, 1)); + float c = pdf_to_real(pdf_array_get(array, 2)); + float d = pdf_to_real(pdf_array_get(array, 3)); + r->x0 = fz_min(a, c); + r->y0 = fz_min(b, d); + r->x1 = fz_max(a, c); + r->y1 = fz_max(b, d); + return r; +} + +fz_matrix * +pdf_to_matrix(fz_context *ctx, pdf_obj *array, fz_matrix *m) +{ + m->a = pdf_to_real(pdf_array_get(array, 0)); + m->b = pdf_to_real(pdf_array_get(array, 1)); + m->c = pdf_to_real(pdf_array_get(array, 2)); + m->d = pdf_to_real(pdf_array_get(array, 3)); + m->e = pdf_to_real(pdf_array_get(array, 4)); + m->f = pdf_to_real(pdf_array_get(array, 5)); + return m; +} + +/* Convert Unicode/PdfDocEncoding string into utf-8 */ +char * +pdf_to_utf8(pdf_document *xref, pdf_obj *src) +{ + fz_context *ctx = xref->ctx; + fz_buffer *strmbuf = NULL; + unsigned char *srcptr; + char *dstptr, *dst; + int srclen; + int dstlen = 0; + int ucs; + int i; + + fz_var(strmbuf); + fz_try(ctx) + { + if (pdf_is_string(src)) + { + srcptr = (unsigned char *) pdf_to_str_buf(src); + srclen = pdf_to_str_len(src); + } + else if (pdf_is_stream(xref, pdf_to_num(src), pdf_to_gen(src))) + { + strmbuf = pdf_load_stream(xref, pdf_to_num(src), pdf_to_gen(src)); + srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr); + } + else + { + srclen = 0; + } + + if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) + { + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] << 8 | srcptr[i+1]; + dstlen += fz_runelen(ucs); + } + + dstptr = dst = fz_malloc(ctx, dstlen + 1); + + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] << 8 | srcptr[i+1]; + dstptr += fz_runetochar(dstptr, ucs); + } + } + else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) + { + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] | srcptr[i+1] << 8; + dstlen += fz_runelen(ucs); + } + + dstptr = dst = fz_malloc(ctx, dstlen + 1); + + for (i = 2; i + 1 < srclen; i += 2) + { + ucs = srcptr[i] | srcptr[i+1] << 8; + dstptr += fz_runetochar(dstptr, ucs); + } + } + else + { + for (i = 0; i < srclen; i++) + dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]); + + dstptr = dst = fz_malloc(ctx, dstlen + 1); + + for (i = 0; i < srclen; i++) + { + ucs = pdf_doc_encoding[srcptr[i]]; + dstptr += fz_runetochar(dstptr, ucs); + } + } + } + fz_always(ctx) + { + fz_drop_buffer(ctx, strmbuf); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + *dstptr = '\0'; + return dst; +} + +/* Convert Unicode/PdfDocEncoding string into ucs-2 */ +unsigned short * +pdf_to_ucs2(pdf_document *xref, pdf_obj *src) +{ + fz_context *ctx = xref->ctx; + unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src); + unsigned short *dstptr, *dst; + int srclen = pdf_to_str_len(src); + int i; + + if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) + { + dstptr = dst = fz_malloc_array(ctx, (srclen - 2) / 2 + 1, sizeof(short)); + for (i = 2; i + 1 < srclen; i += 2) + *dstptr++ = srcptr[i] << 8 | srcptr[i+1]; + } + else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) + { + dstptr = dst = fz_malloc_array(ctx, (srclen - 2) / 2 + 1, sizeof(short)); + for (i = 2; i + 1 < srclen; i += 2) + *dstptr++ = srcptr[i] | srcptr[i+1] << 8; + } + else + { + dstptr = dst = fz_malloc_array(ctx, srclen + 1, sizeof(short)); + for (i = 0; i < srclen; i++) + *dstptr++ = pdf_doc_encoding[srcptr[i]]; + } + + *dstptr = '\0'; + return dst; +} + +/* allow to convert to UCS-2 without the need for an fz_context */ +/* (buffer must be at least (fz_to_str_len(src) + 1) * 2 bytes in size) */ +void +pdf_to_ucs2_buf(unsigned short *buffer, pdf_obj *src) +{ + unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src); + unsigned short *dstptr = buffer; + int srclen = pdf_to_str_len(src); + int i; + + if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) + { + for (i = 2; i + 1 < srclen; i += 2) + *dstptr++ = srcptr[i] << 8 | srcptr[i+1]; + } + else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) + { + for (i = 2; i + 1 < srclen; i += 2) + *dstptr++ = srcptr[i] | srcptr[i+1] << 8; + } + else + { + for (i = 0; i < srclen; i++) + *dstptr++ = pdf_doc_encoding[srcptr[i]]; + } + + *dstptr = '\0'; +} + +/* Convert UCS-2 string into PdfDocEncoding for authentication */ +char * +pdf_from_ucs2(pdf_document *xref, unsigned short *src) +{ + fz_context *ctx = xref->ctx; + int i, j, len; + char *docstr; + + len = 0; + while (src[len]) + len++; + + docstr = fz_malloc(ctx, len + 1); + + for (i = 0; i < len; i++) + { + /* shortcut: check if the character has the same code point in both encodings */ + if (0 < src[i] && src[i] < 256 && pdf_doc_encoding[src[i]] == src[i]) { + docstr[i] = src[i]; + continue; + } + + /* search through pdf_docencoding for the character's code point */ + for (j = 0; j < 256; j++) + if (pdf_doc_encoding[j] == src[i]) + break; + docstr[i] = j; + + /* fail, if a character can't be encoded */ + if (!docstr[i]) + { + fz_free(ctx, docstr); + return NULL; + } + } + docstr[len] = '\0'; + + return docstr; +} + +pdf_obj * +pdf_to_utf8_name(pdf_document *xref, pdf_obj *src) +{ + char *buf = pdf_to_utf8(xref, src); + pdf_obj *dst = pdf_new_name(xref->ctx, buf); + fz_free(xref->ctx, buf); + return dst; +} + +pdf_obj * +pdf_parse_array(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_obj *ary = NULL; + pdf_obj *obj = NULL; + int a = 0, b = 0, n = 0; + pdf_token tok; + fz_context *ctx = file->ctx; + pdf_obj *op; + + fz_var(obj); + + ary = pdf_new_array(ctx, 4); + + fz_try(ctx) + { + while (1) + { + tok = pdf_lex(file, buf); + + if (tok != PDF_TOK_INT && tok != PDF_TOK_R) + { + if (n > 0) + { + obj = pdf_new_int(ctx, a); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + } + if (n > 1) + { + obj = pdf_new_int(ctx, b); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + } + n = 0; + } + + if (tok == PDF_TOK_INT && n == 2) + { + obj = pdf_new_int(ctx, a); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + a = b; + n --; + } + + switch (tok) + { + case PDF_TOK_CLOSE_ARRAY: + op = ary; + goto end; + + case PDF_TOK_INT: + if (n == 0) + a = buf->i; + if (n == 1) + b = buf->i; + n ++; + break; + + case PDF_TOK_R: + if (n != 2) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse indirect reference in array"); + obj = pdf_new_indirect(ctx, a, b, xref); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + n = 0; + break; + + case PDF_TOK_OPEN_ARRAY: + obj = pdf_parse_array(xref, file, buf); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + break; + + case PDF_TOK_OPEN_DICT: + obj = pdf_parse_dict(xref, file, buf); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + break; + + case PDF_TOK_NAME: + obj = pdf_new_name(ctx, buf->scratch); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + break; + case PDF_TOK_REAL: + obj = pdf_new_real(ctx, buf->f); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + break; + case PDF_TOK_STRING: + obj = pdf_new_string(ctx, buf->scratch, buf->len); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + break; + case PDF_TOK_TRUE: + obj = pdf_new_bool(ctx, 1); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + break; + case PDF_TOK_FALSE: + obj = pdf_new_bool(ctx, 0); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + break; + case PDF_TOK_NULL: + obj = pdf_new_null(ctx); + pdf_array_push(ary, obj); + pdf_drop_obj(obj); + obj = NULL; + break; + + default: + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse token in array"); + } + } +end: + {} + } + fz_catch(ctx) + { + pdf_drop_obj(obj); + pdf_drop_obj(ary); + fz_rethrow_message(ctx, "cannot parse array"); + } + return op; +} + +pdf_obj * +pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_obj *dict; + pdf_obj *key = NULL; + pdf_obj *val = NULL; + pdf_token tok; + int a, b; + fz_context *ctx = file->ctx; + + dict = pdf_new_dict(ctx, 8); + + fz_var(key); + fz_var(val); + + fz_try(ctx) + { + while (1) + { + tok = pdf_lex(file, buf); + skip: + if (tok == PDF_TOK_CLOSE_DICT) + break; + + /* for BI .. ID .. EI in content streams */ + if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")) + break; + + if (tok != PDF_TOK_NAME) + fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key in dict"); + + key = pdf_new_name(ctx, buf->scratch); + + tok = pdf_lex(file, buf); + + switch (tok) + { + case PDF_TOK_OPEN_ARRAY: + val = pdf_parse_array(xref, file, buf); + break; + + case PDF_TOK_OPEN_DICT: + val = pdf_parse_dict(xref, file, buf); + break; + + case PDF_TOK_NAME: val = pdf_new_name(ctx, buf->scratch); break; + case PDF_TOK_REAL: val = pdf_new_real(ctx, buf->f); break; + case PDF_TOK_STRING: val = pdf_new_string(ctx, buf->scratch, buf->len); break; + case PDF_TOK_TRUE: val = pdf_new_bool(ctx, 1); break; + case PDF_TOK_FALSE: val = pdf_new_bool(ctx, 0); break; + case PDF_TOK_NULL: val = pdf_new_null(ctx); break; + + case PDF_TOK_INT: + /* 64-bit to allow for numbers > INT_MAX and overflow */ + a = buf->i; + tok = pdf_lex(file, buf); + if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || + (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))) + { + val = pdf_new_int(ctx, a); + pdf_dict_put(dict, key, val); + pdf_drop_obj(val); + val = NULL; + pdf_drop_obj(key); + key = NULL; + goto skip; + } + if (tok == PDF_TOK_INT) + { + b = buf->i; + tok = pdf_lex(file, buf); + if (tok == PDF_TOK_R) + { + val = pdf_new_indirect(ctx, a, b, xref); + break; + } + } + fz_throw(ctx, FZ_ERROR_GENERIC, "invalid indirect reference in dict"); + + default: + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in dict"); + } + + pdf_dict_put(dict, key, val); + pdf_drop_obj(val); + val = NULL; + pdf_drop_obj(key); + key = NULL; + } + } + fz_catch(ctx) + { + pdf_drop_obj(dict); + pdf_drop_obj(key); + pdf_drop_obj(val); + fz_rethrow_message(ctx, "cannot parse dict"); + } + return dict; +} + +pdf_obj * +pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf) +{ + pdf_token tok; + fz_context *ctx = file->ctx; + + tok = pdf_lex(file, buf); + + switch (tok) + { + case PDF_TOK_OPEN_ARRAY: + return pdf_parse_array(xref, file, buf); + case PDF_TOK_OPEN_DICT: + return pdf_parse_dict(xref, file, buf); + case PDF_TOK_NAME: return pdf_new_name(ctx, buf->scratch); break; + case PDF_TOK_REAL: return pdf_new_real(ctx, buf->f); break; + case PDF_TOK_STRING: return pdf_new_string(ctx, buf->scratch, buf->len); break; + case PDF_TOK_TRUE: return pdf_new_bool(ctx, 1); break; + case PDF_TOK_FALSE: return pdf_new_bool(ctx, 0); break; + case PDF_TOK_NULL: return pdf_new_null(ctx); break; + case PDF_TOK_INT: return pdf_new_int(ctx, buf->i); break; + default: fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in object stream"); + } + return NULL; /* Stupid MSVC */ +} + +pdf_obj * +pdf_parse_ind_obj(pdf_document *xref, + fz_stream *file, pdf_lexbuf *buf, + int *onum, int *ogen, int *ostmofs) +{ + pdf_obj *obj = NULL; + int num = 0, gen = 0, stm_ofs; + pdf_token tok; + int a, b; + fz_context *ctx = file->ctx; + + fz_var(obj); + + tok = pdf_lex(file, buf); + if (tok != PDF_TOK_INT) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number"); + num = buf->i; + + tok = pdf_lex(file, buf); + if (tok != PDF_TOK_INT) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num); + gen = buf->i; + + tok = pdf_lex(file, buf); + if (tok != PDF_TOK_OBJ) + fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen); + + tok = pdf_lex(file, buf); + + switch (tok) + { + case PDF_TOK_OPEN_ARRAY: + obj = pdf_parse_array(xref, file, buf); + break; + + case PDF_TOK_OPEN_DICT: + obj = pdf_parse_dict(xref, file, buf); + break; + + case PDF_TOK_NAME: obj = pdf_new_name(ctx, buf->scratch); break; + case PDF_TOK_REAL: obj = pdf_new_real(ctx, buf->f); break; + case PDF_TOK_STRING: obj = pdf_new_string(ctx, buf->scratch, buf->len); break; + case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, 1); break; + case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, 0); break; + case PDF_TOK_NULL: obj = pdf_new_null(ctx); break; + + case PDF_TOK_INT: + a = buf->i; + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) + { + obj = pdf_new_int(ctx, a); + goto skip; + } + if (tok == PDF_TOK_INT) + { + b = buf->i; + tok = pdf_lex(file, buf); + if (tok == PDF_TOK_R) + { + obj = pdf_new_indirect(ctx, a, b, xref); + break; + } + } + fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen); + + case PDF_TOK_ENDOBJ: + obj = pdf_new_null(ctx); + goto skip; + + default: + fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in object (%d %d R)", num, gen); + } + + fz_try(ctx) + { + tok = pdf_lex(file, buf); + } + fz_catch(ctx) + { + pdf_drop_obj(obj); + fz_rethrow_message(ctx, "cannot parse indirect object (%d %d R)", num, gen); + } + +skip: + if (tok == PDF_TOK_STREAM) + { + int c = fz_read_byte(file); + while (c == ' ') + c = fz_read_byte(file); + if (c == '\r') + { + c = fz_peek_byte(file); + if (c != '\n') + fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); + else + fz_read_byte(file); + } + stm_ofs = fz_tell(file); + } + else if (tok == PDF_TOK_ENDOBJ) + { + stm_ofs = 0; + } + else + { + fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); + stm_ofs = 0; + } + + if (onum) *onum = num; + if (ogen) *ogen = gen; + if (ostmofs) *ostmofs = stm_ofs; + return obj; +} diff --git a/source/pdf/pdf-pattern.c b/source/pdf/pdf-pattern.c new file mode 100644 index 00000000..622705b2 --- /dev/null +++ b/source/pdf/pdf-pattern.c @@ -0,0 +1,83 @@ +#include "mupdf/pdf.h" + +pdf_pattern * +pdf_keep_pattern(fz_context *ctx, pdf_pattern *pat) +{ + return (pdf_pattern *)fz_keep_storable(ctx, &pat->storable); +} + +void +pdf_drop_pattern(fz_context *ctx, pdf_pattern *pat) +{ + fz_drop_storable(ctx, &pat->storable); +} + +static void +pdf_free_pattern_imp(fz_context *ctx, fz_storable *pat_) +{ + pdf_pattern *pat = (pdf_pattern *)pat_; + + if (pat->resources) + pdf_drop_obj(pat->resources); + if (pat->contents) + pdf_drop_obj(pat->contents); + fz_free(ctx, pat); +} + +static unsigned int +pdf_pattern_size(pdf_pattern *pat) +{ + if (pat == NULL) + return 0; + return sizeof(*pat); +} + +pdf_pattern * +pdf_load_pattern(pdf_document *xref, pdf_obj *dict) +{ + pdf_pattern *pat; + pdf_obj *obj; + fz_context *ctx = xref->ctx; + + if ((pat = pdf_find_item(ctx, pdf_free_pattern_imp, dict))) + { + return pat; + } + + pat = fz_malloc_struct(ctx, pdf_pattern); + FZ_INIT_STORABLE(pat, 1, pdf_free_pattern_imp); + pat->resources = NULL; + pat->contents = NULL; + + /* Store pattern now, to avoid possible recursion if objects refer back to this one */ + pdf_store_item(ctx, dict, pat, pdf_pattern_size(pat)); + + pat->ismask = pdf_to_int(pdf_dict_gets(dict, "PaintType")) == 2; + pat->xstep = pdf_to_real(pdf_dict_gets(dict, "XStep")); + pat->ystep = pdf_to_real(pdf_dict_gets(dict, "YStep")); + + obj = pdf_dict_gets(dict, "BBox"); + pdf_to_rect(ctx, obj, &pat->bbox); + + obj = pdf_dict_gets(dict, "Matrix"); + if (obj) + pdf_to_matrix(ctx, obj, &pat->matrix); + else + pat->matrix = fz_identity; + + pat->resources = pdf_dict_gets(dict, "Resources"); + if (pat->resources) + pdf_keep_obj(pat->resources); + + fz_try(ctx) + { + pat->contents = pdf_keep_obj(dict); + } + fz_catch(ctx) + { + pdf_remove_item(ctx, pdf_free_pattern_imp, dict); + pdf_drop_pattern(ctx, pat); + fz_rethrow_message(ctx, "cannot load pattern stream (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); + } + return pat; +} diff --git a/source/pdf/pdf-pkcs7.c b/source/pdf/pdf-pkcs7.c new file mode 100644 index 00000000..1bc50b6e --- /dev/null +++ b/source/pdf/pdf-pkcs7.c @@ -0,0 +1,400 @@ +#include "mupdf/pdf.h" // TODO: move this file to pdf module + +#ifdef HAVE_OPENSSL + +#include "openssl/err.h" +#include "openssl/bio.h" +#include "openssl/asn1.h" +#include "openssl/x509.h" +#include "openssl/err.h" +#include "openssl/objects.h" +#include "openssl/pem.h" +#include "openssl/pkcs7.h" + +enum +{ + SEG_START = 0, + SEG_SIZE = 1 +}; + +typedef struct bsegs_struct +{ + int (*seg)[2]; + int nsegs; + int current_seg; + int seg_pos; +} BIO_SEGS_CTX; + +static int bsegs_read(BIO *b, char *buf, int size) +{ + BIO_SEGS_CTX *ctx = (BIO_SEGS_CTX *)b->ptr; + int read = 0; + + while (size > 0 && ctx->current_seg < ctx->nsegs) + { + int nb = ctx->seg[ctx->current_seg][SEG_SIZE] - ctx->seg_pos; + + if (nb > size) + nb = size; + + if (nb > 0) + { + if (ctx->seg_pos == 0) + (void)BIO_seek(b->next_bio, ctx->seg[ctx->current_seg][SEG_START]); + + (void)BIO_read(b->next_bio, buf, nb); + ctx->seg_pos += nb; + read += nb; + buf += nb; + size -= nb; + } + else + { + ctx->current_seg++; + + if (ctx->current_seg < ctx->nsegs) + ctx->seg_pos = 0; + } + } + + return read; +} + +static long bsegs_ctrl(BIO *b, int cmd, long arg1, void *arg2) +{ + return BIO_ctrl(b->next_bio, cmd, arg1, arg2); +} + +static int bsegs_new(BIO *b) +{ + BIO_SEGS_CTX *ctx; + + ctx = (BIO_SEGS_CTX *)malloc(sizeof(BIO_SEGS_CTX)); + if (ctx == NULL) + return 0; + + ctx->current_seg = 0; + ctx->seg_pos = 0; + ctx->seg = NULL; + ctx->nsegs = 0; + + b->init = 1; + b->ptr = (char *)ctx; + b->flags = 0; + b->num = 0; + + return 1; +} + +static int bsegs_free(BIO *b) +{ + if (b == NULL) + return 0; + + free(b->ptr); + b->ptr = NULL; + b->init = 0; + b->flags = 0; + + return 1; +} + +static long bsegs_callback_ctrl(BIO *b, int cmd, bio_info_cb *fp) +{ + return BIO_callback_ctrl(b->next_bio, cmd, fp); +} + +static BIO_METHOD methods_bsegs = +{ + 0,"segment reader", + NULL, + bsegs_read, + NULL, + NULL, + bsegs_ctrl, + bsegs_new, + bsegs_free, + bsegs_callback_ctrl, +}; + +static BIO_METHOD *BIO_f_segments(void) +{ + return &methods_bsegs; +} + +static void BIO_set_segments(BIO *b, int (*seg)[2], int nsegs) +{ + BIO_SEGS_CTX *ctx = (BIO_SEGS_CTX *)b->ptr; + + ctx->seg = seg; + ctx->nsegs = nsegs; +} + +typedef struct verify_context_s +{ + X509_STORE_CTX x509_ctx; + char certdesc[256]; + int err; +} verify_context; + +static int verify_callback(int ok, X509_STORE_CTX *ctx) +{ + verify_context *vctx; + X509 *err_cert; + int err, depth; + + vctx = (verify_context *)ctx; + + err_cert = X509_STORE_CTX_get_current_cert(ctx); + err = X509_STORE_CTX_get_error(ctx); + depth = X509_STORE_CTX_get_error_depth(ctx); + + X509_NAME_oneline(X509_get_subject_name(err_cert), vctx->certdesc, sizeof(vctx->certdesc)); + + if (!ok && depth >= 6) + { + X509_STORE_CTX_set_error(ctx, X509_V_ERR_CERT_CHAIN_TOO_LONG); + } + + switch (ctx->error) + { + case X509_V_ERR_INVALID_PURPOSE: + case X509_V_ERR_CERT_HAS_EXPIRED: + case X509_V_ERR_KEYUSAGE_NO_CERTSIGN: + err = X509_V_OK; + X509_STORE_CTX_set_error(ctx, X509_V_OK); + ok = 1; + break; + + case X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT: + /* + In this case, don't reset err to X509_V_OK, so that it can be reported, + although we do return 1, so that the digest will still be checked + */ + ok = 1; + break; + + default: + break; + } + + if (ok && vctx->err == X509_V_OK) + vctx->err = err; + return ok; +} + +static int pk7_verify(X509_STORE *cert_store, PKCS7 *p7, BIO *detached, char *ebuf, int ebufsize) +{ + PKCS7_SIGNER_INFO *si; + verify_context vctx; + BIO *p7bio=NULL; + char readbuf[1024*4]; + int res = 1; + int i; + STACK_OF(PKCS7_SIGNER_INFO) *sk; + + vctx.err = X509_V_OK; + ebuf[0] = 0; + + OpenSSL_add_all_algorithms(); + + EVP_add_digest(EVP_md5()); + EVP_add_digest(EVP_sha1()); + + ERR_load_crypto_strings(); + + ERR_clear_error(); + + X509_VERIFY_PARAM_set_flags(cert_store->param, X509_V_FLAG_CB_ISSUER_CHECK); + X509_STORE_set_verify_cb_func(cert_store, verify_callback); + + p7bio = PKCS7_dataInit(p7, detached); + + /* We now have to 'read' from p7bio to calculate digests etc. */ + while (BIO_read(p7bio, readbuf, sizeof(readbuf)) > 0) + ; + + /* We can now verify signatures */ + sk = PKCS7_get_signer_info(p7); + if (sk == NULL) + { + /* there are no signatures on this data */ + res = 0; + strncpy(ebuf, "No signatures", sizeof(ebuf)); + goto exit; + } + + for (i=0; i<sk_PKCS7_SIGNER_INFO_num(sk); i++) + { + int rc; + si = sk_PKCS7_SIGNER_INFO_value(sk, i); + rc = PKCS7_dataVerify(cert_store, &vctx.x509_ctx, p7bio,p7, si); + if (rc <= 0 || vctx.err != X509_V_OK) + { + char tbuf[120]; + + if (rc <= 0) + { + strncpy(ebuf, ERR_error_string(ERR_get_error(), tbuf), ebufsize-1); + } + else + { + /* Error while checking the certificate chain */ + snprintf(ebuf, ebufsize-1, "%s(%d): %s", X509_verify_cert_error_string(vctx.err), vctx.err, vctx.certdesc); + } + + ebuf[ebufsize-1] = 0; + + res = 0; + goto exit; + } + } + +exit: + X509_STORE_CTX_cleanup(&vctx.x509_ctx); + ERR_free_strings(); + + return res; +} + +static unsigned char adobe_ca[] = +{ +#include "gen_adobe_ca.h" +}; + +static int verify_sig(char *sig, int sig_len, char *file, int (*byte_range)[2], int byte_range_len, char *ebuf, int ebufsize) +{ + PKCS7 *pk7sig = NULL; + PKCS7 *pk7cert = NULL; + X509_STORE *st = NULL; + BIO *bsig = NULL; + BIO *bcert = NULL; + BIO *bdata = NULL; + BIO *bsegs = NULL; + STACK_OF(X509) *certs = NULL; + int t; + int res = 0; + + bsig = BIO_new_mem_buf(sig, sig_len); + pk7sig = d2i_PKCS7_bio(bsig, NULL); + if (pk7sig == NULL) + goto exit; + + bdata = BIO_new(BIO_s_file()); + BIO_read_filename(bdata, file); + + bsegs = BIO_new(BIO_f_segments()); + if (bsegs == NULL) + goto exit; + + bsegs->next_bio = bdata; + BIO_set_segments(bsegs, byte_range, byte_range_len); + + /* Find the certificates in the pk7 file */ + bcert = BIO_new_mem_buf(adobe_ca, sizeof(adobe_ca)); + pk7cert = d2i_PKCS7_bio(bcert, NULL); + if (pk7cert == NULL) + goto exit; + + t = OBJ_obj2nid(pk7cert->type); + switch (t) + { + case NID_pkcs7_signed: + certs = pk7cert->d.sign->cert; + break; + + case NID_pkcs7_signedAndEnveloped: + certs = pk7cert->d.sign->cert; + break; + + default: + break; + } + + st = X509_STORE_new(); + if (st == NULL) + goto exit; + + /* Add the certificates to the store */ + if (certs != NULL) + { + int i, n = sk_X509_num(certs); + + for (i = 0; i < n; i++) + { + X509 *c = sk_X509_value(certs, i); + X509_STORE_add_cert(st, c); + } + } + + res = pk7_verify(st, pk7sig, bsegs, ebuf, ebufsize); + +exit: + BIO_free(bsig); + BIO_free(bdata); + BIO_free(bsegs); + BIO_free(bcert); + PKCS7_free(pk7sig); + PKCS7_free(pk7cert); + X509_STORE_free(st); + + return res; +} + +int pdf_check_signature(fz_context *ctx, pdf_document *doc, pdf_widget *widget, char *file, char *ebuf, int ebufsize) +{ + int (*byte_range)[2] = NULL; + int byte_range_len; + char *contents = NULL; + int contents_len; + int res = 0; + + fz_var(byte_range); + fz_var(res); + fz_try(ctx); + { + byte_range_len = pdf_signature_widget_byte_range(doc, widget, NULL); + if (byte_range_len) + { + byte_range = fz_calloc(ctx, byte_range_len, sizeof(*byte_range)); + pdf_signature_widget_byte_range(doc, widget, byte_range); + } + + contents_len = pdf_signature_widget_contents(doc, widget, &contents); + if (byte_range && contents) + { + res = verify_sig(contents, contents_len, file, byte_range, byte_range_len, ebuf, ebufsize); + } + else + { + res = 0; + strncpy(ebuf, "Not signed", ebufsize); + } + + } + fz_always(ctx) + { + fz_free(ctx, byte_range); + } + fz_catch(ctx) + { + res = 0; + strncpy(ebuf, fz_caught_message(ctx), ebufsize); + } + + if (ebufsize > 0) + ebuf[ebufsize-1] = 0; + + return res; +} + +#else /* HAVE_OPENSSL */ + +int pdf_check_signature(fz_context *ctx, pdf_document *doc, pdf_widget *widget, char *file, char *ebuf, int ebufsize) +{ + strncpy(ebuf, "This version of MuPDF was built without signature support", ebufsize); + + return 0; +} + +#endif /* HAVE_OPENSSL */ diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c new file mode 100644 index 00000000..421696a2 --- /dev/null +++ b/source/pdf/pdf-repair.c @@ -0,0 +1,587 @@ +#include "mupdf/pdf.h" + +/* Scan file for objects and reconstruct xref table */ + +/* Define in PDF 1.7 to be 8388607, but mupdf is more lenient. */ +#define MAX_OBJECT_NUMBER (10 << 20) + +struct entry +{ + int num; + int gen; + int ofs; + int stm_ofs; + int stm_len; +}; + +static int +pdf_repair_obj(fz_stream *file, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, int *tmpofs) +{ + pdf_token tok; + int stm_len; + int n; + fz_context *ctx = file->ctx; + + *stmofsp = 0; + *stmlenp = -1; + + stm_len = 0; + + /* On entry to this function, we know that we've just seen + * '<int> <int> obj'. We expect the next thing we see to be a + * pdf object. Regardless of the type of thing we meet next + * we only need to fully parse it if it is a dictionary. */ + tok = pdf_lex(file, buf); + + if (tok == PDF_TOK_OPEN_DICT) + { + pdf_obj *dict, *obj; + + /* Send NULL xref so we don't try to resolve references */ + fz_try(ctx) + { + dict = pdf_parse_dict(NULL, file, buf); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + /* Don't let a broken object at EOF overwrite a good one */ + if (file->eof) + fz_rethrow_message(ctx, "broken object at EOF ignored"); + /* Silently swallow the error */ + dict = pdf_new_dict(ctx, 2); + } + + obj = pdf_dict_gets(dict, "Type"); + if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "XRef")) + { + obj = pdf_dict_gets(dict, "Encrypt"); + if (obj) + { + pdf_drop_obj(*encrypt); + *encrypt = pdf_keep_obj(obj); + } + + obj = pdf_dict_gets(dict, "ID"); + if (obj) + { + pdf_drop_obj(*id); + *id = pdf_keep_obj(obj); + } + } + + obj = pdf_dict_gets(dict, "Length"); + if (!pdf_is_indirect(obj) && pdf_is_int(obj)) + stm_len = pdf_to_int(obj); + + pdf_drop_obj(dict); + } + + while ( tok != PDF_TOK_STREAM && + tok != PDF_TOK_ENDOBJ && + tok != PDF_TOK_ERROR && + tok != PDF_TOK_EOF && + tok != PDF_TOK_INT ) + { + *tmpofs = fz_tell(file); + if (*tmpofs < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file"); + tok = pdf_lex(file, buf); + } + + if (tok == PDF_TOK_STREAM) + { + int c = fz_read_byte(file); + if (c == '\r') { + c = fz_peek_byte(file); + if (c == '\n') + fz_read_byte(file); + } + + *stmofsp = fz_tell(file); + if (*stmofsp < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot seek in file"); + + if (stm_len > 0) + { + fz_seek(file, *stmofsp + stm_len, 0); + fz_try(ctx) + { + tok = pdf_lex(file, buf); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + fz_warn(ctx, "cannot find endstream token, falling back to scanning"); + } + if (tok == PDF_TOK_ENDSTREAM) + goto atobjend; + fz_seek(file, *stmofsp, 0); + } + + n = fz_read(file, (unsigned char *) buf->scratch, 9); + if (n < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot read from file"); + + while (memcmp(buf->scratch, "endstream", 9) != 0) + { + c = fz_read_byte(file); + if (c == EOF) + break; + memmove(&buf->scratch[0], &buf->scratch[1], 8); + buf->scratch[8] = c; + } + + *stmlenp = fz_tell(file) - *stmofsp - 9; + +atobjend: + *tmpofs = fz_tell(file); + if (*tmpofs < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file"); + tok = pdf_lex(file, buf); + if (tok != PDF_TOK_ENDOBJ) + fz_warn(ctx, "object missing 'endobj' token"); + else + { + /* Read another token as we always return the next one */ + *tmpofs = fz_tell(file); + if (*tmpofs < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file"); + tok = pdf_lex(file, buf); + } + } + return tok; +} + +static void +pdf_repair_obj_stm(pdf_document *xref, int num, int gen) +{ + pdf_obj *obj; + fz_stream *stm = NULL; + pdf_token tok; + int i, n, count; + fz_context *ctx = xref->ctx; + pdf_lexbuf buf; + + fz_var(stm); + + pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL); + + fz_try(ctx) + { + obj = pdf_load_object(xref, num, gen); + + count = pdf_to_int(pdf_dict_gets(obj, "N")); + + pdf_drop_obj(obj); + + stm = pdf_open_stream(xref, num, gen); + + for (i = 0; i < count; i++) + { + pdf_xref_entry *entry; + + tok = pdf_lex(stm, &buf); + if (tok != PDF_TOK_INT) + fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen); + + n = buf.i; + if (n < 0) + { + fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i); + continue; + } + else if (n > MAX_OBJECT_NUMBER) + { + fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i); + continue; + } + + entry = pdf_get_populating_xref_entry(xref, n); + entry->ofs = num; + entry->gen = i; + entry->stm_ofs = 0; + pdf_drop_obj(entry->obj); + entry->obj = NULL; + entry->type = 'o'; + + tok = pdf_lex(stm, &buf); + if (tok != PDF_TOK_INT) + fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen); + } + } + fz_always(ctx) + { + fz_close(stm); + pdf_lexbuf_fin(&buf); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot load object stream object (%d %d R)", num, gen); + } +} + +/* Entered with file locked, remains locked throughout. */ +void +pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) +{ + pdf_obj *dict, *obj = NULL; + pdf_obj *length; + + pdf_obj *encrypt = NULL; + pdf_obj *id = NULL; + pdf_obj *root = NULL; + pdf_obj *info = NULL; + + struct entry *list = NULL; + int listlen; + int listcap; + int maxnum = 0; + + int num = 0; + int gen = 0; + int tmpofs, numofs = 0, genofs = 0; + int stm_len, stm_ofs = 0; + pdf_token tok; + int next; + int i, n, c; + fz_context *ctx = xref->ctx; + + fz_var(encrypt); + fz_var(id); + fz_var(root); + fz_var(info); + fz_var(list); + fz_var(obj); + + xref->dirty = 1; + + fz_seek(xref->file, 0, 0); + + fz_try(ctx) + { + pdf_xref_entry *entry; + listlen = 0; + listcap = 1024; + list = fz_malloc_array(ctx, listcap, sizeof(struct entry)); + + /* look for '%PDF' version marker within first kilobyte of file */ + n = fz_read(xref->file, (unsigned char *)buf->scratch, fz_mini(buf->size, 1024)); + if (n < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot read from file"); + + fz_seek(xref->file, 0, 0); + for (i = 0; i < n - 4; i++) + { + if (memcmp(&buf->scratch[i], "%PDF", 4) == 0) + { + fz_seek(xref->file, i + 8, 0); /* skip "%PDF-X.Y" */ + break; + } + } + + /* skip comment line after version marker since some generators + * forget to terminate the comment with a newline */ + c = fz_read_byte(xref->file); + while (c >= 0 && (c == ' ' || c == '%')) + c = fz_read_byte(xref->file); + fz_unread_byte(xref->file); + + while (1) + { + tmpofs = fz_tell(xref->file); + if (tmpofs < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file"); + + fz_try(ctx) + { + tok = pdf_lex(xref->file, buf); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + fz_warn(ctx, "ignoring the rest of the file"); + break; + } + + /* If we have the next token already, then we'll jump + * back here, rather than going through the top of + * the loop. */ + have_next_token: + + if (tok == PDF_TOK_INT) + { + numofs = genofs; + num = gen; + genofs = tmpofs; + gen = buf->i; + } + + else if (tok == PDF_TOK_OBJ) + { + fz_try(ctx) + { + tok = pdf_repair_obj(xref->file, buf, &stm_ofs, &stm_len, &encrypt, &id, &tmpofs); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + /* If we haven't seen a root yet, there is nothing + * we can do, but give up. Otherwise, we'll make + * do. */ + if (!root) + fz_rethrow(ctx); + fz_warn(ctx, "cannot parse object (%d %d R) - ignoring rest of file", num, gen); + break; + } + + if (num <= 0) + { + fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", num, gen); + continue; + } + else if (num > MAX_OBJECT_NUMBER) + { + fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", num, gen); + continue; + } + + gen = fz_clampi(gen, 0, 65535); + + if (listlen + 1 == listcap) + { + listcap = (listcap * 3) / 2; + list = fz_resize_array(ctx, list, listcap, sizeof(struct entry)); + } + + list[listlen].num = num; + list[listlen].gen = gen; + list[listlen].ofs = numofs; + list[listlen].stm_ofs = stm_ofs; + list[listlen].stm_len = stm_len; + listlen ++; + + if (num > maxnum) + maxnum = num; + + goto have_next_token; + } + + /* trailer dictionary */ + else if (tok == PDF_TOK_OPEN_DICT) + { + fz_try(ctx) + { + dict = pdf_parse_dict(xref, xref->file, buf); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + /* If we haven't seen a root yet, there is nothing + * we can do, but give up. Otherwise, we'll make + * do. */ + if (!root) + fz_rethrow(ctx); + fz_warn(ctx, "cannot parse trailer dictionary - ignoring rest of file"); + break; + } + + obj = pdf_dict_gets(dict, "Encrypt"); + if (obj) + { + pdf_drop_obj(encrypt); + encrypt = pdf_keep_obj(obj); + } + + obj = pdf_dict_gets(dict, "ID"); + if (obj) + { + pdf_drop_obj(id); + id = pdf_keep_obj(obj); + } + + obj = pdf_dict_gets(dict, "Root"); + if (obj) + { + pdf_drop_obj(root); + root = pdf_keep_obj(obj); + } + + obj = pdf_dict_gets(dict, "Info"); + if (obj) + { + pdf_drop_obj(info); + info = pdf_keep_obj(obj); + } + + pdf_drop_obj(dict); + obj = NULL; + } + + else if (tok == PDF_TOK_ERROR) + fz_read_byte(xref->file); + + else if (tok == PDF_TOK_EOF) + break; + } + + /* make xref reasonable */ + + /* + Dummy access to entry to assure sufficient space in the xref table + and avoid repeated reallocs in the loop + */ + (void)pdf_get_populating_xref_entry(xref, maxnum); + + for (i = 0; i < listlen; i++) + { + entry = pdf_get_populating_xref_entry(xref, list[i].num); + entry->type = 'n'; + entry->ofs = list[i].ofs; + entry->gen = list[i].gen; + + entry->stm_ofs = list[i].stm_ofs; + + /* correct stream length for unencrypted documents */ + if (!encrypt && list[i].stm_len >= 0) + { + dict = pdf_load_object(xref, list[i].num, list[i].gen); + + length = pdf_new_int(ctx, list[i].stm_len); + pdf_dict_puts(dict, "Length", length); + pdf_drop_obj(length); + + pdf_drop_obj(dict); + } + } + + entry = pdf_get_populating_xref_entry(xref, 0); + entry->type = 'f'; + entry->ofs = 0; + entry->gen = 65535; + entry->stm_ofs = 0; + entry->obj = NULL; + + next = 0; + for (i = pdf_xref_len(xref) - 1; i >= 0; i--) + { + entry = pdf_get_populating_xref_entry(xref, i); + if (entry->type == 'f') + { + entry->ofs = next; + if (entry->gen < 65535) + entry->gen ++; + next = i; + } + } + + /* create a repaired trailer, Root will be added later */ + + obj = pdf_new_dict(ctx, 5); + /* During repair there is only a single xref section */ + pdf_set_populating_xref_trailer(xref, obj); + pdf_drop_obj(obj); + obj = NULL; + + obj = pdf_new_int(ctx, maxnum + 1); + pdf_dict_puts(pdf_trailer(xref), "Size", obj); + pdf_drop_obj(obj); + obj = NULL; + + if (root) + { + pdf_dict_puts(pdf_trailer(xref), "Root", root); + pdf_drop_obj(root); + root = NULL; + } + if (info) + { + pdf_dict_puts(pdf_trailer(xref), "Info", info); + pdf_drop_obj(info); + info = NULL; + } + + if (encrypt) + { + if (pdf_is_indirect(encrypt)) + { + /* create new reference with non-NULL xref pointer */ + obj = pdf_new_indirect(ctx, pdf_to_num(encrypt), pdf_to_gen(encrypt), xref); + pdf_drop_obj(encrypt); + encrypt = obj; + obj = NULL; + } + pdf_dict_puts(pdf_trailer(xref), "Encrypt", encrypt); + pdf_drop_obj(encrypt); + encrypt = NULL; + } + + if (id) + { + if (pdf_is_indirect(id)) + { + /* create new reference with non-NULL xref pointer */ + obj = pdf_new_indirect(ctx, pdf_to_num(id), pdf_to_gen(id), xref); + pdf_drop_obj(id); + id = obj; + obj = NULL; + } + pdf_dict_puts(pdf_trailer(xref), "ID", id); + pdf_drop_obj(id); + id = NULL; + } + + fz_free(ctx, list); + } + fz_catch(ctx) + { + pdf_drop_obj(encrypt); + pdf_drop_obj(id); + pdf_drop_obj(root); + pdf_drop_obj(obj); + pdf_drop_obj(info); + fz_free(ctx, list); + fz_rethrow(ctx); + } +} + +void +pdf_repair_obj_stms(pdf_document *xref) +{ + fz_context *ctx = xref->ctx; + pdf_obj *dict; + int i; + int xref_len = pdf_xref_len(xref); + + for (i = 0; i < xref_len; i++) + { + pdf_xref_entry *entry = pdf_get_populating_xref_entry(xref, i); + + if (entry->stm_ofs) + { + dict = pdf_load_object(xref, i, 0); + fz_try(ctx) + { + if (!strcmp(pdf_to_name(pdf_dict_gets(dict, "Type")), "ObjStm")) + pdf_repair_obj_stm(xref, i, 0); + } + fz_always(ctx) + { + pdf_drop_obj(dict); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + } + } + + /* Ensure that streamed objects reside inside a known non-streamed object */ + for (i = 0; i < xref_len; i++) + { + pdf_xref_entry *entry = pdf_get_populating_xref_entry(xref, i); + + if (entry->type == 'o' && pdf_get_populating_xref_entry(xref, entry->ofs)->type != 'n') + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "invalid reference to non-object-stream: %d (%d 0 R)", entry->ofs, i); + } +} diff --git a/source/pdf/pdf-shade.c b/source/pdf/pdf-shade.c new file mode 100644 index 00000000..41ddcc1a --- /dev/null +++ b/source/pdf/pdf-shade.c @@ -0,0 +1,498 @@ +#include "mupdf/pdf.h" + +/* FIXME: Remove this somehow */ +#define FUNSEGS 32 /* size of sampled mesh for function-based shadings */ + +/* Sample various functions into lookup tables */ + +static void +pdf_sample_composite_shade_function(fz_context *ctx, fz_shade *shade, fz_function *func, float t0, float t1) +{ + int i; + float t; + + for (i = 0; i < 256; i++) + { + t = t0 + (i / 255.0f) * (t1 - t0); + fz_eval_function(ctx, func, &t, 1, shade->function[i], shade->colorspace->n); + shade->function[i][shade->colorspace->n] = 1; + } +} + +static void +pdf_sample_component_shade_function(fz_context *ctx, fz_shade *shade, int funcs, fz_function **func, float t0, float t1) +{ + int i, k; + float t; + + for (i = 0; i < 256; i++) + { + t = t0 + (i / 255.0f) * (t1 - t0); + for (k = 0; k < funcs; k++) + fz_eval_function(ctx, func[k], &t, 1, &shade->function[i][k], 1); + shade->function[i][k] = 1; + } +} + +static void +pdf_sample_shade_function(fz_context *ctx, fz_shade *shade, int funcs, fz_function **func, float t0, float t1) +{ + shade->use_function = 1; + if (funcs == 1) + pdf_sample_composite_shade_function(ctx, shade, func[0], t0, t1); + else + pdf_sample_component_shade_function(ctx, shade, funcs, func, t0, t1); +} + +/* Type 1-3 -- Function-based, linear and radial shadings */ + +static void +pdf_load_function_based_shading(fz_shade *shade, pdf_document *xref, pdf_obj *dict, fz_function *func) +{ + pdf_obj *obj; + float x0, y0, x1, y1; + float fv[2]; + fz_matrix matrix; + int xx, yy; + fz_context *ctx = xref->ctx; + float *p; + + x0 = y0 = 0; + x1 = y1 = 1; + obj = pdf_dict_gets(dict, "Domain"); + if (obj) + { + x0 = pdf_to_real(pdf_array_get(obj, 0)); + x1 = pdf_to_real(pdf_array_get(obj, 1)); + y0 = pdf_to_real(pdf_array_get(obj, 2)); + y1 = pdf_to_real(pdf_array_get(obj, 3)); + } + + obj = pdf_dict_gets(dict, "Matrix"); + if (obj) + pdf_to_matrix(ctx, obj, &matrix); + else + matrix = fz_identity; + shade->u.f.matrix = matrix; + shade->u.f.xdivs = FUNSEGS; + shade->u.f.ydivs = FUNSEGS; + shade->u.f.fn_vals = fz_malloc(ctx, (FUNSEGS+1)*(FUNSEGS+1)*shade->colorspace->n*sizeof(float)); + shade->u.f.domain[0][0] = x0; + shade->u.f.domain[0][1] = y0; + shade->u.f.domain[1][0] = x1; + shade->u.f.domain[1][1] = y1; + + p = shade->u.f.fn_vals; + for (yy = 0; yy <= FUNSEGS; yy++) + { + fv[1] = y0 + (y1 - y0) * yy / FUNSEGS; + + for (xx = 0; xx <= FUNSEGS; xx++) + { + fv[0] = x0 + (x1 - x0) * xx / FUNSEGS; + + fz_eval_function(ctx, func, fv, 2, p, shade->colorspace->n); + p += shade->colorspace->n; + } + } +} + +static void +pdf_load_linear_shading(fz_shade *shade, pdf_document *xref, pdf_obj *dict, int funcs, fz_function **func) +{ + pdf_obj *obj; + float d0, d1; + int e0, e1; + fz_context *ctx = xref->ctx; + + obj = pdf_dict_gets(dict, "Coords"); + shade->u.l_or_r.coords[0][0] = pdf_to_real(pdf_array_get(obj, 0)); + shade->u.l_or_r.coords[0][1] = pdf_to_real(pdf_array_get(obj, 1)); + shade->u.l_or_r.coords[1][0] = pdf_to_real(pdf_array_get(obj, 2)); + shade->u.l_or_r.coords[1][1] = pdf_to_real(pdf_array_get(obj, 3)); + + d0 = 0; + d1 = 1; + obj = pdf_dict_gets(dict, "Domain"); + if (obj) + { + d0 = pdf_to_real(pdf_array_get(obj, 0)); + d1 = pdf_to_real(pdf_array_get(obj, 1)); + } + + e0 = e1 = 0; + obj = pdf_dict_gets(dict, "Extend"); + if (obj) + { + e0 = pdf_to_bool(pdf_array_get(obj, 0)); + e1 = pdf_to_bool(pdf_array_get(obj, 1)); + } + + pdf_sample_shade_function(ctx, shade, funcs, func, d0, d1); + + shade->u.l_or_r.extend[0] = e0; + shade->u.l_or_r.extend[1] = e1; +} + +static void +pdf_load_radial_shading(fz_shade *shade, pdf_document *xref, pdf_obj *dict, int funcs, fz_function **func) +{ + pdf_obj *obj; + float d0, d1; + int e0, e1; + fz_context *ctx = xref->ctx; + + obj = pdf_dict_gets(dict, "Coords"); + shade->u.l_or_r.coords[0][0] = pdf_to_real(pdf_array_get(obj, 0)); + shade->u.l_or_r.coords[0][1] = pdf_to_real(pdf_array_get(obj, 1)); + shade->u.l_or_r.coords[0][2] = pdf_to_real(pdf_array_get(obj, 2)); + shade->u.l_or_r.coords[1][0] = pdf_to_real(pdf_array_get(obj, 3)); + shade->u.l_or_r.coords[1][1] = pdf_to_real(pdf_array_get(obj, 4)); + shade->u.l_or_r.coords[1][2] = pdf_to_real(pdf_array_get(obj, 5)); + + d0 = 0; + d1 = 1; + obj = pdf_dict_gets(dict, "Domain"); + if (obj) + { + d0 = pdf_to_real(pdf_array_get(obj, 0)); + d1 = pdf_to_real(pdf_array_get(obj, 1)); + } + + e0 = e1 = 0; + obj = pdf_dict_gets(dict, "Extend"); + if (obj) + { + e0 = pdf_to_bool(pdf_array_get(obj, 0)); + e1 = pdf_to_bool(pdf_array_get(obj, 1)); + } + + pdf_sample_shade_function(ctx, shade, funcs, func, d0, d1); + + shade->u.l_or_r.extend[0] = e0; + shade->u.l_or_r.extend[1] = e1; +} + +/* Type 4-7 -- Triangle and patch mesh shadings */ + +struct mesh_params +{ + int vprow; + int bpflag; + int bpcoord; + int bpcomp; + float x0, x1; + float y0, y1; + float c0[FZ_MAX_COLORS]; + float c1[FZ_MAX_COLORS]; +}; + +static void +pdf_load_mesh_params(fz_shade *shade, pdf_document *xref, pdf_obj *dict) +{ + fz_context *ctx = xref->ctx; + pdf_obj *obj; + int i, n; + + shade->u.m.x0 = shade->u.m.y0 = 0; + shade->u.m.x1 = shade->u.m.y1 = 1; + for (i = 0; i < FZ_MAX_COLORS; i++) + { + shade->u.m.c0[i] = 0; + shade->u.m.c1[i] = 1; + } + + shade->u.m.vprow = pdf_to_int(pdf_dict_gets(dict, "VerticesPerRow")); + shade->u.m.bpflag = pdf_to_int(pdf_dict_gets(dict, "BitsPerFlag")); + shade->u.m.bpcoord = pdf_to_int(pdf_dict_gets(dict, "BitsPerCoordinate")); + shade->u.m.bpcomp = pdf_to_int(pdf_dict_gets(dict, "BitsPerComponent")); + + obj = pdf_dict_gets(dict, "Decode"); + if (pdf_array_len(obj) >= 6) + { + n = (pdf_array_len(obj) - 4) / 2; + shade->u.m.x0 = pdf_to_real(pdf_array_get(obj, 0)); + shade->u.m.x1 = pdf_to_real(pdf_array_get(obj, 1)); + shade->u.m.y0 = pdf_to_real(pdf_array_get(obj, 2)); + shade->u.m.y1 = pdf_to_real(pdf_array_get(obj, 3)); + for (i = 0; i < n; i++) + { + shade->u.m.c0[i] = pdf_to_real(pdf_array_get(obj, 4 + i * 2)); + shade->u.m.c1[i] = pdf_to_real(pdf_array_get(obj, 5 + i * 2)); + } + } + + if (shade->u.m.vprow < 2 && shade->type == 5) + { + fz_warn(ctx, "Too few vertices per row (%d)", shade->u.m.vprow); + shade->u.m.vprow = 2; + } + + if (shade->u.m.bpflag != 2 && shade->u.m.bpflag != 4 && shade->u.m.bpflag != 8 && + shade->type != 5) + { + fz_warn(ctx, "Invalid number of bits per flag (%d)", shade->u.m.bpflag); + shade->u.m.bpflag = 8; + } + + if (shade->u.m.bpcoord != 1 && shade->u.m.bpcoord != 2 && shade->u.m.bpcoord != 4 && + shade->u.m.bpcoord != 8 && shade->u.m.bpcoord != 12 && shade->u.m.bpcoord != 16 && + shade->u.m.bpcoord != 24 && shade->u.m.bpcoord != 32) + { + fz_warn(ctx, "Invalid number of bits per coordinate (%d)", shade->u.m.bpcoord); + shade->u.m.bpcoord = 8; + } + + if (shade->u.m.bpcomp != 1 && shade->u.m.bpcomp != 2 && shade->u.m.bpcomp != 4 && + shade->u.m.bpcomp != 8 && shade->u.m.bpcomp != 12 && shade->u.m.bpcomp != 16) + { + fz_warn(ctx, "Invalid number of bits per component (%d)", shade->u.m.bpcomp); + shade->u.m.bpcomp = 8; + } +} + +static void +pdf_load_type4_shade(fz_shade *shade, pdf_document *xref, pdf_obj *dict, + int funcs, fz_function **func) +{ + fz_context *ctx = xref->ctx; + + pdf_load_mesh_params(shade, xref, dict); + + if (funcs > 0) + pdf_sample_shade_function(ctx, shade, funcs, func, shade->u.m.c0[0], shade->u.m.c1[0]); + + shade->buffer = pdf_load_compressed_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); +} + +static void +pdf_load_type5_shade(fz_shade *shade, pdf_document *xref, pdf_obj *dict, + int funcs, fz_function **func) +{ + fz_context *ctx = xref->ctx; + + pdf_load_mesh_params(shade, xref, dict); + + if (funcs > 0) + pdf_sample_shade_function(ctx, shade, funcs, func, shade->u.m.c0[0], shade->u.m.c1[0]); + + shade->buffer = pdf_load_compressed_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); +} + +/* Type 6 & 7 -- Patch mesh shadings */ + +static void +pdf_load_type6_shade(fz_shade *shade, pdf_document *xref, pdf_obj *dict, + int funcs, fz_function **func) +{ + fz_context *ctx = xref->ctx; + + pdf_load_mesh_params(shade, xref, dict); + + if (funcs > 0) + pdf_sample_shade_function(ctx, shade, funcs, func, shade->u.m.c0[0], shade->u.m.c1[0]); + + shade->buffer = pdf_load_compressed_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); +} + +static void +pdf_load_type7_shade(fz_shade *shade, pdf_document *xref, pdf_obj *dict, + int funcs, fz_function **func) +{ + fz_context *ctx = xref->ctx; + + pdf_load_mesh_params(shade, xref, dict); + + if (funcs > 0) + pdf_sample_shade_function(ctx, shade, funcs, func, shade->u.m.c0[0], shade->u.m.c1[0]); + + shade->buffer = pdf_load_compressed_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); +} + +/* Load all of the shading dictionary parameters, then switch on the shading type. */ + +static fz_shade * +pdf_load_shading_dict(pdf_document *xref, pdf_obj *dict, const fz_matrix *transform) +{ + fz_shade *shade = NULL; + fz_function *func[FZ_MAX_COLORS] = { NULL }; + pdf_obj *obj; + int funcs = 0; + int type = 0; + int i, in, out; + fz_context *ctx = xref->ctx; + + fz_var(shade); + fz_var(func); + fz_var(funcs); + fz_var(type); + + fz_try(ctx) + { + shade = fz_malloc_struct(ctx, fz_shade); + FZ_INIT_STORABLE(shade, 1, fz_free_shade_imp); + shade->type = FZ_MESH_TYPE4; + shade->use_background = 0; + shade->use_function = 0; + shade->matrix = *transform; + shade->bbox = fz_infinite_rect; + + shade->colorspace = NULL; + + funcs = 0; + + obj = pdf_dict_gets(dict, "ShadingType"); + type = pdf_to_int(obj); + + obj = pdf_dict_gets(dict, "ColorSpace"); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "shading colorspace is missing"); + shade->colorspace = pdf_load_colorspace(xref, obj); + + obj = pdf_dict_gets(dict, "Background"); + if (obj) + { + shade->use_background = 1; + for (i = 0; i < shade->colorspace->n; i++) + shade->background[i] = pdf_to_real(pdf_array_get(obj, i)); + } + + obj = pdf_dict_gets(dict, "BBox"); + if (pdf_is_array(obj)) + pdf_to_rect(ctx, obj, &shade->bbox); + + obj = pdf_dict_gets(dict, "Function"); + if (pdf_is_dict(obj)) + { + funcs = 1; + + if (type == 1) + in = 2; + else + in = 1; + out = shade->colorspace->n; + + func[0] = pdf_load_function(xref, obj, in, out); + if (!func[0]) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load shading function (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); + } + else if (pdf_is_array(obj)) + { + funcs = pdf_array_len(obj); + if (funcs != 1 && funcs != shade->colorspace->n) + { + funcs = 0; + fz_throw(ctx, FZ_ERROR_GENERIC, "incorrect number of shading functions"); + } + if (funcs > FZ_MAX_COLORS) + { + funcs = 0; + fz_throw(ctx, FZ_ERROR_GENERIC, "too many shading functions"); + } + + if (type == 1) + in = 2; + else + in = 1; + out = 1; + + for (i = 0; i < funcs; i++) + { + func[i] = pdf_load_function(xref, pdf_array_get(obj, i), in, out); + if (!func[i]) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load shading function (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); + } + } + else if (type < 4) + { + /* Functions are compulsory for types 1,2,3 */ + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load shading function (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); + } + + shade->type = type; + switch (type) + { + case 1: pdf_load_function_based_shading(shade, xref, dict, func[0]); break; + case 2: pdf_load_linear_shading(shade, xref, dict, funcs, func); break; + case 3: pdf_load_radial_shading(shade, xref, dict, funcs, func); break; + case 4: pdf_load_type4_shade(shade, xref, dict, funcs, func); break; + case 5: pdf_load_type5_shade(shade, xref, dict, funcs, func); break; + case 6: pdf_load_type6_shade(shade, xref, dict, funcs, func); break; + case 7: pdf_load_type7_shade(shade, xref, dict, funcs, func); break; + default: + fz_throw(ctx, FZ_ERROR_GENERIC, "unknown shading type: %d", type); + } + } + fz_always(ctx) + { + for (i = 0; i < funcs; i++) + if (func[i]) + fz_drop_function(ctx, func[i]); + } + fz_catch(ctx) + { + fz_drop_shade(ctx, shade); + + fz_rethrow_message(ctx, "cannot load shading type %d (%d %d R)", type, pdf_to_num(dict), pdf_to_gen(dict)); + } + return shade; +} + +static unsigned int +fz_shade_size(fz_shade *s) +{ + if (s == NULL) + return 0; + if (s->type == FZ_FUNCTION_BASED) + return sizeof(*s) + sizeof(float) * s->u.f.xdivs * s->u.f.ydivs * s->colorspace->n; + return sizeof(*s) + fz_compressed_buffer_size(s->buffer); +} + +fz_shade * +pdf_load_shading(pdf_document *xref, pdf_obj *dict) +{ + fz_matrix mat; + pdf_obj *obj; + fz_context *ctx = xref->ctx; + fz_shade *shade; + + if ((shade = pdf_find_item(ctx, fz_free_shade_imp, dict))) + { + return shade; + } + + /* Type 2 pattern dictionary */ + if (pdf_dict_gets(dict, "PatternType")) + { + obj = pdf_dict_gets(dict, "Matrix"); + if (obj) + pdf_to_matrix(ctx, obj, &mat); + else + mat = fz_identity; + + obj = pdf_dict_gets(dict, "ExtGState"); + if (obj) + { + if (pdf_dict_gets(obj, "CA") || pdf_dict_gets(obj, "ca")) + { + fz_warn(ctx, "shading with alpha not supported"); + } + } + + obj = pdf_dict_gets(dict, "Shading"); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: missing shading dictionary"); + + shade = pdf_load_shading_dict(xref, obj, &mat); + } + + /* Naked shading dictionary */ + else + { + shade = pdf_load_shading_dict(xref, dict, &fz_identity); + } + + pdf_store_item(ctx, dict, shade, fz_shade_size(shade)); + + return shade; +} diff --git a/source/pdf/pdf-store.c b/source/pdf/pdf-store.c new file mode 100644 index 00000000..d3873e76 --- /dev/null +++ b/source/pdf/pdf-store.c @@ -0,0 +1,76 @@ +#include "mupdf/pdf.h" + +static int +pdf_make_hash_key(fz_store_hash *hash, void *key_) +{ + pdf_obj *key = (pdf_obj *)key_; + + if (!pdf_is_indirect(key)) + return 0; + hash->u.i.i0 = pdf_to_num(key); + hash->u.i.i1 = pdf_to_gen(key); + return 1; +} + +static void * +pdf_keep_key(fz_context *ctx, void *key) +{ + return (void *)pdf_keep_obj((pdf_obj *)key); +} + +static void +pdf_drop_key(fz_context *ctx, void *key) +{ + pdf_drop_obj((pdf_obj *)key); +} + +static int +pdf_cmp_key(void *k0, void *k1) +{ + return pdf_objcmp((pdf_obj *)k0, (pdf_obj *)k1); +} + +#ifndef NDEBUG +static void +pdf_debug_key(FILE *out, void *key_) +{ + pdf_obj *key = (pdf_obj *)key_; + + if (pdf_is_indirect(key)) + { + fprintf(out, "(%d %d R) ", pdf_to_num(key), pdf_to_gen(key)); + } else + pdf_fprint_obj(out, key, 0); +} +#endif + +static fz_store_type pdf_obj_store_type = +{ + pdf_make_hash_key, + pdf_keep_key, + pdf_drop_key, + pdf_cmp_key, +#ifndef NDEBUG + pdf_debug_key +#endif +}; + +void +pdf_store_item(fz_context *ctx, pdf_obj *key, void *val, unsigned int itemsize) +{ + void *existing; + existing = fz_store_item(ctx, key, val, itemsize, &pdf_obj_store_type); + assert(existing == NULL); +} + +void * +pdf_find_item(fz_context *ctx, fz_store_free_fn *free, pdf_obj *key) +{ + return fz_find_item(ctx, free, key, &pdf_obj_store_type); +} + +void +pdf_remove_item(fz_context *ctx, fz_store_free_fn *free, pdf_obj *key) +{ + fz_remove_item(ctx, free, key, &pdf_obj_store_type); +} diff --git a/source/pdf/pdf-stream.c b/source/pdf/pdf-stream.c new file mode 100644 index 00000000..7e74b666 --- /dev/null +++ b/source/pdf/pdf-stream.c @@ -0,0 +1,564 @@ +#include "mupdf/pdf.h" + +/* + * Check if an object is a stream or not. + */ +int +pdf_is_stream(pdf_document *xref, int num, int gen) +{ + pdf_xref_entry *entry; + + if (num < 0 || num >= pdf_xref_len(xref)) + return 0; + + pdf_cache_object(xref, num, gen); + + entry = pdf_get_xref_entry(xref, num); + return entry->stm_ofs != 0 || entry->stm_buf; +} + +/* + * Scan stream dictionary for an explicit /Crypt filter + */ +static int +pdf_stream_has_crypt(fz_context *ctx, pdf_obj *stm) +{ + pdf_obj *filters; + pdf_obj *obj; + int i; + + filters = pdf_dict_getsa(stm, "Filter", "F"); + if (filters) + { + if (!strcmp(pdf_to_name(filters), "Crypt")) + return 1; + if (pdf_is_array(filters)) + { + int n = pdf_array_len(filters); + for (i = 0; i < n; i++) + { + obj = pdf_array_get(filters, i); + if (!strcmp(pdf_to_name(obj), "Crypt")) + return 1; + } + } + } + return 0; +} + +/* + * Create a filter given a name and param dictionary. + */ +static fz_stream * +build_filter(fz_stream *chain, pdf_document * xref, pdf_obj * f, pdf_obj * p, int num, int gen, fz_compression_params *params) +{ + fz_context *ctx = chain->ctx; + char *s = pdf_to_name(f); + + int predictor = pdf_to_int(pdf_dict_gets(p, "Predictor")); + pdf_obj *columns_obj = pdf_dict_gets(p, "Columns"); + int columns = pdf_to_int(columns_obj); + int colors = pdf_to_int(pdf_dict_gets(p, "Colors")); + int bpc = pdf_to_int(pdf_dict_gets(p, "BitsPerComponent")); + + if (!strcmp(s, "ASCIIHexDecode") || !strcmp(s, "AHx")) + return fz_open_ahxd(chain); + + else if (!strcmp(s, "ASCII85Decode") || !strcmp(s, "A85")) + return fz_open_a85d(chain); + + else if (!strcmp(s, "CCITTFaxDecode") || !strcmp(s, "CCF")) + { + pdf_obj *k = pdf_dict_gets(p, "K"); + pdf_obj *eol = pdf_dict_gets(p, "EndOfLine"); + pdf_obj *eba = pdf_dict_gets(p, "EncodedByteAlign"); + pdf_obj *rows = pdf_dict_gets(p, "Rows"); + pdf_obj *eob = pdf_dict_gets(p, "EndOfBlock"); + pdf_obj *bi1 = pdf_dict_gets(p, "BlackIs1"); + if (params) + { + /* We will shortstop here */ + params->type = FZ_IMAGE_FAX; + params->u.fax.k = (k ? pdf_to_int(k) : 0); + params->u.fax.end_of_line = (eol ? pdf_to_bool(eol) : 0); + params->u.fax.encoded_byte_align = (eba ? pdf_to_bool(eba) : 0); + params->u.fax.columns = (columns_obj ? columns : 1728); + params->u.fax.rows = (rows ? pdf_to_int(rows) : 0); + params->u.fax.end_of_block = (eob ? pdf_to_bool(eob) : 1); + params->u.fax.black_is_1 = (bi1 ? pdf_to_bool(bi1) : 0); + return chain; + } + return fz_open_faxd(chain, + k ? pdf_to_int(k) : 0, + eol ? pdf_to_bool(eol) : 0, + eba ? pdf_to_bool(eba) : 0, + columns_obj ? columns : 1728, + rows ? pdf_to_int(rows) : 0, + eob ? pdf_to_bool(eob) : 1, + bi1 ? pdf_to_bool(bi1) : 0); + } + + else if (!strcmp(s, "DCTDecode") || !strcmp(s, "DCT")) + { + pdf_obj *ct = pdf_dict_gets(p, "ColorTransform"); + if (params) + { + /* We will shortstop here */ + params->type = FZ_IMAGE_JPEG; + params->u.jpeg.color_transform = (ct ? pdf_to_int(ct) : -1); + return chain; + } + return fz_open_dctd(chain, ct ? pdf_to_int(ct) : -1); + } + + else if (!strcmp(s, "RunLengthDecode") || !strcmp(s, "RL")) + { + if (params) + { + /* We will shortstop here */ + params->type = FZ_IMAGE_RLD; + return chain; + } + return fz_open_rld(chain); + } + else if (!strcmp(s, "FlateDecode") || !strcmp(s, "Fl")) + { + if (params) + { + /* We will shortstop here */ + params->type = FZ_IMAGE_FLATE; + params->u.flate.predictor = predictor; + params->u.flate.columns = columns; + params->u.flate.colors = colors; + params->u.flate.bpc = bpc; + return chain; + } + chain = fz_open_flated(chain); + if (predictor > 1) + chain = fz_open_predict(chain, predictor, columns, colors, bpc); + return chain; + } + + else if (!strcmp(s, "LZWDecode") || !strcmp(s, "LZW")) + { + pdf_obj *ec = pdf_dict_gets(p, "EarlyChange"); + if (params) + { + /* We will shortstop here */ + params->type = FZ_IMAGE_LZW; + params->u.lzw.predictor = predictor; + params->u.lzw.columns = columns; + params->u.lzw.colors = colors; + params->u.lzw.bpc = bpc; + params->u.lzw.early_change = (ec ? pdf_to_int(ec) : 1); + return chain; + } + chain = fz_open_lzwd(chain, ec ? pdf_to_int(ec) : 1); + if (predictor > 1) + chain = fz_open_predict(chain, predictor, columns, colors, bpc); + return chain; + } + + else if (!strcmp(s, "JBIG2Decode")) + { + fz_buffer *globals = NULL; + pdf_obj *obj = pdf_dict_gets(p, "JBIG2Globals"); + if (obj) + globals = pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)); + /* fz_open_jbig2d takes possession of globals */ + return fz_open_jbig2d(chain, globals); + } + + else if (!strcmp(s, "JPXDecode")) + return chain; /* JPX decoding is special cased in the image loading code */ + + else if (!strcmp(s, "Crypt")) + { + pdf_obj *name; + + if (!xref->crypt) + { + fz_warn(ctx, "crypt filter in unencrypted document"); + return chain; + } + + name = pdf_dict_gets(p, "Name"); + if (pdf_is_name(name)) + return pdf_open_crypt_with_filter(chain, xref->crypt, pdf_to_name(name), num, gen); + + return chain; + } + + fz_warn(ctx, "unknown filter name (%s)", s); + return chain; +} + +/* + * Build a chain of filters given filter names and param dicts. + * If head is given, start filter chain with it. + * Assume ownership of head. + */ +static fz_stream * +build_filter_chain(fz_stream *chain, pdf_document *xref, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params) +{ + pdf_obj *f; + pdf_obj *p; + int i, n; + + n = pdf_array_len(fs); + for (i = 0; i < n; i++) + { + f = pdf_array_get(fs, i); + p = pdf_array_get(ps, i); + chain = build_filter(chain, xref, f, p, num, gen, (i == n-1 ? params : NULL)); + } + + return chain; +} + +/* + * Build a filter for reading raw stream data. + * This is a null filter to constrain reading to the stream length (and to + * allow for other people accessing the file), followed by a decryption + * filter. + * + * orig_num and orig_gen are used purely to seed the encryption. + */ +static fz_stream * +pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int orig_num, int orig_gen, int offset) +{ + fz_context *ctx = chain->ctx; + int hascrypt; + int len; + + if (num > 0 && num < pdf_xref_len(xref)) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, num); + if (entry->stm_buf) + return fz_open_buffer(ctx, entry->stm_buf); + } + + /* don't close chain when we close this filter */ + fz_keep_stream(chain); + + len = pdf_to_int(pdf_dict_gets(stmobj, "Length")); + chain = fz_open_null(chain, len, offset); + + fz_try(ctx) + { + hascrypt = pdf_stream_has_crypt(ctx, stmobj); + if (xref->crypt && !hascrypt) + chain = pdf_open_crypt(chain, xref->crypt, orig_num, orig_gen); + } + fz_catch(ctx) + { + fz_close(chain); + fz_rethrow(ctx); + } + + return chain; +} + +/* + * Construct a filter to decode a stream, constraining + * to stream length and decrypting. + */ +static fz_stream * +pdf_open_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, int offset, fz_compression_params *imparams) +{ + pdf_obj *filters; + pdf_obj *params; + + filters = pdf_dict_getsa(stmobj, "Filter", "F"); + params = pdf_dict_getsa(stmobj, "DecodeParms", "DP"); + + chain = pdf_open_raw_filter(chain, xref, stmobj, num, num, gen, offset); + + fz_try(xref->ctx) + { + if (pdf_is_name(filters)) + chain = build_filter(chain, xref, filters, params, num, gen, imparams); + else if (pdf_array_len(filters) > 0) + chain = build_filter_chain(chain, xref, filters, params, num, gen, imparams); + } + fz_catch(xref->ctx) + { + fz_close(chain); + fz_rethrow(xref->ctx); + } + + return chain; +} + +/* + * Construct a filter to decode a stream, without + * constraining to stream length, and without decryption. + */ +fz_stream * +pdf_open_inline_stream(pdf_document *xref, pdf_obj *stmobj, int length, fz_stream *chain, fz_compression_params *imparams) +{ + pdf_obj *filters; + pdf_obj *params; + + filters = pdf_dict_getsa(stmobj, "Filter", "F"); + params = pdf_dict_getsa(stmobj, "DecodeParms", "DP"); + + /* don't close chain when we close this filter */ + fz_keep_stream(chain); + + if (pdf_is_name(filters)) + return build_filter(chain, xref, filters, params, 0, 0, imparams); + if (pdf_array_len(filters) > 0) + return build_filter_chain(chain, xref, filters, params, 0, 0, imparams); + + return fz_open_null(chain, length, fz_tell(chain)); +} + +/* + * Open a stream for reading the raw (compressed but decrypted) data. + */ +fz_stream * +pdf_open_raw_stream(pdf_document *xref, int num, int gen) +{ + return pdf_open_raw_renumbered_stream(xref, num, gen, num, gen); +} + +fz_stream * +pdf_open_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen) +{ + pdf_xref_entry *x; + + if (num < 0 || num >= pdf_xref_len(xref)) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "object id out of range (%d %d R)", num, gen); + + x = pdf_get_xref_entry(xref, num); + + pdf_cache_object(xref, num, gen); + + if (x->stm_ofs == 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "object is not a stream"); + + return pdf_open_raw_filter(xref->file, xref, x->obj, num, orig_num, orig_gen, x->stm_ofs); +} + +static fz_stream * +pdf_open_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params) +{ + pdf_xref_entry *x; + + if (num < 0 || num >= pdf_xref_len(xref)) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "object id out of range (%d %d R)", num, gen); + + x = pdf_get_xref_entry(xref, num); + + pdf_cache_object(xref, num, gen); + + if (x->stm_ofs == 0 && x->stm_buf == NULL) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "object is not a stream"); + + return pdf_open_filter(xref->file, xref, x->obj, orig_num, orig_gen, x->stm_ofs, params); +} + +/* + * Open a stream for reading uncompressed data. + * Put the opened file in xref->stream. + * Using xref->file while a stream is open is a Bad idea. + */ +fz_stream * +pdf_open_stream(pdf_document *xref, int num, int gen) +{ + return pdf_open_image_stream(xref, num, gen, num, gen, NULL); +} + +fz_stream * +pdf_open_stream_with_offset(pdf_document *xref, int num, int gen, pdf_obj *dict, int stm_ofs) +{ + if (stm_ofs == 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "object is not a stream"); + + return pdf_open_filter(xref->file, xref, dict, num, gen, stm_ofs, NULL); +} + +/* + * Load raw (compressed but decrypted) contents of a stream into buf. + */ +fz_buffer * +pdf_load_raw_stream(pdf_document *xref, int num, int gen) +{ + return pdf_load_raw_renumbered_stream(xref, num, gen, num, gen); +} + +fz_buffer * +pdf_load_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen) +{ + fz_stream *stm; + pdf_obj *dict; + int len; + fz_buffer *buf; + + if (num > 0 && num < pdf_xref_len(xref)) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, num); + if (entry->stm_buf) + return fz_keep_buffer(xref->ctx, entry->stm_buf); + } + + dict = pdf_load_object(xref, num, gen); + + len = pdf_to_int(pdf_dict_gets(dict, "Length")); + + pdf_drop_obj(dict); + + stm = pdf_open_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen); + + buf = fz_read_all(stm, len); + + fz_close(stm); + return buf; +} + +static int +pdf_guess_filter_length(int len, char *filter) +{ + if (!strcmp(filter, "ASCIIHexDecode")) + return len / 2; + if (!strcmp(filter, "ASCII85Decode")) + return len * 4 / 5; + if (!strcmp(filter, "FlateDecode")) + return len * 3; + if (!strcmp(filter, "RunLengthDecode")) + return len * 3; + if (!strcmp(filter, "LZWDecode")) + return len * 2; + return len; +} + +static fz_buffer * +pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params, int *truncated) +{ + fz_context *ctx = xref->ctx; + fz_stream *stm = NULL; + pdf_obj *dict, *obj; + int i, len, n; + fz_buffer *buf; + + fz_var(buf); + + if (num > 0 && num < pdf_xref_len(xref)) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, num); + if (entry->stm_buf) + return fz_keep_buffer(xref->ctx, entry->stm_buf); + } + + dict = pdf_load_object(xref, num, gen); + + len = pdf_to_int(pdf_dict_gets(dict, "Length")); + obj = pdf_dict_gets(dict, "Filter"); + len = pdf_guess_filter_length(len, pdf_to_name(obj)); + n = pdf_array_len(obj); + for (i = 0; i < n; i++) + len = pdf_guess_filter_length(len, pdf_to_name(pdf_array_get(obj, i))); + + pdf_drop_obj(dict); + + stm = pdf_open_image_stream(xref, num, gen, orig_num, orig_gen, params); + + fz_try(ctx) + { + if (truncated) + buf = fz_read_best(stm, len, truncated); + else + buf = fz_read_all(stm, len); + } + fz_always(ctx) + { + fz_close(stm); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot read raw stream (%d %d R)", num, gen); + } + + return buf; +} + +/* + * Load uncompressed contents of a stream into buf. + */ +fz_buffer * +pdf_load_stream(pdf_document *xref, int num, int gen) +{ + return pdf_load_image_stream(xref, num, gen, num, gen, NULL, NULL); +} + +fz_buffer * +pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, int *truncated) +{ + return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL, truncated); +} + +fz_compressed_buffer * +pdf_load_compressed_stream(pdf_document *xref, int num, int gen) +{ + fz_context *ctx = xref->ctx; + fz_compressed_buffer *bc = fz_malloc_struct(ctx, fz_compressed_buffer); + + fz_try(ctx) + { + bc->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &bc->params, NULL); + } + fz_catch(ctx) + { + fz_free(ctx, bc); + fz_rethrow(ctx); + } + return bc; +} + +static fz_stream * +pdf_open_object_array(pdf_document *xref, pdf_obj *list) +{ + int i, n; + fz_context *ctx = xref->ctx; + fz_stream *stm; + + n = pdf_array_len(list); + stm = fz_open_concat(ctx, n, 1); + + fz_var(i); /* Workaround Mac compiler bug */ + for (i = 0; i < n; i++) + { + pdf_obj *obj = pdf_array_get(list, i); + fz_try(ctx) + { + fz_concat_push(stm, pdf_open_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))); + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n); + continue; + } + } + + return stm; +} + +fz_stream * +pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj) +{ + fz_context *ctx = xref->ctx; + int num, gen; + + if (pdf_is_array(obj)) + return pdf_open_object_array(xref, obj); + + num = pdf_to_num(obj); + gen = pdf_to_gen(obj); + if (pdf_is_stream(xref, num, gen)) + return pdf_open_image_stream(xref, num, gen, num, gen, NULL); + + fz_warn(ctx, "pdf object stream missing (%d %d R)", num, gen); + return NULL; +} diff --git a/source/pdf/pdf-type3.c b/source/pdf/pdf-type3.c new file mode 100644 index 00000000..d85d9f11 --- /dev/null +++ b/source/pdf/pdf-type3.c @@ -0,0 +1,190 @@ +#include "mupdf/pdf.h" + +static void +pdf_run_glyph_func(void *doc, void *rdb, fz_buffer *contents, fz_device *dev, const fz_matrix *ctm, void *gstate, int nested_depth) +{ + pdf_run_glyph(doc, (pdf_obj *)rdb, contents, dev, ctm, gstate, nested_depth); +} + +static void +pdf_t3_free_resources(void *doc, void *rdb_) +{ + pdf_obj *rdb = (pdf_obj *)rdb_; + pdf_drop_obj(rdb); +} + +pdf_font_desc * +pdf_load_type3_font(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict) +{ + char buf[256]; + char *estrings[256]; + pdf_font_desc *fontdesc = NULL; + pdf_obj *encoding; + pdf_obj *widths; + pdf_obj *charprocs; + pdf_obj *obj; + int first, last; + int i, k, n; + fz_rect bbox; + fz_matrix matrix; + fz_context *ctx = xref->ctx; + + fz_var(fontdesc); + + fz_try(ctx) + { + obj = pdf_dict_gets(dict, "Name"); + if (pdf_is_name(obj)) + fz_strlcpy(buf, pdf_to_name(obj), sizeof buf); + else + sprintf(buf, "Unnamed-T3"); + + fontdesc = pdf_new_font_desc(ctx); + + obj = pdf_dict_gets(dict, "FontMatrix"); + pdf_to_matrix(ctx, obj, &matrix); + + obj = pdf_dict_gets(dict, "FontBBox"); + fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix); + + fontdesc->font = fz_new_type3_font(ctx, buf, &matrix); + fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float)); + + fz_set_font_bbox(ctx, fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); + + /* Encoding */ + + for (i = 0; i < 256; i++) + estrings[i] = NULL; + + encoding = pdf_dict_gets(dict, "Encoding"); + if (!encoding) + { + fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Encoding"); + } + + if (pdf_is_name(encoding)) + pdf_load_encoding(estrings, pdf_to_name(encoding)); + + if (pdf_is_dict(encoding)) + { + pdf_obj *base, *diff, *item; + + base = pdf_dict_gets(encoding, "BaseEncoding"); + if (pdf_is_name(base)) + pdf_load_encoding(estrings, pdf_to_name(base)); + + diff = pdf_dict_gets(encoding, "Differences"); + if (pdf_is_array(diff)) + { + n = pdf_array_len(diff); + k = 0; + for (i = 0; i < n; i++) + { + item = pdf_array_get(diff, i); + if (pdf_is_int(item)) + k = pdf_to_int(item); + if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) + estrings[k++] = pdf_to_name(item); + } + } + } + + fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); + fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); + + pdf_load_to_unicode(xref, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); + + /* Widths */ + + pdf_set_default_hmtx(ctx, fontdesc, 0); + + first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); + last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); + + if (first < 0 || last > 255 || first > last) + first = last = 0; + + widths = pdf_dict_gets(dict, "Widths"); + if (!widths) + { + fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Widths"); + } + + for (i = first; i <= last; i++) + { + float w = pdf_to_real(pdf_array_get(widths, i - first)); + w = fontdesc->font->t3matrix.a * w * 1000; + fontdesc->font->t3widths[i] = w * 0.001f; + pdf_add_hmtx(ctx, fontdesc, i, i, w); + } + + pdf_end_hmtx(ctx, fontdesc); + + /* Resources -- inherit page resources if the font doesn't have its own */ + + fontdesc->font->t3freeres = pdf_t3_free_resources; + fontdesc->font->t3resources = pdf_dict_gets(dict, "Resources"); + if (!fontdesc->font->t3resources) + fontdesc->font->t3resources = rdb; + if (fontdesc->font->t3resources) + pdf_keep_obj(fontdesc->font->t3resources); + if (!fontdesc->font->t3resources) + fz_warn(ctx, "no resource dictionary for type 3 font!"); + + fontdesc->font->t3doc = xref; + fontdesc->font->t3run = pdf_run_glyph_func; + + /* CharProcs */ + + charprocs = pdf_dict_gets(dict, "CharProcs"); + if (!charprocs) + { + fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing CharProcs"); + } + + for (i = 0; i < 256; i++) + { + if (estrings[i]) + { + obj = pdf_dict_gets(charprocs, estrings[i]); + if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))) + { + fontdesc->font->t3procs[i] = pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)); + fontdesc->size += fontdesc->font->t3procs[i]->cap; + fontdesc->size += 0; // TODO: display list size calculation + } + } + } + } + fz_catch(ctx) + { + if (fontdesc) + pdf_drop_font(ctx, fontdesc); + fz_rethrow_message(ctx, "cannot load type3 font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); + } + return fontdesc; +} + +void pdf_load_type3_glyphs(pdf_document *xref, pdf_font_desc *fontdesc, int nested_depth) +{ + int i; + fz_context *ctx = xref->ctx; + + fz_try(ctx) + { + for (i = 0; i < 256; i++) + { + if (fontdesc->font->t3procs[i]) + { + fz_prepare_t3_glyph(ctx, fontdesc->font, i, nested_depth); + fontdesc->size += 0; // TODO: display list size calculation + } + } + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + fz_warn(ctx, "Type3 glyph load failed: %s", fz_caught_message(ctx)); + } +} diff --git a/source/pdf/pdf-unicode.c b/source/pdf/pdf-unicode.c new file mode 100644 index 00000000..694cbac6 --- /dev/null +++ b/source/pdf/pdf-unicode.c @@ -0,0 +1,77 @@ +#include "mupdf/pdf.h" + +/* Load or synthesize ToUnicode map for fonts */ + +void +pdf_load_to_unicode(pdf_document *xref, pdf_font_desc *font, + char **strings, char *collection, pdf_obj *cmapstm) +{ + pdf_cmap *cmap; + int cid; + int ucsbuf[8]; + int ucslen; + int i; + fz_context *ctx = xref->ctx; + + if (pdf_is_stream(xref, pdf_to_num(cmapstm), pdf_to_gen(cmapstm))) + { + cmap = pdf_load_embedded_cmap(xref, cmapstm); + + font->to_unicode = pdf_new_cmap(ctx); + + for (i = 0; i < (strings ? 256 : 65536); i++) + { + cid = pdf_lookup_cmap(font->encoding, i); + if (cid >= 0) + { + ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf); + if (ucslen == 1) + pdf_map_range_to_range(ctx, font->to_unicode, cid, cid, ucsbuf[0]); + if (ucslen > 1) + pdf_map_one_to_many(ctx, font->to_unicode, cid, ucsbuf, ucslen); + } + } + + pdf_sort_cmap(ctx, font->to_unicode); + + pdf_drop_cmap(ctx, cmap); + font->size += pdf_cmap_size(ctx, font->to_unicode); + } + + else if (collection) + { + if (!strcmp(collection, "Adobe-CNS1")) + font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); + else if (!strcmp(collection, "Adobe-GB1")) + font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); + else if (!strcmp(collection, "Adobe-Japan1")) + font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); + else if (!strcmp(collection, "Adobe-Korea1")) + font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); + + return; + } + + if (strings) + { + /* TODO one-to-many mappings */ + + font->cid_to_ucs_len = 256; + font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof(unsigned short)); + font->size += 256 * sizeof(unsigned short); + + for (i = 0; i < 256; i++) + { + if (strings[i]) + font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]); + else + font->cid_to_ucs[i] = '?'; + } + } + + if (!font->to_unicode && !font->cid_to_ucs) + { + /* TODO: synthesize a ToUnicode if it's a freetype font with + * cmap and/or post tables or if it has glyph names. */ + } +} diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c new file mode 100644 index 00000000..a423edef --- /dev/null +++ b/source/pdf/pdf-write.c @@ -0,0 +1,2363 @@ +#include "mupdf/pdf.h" + +/* #define DEBUG_LINEARIZATION */ +/* #define DEBUG_HEAP_SORT */ +/* #define DEBUG_WRITING */ + +typedef struct pdf_write_options_s pdf_write_options; + +/* + As part of linearization, we need to keep a list of what objects are used + by what page. We do this by recording the objects used in a given page + in a page_objects structure. We have a list of these structures (one per + page) in the page_objects_list structure. + + The page_objects structure maintains a heap in the object array, so + insertion takes log n time, and we can heapsort and dedupe at the end for + a total worse case n log n time. + + The magic heap invariant is that: + entry[n] >= entry[(n+1)*2-1] & entry[n] >= entry[(n+1)*2] + or equivalently: + entry[(n-1)>>1] >= entry[n] + + For a discussion of the heap data structure (and heapsort) see Kingston, + "Algorithms and Data Structures". +*/ + +typedef struct { + int num_shared; + int page_object_number; + int num_objects; + int min_ofs; + int max_ofs; + /* Extensible list of objects used on this page */ + int cap; + int len; + int object[1]; +} page_objects; + +typedef struct { + int cap; + int len; + page_objects *page[1]; +} page_objects_list; + +struct pdf_write_options_s +{ + FILE *out; + int do_ascii; + int do_expand; + int do_garbage; + int do_linear; + int *use_list; + int *ofs_list; + int *gen_list; + int *renumber_map; + int continue_on_error; + int *errors; + /* The following extras are required for linearization */ + int *rev_renumber_map; + int *rev_gen_list; + int start; + int first_xref_offset; + int main_xref_offset; + int first_xref_entry_offset; + int file_len; + int hints_shared_offset; + int hintstream_len; + pdf_obj *linear_l; + pdf_obj *linear_h0; + pdf_obj *linear_h1; + pdf_obj *linear_o; + pdf_obj *linear_e; + pdf_obj *linear_n; + pdf_obj *linear_t; + pdf_obj *hints_s; + pdf_obj *hints_length; + int page_count; + page_objects_list *page_object_lists; +}; + +/* + * Constants for use with use_list. + * + * If use_list[num] = 0, then object num is unused. + * If use_list[num] & PARAMS, then object num is the linearisation params obj. + * If use_list[num] & CATALOGUE, then object num is used by the catalogue. + * If use_list[num] & PAGE1, then object num is used by page 1. + * If use_list[num] & SHARED, then object num is shared between pages. + * If use_list[num] & PAGE_OBJECT then this must be the first object in a page. + * If use_list[num] & OTHER_OBJECTS then this must should appear in section 9. + * Otherwise object num is used by page (use_list[num]>>USE_PAGE_SHIFT). + */ +enum +{ + USE_CATALOGUE = 2, + USE_PAGE1 = 4, + USE_SHARED = 8, + USE_PARAMS = 16, + USE_HINTS = 32, + USE_PAGE_OBJECT = 64, + USE_OTHER_OBJECTS = 128, + USE_PAGE_MASK = ~255, + USE_PAGE_SHIFT = 8 +}; + +/* + * page_objects and page_object_list handling functions + */ +static page_objects_list * +page_objects_list_create(fz_context *ctx) +{ + page_objects_list *pol = fz_calloc(ctx, 1, sizeof(*pol)); + + pol->cap = 1; + pol->len = 0; + return pol; +} + +static void +page_objects_list_destroy(fz_context *ctx, page_objects_list *pol) +{ + int i; + + if (!pol) + return; + for (i = 0; i < pol->len; i++) + { + fz_free(ctx, pol->page[i]); + } + fz_free(ctx, pol); +} + +static void +page_objects_list_ensure(fz_context *ctx, page_objects_list **pol, int newcap) +{ + int oldcap = (*pol)->cap; + if (newcap <= oldcap) + return; + *pol = fz_resize_array(ctx, *pol, 1, sizeof(page_objects_list) + (newcap-1)*sizeof(page_objects *)); + memset(&(*pol)->page[oldcap], 0, (newcap-oldcap)*sizeof(page_objects *)); + (*pol)->cap = newcap; +} + +static page_objects * +page_objects_create(fz_context *ctx) +{ + int initial_cap = 8; + page_objects *po = fz_calloc(ctx, 1, sizeof(*po) + (initial_cap-1) * sizeof(int)); + + po->cap = initial_cap; + po->len = 0; + return po; + +} + +static void +page_objects_insert(fz_context *ctx, page_objects **ppo, int i) +{ + page_objects *po; + + /* Make a page_objects if we don't have one */ + if (*ppo == NULL) + *ppo = page_objects_create(ctx); + + po = *ppo; + /* page_objects insertion: extend the page_objects by 1, and put us on the end */ + if (po->len == po->cap) + { + po = fz_resize_array(ctx, po, 1, sizeof(page_objects) + (po->cap*2 - 1)*sizeof(int)); + po->cap *= 2; + *ppo = po; + } + po->object[po->len++] = i; +} + +static void +page_objects_list_insert(fz_context *ctx, pdf_write_options *opts, int page, int object) +{ + page_objects_list_ensure(ctx, &opts->page_object_lists, page+1); + if (opts->page_object_lists->len < page+1) + opts->page_object_lists->len = page+1; + page_objects_insert(ctx, &opts->page_object_lists->page[page], object); +} + +static void +page_objects_list_set_page_object(fz_context *ctx, pdf_write_options *opts, int page, int object) +{ + page_objects_list_ensure(ctx, &opts->page_object_lists, page+1); + opts->page_object_lists->page[page]->page_object_number = object; +} + +static void +page_objects_sort(fz_context *ctx, page_objects *po) +{ + int i, j; + int n = po->len; + + /* Step 1: Make a heap */ + /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */ + for (i = 1; i < n; i++) + { + /* Now bubble backwards to maintain heap invariant */ + j = i; + while (j != 0) + { + int tmp; + int k = (j-1)>>1; + if (po->object[k] >= po->object[j]) + break; + tmp = po->object[k]; + po->object[k] = po->object[j]; + po->object[j] = tmp; + j = k; + } + } + + /* Step 2: Heap sort */ + /* Invariant: valid heap in [0..i), sorted list in [i..n) */ + /* Initially: i = n */ + for (i = n-1; i > 0; i--) + { + /* Swap the maximum (0th) element from the page_objects into its place + * in the sorted list (position i). */ + int tmp = po->object[0]; + po->object[0] = po->object[i]; + po->object[i] = tmp; + /* Now, the page_objects is invalid because the 0th element is out + * of place. Bubble it until the page_objects is valid. */ + j = 0; + while (1) + { + /* Children are k and k+1 */ + int k = (j+1)*2-1; + /* If both children out of the page_objects, we're done */ + if (k > i-1) + break; + /* If both are in the page_objects, pick the larger one */ + if (k < i-1 && po->object[k] < po->object[k+1]) + k++; + /* If j is bigger than k (i.e. both of it's children), + * we're done */ + if (po->object[j] > po->object[k]) + break; + tmp = po->object[k]; + po->object[k] = po->object[j]; + po->object[j] = tmp; + j = k; + } + } +} + +static int +order_ge(int ui, int uj) +{ + /* + For linearization, we need to order the sections as follows: + + Remaining pages (Part 7) + Shared objects (Part 8) + Objects not associated with any page (Part 9) + Any "other" objects + (Header)(Part 1) + (Linearization params) (Part 2) + (1st page Xref/Trailer) (Part 3) + Catalogue (and other document level objects) (Part 4) + First page (Part 6) + (Primary Hint stream) (*) (Part 5) + Any free objects + + Note, this is NOT the same order they appear in + the final file! + + (*) The PDF reference gives us the option of putting the hint stream + after the first page, and we take it, for simplicity. + */ + + /* If the 2 objects are in the same section, then page object comes first. */ + if (((ui ^ uj) & ~USE_PAGE_OBJECT) == 0) + return ((ui & USE_PAGE_OBJECT) == 0); + /* Put unused objects last */ + else if (ui == 0) + return 1; + else if (uj == 0) + return 0; + /* Put the hint stream before that... */ + else if (ui & USE_HINTS) + return 1; + else if (uj & USE_HINTS) + return 0; + /* Put page 1 before that... */ + else if (ui & USE_PAGE1) + return 1; + else if (uj & USE_PAGE1) + return 0; + /* Put the catalogue before that... */ + else if (ui & USE_CATALOGUE) + return 1; + else if (uj & USE_CATALOGUE) + return 0; + /* Put the linearization params before that... */ + else if (ui & USE_PARAMS) + return 1; + else if (uj & USE_PARAMS) + return 0; + /* Put other objects before that */ + else if (ui & USE_OTHER_OBJECTS) + return 1; + else if (uj & USE_OTHER_OBJECTS) + return 0; + /* Put objects not associated with any page (anything + * not touched by the catalogue) before that... */ + else if (ui == 0) + return 1; + else if (uj == 0) + return 0; + /* Put shared objects before that... */ + else if (ui & USE_SHARED) + return 1; + else if (uj & USE_SHARED) + return 0; + /* And otherwise, order by the page number on which + * they are used. */ + return (ui>>USE_PAGE_SHIFT) >= (uj>>USE_PAGE_SHIFT); +} + +static void +heap_sort(int *list, int n, const int *val, int (*ge)(int, int)) +{ + int i, j; + +#ifdef DEBUG_HEAP_SORT + fprintf(stderr, "Initially:\n"); + for (i=0; i < n; i++) + { + fprintf(stderr, "%d: %d %x\n", i, list[i], val[list[i]]); + } +#endif + /* Step 1: Make a heap */ + /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */ + for (i = 1; i < n; i++) + { + /* Now bubble backwards to maintain heap invariant */ + j = i; + while (j != 0) + { + int tmp; + int k = (j-1)>>1; + if (ge(val[list[k]], val[list[j]])) + break; + tmp = list[k]; + list[k] = list[j]; + list[j] = tmp; + j = k; + } + } +#ifdef DEBUG_HEAP_SORT + fprintf(stderr, "Valid heap:\n"); + for (i=0; i < n; i++) + { + int k; + fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]); + k = (i+1)*2-1; + if (k < n) + { + if (ge(val[list[i]], val[list[k]])) + fprintf(stderr, "OK "); + else + fprintf(stderr, "BAD "); + } + if (k+1 < n) + { + if (ge(val[list[i]], val[list[k+1]])) + fprintf(stderr, "OK\n"); + else + fprintf(stderr, "BAD\n"); + } + else + fprintf(stderr, "\n"); + } +#endif + + /* Step 2: Heap sort */ + /* Invariant: valid heap in [0..i), sorted list in [i..n) */ + /* Initially: i = n */ + for (i = n-1; i > 0; i--) + { + /* Swap the maximum (0th) element from the page_objects into its place + * in the sorted list (position i). */ + int tmp = list[0]; + list[0] = list[i]; + list[i] = tmp; + /* Now, the page_objects is invalid because the 0th element is out + * of place. Bubble it until the page_objects is valid. */ + j = 0; + while (1) + { + /* Children are k and k+1 */ + int k = (j+1)*2-1; + /* If both children out of the page_objects, we're done */ + if (k > i-1) + break; + /* If both are in the page_objects, pick the larger one */ + if (k < i-1 && ge(val[list[k+1]], val[list[k]])) + k++; + /* If j is bigger than k (i.e. both of it's children), + * we're done */ + if (ge(val[list[j]], val[list[k]])) + break; + tmp = list[k]; + list[k] = list[j]; + list[j] = tmp; + j = k; + } + } +#ifdef DEBUG_HEAP_SORT + fprintf(stderr, "Sorted:\n"); + for (i=0; i < n; i++) + { + fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]); + if (i+1 < n) + { + if (ge(val[list[i+1]], val[list[i]])) + fprintf(stderr, "OK"); + else + fprintf(stderr, "BAD"); + } + fprintf(stderr, "\n"); + } +#endif +} + +static void +page_objects_dedupe(fz_context *ctx, page_objects *po) +{ + int i, j; + int n = po->len-1; + + for (i = 0; i < n; i++) + { + if (po->object[i] == po->object[i+1]) + break; + } + j = i; /* j points to the last valid one */ + i++; /* i points to the first one we haven't looked at */ + for (; i < n; i++) + { + if (po->object[j] != po->object[i]) + po->object[++j] = po->object[i]; + } + po->len = j+1; +} + +static void +page_objects_list_sort_and_dedupe(fz_context *ctx, page_objects_list *pol) +{ + int i; + int n = pol->len; + + for (i = 0; i < n; i++) + { + page_objects_sort(ctx, pol->page[i]); + page_objects_dedupe(ctx, pol->page[i]); + } +} + +#ifdef DEBUG_LINEARIZATION +static void +page_objects_dump(pdf_write_options *opts) +{ + page_objects_list *pol = opts->page_object_lists; + int i, j; + + for (i = 0; i < pol->len; i++) + { + page_objects *p = pol->page[i]; + fprintf(stderr, "Page %d\n", i+1); + for (j = 0; j < p->len; j++) + { + int o = p->object[j]; + fprintf(stderr, "\tObject %d: use=%x\n", o, opts->use_list[o]); + } + fprintf(stderr, "Byte range=%d->%d\n", p->min_ofs, p->max_ofs); + fprintf(stderr, "Number of objects=%d, Number of shared objects=%d\n", p->num_objects, p->num_shared); + fprintf(stderr, "Page object number=%d\n", p->page_object_number); + } +} + +static void +objects_dump(pdf_document *xref, pdf_write_options *opts) +{ + int i; + + for (i=0; i < pdf_xref_len(xref); i++) + { + fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], opts->ofs_list[i]); + } +} +#endif + +/* + * Garbage collect objects not reachable from the trailer. + */ + +static pdf_obj *sweepref(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj) +{ + int num = pdf_to_num(obj); + int gen = pdf_to_gen(obj); + fz_context *ctx = xref->ctx; + + if (num < 0 || num >= pdf_xref_len(xref)) + return NULL; + if (opts->use_list[num]) + return NULL; + + opts->use_list[num] = 1; + + /* Bake in /Length in stream objects */ + fz_try(ctx) + { + if (pdf_is_stream(xref, num, gen)) + { + pdf_obj *len = pdf_dict_gets(obj, "Length"); + if (pdf_is_indirect(len)) + { + opts->use_list[pdf_to_num(len)] = 0; + len = pdf_resolve_indirect(len); + pdf_dict_puts(obj, "Length", len); + } + } + } + fz_catch(ctx) + { + /* FIXME: TryLater */ + /* Leave broken */ + } + + return pdf_resolve_indirect(obj); +} + +static void sweepobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj) +{ + int i; + + if (pdf_is_indirect(obj)) + obj = sweepref(xref, opts, obj); + + if (pdf_is_dict(obj)) + { + int n = pdf_dict_len(obj); + for (i = 0; i < n; i++) + sweepobj(xref, opts, pdf_dict_get_val(obj, i)); + } + + else if (pdf_is_array(obj)) + { + int n = pdf_array_len(obj); + for (i = 0; i < n; i++) + sweepobj(xref, opts, pdf_array_get(obj, i)); + } +} + +/* + * Scan for and remove duplicate objects (slow) + */ + +static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts) +{ + int num, other; + fz_context *ctx = xref->ctx; + int xref_len = pdf_xref_len(xref); + + for (num = 1; num < xref_len; num++) + { + /* Only compare an object to objects preceding it */ + for (other = 1; other < num; other++) + { + pdf_obj *a, *b; + int differ, newnum, streama, streamb; + + if (num == other || !opts->use_list[num] || !opts->use_list[other]) + continue; + + /* + * Comparing stream objects data contents would take too long. + * + * pdf_is_stream calls pdf_cache_object and ensures + * that the xref table has the objects loaded. + */ + fz_try(ctx) + { + streama = pdf_is_stream(xref, num, 0); + streamb = pdf_is_stream(xref, other, 0); + differ = streama || streamb; + if (streama && streamb && opts->do_garbage >= 4) + differ = 0; + } + fz_catch(ctx) + { + /* Assume different */ + differ = 1; + } + if (differ) + continue; + + a = pdf_get_xref_entry(xref, num)->obj; + b = pdf_get_xref_entry(xref, other)->obj; + + a = pdf_resolve_indirect(a); + b = pdf_resolve_indirect(b); + + if (pdf_objcmp(a, b)) + continue; + + if (streama && streamb) + { + /* Check to see if streams match too. */ + fz_buffer *sa = NULL; + fz_buffer *sb = NULL; + + fz_var(sa); + fz_var(sb); + + differ = 1; + fz_try(ctx) + { + unsigned char *dataa, *datab; + int lena, lenb; + sa = pdf_load_raw_renumbered_stream(xref, num, 0, num, 0); + sb = pdf_load_raw_renumbered_stream(xref, other, 0, other, 0); + lena = fz_buffer_storage(ctx, sa, &dataa); + lenb = fz_buffer_storage(ctx, sb, &datab); + if (lena == lenb && memcmp(dataa, datab, lena) == 0) + differ = 0; + } + fz_always(ctx) + { + fz_drop_buffer(ctx, sa); + fz_drop_buffer(ctx, sb); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + if (differ) + continue; + } + + /* Keep the lowest numbered object */ + newnum = fz_mini(num, other); + opts->renumber_map[num] = newnum; + opts->renumber_map[other] = newnum; + opts->rev_renumber_map[newnum] = num; /* Either will do */ + opts->use_list[fz_maxi(num, other)] = 0; + + /* One duplicate was found, do not look for another */ + break; + } + } +} + +/* + * Renumber objects sequentially so the xref is more compact + * + * This code assumes that any opts->renumber_map[n] <= n for all n. + */ + +static void compactxref(pdf_document *xref, pdf_write_options *opts) +{ + int num, newnum; + int xref_len = pdf_xref_len(xref); + + /* + * Update renumber_map in-place, clustering all used + * objects together at low object ids. Objects that + * already should be renumbered will have their new + * object ids be updated to reflect the compaction. + */ + + newnum = 1; + for (num = 1; num < xref_len; num++) + { + /* If it's not used, map it to zero */ + if (!opts->use_list[opts->renumber_map[num]]) + { + opts->renumber_map[num] = 0; + } + /* If it's not moved, compact it. */ + else if (opts->renumber_map[num] == num) + { + opts->rev_renumber_map[newnum] = opts->rev_renumber_map[num]; + opts->rev_gen_list[newnum] = opts->rev_gen_list[num]; + opts->renumber_map[num] = newnum++; + } + /* Otherwise it's used, and moved. We know that it must have + * moved down, so the place it's moved to will be in the right + * place already. */ + else + { + opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]]; + } + } +} + +/* + * Update indirect objects according to renumbering established when + * removing duplicate objects and compacting the xref. + */ + +static void renumberobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj) +{ + int i; + fz_context *ctx = xref->ctx; + + if (pdf_is_dict(obj)) + { + int n = pdf_dict_len(obj); + for (i = 0; i < n; i++) + { + pdf_obj *key = pdf_dict_get_key(obj, i); + pdf_obj *val = pdf_dict_get_val(obj, i); + if (pdf_is_indirect(val)) + { + val = pdf_new_indirect(ctx, opts->renumber_map[pdf_to_num(val)], 0, xref); + pdf_dict_put(obj, key, val); + pdf_drop_obj(val); + } + else + { + renumberobj(xref, opts, val); + } + } + } + + else if (pdf_is_array(obj)) + { + int n = pdf_array_len(obj); + for (i = 0; i < n; i++) + { + pdf_obj *val = pdf_array_get(obj, i); + if (pdf_is_indirect(val)) + { + val = pdf_new_indirect(ctx, opts->renumber_map[pdf_to_num(val)], 0, xref); + pdf_array_put(obj, i, val); + pdf_drop_obj(val); + } + else + { + renumberobj(xref, opts, val); + } + } + } +} + +static void renumberobjs(pdf_document *xref, pdf_write_options *opts) +{ + pdf_xref_entry *newxref = NULL; + int newlen; + int num; + fz_context *ctx = xref->ctx; + int *new_use_list; + int xref_len = pdf_xref_len(xref); + + new_use_list = fz_calloc(ctx, pdf_xref_len(xref)+3, sizeof(int)); + + fz_var(newxref); + fz_try(ctx) + { + /* Apply renumber map to indirect references in all objects in xref */ + renumberobj(xref, opts, pdf_trailer(xref)); + for (num = 0; num < xref_len; num++) + { + pdf_obj *obj = pdf_get_xref_entry(xref, num)->obj; + + if (pdf_is_indirect(obj)) + { + obj = pdf_new_indirect(ctx, opts->renumber_map[pdf_to_num(obj)], 0, xref); + pdf_update_object(xref, num, obj); + pdf_drop_obj(obj); + } + else + { + renumberobj(xref, opts, obj); + } + } + + /* Create new table for the reordered, compacted xref */ + newxref = fz_malloc_array(ctx, xref_len + 3, sizeof(pdf_xref_entry)); + newxref[0] = *pdf_get_xref_entry(xref, 0); + + /* Move used objects into the new compacted xref */ + newlen = 0; + for (num = 1; num < xref_len; num++) + { + if (opts->use_list[num]) + { + if (newlen < opts->renumber_map[num]) + newlen = opts->renumber_map[num]; + newxref[opts->renumber_map[num]] = *pdf_get_xref_entry(xref, num); + new_use_list[opts->renumber_map[num]] = opts->use_list[num]; + } + else + { + pdf_drop_obj(pdf_get_xref_entry(xref, num)->obj); + } + } + + pdf_replace_xref(xref, newxref, newlen + 1); + newxref = NULL; + } + fz_catch(ctx) + { + fz_free(ctx, newxref); + fz_free(ctx, new_use_list); + fz_rethrow(ctx); + } + fz_free(ctx, opts->use_list); + opts->use_list = new_use_list; + + for (num = 1; num < xref_len; num++) + { + opts->renumber_map[num] = num; + } +} + +static void page_objects_list_renumber(pdf_write_options *opts) +{ + int i, j; + + for (i = 0; i < opts->page_object_lists->len; i++) + { + page_objects *po = opts->page_object_lists->page[i]; + for (j = 0; j < po->len; j++) + { + po->object[j] = opts->renumber_map[po->object[j]]; + } + po->page_object_number = opts->renumber_map[po->page_object_number]; + } +} + +static void +mark_all(pdf_document *xref, pdf_write_options *opts, pdf_obj *val, int flag, int page) +{ + fz_context *ctx = xref->ctx; + + if (pdf_obj_mark(val)) + return; + + fz_try(ctx) + { + if (pdf_is_indirect(val)) + { + int num = pdf_to_num(val); + if (opts->use_list[num] & USE_PAGE_MASK) + /* Already used */ + opts->use_list[num] |= USE_SHARED; + else + opts->use_list[num] |= flag; + if (page >= 0) + page_objects_list_insert(ctx, opts, page, num); + } + + if (pdf_is_dict(val)) + { + int i, n = pdf_dict_len(val); + + for (i = 0; i < n; i++) + { + mark_all(xref, opts, pdf_dict_get_val(val, i), flag, page); + } + } + else if (pdf_is_array(val)) + { + int i, n = pdf_array_len(val); + + for (i = 0; i < n; i++) + { + mark_all(xref, opts, pdf_array_get(val, i), flag, page); + } + } + } + fz_always(ctx) + { + pdf_obj_unmark(val); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static int +mark_pages(pdf_document *xref, pdf_write_options *opts, pdf_obj *val, int pagenum) +{ + fz_context *ctx = xref->ctx; + + if (pdf_obj_mark(val)) + return pagenum; + + fz_try(ctx) + { + if (pdf_is_dict(val)) + { + if (!strcmp("Page", pdf_to_name(pdf_dict_gets(val, "Type")))) + { + int num = pdf_to_num(val); + pdf_obj_unmark(val); + mark_all(xref, opts, val, pagenum == 0 ? USE_PAGE1 : (pagenum<<USE_PAGE_SHIFT), pagenum); + page_objects_list_set_page_object(ctx, opts, pagenum, num); + pagenum++; + opts->use_list[num] |= USE_PAGE_OBJECT; + } + else + { + int i, n = pdf_dict_len(val); + + for (i = 0; i < n; i++) + { + pdf_obj *key = pdf_dict_get_key(val, i); + pdf_obj *obj = pdf_dict_get_val(val, i); + + if (!strcmp("Kids", pdf_to_name(key))) + pagenum = mark_pages(xref, opts, obj, pagenum); + else + mark_all(xref, opts, obj, USE_CATALOGUE, -1); + } + + if (pdf_is_indirect(val)) + { + int num = pdf_to_num(val); + opts->use_list[num] |= USE_CATALOGUE; + } + } + } + else if (pdf_is_array(val)) + { + int i, n = pdf_array_len(val); + + for (i = 0; i < n; i++) + { + pagenum = mark_pages(xref, opts, pdf_array_get(val, i), pagenum); + } + if (pdf_is_indirect(val)) + { + int num = pdf_to_num(val); + opts->use_list[num] |= USE_CATALOGUE; + } + } + } + fz_always(ctx) + { + pdf_obj_unmark(val); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + return pagenum; +} + +static void +mark_root(pdf_document *xref, pdf_write_options *opts, pdf_obj *dict) +{ + fz_context *ctx = xref->ctx; + int i, n = pdf_dict_len(dict); + + if (pdf_obj_mark(dict)) + return; + + fz_try(ctx) + { + if (pdf_is_indirect(dict)) + { + int num = pdf_to_num(dict); + opts->use_list[num] |= USE_CATALOGUE; + } + + for (i = 0; i < n; i++) + { + char *key = pdf_to_name(pdf_dict_get_key(dict, i)); + pdf_obj *val = pdf_dict_get_val(dict, i); + + if (!strcmp("Pages", key)) + opts->page_count = mark_pages(xref, opts, val, 0); + else if (!strcmp("Names", key)) + mark_all(xref, opts, val, USE_OTHER_OBJECTS, -1); + else if (!strcmp("Dests", key)) + mark_all(xref, opts, val, USE_OTHER_OBJECTS, -1); + else if (!strcmp("Outlines", key)) + { + int section; + /* Look at PageMode to decide whether to + * USE_OTHER_OBJECTS or USE_PAGE1 here. */ + if (strcmp(pdf_to_name(pdf_dict_gets(dict, "PageMode")), "UseOutlines") == 0) + section = USE_PAGE1; + else + section = USE_OTHER_OBJECTS; + mark_all(xref, opts, val, section, -1); + } + else + mark_all(xref, opts, val, USE_CATALOGUE, -1); + } + } + fz_always(ctx) + { + pdf_obj_unmark(dict); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +mark_trailer(pdf_document *xref, pdf_write_options *opts, pdf_obj *dict) +{ + fz_context *ctx = xref->ctx; + int i, n = pdf_dict_len(dict); + + if (pdf_obj_mark(dict)) + return; + + fz_try(ctx) + { + for (i = 0; i < n; i++) + { + char *key = pdf_to_name(pdf_dict_get_key(dict, i)); + pdf_obj *val = pdf_dict_get_val(dict, i); + + if (!strcmp("Root", key)) + mark_root(xref, opts, val); + else + mark_all(xref, opts, val, USE_CATALOGUE, -1); + } + } + fz_always(ctx) + { + pdf_obj_unmark(dict); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +add_linearization_objs(pdf_document *xref, pdf_write_options *opts) +{ + pdf_obj *params_obj = NULL; + pdf_obj *params_ref = NULL; + pdf_obj *hint_obj = NULL; + pdf_obj *hint_ref = NULL; + pdf_obj *o = NULL; + int params_num, hint_num; + fz_context *ctx = xref->ctx; + + fz_var(params_obj); + fz_var(params_ref); + fz_var(hint_obj); + fz_var(hint_ref); + fz_var(o); + + fz_try(ctx) + { + /* Linearization params */ + params_obj = pdf_new_dict(ctx, 10); + params_ref = pdf_new_ref(xref, params_obj); + params_num = pdf_to_num(params_ref); + + opts->use_list[params_num] = USE_PARAMS; + opts->renumber_map[params_num] = params_num; + opts->rev_renumber_map[params_num] = params_num; + opts->gen_list[params_num] = 0; + opts->rev_gen_list[params_num] = 0; + pdf_dict_puts_drop(params_obj, "Linearized", pdf_new_real(ctx, 1.0)); + opts->linear_l = pdf_new_int(ctx, INT_MIN); + pdf_dict_puts(params_obj, "L", opts->linear_l); + opts->linear_h0 = pdf_new_int(ctx, INT_MIN); + o = pdf_new_array(ctx, 2); + pdf_array_push(o, opts->linear_h0); + opts->linear_h1 = pdf_new_int(ctx, INT_MIN); + pdf_array_push(o, opts->linear_h1); + pdf_dict_puts_drop(params_obj, "H", o); + o = NULL; + opts->linear_o = pdf_new_int(ctx, INT_MIN); + pdf_dict_puts(params_obj, "O", opts->linear_o); + opts->linear_e = pdf_new_int(ctx, INT_MIN); + pdf_dict_puts(params_obj, "E", opts->linear_e); + opts->linear_n = pdf_new_int(ctx, INT_MIN); + pdf_dict_puts(params_obj, "N", opts->linear_n); + opts->linear_t = pdf_new_int(ctx, INT_MIN); + pdf_dict_puts(params_obj, "T", opts->linear_t); + + /* Primary hint stream */ + hint_obj = pdf_new_dict(ctx, 10); + hint_ref = pdf_new_ref(xref, hint_obj); + hint_num = pdf_to_num(hint_ref); + + opts->use_list[hint_num] = USE_HINTS; + opts->renumber_map[hint_num] = hint_num; + opts->rev_renumber_map[hint_num] = hint_num; + opts->gen_list[hint_num] = 0; + opts->rev_gen_list[hint_num] = 0; + pdf_dict_puts_drop(hint_obj, "P", pdf_new_int(ctx, 0)); + opts->hints_s = pdf_new_int(ctx, INT_MIN); + pdf_dict_puts(hint_obj, "S", opts->hints_s); + /* FIXME: Do we have thumbnails? Do a T entry */ + /* FIXME: Do we have outlines? Do an O entry */ + /* FIXME: Do we have article threads? Do an A entry */ + /* FIXME: Do we have named destinations? Do a E entry */ + /* FIXME: Do we have interactive forms? Do a V entry */ + /* FIXME: Do we have document information? Do an I entry */ + /* FIXME: Do we have logical structure heirarchy? Do a C entry */ + /* FIXME: Do L, Page Label hint table */ + pdf_dict_puts_drop(hint_obj, "Filter", pdf_new_name(ctx, "FlateDecode")); + opts->hints_length = pdf_new_int(ctx, INT_MIN); + pdf_dict_puts(hint_obj, "Length", opts->hints_length); + pdf_get_xref_entry(xref, hint_num)->stm_ofs = -1; + } + fz_always(ctx) + { + pdf_drop_obj(params_obj); + pdf_drop_obj(params_ref); + pdf_drop_obj(hint_ref); + pdf_drop_obj(hint_obj); + pdf_drop_obj(o); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +static void +lpr_inherit_res_contents(fz_context *ctx, pdf_obj *res, pdf_obj *dict, char *text) +{ + pdf_obj *o, *r; + int i, n; + + /* If the parent node doesn't have an entry of this type, give up. */ + o = pdf_dict_gets(dict, text); + if (!o) + return; + + /* If the resources dict we are building doesn't have an entry of this + * type yet, then just copy it (ensuring it's not a reference) */ + r = pdf_dict_gets(res, text); + if (r == NULL) + { + o = pdf_resolve_indirect(o); + if (pdf_is_dict(o)) + o = pdf_copy_dict(ctx, o); + else if (pdf_is_array(o)) + o = pdf_copy_array(ctx, o); + else + o = NULL; + if (o) + pdf_dict_puts(res, text, o); + return; + } + + /* Otherwise we need to merge o into r */ + if (pdf_is_dict(o)) + { + n = pdf_dict_len(o); + for (i = 0; i < n; i++) + { + pdf_obj *key = pdf_dict_get_key(o, i); + pdf_obj *val = pdf_dict_get_val(o, i); + + if (pdf_dict_gets(res, pdf_to_name(key))) + continue; + pdf_dict_puts(res, pdf_to_name(key), val); + } + } +} + +static void +lpr_inherit_res(fz_context *ctx, pdf_obj *node, int depth, pdf_obj *dict) +{ + while (1) + { + pdf_obj *o; + + node = pdf_dict_gets(node, "Parent"); + depth--; + if (!node || depth < 0) + break; + + o = pdf_dict_gets(node, "Resources"); + if (o) + { + lpr_inherit_res_contents(ctx, dict, o, "ExtGState"); + lpr_inherit_res_contents(ctx, dict, o, "ColorSpace"); + lpr_inherit_res_contents(ctx, dict, o, "Pattern"); + lpr_inherit_res_contents(ctx, dict, o, "Shading"); + lpr_inherit_res_contents(ctx, dict, o, "XObject"); + lpr_inherit_res_contents(ctx, dict, o, "Font"); + lpr_inherit_res_contents(ctx, dict, o, "ProcSet"); + lpr_inherit_res_contents(ctx, dict, o, "Properties"); + } + } +} + +static pdf_obj * +lpr_inherit(fz_context *ctx, pdf_obj *node, char *text, int depth) +{ + do + { + pdf_obj *o = pdf_dict_gets(node, text); + + if (o) + return pdf_resolve_indirect(o); + node = pdf_dict_gets(node, "Parent"); + depth--; + } + while (depth >= 0 && node); + + return NULL; +} + +static int +lpr(fz_context *ctx, pdf_obj *node, int depth, int page) +{ + pdf_obj *kids; + pdf_obj *o = NULL; + int i, n; + + if (pdf_obj_mark(node)) + return page; + + fz_var(o); + + fz_try(ctx) + { + if (!strcmp("Page", pdf_to_name(pdf_dict_gets(node, "Type")))) + { + pdf_obj *r; /* r is deliberately not cleaned up */ + + /* Copy resources down to the child */ + o = pdf_keep_obj(pdf_dict_gets(node, "Resources")); + if (!o) + { + o = pdf_keep_obj(pdf_new_dict(ctx, 2)); + pdf_dict_puts(node, "Resources", o); + } + lpr_inherit_res(ctx, node, depth, o); + r = lpr_inherit(ctx, node, "MediaBox", depth); + if (r) + pdf_dict_puts(node, "MediaBox", r); + r = lpr_inherit(ctx, node, "CropBox", depth); + if (r) + pdf_dict_puts(node, "CropBox", r); + r = lpr_inherit(ctx, node, "BleedBox", depth); + if (r) + pdf_dict_puts(node, "BleedBox", r); + r = lpr_inherit(ctx, node, "TrimBox", depth); + if (r) + pdf_dict_puts(node, "TrimBox", r); + r = lpr_inherit(ctx, node, "ArtBox", depth); + if (r) + pdf_dict_puts(node, "ArtBox", r); + r = lpr_inherit(ctx, node, "Rotate", depth); + if (r) + pdf_dict_puts(node, "Rotate", r); + page++; + } + else + { + kids = pdf_dict_gets(node, "Kids"); + n = pdf_array_len(kids); + for(i = 0; i < n; i++) + { + page = lpr(ctx, pdf_array_get(kids, i), depth+1, page); + } + pdf_dict_dels(node, "Resources"); + pdf_dict_dels(node, "MediaBox"); + pdf_dict_dels(node, "CropBox"); + pdf_dict_dels(node, "BleedBox"); + pdf_dict_dels(node, "TrimBox"); + pdf_dict_dels(node, "ArtBox"); + pdf_dict_dels(node, "Rotate"); + } + } + fz_always(ctx) + { + pdf_drop_obj(o); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + pdf_obj_unmark(node); + + return page; +} + +void +pdf_localise_page_resources(pdf_document *xref) +{ + fz_context *ctx = xref->ctx; + + if (xref->resources_localised) + return; + + lpr(ctx, pdf_dict_getp(pdf_trailer(xref), "Root/Pages"), 0, 0); + + xref->resources_localised = 1; +} + +static void +linearize(pdf_document *xref, pdf_write_options *opts) +{ + int i; + int n = pdf_xref_len(xref) + 2; + int *reorder; + int *rev_renumber_map; + int *rev_gen_list; + fz_context *ctx = xref->ctx; + + opts->page_object_lists = page_objects_list_create(ctx); + + /* Ensure that every page has local references of its resources */ + /* FIXME: We could 'thin' the resources according to what is actually + * required for each page, but this would require us to run the page + * content streams. */ + pdf_localise_page_resources(xref); + + /* Walk the objects for each page, marking which ones are used, where */ + memset(opts->use_list, 0, n * sizeof(int)); + mark_trailer(xref, opts, pdf_trailer(xref)); + + /* Add new objects required for linearization */ + add_linearization_objs(xref, opts); + +#ifdef DEBUG_WRITING + fprintf(stderr, "Usage calculated:\n"); + for (i=0; i < pdf_xref_len(xref); i++) + { + fprintf(stderr, "%d: use=%d\n", i, opts->use_list[i]); + } +#endif + + /* Allocate/init the structures used for renumbering the objects */ + reorder = fz_calloc(ctx, n, sizeof(int)); + rev_renumber_map = fz_calloc(ctx, n, sizeof(int)); + rev_gen_list = fz_calloc(ctx, n, sizeof(int)); + for (i = 0; i < n; i++) + { + reorder[i] = i; + } + + /* Heap sort the reordering */ + heap_sort(reorder+1, n-1, opts->use_list, &order_ge); + +#ifdef DEBUG_WRITING + fprintf(stderr, "Reordered:\n"); + for (i=1; i < pdf_xref_len(xref); i++) + { + fprintf(stderr, "%d: use=%d\n", i, opts->use_list[reorder[i]]); + } +#endif + + /* Find the split point */ + for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++); + opts->start = i; + + /* Roll the reordering into the renumber_map */ + for (i = 0; i < n; i++) + { + opts->renumber_map[reorder[i]] = i; + rev_renumber_map[i] = opts->rev_renumber_map[reorder[i]]; + rev_gen_list[i] = opts->rev_gen_list[reorder[i]]; + } + fz_free(ctx, opts->rev_renumber_map); + fz_free(ctx, opts->rev_gen_list); + opts->rev_renumber_map = rev_renumber_map; + opts->rev_gen_list = rev_gen_list; + fz_free(ctx, reorder); + + /* Apply the renumber_map */ + page_objects_list_renumber(opts); + renumberobjs(xref, opts); + + page_objects_list_sort_and_dedupe(ctx, opts->page_object_lists); +} + +static void +update_linearization_params(pdf_document *xref, pdf_write_options *opts) +{ + int offset; + pdf_set_int(opts->linear_l, opts->file_len); + /* Primary hint stream offset (of object, not stream!) */ + pdf_set_int(opts->linear_h0, opts->ofs_list[pdf_xref_len(xref)-1]); + /* Primary hint stream length (of object, not stream!) */ + offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len); + pdf_set_int(opts->linear_h1, offset - opts->ofs_list[pdf_xref_len(xref)-1]); + /* Object number of first pages page object (the first object of page 0) */ + pdf_set_int(opts->linear_o, opts->page_object_lists->page[0]->object[0]); + /* Offset of end of first page (first page is followed by primary + * hint stream (object n-1) then remaining pages (object 1...). The + * primary hint stream counts as part of the first pages data, I think. + */ + offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len); + pdf_set_int(opts->linear_e, offset); + /* Number of pages in document */ + pdf_set_int(opts->linear_n, opts->page_count); + /* Offset of first entry in main xref table */ + pdf_set_int(opts->linear_t, opts->first_xref_entry_offset + opts->hintstream_len); + /* Offset of shared objects hint table in the primary hint stream */ + pdf_set_int(opts->hints_s, opts->hints_shared_offset); + /* Primary hint stream length */ + pdf_set_int(opts->hints_length, opts->hintstream_len); +} + +/* + * Make sure we have loaded objects from object streams. + */ + +static void preloadobjstms(pdf_document *xref) +{ + pdf_obj *obj; + int num; + int xref_len = pdf_xref_len(xref); + + for (num = 0; num < xref_len; num++) + { + if (pdf_get_xref_entry(xref, num)->type == 'o') + { + obj = pdf_load_object(xref, num, 0); + pdf_drop_obj(obj); + } + } +} + +/* + * Save streams and objects to the output + */ + +static inline int isbinary(int c) +{ + if (c == '\n' || c == '\r' || c == '\t') + return 0; + return c < 32 || c > 127; +} + +static int isbinarystream(fz_buffer *buf) +{ + int i; + for (i = 0; i < buf->len; i++) + if (isbinary(buf->data[i])) + return 1; + return 0; +} + +static fz_buffer *hexbuf(fz_context *ctx, unsigned char *p, int n) +{ + static const char hex[16] = "0123456789abcdef"; + fz_buffer *buf; + int x = 0; + + buf = fz_new_buffer(ctx, n * 2 + (n / 32) + 2); + + while (n--) + { + buf->data[buf->len++] = hex[*p >> 4]; + buf->data[buf->len++] = hex[*p & 15]; + if (++x == 32) + { + buf->data[buf->len++] = '\n'; + x = 0; + } + p++; + } + + buf->data[buf->len++] = '>'; + buf->data[buf->len++] = '\n'; + + return buf; +} + +static void addhexfilter(pdf_document *xref, pdf_obj *dict) +{ + pdf_obj *f, *dp, *newf, *newdp; + pdf_obj *ahx, *nullobj; + fz_context *ctx = xref->ctx; + + ahx = pdf_new_name(ctx, "ASCIIHexDecode"); + nullobj = pdf_new_null(ctx); + newf = newdp = NULL; + + f = pdf_dict_gets(dict, "Filter"); + dp = pdf_dict_gets(dict, "DecodeParms"); + + if (pdf_is_name(f)) + { + newf = pdf_new_array(ctx, 2); + pdf_array_push(newf, ahx); + pdf_array_push(newf, f); + f = newf; + if (pdf_is_dict(dp)) + { + newdp = pdf_new_array(ctx, 2); + pdf_array_push(newdp, nullobj); + pdf_array_push(newdp, dp); + dp = newdp; + } + } + else if (pdf_is_array(f)) + { + pdf_array_insert(f, ahx); + if (pdf_is_array(dp)) + pdf_array_insert(dp, nullobj); + } + else + f = ahx; + + pdf_dict_puts(dict, "Filter", f); + if (dp) + pdf_dict_puts(dict, "DecodeParms", dp); + + pdf_drop_obj(ahx); + pdf_drop_obj(nullobj); + pdf_drop_obj(newf); + pdf_drop_obj(newdp); +} + +static void copystream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen) +{ + fz_buffer *buf, *tmp; + pdf_obj *newlen; + pdf_obj *obj; + fz_context *ctx = xref->ctx; + int orig_num = opts->rev_renumber_map[num]; + int orig_gen = opts->rev_gen_list[num]; + + buf = pdf_load_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen); + + obj = pdf_copy_dict(ctx, obj_orig); + if (opts->do_ascii && isbinarystream(buf)) + { + tmp = hexbuf(ctx, buf->data, buf->len); + fz_drop_buffer(ctx, buf); + buf = tmp; + + addhexfilter(xref, obj); + + newlen = pdf_new_int(ctx, buf->len); + pdf_dict_puts(obj, "Length", newlen); + pdf_drop_obj(newlen); + } + + fprintf(opts->out, "%d %d obj\n", num, gen); + pdf_fprint_obj(opts->out, obj, opts->do_expand == 0); + fprintf(opts->out, "stream\n"); + fwrite(buf->data, 1, buf->len, opts->out); + fprintf(opts->out, "endstream\nendobj\n\n"); + + fz_drop_buffer(ctx, buf); + pdf_drop_obj(obj); +} + +static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen) +{ + fz_buffer *buf, *tmp; + pdf_obj *newlen; + pdf_obj *obj; + fz_context *ctx = xref->ctx; + int orig_num = opts->rev_renumber_map[num]; + int orig_gen = opts->rev_gen_list[num]; + int truncated = 0; + + buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen, (opts->continue_on_error ? &truncated : NULL)); + if (truncated && opts->errors) + (*opts->errors)++; + + obj = pdf_copy_dict(ctx, obj_orig); + pdf_dict_dels(obj, "Filter"); + pdf_dict_dels(obj, "DecodeParms"); + + if (opts->do_ascii && isbinarystream(buf)) + { + tmp = hexbuf(ctx, buf->data, buf->len); + fz_drop_buffer(ctx, buf); + buf = tmp; + + addhexfilter(xref, obj); + } + + newlen = pdf_new_int(ctx, buf->len); + pdf_dict_puts(obj, "Length", newlen); + pdf_drop_obj(newlen); + + fprintf(opts->out, "%d %d obj\n", num, gen); + pdf_fprint_obj(opts->out, obj, opts->do_expand == 0); + fprintf(opts->out, "stream\n"); + fwrite(buf->data, 1, buf->len, opts->out); + fprintf(opts->out, "endstream\nendobj\n\n"); + + fz_drop_buffer(ctx, buf); + pdf_drop_obj(obj); +} + +static int is_image_filter(char *s) +{ + if (!strcmp(s, "CCITTFaxDecode") || !strcmp(s, "CCF") || + !strcmp(s, "DCTDecode") || !strcmp(s, "DCT") || + !strcmp(s, "RunLengthDecode") || !strcmp(s, "RL") || + !strcmp(s, "JBIG2Decode") || + !strcmp(s, "JPXDecode")) + return 1; + return 0; +} + +static int filter_implies_image(pdf_document *xref, pdf_obj *o) +{ + if (!o) + return 0; + if (pdf_is_name(o)) + return is_image_filter(pdf_to_name(o)); + if (pdf_is_array(o)) + { + int i, len; + len = pdf_array_len(o); + for (i = 0; i < len; i++) + if (is_image_filter(pdf_to_name(pdf_array_get(o, i)))) + return 1; + } + return 0; +} + +static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, int gen) +{ + pdf_xref_entry *entry; + pdf_obj *obj; + pdf_obj *type; + fz_context *ctx = xref->ctx; + + fz_try(ctx) + { + obj = pdf_load_object(xref, num, gen); + } + fz_catch(ctx) + { + /* FIXME: TryLater ? */ + if (opts->continue_on_error) + { + fprintf(opts->out, "%d %d obj\nnull\nendobj\n", num, gen); + if (opts->errors) + (*opts->errors)++; + fz_warn(ctx, "%s", fz_caught_message(ctx)); + return; + } + else + fz_rethrow(ctx); + } + + /* skip ObjStm and XRef objects */ + if (pdf_is_dict(obj)) + { + type = pdf_dict_gets(obj, "Type"); + if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "ObjStm")) + { + opts->use_list[num] = 0; + pdf_drop_obj(obj); + return; + } + if (pdf_is_name(type) && !strcmp(pdf_to_name(type), "XRef")) + { + opts->use_list[num] = 0; + pdf_drop_obj(obj); + return; + } + } + + entry = pdf_get_xref_entry(xref, num); + if (!pdf_is_stream(xref, num, gen)) + { + fprintf(opts->out, "%d %d obj\n", num, gen); + pdf_fprint_obj(opts->out, obj, opts->do_expand == 0); + fprintf(opts->out, "endobj\n\n"); + } + else if (entry->stm_ofs < 0 && entry->stm_buf == NULL) + { + fprintf(opts->out, "%d %d obj\n", num, gen); + pdf_fprint_obj(opts->out, obj, opts->do_expand == 0); + fprintf(opts->out, "stream\nendstream\nendobj\n\n"); + } + else + { + int dontexpand = 0; + if (opts->do_expand != 0 && opts->do_expand != fz_expand_all) + { + pdf_obj *o; + + if ((o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "XObject")) && + (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Image"))) + dontexpand = !(opts->do_expand & fz_expand_images); + if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "Font")) + dontexpand = !(opts->do_expand & fz_expand_fonts); + if (o = pdf_dict_gets(obj, "Type"), !strcmp(pdf_to_name(o), "FontDescriptor")) + dontexpand = !(opts->do_expand & fz_expand_fonts); + if ((o = pdf_dict_gets(obj, "Length1")) != NULL) + dontexpand = !(opts->do_expand & fz_expand_fonts); + if ((o = pdf_dict_gets(obj, "Length2")) != NULL) + dontexpand = !(opts->do_expand & fz_expand_fonts); + if ((o = pdf_dict_gets(obj, "Length3")) != NULL) + dontexpand = !(opts->do_expand & fz_expand_fonts); + if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "Type1C")) + dontexpand = !(opts->do_expand & fz_expand_fonts); + if (o = pdf_dict_gets(obj, "Subtype"), !strcmp(pdf_to_name(o), "CIDFontType0C")) + dontexpand = !(opts->do_expand & fz_expand_fonts); + if (o = pdf_dict_gets(obj, "Filter"), filter_implies_image(xref, o)) + dontexpand = !(opts->do_expand & fz_expand_images); + if (pdf_dict_gets(obj, "Width") != NULL && pdf_dict_gets(obj, "Height") != NULL) + dontexpand = !(opts->do_expand & fz_expand_images); + } + fz_try(ctx) + { + if (opts->do_expand && !dontexpand && !pdf_is_jpx_image(ctx, obj)) + expandstream(xref, opts, obj, num, gen); + else + copystream(xref, opts, obj, num, gen); + } + fz_catch(ctx) + { + /* FIXME: TryLater ? */ + if (opts->continue_on_error) + { + fprintf(opts->out, "%d %d obj\nnull\nendobj\n", num, gen); + if (opts->errors) + (*opts->errors)++; + fz_warn(ctx, "%s", fz_caught_message(ctx)); + } + else + { + pdf_drop_obj(obj); + fz_rethrow(ctx); + } + } + } + + pdf_drop_obj(obj); +} + +static void writexref(pdf_document *xref, pdf_write_options *opts, int from, int to, int first, int main_xref_offset, int startxref) +{ + pdf_obj *trailer = NULL; + pdf_obj *obj; + pdf_obj *nobj = NULL; + int num; + fz_context *ctx = xref->ctx; + + fprintf(opts->out, "xref\n%d %d\n", from, to - from); + opts->first_xref_entry_offset = ftell(opts->out); + for (num = from; num < to; num++) + { + if (opts->use_list[num]) + fprintf(opts->out, "%010d %05d n \n", opts->ofs_list[num], opts->gen_list[num]); + else + fprintf(opts->out, "%010d %05d f \n", opts->ofs_list[num], opts->gen_list[num]); + } + fprintf(opts->out, "\n"); + + fz_var(trailer); + fz_var(nobj); + + fz_try(ctx) + { + trailer = pdf_new_dict(ctx, 5); + + nobj = pdf_new_int(ctx, to); + pdf_dict_puts(trailer, "Size", nobj); + pdf_drop_obj(nobj); + nobj = NULL; + + if (first) + { + obj = pdf_dict_gets(pdf_trailer(xref), "Info"); + if (obj) + pdf_dict_puts(trailer, "Info", obj); + + obj = pdf_dict_gets(pdf_trailer(xref), "Root"); + if (obj) + pdf_dict_puts(trailer, "Root", obj); + + obj = pdf_dict_gets(pdf_trailer(xref), "ID"); + if (obj) + pdf_dict_puts(trailer, "ID", obj); + } + if (main_xref_offset != 0) + { + nobj = pdf_new_int(ctx, main_xref_offset); + pdf_dict_puts(trailer, "Prev", nobj); + pdf_drop_obj(nobj); + nobj = NULL; + } + } + fz_always(ctx) + { + pdf_drop_obj(nobj); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } + + fprintf(opts->out, "trailer\n"); + pdf_fprint_obj(opts->out, trailer, opts->do_expand == 0); + fprintf(opts->out, "\n"); + + pdf_drop_obj(trailer); + + fprintf(opts->out, "startxref\n%d\n%%%%EOF\n", startxref); +} + +static void +padto(FILE *file, int target) +{ + int pos = ftell(file); + + assert(pos <= target); + while (pos < target) + { + fputc('\n', file); + pos++; + } +} + +static void +dowriteobject(pdf_document *xref, pdf_write_options *opts, int num, int pass) +{ + pdf_xref_entry *entry = pdf_get_xref_entry(xref, num); + if (entry->type == 'f') + opts->gen_list[num] = entry->gen; + if (entry->type == 'n') + opts->gen_list[num] = entry->gen; + if (entry->type == 'o') + opts->gen_list[num] = 0; + + /* If we are renumbering, then make sure all generation numbers are + * zero (except object 0 which must be free, and have a gen number of + * 65535). Changing the generation numbers (and indeed object numbers) + * will break encryption - so only do this if we are renumbering + * anyway. */ + if (opts->do_garbage >= 2) + opts->gen_list[num] = (num == 0 ? 65535 : 0); + + if (opts->do_garbage && !opts->use_list[num]) + return; + + if (entry->type == 'n' || entry->type == 'o') + { + if (pass > 0) + padto(opts->out, opts->ofs_list[num]); + opts->ofs_list[num] = ftell(opts->out); + writeobject(xref, opts, num, opts->gen_list[num]); + } + else + opts->use_list[num] = 0; +} + +static void +writeobjects(pdf_document *xref, pdf_write_options *opts, int pass) +{ + int num; + int xref_len = pdf_xref_len(xref); + + fprintf(opts->out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10); + fprintf(opts->out, "%%\316\274\341\277\246\n\n"); + + dowriteobject(xref, opts, opts->start, pass); + + if (opts->do_linear) + { + /* Write first xref */ + if (pass == 0) + opts->first_xref_offset = ftell(opts->out); + else + padto(opts->out, opts->first_xref_offset); + writexref(xref, opts, opts->start, pdf_xref_len(xref), 1, opts->main_xref_offset, 0); + } + + for (num = opts->start+1; num < xref_len; num++) + dowriteobject(xref, opts, num, pass); + if (opts->do_linear && pass == 1) + { + int offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len); + padto(opts->out, offset); + } + for (num = 1; num < opts->start; num++) + { + if (pass == 1) + opts->ofs_list[num] += opts->hintstream_len; + dowriteobject(xref, opts, num, pass); + } +} + +static int +my_log2(int x) +{ + int i = 0; + + if (x <= 0) + return 0; + + while ((1<<i) <= x && (1<<i) > 0) + i++; + + if ((1<<i) <= 0) + return 0; + + return i; +} + +static void +make_page_offset_hints(pdf_document *xref, pdf_write_options *opts, fz_buffer *buf) +{ + fz_context *ctx = xref->ctx; + int i, j; + int min_objs_per_page, max_objs_per_page; + int min_page_length, max_page_length; + int objs_per_page_bits; + int min_shared_object, max_shared_object; + int max_shared_object_refs; + int min_shared_length, max_shared_length; + page_objects **pop = &opts->page_object_lists->page[0]; + int page_len_bits, shared_object_bits, shared_object_id_bits; + int shared_length_bits; + int xref_len = pdf_xref_len(xref); + + min_shared_object = pdf_xref_len(xref); + max_shared_object = 1; + min_shared_length = opts->file_len; + max_shared_length = 0; + for (i=1; i < xref_len; i++) + { + int min, max, page; + + min = opts->ofs_list[i]; + if (i == opts->start-1 || (opts->start == 1 && i == xref_len-1)) + max = opts->main_xref_offset; + else if (i == xref_len-1) + max = opts->ofs_list[1]; + else + max = opts->ofs_list[i+1]; + + assert(max > min); + + if (opts->use_list[i] & USE_SHARED) + { + page = -1; + if (i < min_shared_object) + min_shared_object = i; + if (i > max_shared_object) + max_shared_object = i; + if (min_shared_length > max - min) + min_shared_length = max - min; + if (max_shared_length < max - min) + max_shared_length = max - min; + } + else if (opts->use_list[i] & (USE_CATALOGUE | USE_HINTS | USE_PARAMS)) + page = -1; + else if (opts->use_list[i] & USE_PAGE1) + { + page = 0; + if (min_shared_length > max - min) + min_shared_length = max - min; + if (max_shared_length < max - min) + max_shared_length = max - min; + } + else if (opts->use_list[i] == 0) + page = -1; + else + page = opts->use_list[i]>>USE_PAGE_SHIFT; + + if (page >= 0) + { + pop[page]->num_objects++; + if (pop[page]->min_ofs > min) + pop[page]->min_ofs = min; + if (pop[page]->max_ofs < max) + pop[page]->max_ofs = max; + } + } + + min_objs_per_page = max_objs_per_page = pop[0]->num_objects; + min_page_length = max_page_length = pop[0]->max_ofs - pop[0]->min_ofs; + for (i=1; i < opts->page_count; i++) + { + int tmp; + if (min_objs_per_page > pop[i]->num_objects) + min_objs_per_page = pop[i]->num_objects; + if (max_objs_per_page < pop[i]->num_objects) + max_objs_per_page = pop[i]->num_objects; + tmp = pop[i]->max_ofs - pop[i]->min_ofs; + if (tmp < min_page_length) + min_page_length = tmp; + if (tmp > max_page_length) + max_page_length = tmp; + } + + for (i=0; i < opts->page_count; i++) + { + int count = 0; + page_objects *po = opts->page_object_lists->page[i]; + for (j = 0; j < po->len; j++) + { + if (i == 0 && opts->use_list[po->object[j]] & USE_PAGE1) + count++; + else if (i != 0 && opts->use_list[po->object[j]] & USE_SHARED) + count++; + } + po->num_shared = count; + if (i == 0 || count > max_shared_object_refs) + max_shared_object_refs = count; + } + if (min_shared_object > max_shared_object) + min_shared_object = max_shared_object = 0; + + /* Table F.3 - Header */ + /* Header Item 1: Least number of objects in a page */ + fz_write_buffer_bits(ctx, buf, min_objs_per_page, 32); + /* Header Item 2: Location of first pages page object */ + fz_write_buffer_bits(ctx, buf, opts->ofs_list[pop[0]->page_object_number], 32); + /* Header Item 3: Number of bits required to represent the difference + * between the greatest and least number of objects in a page. */ + objs_per_page_bits = my_log2(max_objs_per_page - min_objs_per_page); + fz_write_buffer_bits(ctx, buf, objs_per_page_bits, 16); + /* Header Item 4: Least length of a page. */ + fz_write_buffer_bits(ctx, buf, min_page_length, 32); + /* Header Item 5: Number of bits needed to represent the difference + * between the greatest and least length of a page. */ + page_len_bits = my_log2(max_page_length - min_page_length); + fz_write_buffer_bits(ctx, buf, page_len_bits, 16); + /* Header Item 6: Least offset to start of content stream (Acrobat + * sets this to always be 0) */ + fz_write_buffer_bits(ctx, buf, 0, 32); + /* Header Item 7: Number of bits needed to represent the difference + * between the greatest and least offset to content stream (Acrobat + * sets this to always be 0) */ + fz_write_buffer_bits(ctx, buf, 0, 16); + /* Header Item 8: Least content stream length. (Acrobat + * sets this to always be 0) */ + fz_write_buffer_bits(ctx, buf, 0, 32); + /* Header Item 9: Number of bits needed to represent the difference + * between the greatest and least content stream length (Acrobat + * sets this to always be the same as item 5) */ + fz_write_buffer_bits(ctx, buf, page_len_bits, 16); + /* Header Item 10: Number of bits needed to represent the greatest + * number of shared object references. */ + shared_object_bits = my_log2(max_shared_object_refs); + fz_write_buffer_bits(ctx, buf, shared_object_bits, 16); + /* Header Item 11: Number of bits needed to represent the greatest + * shared object identifier. */ + shared_object_id_bits = my_log2(max_shared_object - min_shared_object + pop[0]->num_shared); + fz_write_buffer_bits(ctx, buf, shared_object_id_bits, 16); + /* Header Item 12: Number of bits needed to represent the numerator + * of the fractions. We always send 0. */ + fz_write_buffer_bits(ctx, buf, 0, 16); + /* Header Item 13: Number of bits needed to represent the denominator + * of the fractions. We always send 0. */ + fz_write_buffer_bits(ctx, buf, 0, 16); + + /* Table F.4 - Page offset hint table (per page) */ + /* Item 1: A number that, when added to the least number of objects + * on a page, gives the number of objects in the page. */ + for (i = 0; i < opts->page_count; i++) + { + fz_write_buffer_bits(ctx, buf, pop[i]->num_objects - min_objs_per_page, objs_per_page_bits); + } + fz_write_buffer_pad(ctx, buf); + /* Item 2: A number that, when added to the least page length, gives + * the length of the page in bytes. */ + for (i = 0; i < opts->page_count; i++) + { + fz_write_buffer_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits); + } + fz_write_buffer_pad(ctx, buf); + /* Item 3: The number of shared objects referenced from the page. */ + for (i = 0; i < opts->page_count; i++) + { + fz_write_buffer_bits(ctx, buf, pop[i]->num_shared, shared_object_bits); + } + fz_write_buffer_pad(ctx, buf); + /* Item 4: Shared object id for each shared object ref in every page. + * Spec says "not for page 1", but acrobat does send page 1's - all + * as zeros. */ + for (i = 0; i < opts->page_count; i++) + { + for (j = 0; j < pop[i]->len; j++) + { + int o = pop[i]->object[j]; + if (i == 0 && opts->use_list[o] & USE_PAGE1) + fz_write_buffer_bits(ctx, buf, 0 /* o - pop[0]->page_object_number */, shared_object_id_bits); + if (i != 0 && opts->use_list[o] & USE_SHARED) + fz_write_buffer_bits(ctx, buf, o - min_shared_object + pop[0]->num_shared, shared_object_id_bits); + } + } + fz_write_buffer_pad(ctx, buf); + /* Item 5: Numerator of fractional position for each shared object reference. */ + /* We always send 0 in 0 bits */ + /* Item 6: A number that, when added to the least offset to the start + * of the content stream (F.3 Item 6), gives the offset in bytes of + * start of the pages content stream object relative to the beginning + * of the page. Always 0 in 0 bits. */ + /* Item 7: A number that, when added to the least content stream length + * (F.3 Item 8), gives the length of the pages content stream object. + * Always == Item 2 as least content stream length = least page stream + * length. + */ + for (i = 0; i < opts->page_count; i++) + { + fz_write_buffer_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits); + } + + /* Pad, and then do shared object hint table */ + fz_write_buffer_pad(ctx, buf); + opts->hints_shared_offset = buf->len; + + /* Table F.5: */ + /* Header Item 1: Object number of the first object in the shared + * objects section. */ + fz_write_buffer_bits(ctx, buf, min_shared_object, 32); + /* Header Item 2: Location of first object in the shared objects + * section. */ + fz_write_buffer_bits(ctx, buf, opts->ofs_list[min_shared_object], 32); + /* Header Item 3: The number of shared object entries for the first + * page. */ + fz_write_buffer_bits(ctx, buf, pop[0]->num_shared, 32); + /* Header Item 4: The number of shared object entries for the shared + * objects section + first page. */ + fz_write_buffer_bits(ctx, buf, max_shared_object - min_shared_object + pop[0]->num_shared, 32); + /* Header Item 5: The number of bits needed to represent the greatest + * number of objects in a shared object group (Always 0). */ + fz_write_buffer_bits(ctx, buf, 0, 16); + /* Header Item 6: The least length of a shared object group in bytes. */ + fz_write_buffer_bits(ctx, buf, min_shared_length, 32); + /* Header Item 7: The number of bits required to represent the + * difference between the greatest and least length of a shared object + * group. */ + shared_length_bits = my_log2(max_shared_length - min_shared_length); + fz_write_buffer_bits(ctx, buf, shared_length_bits, 16); + + /* Table F.6 */ + /* Item 1: Shared object group length (page 1 objects) */ + for (j = 0; j < pop[0]->len; j++) + { + int o = pop[0]->object[j]; + int min, max; + min = opts->ofs_list[o]; + if (o == opts->start-1) + max = opts->main_xref_offset; + else if (o < xref_len-1) + max = opts->ofs_list[o+1]; + else + max = opts->ofs_list[1]; + if (opts->use_list[o] & USE_PAGE1) + fz_write_buffer_bits(ctx, buf, max - min - min_shared_length, shared_length_bits); + } + /* Item 1: Shared object group length (shared objects) */ + for (i = min_shared_object; i <= max_shared_object; i++) + { + int min, max; + min = opts->ofs_list[i]; + if (i == opts->start-1) + max = opts->main_xref_offset; + else if (i < xref_len-1) + max = opts->ofs_list[i+1]; + else + max = opts->ofs_list[1]; + fz_write_buffer_bits(ctx, buf, max - min - min_shared_length, shared_length_bits); + } + fz_write_buffer_pad(ctx, buf); + + /* Item 2: MD5 presence flags */ + for (i = max_shared_object - min_shared_object + pop[0]->num_shared; i > 0; i--) + { + fz_write_buffer_bits(ctx, buf, 0, 1); + } + fz_write_buffer_pad(ctx, buf); + /* Item 3: MD5 sums (not present) */ + fz_write_buffer_pad(ctx, buf); + /* Item 4: Number of objects in the group (not present) */ +} + +static void +make_hint_stream(pdf_document *xref, pdf_write_options *opts) +{ + fz_context *ctx = xref->ctx; + fz_buffer *buf = fz_new_buffer(ctx, 100); + + fz_try(ctx) + { + make_page_offset_hints(xref, opts, buf); + pdf_update_stream(xref, pdf_xref_len(xref)-1, buf); + opts->hintstream_len = buf->len; + fz_drop_buffer(ctx, buf); + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, buf); + fz_rethrow(ctx); + } +} + +#ifdef DEBUG_WRITING +static void dump_object_details(pdf_document *xref, pdf_write_options *opts) +{ + int i; + + for (i = 0; i < pdf_xref_len(xref); i++) + { + fprintf(stderr, "%d@%d: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]); + } +} +#endif + +void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz_opts) +{ + int lastfree; + int num; + pdf_write_options opts = { 0 }; + fz_context *ctx; + int xref_len = pdf_xref_len(xref); + + if (!xref) + return; + + ctx = xref->ctx; + + opts.out = fopen(filename, "wb"); + if (!opts.out) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open output file '%s'", filename); + + fz_try(ctx) + { + opts.do_expand = fz_opts ? fz_opts->do_expand : 0; + opts.do_garbage = fz_opts ? fz_opts->do_garbage : 0; + opts.do_ascii = fz_opts ? fz_opts->do_ascii: 0; + opts.do_linear = fz_opts ? fz_opts->do_linear: 0; + opts.start = 0; + opts.main_xref_offset = INT_MIN; + /* We deliberately make these arrays long enough to cope with + * 1 to n access rather than 0..n-1, and add space for 2 new + * extra entries that may be required for linearization. */ + opts.use_list = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.ofs_list = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.gen_list = fz_calloc(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.renumber_map = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.rev_renumber_map = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.rev_gen_list = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.continue_on_error = fz_opts->continue_on_error; + opts.errors = fz_opts->errors; + + for (num = 0; num < xref_len; num++) + { + opts.use_list[num] = 0; + opts.ofs_list[num] = 0; + opts.renumber_map[num] = num; + opts.rev_renumber_map[num] = num; + opts.rev_gen_list[num] = pdf_get_xref_entry(xref, num)->gen; + } + + /* Make sure any objects hidden in compressed streams have been loaded */ + preloadobjstms(xref); + + /* Sweep & mark objects from the trailer */ + if (opts.do_garbage >= 1) + sweepobj(xref, &opts, pdf_trailer(xref)); + else + for (num = 0; num < xref_len; num++) + opts.use_list[num] = 1; + + /* Coalesce and renumber duplicate objects */ + if (opts.do_garbage >= 3) + removeduplicateobjs(xref, &opts); + + /* Compact xref by renumbering and removing unused objects */ + if (opts.do_garbage >= 2 || opts.do_linear) + compactxref(xref, &opts); + + /* Make renumbering affect all indirect references and update xref */ + if (opts.do_garbage >= 2 || opts.do_linear) + renumberobjs(xref, &opts); + + if (opts.do_linear) + { + linearize(xref, &opts); + } + + writeobjects(xref, &opts, 0); + +#ifdef DEBUG_WRITING + dump_object_details(xref, &opts); +#endif + + /* Construct linked list of free object slots */ + lastfree = 0; + for (num = 0; num < xref_len; num++) + { + if (!opts.use_list[num]) + { + opts.gen_list[num]++; + opts.ofs_list[lastfree] = num; + lastfree = num; + } + } + + if (opts.do_linear) + { + opts.main_xref_offset = ftell(opts.out); + writexref(xref, &opts, 0, opts.start, 0, 0, opts.first_xref_offset); + opts.file_len = ftell(opts.out); + + make_hint_stream(xref, &opts); + opts.file_len += opts.hintstream_len; + opts.main_xref_offset += opts.hintstream_len; + update_linearization_params(xref, &opts); + fseek(opts.out, 0, 0); + writeobjects(xref, &opts, 1); + + padto(opts.out, opts.main_xref_offset); + writexref(xref, &opts, 0, opts.start, 0, 0, opts.first_xref_offset); + } + else + { + opts.first_xref_offset = ftell(opts.out); + writexref(xref, &opts, 0, xref_len, 1, 0, opts.first_xref_offset); + } + + xref->dirty = 0; + } + fz_always(ctx) + { +#ifdef DEBUG_LINEARIZATION + page_objects_dump(&opts); + objects_dump(xref, &opts); +#endif + fz_free(ctx, opts.use_list); + fz_free(ctx, opts.ofs_list); + fz_free(ctx, opts.gen_list); + fz_free(ctx, opts.renumber_map); + fz_free(ctx, opts.rev_renumber_map); + fz_free(ctx, opts.rev_gen_list); + pdf_drop_obj(opts.linear_l); + pdf_drop_obj(opts.linear_h0); + pdf_drop_obj(opts.linear_h1); + pdf_drop_obj(opts.linear_o); + pdf_drop_obj(opts.linear_e); + pdf_drop_obj(opts.linear_n); + pdf_drop_obj(opts.linear_t); + pdf_drop_obj(opts.hints_s); + pdf_drop_obj(opts.hints_length); + page_objects_list_destroy(ctx, opts.page_object_lists); + fclose(opts.out); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} diff --git a/source/pdf/pdf-xobject.c b/source/pdf/pdf-xobject.c new file mode 100644 index 00000000..61fc876a --- /dev/null +++ b/source/pdf/pdf-xobject.c @@ -0,0 +1,232 @@ +#include "mupdf/pdf.h" + +pdf_xobject * +pdf_keep_xobject(fz_context *ctx, pdf_xobject *xobj) +{ + return (pdf_xobject *)fz_keep_storable(ctx, &xobj->storable); +} + +void +pdf_drop_xobject(fz_context *ctx, pdf_xobject *xobj) +{ + fz_drop_storable(ctx, &xobj->storable); +} + +static void +pdf_free_xobject_imp(fz_context *ctx, fz_storable *xobj_) +{ + pdf_xobject *xobj = (pdf_xobject *)xobj_; + + if (xobj->colorspace) + fz_drop_colorspace(ctx, xobj->colorspace); + pdf_drop_obj(xobj->resources); + pdf_drop_obj(xobj->contents); + pdf_drop_obj(xobj->me); + fz_free(ctx, xobj); +} + +static unsigned int +pdf_xobject_size(pdf_xobject *xobj) +{ + if (xobj == NULL) + return 0; + return sizeof(*xobj) + (xobj->colorspace ? xobj->colorspace->size : 0); +} + +pdf_xobject * +pdf_load_xobject(pdf_document *xref, pdf_obj *dict) +{ + pdf_xobject *form; + pdf_obj *obj; + fz_context *ctx = xref->ctx; + + if ((form = pdf_find_item(ctx, pdf_free_xobject_imp, dict))) + { + return form; + } + + form = fz_malloc_struct(ctx, pdf_xobject); + FZ_INIT_STORABLE(form, 1, pdf_free_xobject_imp); + form->resources = NULL; + form->contents = NULL; + form->colorspace = NULL; + form->me = NULL; + form->iteration = 0; + + /* Store item immediately, to avoid possible recursion if objects refer back to this one */ + pdf_store_item(ctx, dict, form, pdf_xobject_size(form)); + + fz_try(ctx) + { + obj = pdf_dict_gets(dict, "BBox"); + pdf_to_rect(ctx, obj, &form->bbox); + + obj = pdf_dict_gets(dict, "Matrix"); + if (obj) + pdf_to_matrix(ctx, obj, &form->matrix); + else + form->matrix = fz_identity; + + form->isolated = 0; + form->knockout = 0; + form->transparency = 0; + + obj = pdf_dict_gets(dict, "Group"); + if (obj) + { + pdf_obj *attrs = obj; + + form->isolated = pdf_to_bool(pdf_dict_gets(attrs, "I")); + form->knockout = pdf_to_bool(pdf_dict_gets(attrs, "K")); + + obj = pdf_dict_gets(attrs, "S"); + if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "Transparency")) + form->transparency = 1; + + obj = pdf_dict_gets(attrs, "CS"); + if (obj) + { + form->colorspace = pdf_load_colorspace(xref, obj); + if (!form->colorspace) + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load xobject colorspace"); + } + } + + form->resources = pdf_dict_gets(dict, "Resources"); + if (form->resources) + pdf_keep_obj(form->resources); + + form->contents = pdf_keep_obj(dict); + } + fz_catch(ctx) + { + pdf_remove_item(ctx, pdf_free_xobject_imp, dict); + pdf_drop_xobject(ctx, form); + fz_rethrow_message(ctx, "cannot load xobject content stream (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); + } + form->me = pdf_keep_obj(dict); + + return form; +} + +pdf_obj * +pdf_new_xobject(pdf_document *xref, const fz_rect *bbox, const fz_matrix *mat) +{ + int idict_num; + pdf_obj *idict = NULL; + pdf_obj *dict = NULL; + pdf_xobject *form = NULL; + pdf_obj *obj = NULL; + pdf_obj *res = NULL; + pdf_obj *procset = NULL; + fz_context *ctx = xref->ctx; + + fz_var(idict); + fz_var(dict); + fz_var(form); + fz_var(obj); + fz_var(res); + fz_var(procset); + fz_try(ctx) + { + dict = pdf_new_dict(ctx, 0); + + obj = pdf_new_rect(ctx, bbox); + pdf_dict_puts(dict, "BBox", obj); + pdf_drop_obj(obj); + obj = NULL; + + obj = pdf_new_int(ctx, 1); + pdf_dict_puts(dict, "FormType", obj); + pdf_drop_obj(obj); + obj = NULL; + + obj = pdf_new_int(ctx, 0); + pdf_dict_puts(dict, "Length", obj); + pdf_drop_obj(obj); + obj = NULL; + + obj = pdf_new_matrix(ctx, mat); + pdf_dict_puts(dict, "Matrix", obj); + pdf_drop_obj(obj); + obj = NULL; + + res = pdf_new_dict(ctx, 0); + procset = pdf_new_array(ctx, 2); + obj = pdf_new_name(ctx, "PDF"); + pdf_array_push(procset, obj); + pdf_drop_obj(obj); + obj = NULL; + obj = pdf_new_name(ctx, "Text"); + pdf_array_push(procset, obj); + pdf_drop_obj(obj); + obj = NULL; + pdf_dict_puts(res, "ProcSet", procset); + pdf_drop_obj(procset); + procset = NULL; + pdf_dict_puts(dict, "Resources", res); + + obj = pdf_new_name(ctx, "Form"); + pdf_dict_puts(dict, "Subtype", obj); + pdf_drop_obj(obj); + obj = NULL; + + obj = pdf_new_name(ctx, "XObject"); + pdf_dict_puts(dict, "Type", obj); + pdf_drop_obj(obj); + obj = NULL; + + form = fz_malloc_struct(ctx, pdf_xobject); + FZ_INIT_STORABLE(form, 1, pdf_free_xobject_imp); + form->resources = NULL; + form->contents = NULL; + form->colorspace = NULL; + form->me = NULL; + form->iteration = 0; + + form->bbox = *bbox; + + form->matrix = *mat; + + form->isolated = 0; + form->knockout = 0; + form->transparency = 0; + + form->resources = res; + res = NULL; + + idict_num = pdf_create_object(xref); + pdf_update_object(xref, idict_num, dict); + idict = pdf_new_indirect(ctx, idict_num, 0, xref); + pdf_drop_obj(dict); + dict = NULL; + + pdf_store_item(ctx, idict, form, pdf_xobject_size(form)); + + form->contents = pdf_keep_obj(idict); + form->me = pdf_keep_obj(idict); + + pdf_drop_xobject(ctx, form); + form = NULL; + } + fz_catch(ctx) + { + pdf_drop_obj(procset); + pdf_drop_obj(res); + pdf_drop_obj(obj); + pdf_drop_obj(dict); + pdf_drop_obj(idict); + pdf_drop_xobject(ctx, form); + fz_rethrow_message(ctx, "failed to create xobject)"); + } + + return idict; +} + +void pdf_update_xobject_contents(pdf_document *xref, pdf_xobject *form, fz_buffer *buffer) +{ + pdf_dict_dels(form->contents, "Filter"); + pdf_dict_puts_drop(form->contents, "Length", pdf_new_int(xref->ctx, buffer->len)); + pdf_update_stream(xref, pdf_to_num(form->contents), buffer); + form->iteration ++; +} diff --git a/source/pdf/pdf-xref-aux.c b/source/pdf/pdf-xref-aux.c new file mode 100644 index 00000000..48634374 --- /dev/null +++ b/source/pdf/pdf-xref-aux.c @@ -0,0 +1,39 @@ +#include "mupdf/pdf.h" + +/* + These functions have been split out of pdf_xref.c to allow tools + to be linked without pulling in the interpreter. The interpreter + references the built-in font and cmap resources which are quite + big. Not linking those into the tools saves roughly 6MB in the + resulting executables. +*/ + +static void pdf_run_page_contents_shim(fz_document *doc, fz_page *page, fz_device *dev, const fz_matrix *transform, fz_cookie *cookie) +{ + pdf_run_page_contents((pdf_document*)doc, (pdf_page*)page, dev, transform, cookie); +} + +static void pdf_run_annot_shim(fz_document *doc, fz_page *page, fz_annot *annot, fz_device *dev, const fz_matrix *transform, fz_cookie *cookie) +{ + pdf_run_annot((pdf_document*)doc, (pdf_page*)page, (pdf_annot *)annot, dev, transform, cookie); +} + +pdf_document * +pdf_open_document_with_stream(fz_context *ctx, fz_stream *file) +{ + pdf_document *doc = pdf_open_document_no_run_with_stream(ctx, file); + doc->super.run_page_contents = pdf_run_page_contents_shim; + doc->super.run_annot = pdf_run_annot_shim; + doc->update_appearance = pdf_update_appearance; + return doc; +} + +pdf_document * +pdf_open_document(fz_context *ctx, const char *filename) +{ + pdf_document *doc = pdf_open_document_no_run(ctx, filename); + doc->super.run_page_contents = pdf_run_page_contents_shim; + doc->super.run_annot = pdf_run_annot_shim; + doc->update_appearance = pdf_update_appearance; + return doc; +} diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c new file mode 100644 index 00000000..9224d515 --- /dev/null +++ b/source/pdf/pdf-xref.c @@ -0,0 +1,1552 @@ +#include "mupdf/pdf.h" + +static inline int iswhite(int ch) +{ + return + ch == '\000' || ch == '\011' || ch == '\012' || + ch == '\014' || ch == '\015' || ch == '\040'; +} + +/* + * xref tables + */ + +static void pdf_free_xref_sections(pdf_document *doc) +{ + fz_context *ctx = doc->ctx; + int x, e; + + for (x = 0; x < doc->num_xref_sections; x++) + { + pdf_xref *xref = &doc->xref_sections[x]; + + for (e = 0; e < xref->len; e++) + { + pdf_xref_entry *entry = &xref->table[e]; + + if (entry->obj) + { + pdf_drop_obj(entry->obj); + fz_drop_buffer(ctx, entry->stm_buf); + } + } + + fz_free(ctx, xref->table); + pdf_drop_obj(xref->trailer); + } + + fz_free(ctx, doc->xref_sections); + doc->xref_sections = NULL; + doc->num_xref_sections = 0; +} + +static void pdf_resize_xref(fz_context *ctx, pdf_xref *xref, int newlen) +{ + int i; + + xref->table = fz_resize_array(ctx, xref->table, newlen, sizeof(pdf_xref_entry)); + for (i = xref->len; i < newlen; i++) + { + xref->table[i].type = 0; + xref->table[i].ofs = 0; + xref->table[i].gen = 0; + xref->table[i].stm_ofs = 0; + xref->table[i].stm_buf = NULL; + xref->table[i].obj = NULL; + } + xref->len = newlen; +} + +static void pdf_populate_next_xref_level(pdf_document *doc) +{ + pdf_xref *xref; + doc->xref_sections = fz_resize_array(doc->ctx, doc->xref_sections, doc->num_xref_sections + 1, sizeof(pdf_xref)); + doc->num_xref_sections++; + + xref = &doc->xref_sections[doc->num_xref_sections - 1]; + xref->len = 0; + xref->table = NULL; + xref->trailer = NULL; +} + +pdf_obj *pdf_trailer(pdf_document *doc) +{ + /* Return the document's final trailer */ + pdf_xref *xref = &doc->xref_sections[0]; + + return xref->trailer; +} + +void pdf_set_populating_xref_trailer(pdf_document *doc, pdf_obj *trailer) +{ + /* Update the trailer of the xref section being populated */ + pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - 1]; + pdf_drop_obj(xref->trailer); + xref->trailer = pdf_keep_obj(trailer); +} + +int pdf_xref_len(pdf_document *doc) +{ + /* Return the length of the document's final xref section */ + pdf_xref *xref = &doc->xref_sections[0]; + + return xref->len; +} + +/* Used while reading the individual xref sections from a file */ +pdf_xref_entry *pdf_get_populating_xref_entry(pdf_document *doc, int i) +{ + /* Return an entry within the xref currently being populated */ + pdf_xref *xref; + + if (doc->num_xref_sections == 0) + { + doc->xref_sections = fz_calloc(doc->ctx, 1, sizeof(pdf_xref)); + doc->num_xref_sections = 1; + } + + xref = &doc->xref_sections[doc->num_xref_sections - 1]; + + if (i >= xref->len) + pdf_resize_xref(doc->ctx, xref, i+1); + + return &xref->table[i]; +} + +/* Used after loading a document to access entries */ +pdf_xref_entry *pdf_get_xref_entry(pdf_document *doc, int i) +{ + int j; + + /* Find the first xref section where the entry is defined. */ + for (j = 0; j < doc->num_xref_sections; j++) + { + pdf_xref *xref = &doc->xref_sections[j]; + + if (i >= 0 && i < xref->len) + { + pdf_xref_entry *entry = &xref->table[i]; + + if (entry->type) + return entry; + } + } + + /* + Didn't find the entry in any section. Return the entry from the final + section. + */ + return &doc->xref_sections[0].table[i]; +} + +/* Used when altering a document */ +static pdf_xref_entry *pdf_get_new_xref_entry(pdf_document *doc, int i) +{ + fz_context *ctx = doc->ctx; + pdf_xref *xref; + + /* Make a new final xref section if we haven't already */ + if (!doc->xref_altered) + { + doc->xref_sections = fz_resize_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, sizeof(pdf_xref)); + memmove(&doc->xref_sections[1], &doc->xref_sections[0], doc->num_xref_sections * sizeof(pdf_xref)); + doc->num_xref_sections++; + xref = &doc->xref_sections[0]; + xref->len = 0; + xref->table = NULL; + xref->trailer = pdf_keep_obj(doc->xref_sections[1].trailer); + doc->xref_altered = 1; + } + + xref = &doc->xref_sections[0]; + if (i >= xref->len) + pdf_resize_xref(ctx, xref, i + 1); + + return &xref->table[i]; +} + +void pdf_replace_xref(pdf_document *doc, pdf_xref_entry *entries, int n) +{ + fz_context *ctx = doc->ctx; + pdf_xref *xref; + pdf_obj *trailer = pdf_keep_obj(pdf_trailer(doc)); + + /* The new table completely replaces the previous separate sections */ + pdf_free_xref_sections(doc); + + fz_var(trailer); + fz_try(ctx) + { + xref = fz_calloc(ctx, 1, sizeof(pdf_xref)); + xref->table = entries; + xref->len = n; + xref->trailer = trailer; + trailer = NULL; + + doc->xref_sections = xref; + doc->num_xref_sections = 1; + } + fz_catch(ctx) + { + pdf_drop_obj(trailer); + fz_rethrow(ctx); + } +} + +/* + * magic version tag and startxref + */ + +static void +pdf_load_version(pdf_document *xref) +{ + char buf[20]; + + fz_seek(xref->file, 0, SEEK_SET); + fz_read_line(xref->file, buf, sizeof buf); + if (memcmp(buf, "%PDF-", 5) != 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "cannot recognize version marker"); + + xref->version = atoi(buf + 5) * 10 + atoi(buf + 7); +} + +static void +pdf_read_start_xref(pdf_document *xref) +{ + unsigned char buf[1024]; + int t, n; + int i; + + fz_seek(xref->file, 0, SEEK_END); + + xref->file_size = fz_tell(xref->file); + + t = fz_maxi(0, xref->file_size - (int)sizeof buf); + fz_seek(xref->file, t, SEEK_SET); + + n = fz_read(xref->file, buf, sizeof buf); + if (n < 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "cannot read from file"); + + for (i = n - 9; i >= 0; i--) + { + if (memcmp(buf + i, "startxref", 9) == 0) + { + i += 9; + while (iswhite(buf[i]) && i < n) + i ++; + xref->startxref = atoi((char*)(buf + i)); + if (xref->startxref != 0) + return; + break; + } + } + + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "cannot find startxref"); +} + +/* + * trailer dictionary + */ + +static int +pdf_xref_size_from_old_trailer(pdf_document *xref, pdf_lexbuf *buf) +{ + int len; + char *s; + int t; + pdf_token tok; + int c; + int size; + int ofs; + + /* Record the current file read offset so that we can reinstate it */ + ofs = fz_tell(xref->file); + + fz_read_line(xref->file, buf->scratch, buf->size); + if (strncmp(buf->scratch, "xref", 4) != 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "cannot find xref marker"); + + while (1) + { + c = fz_peek_byte(xref->file); + if (!(c >= '0' && c <= '9')) + break; + + fz_read_line(xref->file, buf->scratch, buf->size); + s = buf->scratch; + fz_strsep(&s, " "); /* ignore ofs */ + if (!s) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "invalid range marker in xref"); + len = fz_atoi(fz_strsep(&s, " ")); + + /* broken pdfs where the section is not on a separate line */ + if (s && *s != '\0') + fz_seek(xref->file, -(2 + (int)strlen(s)), SEEK_CUR); + + t = fz_tell(xref->file); + if (t < 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "cannot tell in file"); + + fz_seek(xref->file, t + 20 * len, SEEK_SET); + } + + fz_try(xref->ctx) + { + pdf_obj *trailer; + tok = pdf_lex(xref->file, buf); + if (tok != PDF_TOK_TRAILER) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "expected trailer marker"); + + tok = pdf_lex(xref->file, buf); + if (tok != PDF_TOK_OPEN_DICT) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "expected trailer dictionary"); + + trailer = pdf_parse_dict(xref, xref->file, buf); + + size = pdf_to_int(pdf_dict_gets(trailer, "Size")); + if (!size) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "trailer missing Size entry"); + + pdf_drop_obj(trailer); + } + fz_catch(xref->ctx) + { + fz_rethrow_message(xref->ctx, "cannot parse trailer"); + } + + fz_seek(xref->file, ofs, SEEK_SET); + + return size; +} + +pdf_obj * +pdf_new_ref(pdf_document *xref, pdf_obj *obj) +{ + int num = pdf_create_object(xref); + pdf_update_object(xref, num, obj); + return pdf_new_indirect(xref->ctx, num, 0, xref); +} + +static pdf_obj * +pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf) +{ + int ofs, len; + char *s; + int n; + pdf_token tok; + int i; + int c; + pdf_obj *trailer; + int xref_len = pdf_xref_size_from_old_trailer(xref, buf); + + /* Access last entry to ensure xref size up front and avoid reallocs */ + (void)pdf_get_populating_xref_entry(xref, xref_len - 1); + + fz_read_line(xref->file, buf->scratch, buf->size); + if (strncmp(buf->scratch, "xref", 4) != 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "cannot find xref marker"); + + while (1) + { + c = fz_peek_byte(xref->file); + if (!(c >= '0' && c <= '9')) + break; + + fz_read_line(xref->file, buf->scratch, buf->size); + s = buf->scratch; + ofs = fz_atoi(fz_strsep(&s, " ")); + len = fz_atoi(fz_strsep(&s, " ")); + + /* broken pdfs where the section is not on a separate line */ + if (s && *s != '\0') + { + fz_warn(xref->ctx, "broken xref section. proceeding anyway."); + fz_seek(xref->file, -(2 + (int)strlen(s)), SEEK_CUR); + } + + if (ofs < 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "out of range object num in xref: %d", ofs); + + /* broken pdfs where size in trailer undershoots entries in xref sections */ + if (ofs + len > xref_len) + { + fz_warn(xref->ctx, "broken xref section, proceeding anyway."); + /* Access last entry to ensure size */ + (void)pdf_get_populating_xref_entry(xref, ofs + len - 1); + } + + for (i = ofs; i < ofs + len; i++) + { + pdf_xref_entry *entry = pdf_get_populating_xref_entry(xref, i); + n = fz_read(xref->file, (unsigned char *) buf->scratch, 20); + if (n < 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "cannot read xref table"); + if (!entry->type) + { + s = buf->scratch; + + /* broken pdfs where line start with white space */ + while (*s != '\0' && iswhite(*s)) + s++; + + entry->ofs = atoi(s); + entry->gen = atoi(s + 11); + entry->type = s[17]; + if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o') + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], i, entry->gen); + } + } + } + + fz_try(xref->ctx) + { + tok = pdf_lex(xref->file, buf); + if (tok != PDF_TOK_TRAILER) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "expected trailer marker"); + + tok = pdf_lex(xref->file, buf); + if (tok != PDF_TOK_OPEN_DICT) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "expected trailer dictionary"); + + trailer = pdf_parse_dict(xref, xref->file, buf); + } + fz_catch(xref->ctx) + { + fz_rethrow_message(xref->ctx, "cannot parse trailer"); + } + return trailer; +} + +static void +pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2) +{ + int i, n; + + if (i0 < 0 || i1 < 0) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "negative xref stream entry index"); + if (i0 + i1 > pdf_xref_len(xref)) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "xref stream has too many entries"); + + for (i = i0; i < i0 + i1; i++) + { + pdf_xref_entry *entry = pdf_get_populating_xref_entry(xref, i); + int a = 0; + int b = 0; + int c = 0; + + if (fz_is_eof(stm)) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "truncated xref stream"); + + for (n = 0; n < w0; n++) + a = (a << 8) + fz_read_byte(stm); + for (n = 0; n < w1; n++) + b = (b << 8) + fz_read_byte(stm); + for (n = 0; n < w2; n++) + c = (c << 8) + fz_read_byte(stm); + + if (!entry->type) + { + int t = w0 ? a : 1; + entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; + entry->ofs = w1 ? b : 0; + entry->gen = w2 ? c : 0; + } + } +} + +/* Entered with file locked, remains locked throughout. */ +static pdf_obj * +pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) +{ + fz_stream *stm = NULL; + pdf_obj *trailer = NULL; + pdf_obj *index = NULL; + pdf_obj *obj = NULL; + int num, gen, stm_ofs; + int size, w0, w1, w2; + int t; + fz_context *ctx = xref->ctx; + + fz_var(trailer); + fz_var(stm); + + fz_try(ctx) + { + pdf_xref_entry *entry; + int ofs = fz_tell(xref->file); + trailer = pdf_parse_ind_obj(xref, xref->file, buf, &num, &gen, &stm_ofs); + entry = pdf_get_populating_xref_entry(xref, num); + entry->ofs = ofs; + entry->gen = gen; + entry->stm_ofs = stm_ofs; + pdf_drop_obj(entry->obj); + entry->obj = pdf_keep_obj(trailer); + entry->type = 'n'; + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot parse compressed xref stream object"); + } + + fz_try(ctx) + { + obj = pdf_dict_gets(trailer, "Size"); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing Size entry (%d %d R)", num, gen); + + size = pdf_to_int(obj); + /* Access xref entry to assure table size */ + (void)pdf_get_populating_xref_entry(xref, size-1); + + if (num < 0 || num >= pdf_xref_len(xref)) + fz_throw(ctx, FZ_ERROR_GENERIC, "object id (%d %d R) out of range (0..%d)", num, gen, pdf_xref_len(xref) - 1); + + obj = pdf_dict_gets(trailer, "W"); + if (!obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing W entry (%d %d R)", num, gen); + w0 = pdf_to_int(pdf_array_get(obj, 0)); + w1 = pdf_to_int(pdf_array_get(obj, 1)); + w2 = pdf_to_int(pdf_array_get(obj, 2)); + + if (w0 < 0) + fz_warn(ctx, "xref stream objects have corrupt type"); + if (w1 < 0) + fz_warn(ctx, "xref stream objects have corrupt offset"); + if (w2 < 0) + fz_warn(ctx, "xref stream objects have corrupt generation"); + + w0 = w0 < 0 ? 0 : w0; + w1 = w1 < 0 ? 0 : w1; + w2 = w2 < 0 ? 0 : w2; + + index = pdf_dict_gets(trailer, "Index"); + + stm = pdf_open_stream_with_offset(xref, num, gen, trailer, stm_ofs); + + if (!index) + { + pdf_read_new_xref_section(xref, stm, 0, size, w0, w1, w2); + } + else + { + int n = pdf_array_len(index); + for (t = 0; t < n; t += 2) + { + int i0 = pdf_to_int(pdf_array_get(index, t + 0)); + int i1 = pdf_to_int(pdf_array_get(index, t + 1)); + pdf_read_new_xref_section(xref, stm, i0, i1, w0, w1, w2); + } + } + } + fz_always(ctx) + { + fz_close(stm); + } + fz_catch(ctx) + { + pdf_drop_obj(trailer); + fz_rethrow(ctx); + } + + return trailer; +} + +/* File is locked on entry, and exit (but may be dropped in the middle) */ +static pdf_obj * +pdf_read_xref(pdf_document *xref, int ofs, pdf_lexbuf *buf) +{ + int c; + fz_context *ctx = xref->ctx; + pdf_obj *trailer; + + fz_seek(xref->file, ofs, SEEK_SET); + + while (iswhite(fz_peek_byte(xref->file))) + fz_read_byte(xref->file); + + fz_try(ctx) + { + c = fz_peek_byte(xref->file); + if (c == 'x') + trailer = pdf_read_old_xref(xref, buf); + else if (c >= '0' && c <= '9') + trailer = pdf_read_new_xref(xref, buf); + else + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize xref format"); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot read xref (ofs=%d)", ofs); + } + return trailer; +} + +typedef struct ofs_list_s ofs_list; + +struct ofs_list_s +{ + int max; + int len; + int *list; +}; + +static int +read_xref_section(pdf_document *xref, int ofs, pdf_lexbuf *buf, ofs_list *offsets) +{ + pdf_obj *trailer = NULL; + fz_context *ctx = xref->ctx; + int xrefstmofs = 0; + int prevofs = 0; + + fz_var(trailer); + + fz_try(ctx) + { + int i; + /* Avoid potential infinite recursion */ + for (i = 0; i < offsets->len; i ++) + { + if (offsets->list[i] == ofs) + break; + } + if (i < offsets->len) + { + fz_warn(ctx, "ignoring xref recursion with offset %d", ofs); + return 0; + } + if (offsets->len == offsets->max) + { + offsets->list = fz_resize_array(ctx, offsets->list, offsets->max*2, sizeof(int)); + offsets->max *= 2; + } + offsets->list[offsets->len++] = ofs; + + trailer = pdf_read_xref(xref, ofs, buf); + + pdf_set_populating_xref_trailer(xref, trailer); + + /* FIXME: do we overwrite free entries properly? */ + xrefstmofs = pdf_to_int(pdf_dict_gets(trailer, "XRefStm")); + if (xrefstmofs) + { + if (xrefstmofs < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset"); + + /* + Read the XRefStm stream, but throw away the resulting trailer. We do not + follow any Prev tag therein, as specified on Page 108 of the PDF reference + 1.7 + */ + pdf_drop_obj(pdf_read_xref(xref, xrefstmofs, buf)); + } + + prevofs = pdf_to_int(pdf_dict_gets(trailer, "Prev")); + if (prevofs < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset for previous xref stream"); + } + fz_always(ctx) + { + pdf_drop_obj(trailer); + trailer = NULL; + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot read xref at offset %d", ofs); + } + + return prevofs; +} + +static void +pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf) +{ + fz_context *ctx = xref->ctx; + ofs_list list; + + list.len = 0; + list.max = 10; + list.list = fz_malloc_array(ctx, 10, sizeof(int)); + fz_try(ctx) + { + while(ofs) + { + pdf_populate_next_xref_level(xref); + ofs = read_xref_section(xref, ofs, buf, &list); + } + } + fz_always(ctx) + { + fz_free(ctx, list.list); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +/* + * load xref tables from pdf + * + * File locked on entry, throughout and on exit. + */ + +static void +pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf) +{ + int i; + int xref_len; + fz_context *ctx = xref->ctx; + + pdf_load_version(xref); + + pdf_read_start_xref(xref); + + pdf_read_xref_sections(xref, xref->startxref, buf); + + /* broken pdfs where first object is not free */ + if (pdf_get_xref_entry(xref, 0)->type != 'f') + fz_throw(ctx, FZ_ERROR_GENERIC, "first object in xref is not free"); + + /* broken pdfs where object offsets are out of range */ + xref_len = pdf_xref_len(xref); + for (i = 0; i < xref_len; i++) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + if (entry->type == 'n') + { + /* Special case code: "0000000000 * n" means free, + * according to some producers (inc Quartz) */ + if (entry->ofs == 0) + entry->type = 'f'; + else if (entry->ofs <= 0 || entry->ofs >= xref->file_size) + fz_throw(ctx, FZ_ERROR_GENERIC, "object offset out of range: %d (%d 0 R)", entry->ofs, i); + } + if (entry->type == 'o') + if (entry->ofs <= 0 || entry->ofs >= xref_len || pdf_get_xref_entry(xref, entry->ofs)->type != 'n') + fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to an objstm that does not exist: %d (%d 0 R)", entry->ofs, i); + } +} + +void +pdf_ocg_set_config(pdf_document *xref, int config) +{ + int i, j, len, len2; + pdf_ocg_descriptor *desc = xref->ocg; + pdf_obj *obj, *cobj; + char *name; + + obj = pdf_dict_gets(pdf_dict_gets(pdf_trailer(xref), "Root"), "OCProperties"); + if (!obj) + { + if (config == 0) + return; + else + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "Unknown OCG config (None known!)"); + } + if (config == 0) + { + cobj = pdf_dict_gets(obj, "D"); + if (!cobj) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "No default OCG config"); + } + else + { + cobj = pdf_array_get(pdf_dict_gets(obj, "Configs"), config); + if (!cobj) + fz_throw(xref->ctx, FZ_ERROR_GENERIC, "Illegal OCG config"); + } + + pdf_drop_obj(desc->intent); + desc->intent = pdf_dict_gets(cobj, "Intent"); + if (desc->intent) + pdf_keep_obj(desc->intent); + + len = desc->len; + name = pdf_to_name(pdf_dict_gets(cobj, "BaseState")); + if (strcmp(name, "Unchanged") == 0) + { + /* Do nothing */ + } + else if (strcmp(name, "OFF") == 0) + { + for (i = 0; i < len; i++) + { + desc->ocgs[i].state = 0; + } + } + else /* Default to ON */ + { + for (i = 0; i < len; i++) + { + desc->ocgs[i].state = 1; + } + } + + obj = pdf_dict_gets(cobj, "ON"); + len2 = pdf_array_len(obj); + for (i = 0; i < len2; i++) + { + pdf_obj *o = pdf_array_get(obj, i); + int n = pdf_to_num(o); + int g = pdf_to_gen(o); + for (j=0; j < len; j++) + { + if (desc->ocgs[j].num == n && desc->ocgs[j].gen == g) + { + desc->ocgs[j].state = 1; + break; + } + } + } + + obj = pdf_dict_gets(cobj, "OFF"); + len2 = pdf_array_len(obj); + for (i = 0; i < len2; i++) + { + pdf_obj *o = pdf_array_get(obj, i); + int n = pdf_to_num(o); + int g = pdf_to_gen(o); + for (j=0; j < len; j++) + { + if (desc->ocgs[j].num == n && desc->ocgs[j].gen == g) + { + desc->ocgs[j].state = 0; + break; + } + } + } + + /* FIXME: Should make 'num configs' available in the descriptor. */ + /* FIXME: Should copy out 'Intent' here into the descriptor, and remove + * csi->intent in favour of that. */ + /* FIXME: Should copy 'AS' into the descriptor, and visibility + * decisions should respect it. */ + /* FIXME: Make 'Order' available via the descriptor (when we have an + * app that needs it) */ + /* FIXME: Make 'ListMode' available via the descriptor (when we have + * an app that needs it) */ + /* FIXME: Make 'RBGroups' available via the descriptor (when we have + * an app that needs it) */ + /* FIXME: Make 'Locked' available via the descriptor (when we have + * an app that needs it) */ +} + +static void +pdf_read_ocg(pdf_document *xref) +{ + pdf_obj *obj, *ocg; + int len, i; + pdf_ocg_descriptor *desc; + fz_context *ctx = xref->ctx; + + fz_var(desc); + + obj = pdf_dict_gets(pdf_dict_gets(pdf_trailer(xref), "Root"), "OCProperties"); + if (!obj) + return; + ocg = pdf_dict_gets(obj, "OCGs"); + if (!ocg || !pdf_is_array(ocg)) + /* Not ever supposed to happen, but live with it. */ + return; + len = pdf_array_len(ocg); + fz_try(ctx) + { + desc = fz_calloc(ctx, 1, sizeof(*desc)); + desc->len = len; + desc->ocgs = fz_calloc(ctx, len, sizeof(*desc->ocgs)); + desc->intent = NULL; + for (i=0; i < len; i++) + { + pdf_obj *o = pdf_array_get(ocg, i); + desc->ocgs[i].num = pdf_to_num(o); + desc->ocgs[i].gen = pdf_to_gen(o); + desc->ocgs[i].state = 0; + } + xref->ocg = desc; + } + fz_catch(ctx) + { + if (desc) + fz_free(ctx, desc->ocgs); + fz_free(ctx, desc); + fz_rethrow(ctx); + } + + pdf_ocg_set_config(xref, 0); +} + +static void +pdf_free_ocg(fz_context *ctx, pdf_ocg_descriptor *desc) +{ + if (!desc) + return; + + pdf_drop_obj(desc->intent); + fz_free(ctx, desc->ocgs); + fz_free(ctx, desc); +} + +/* + * Initialize and load xref tables. + * If password is not null, try to decrypt. + */ + +static void +pdf_init_document(pdf_document *xref) +{ + fz_context *ctx = xref->ctx; + pdf_obj *encrypt, *id; + pdf_obj *dict = NULL; + pdf_obj *obj; + pdf_obj *nobj = NULL; + int i, repaired = 0; + + fz_var(dict); + fz_var(nobj); + + fz_try(ctx) + { + pdf_load_xref(xref, &xref->lexbuf.base); + } + fz_catch(ctx) + { + /* FIXME: TryLater ? */ + pdf_free_xref_sections(xref); + fz_warn(xref->ctx, "trying to repair broken xref"); + repaired = 1; + } + + fz_try(ctx) + { + int hasroot, hasinfo; + + if (repaired) + pdf_repair_xref(xref, &xref->lexbuf.base); + + encrypt = pdf_dict_gets(pdf_trailer(xref), "Encrypt"); + id = pdf_dict_gets(pdf_trailer(xref), "ID"); + if (pdf_is_dict(encrypt)) + xref->crypt = pdf_new_crypt(ctx, encrypt, id); + + /* Allow lazy clients to read encrypted files with a blank password */ + pdf_authenticate_password(xref, ""); + + if (repaired) + { + int xref_len = pdf_xref_len(xref); + pdf_repair_obj_stms(xref); + + hasroot = (pdf_dict_gets(pdf_trailer(xref), "Root") != NULL); + hasinfo = (pdf_dict_gets(pdf_trailer(xref), "Info") != NULL); + + for (i = 1; i < xref_len; i++) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + if (entry->type == 0 || entry->type == 'f') + continue; + + fz_try(ctx) + { + dict = pdf_load_object(xref, i, 0); + } + fz_catch(ctx) + { + /* FIXME: TryLater ? */ + fz_warn(ctx, "ignoring broken object (%d 0 R)", i); + continue; + } + + if (!hasroot) + { + obj = pdf_dict_gets(dict, "Type"); + if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "Catalog")) + { + nobj = pdf_new_indirect(ctx, i, 0, xref); + pdf_dict_puts(pdf_trailer(xref), "Root", nobj); + pdf_drop_obj(nobj); + nobj = NULL; + } + } + + if (!hasinfo) + { + if (pdf_dict_gets(dict, "Creator") || pdf_dict_gets(dict, "Producer")) + { + nobj = pdf_new_indirect(ctx, i, 0, xref); + pdf_dict_puts(pdf_trailer(xref), "Info", nobj); + pdf_drop_obj(nobj); + nobj = NULL; + } + } + + pdf_drop_obj(dict); + dict = NULL; + } + } + xref->js = pdf_new_js(xref); + pdf_js_load_document_level(xref->js); + } + fz_catch(ctx) + { + pdf_drop_obj(dict); + pdf_drop_obj(nobj); + pdf_close_document(xref); + fz_rethrow_message(ctx, "cannot open document"); + } + + fz_try(ctx) + { + pdf_read_ocg(xref); + } + fz_catch(ctx) + { + /* FIXME: TryLater ? */ + fz_warn(ctx, "Ignoring Broken Optional Content"); + } +} + +void +pdf_close_document(pdf_document *xref) +{ + int i; + fz_context *ctx; + + if (!xref) + return; + ctx = xref->ctx; + + pdf_drop_js(xref->js); + + pdf_free_xref_sections(xref); + + if (xref->page_objs) + { + for (i = 0; i < xref->page_len; i++) + pdf_drop_obj(xref->page_objs[i]); + fz_free(ctx, xref->page_objs); + } + + if (xref->page_refs) + { + for (i = 0; i < xref->page_len; i++) + pdf_drop_obj(xref->page_refs[i]); + fz_free(ctx, xref->page_refs); + } + + if (xref->focus_obj) + pdf_drop_obj(xref->focus_obj); + if (xref->file) + fz_close(xref->file); + if (xref->crypt) + pdf_free_crypt(ctx, xref->crypt); + + pdf_free_ocg(ctx, xref->ocg); + + fz_empty_store(ctx); + + pdf_lexbuf_fin(&xref->lexbuf.base); + + fz_free(ctx, xref); +} + +void +pdf_print_xref(pdf_document *xref) +{ + int i; + int xref_len = pdf_xref_len(xref); + printf("xref\n0 %d\n", pdf_xref_len(xref)); + for (i = 0; i < xref_len; i++) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + printf("%05d: %010d %05d %c (stm_ofs=%d; stm_buf=%p)\n", i, + entry->ofs, + entry->gen, + entry->type ? entry->type : '-', + entry->stm_ofs, + entry->stm_buf); + } +} + +/* + * compressed object streams + */ + +static void +pdf_load_obj_stm(pdf_document *xref, int num, int gen, pdf_lexbuf *buf) +{ + fz_stream *stm = NULL; + pdf_obj *objstm = NULL; + int *numbuf = NULL; + int *ofsbuf = NULL; + + pdf_obj *obj; + int first; + int count; + int i; + pdf_token tok; + fz_context *ctx = xref->ctx; + + fz_var(numbuf); + fz_var(ofsbuf); + fz_var(objstm); + fz_var(stm); + + fz_try(ctx) + { + objstm = pdf_load_object(xref, num, gen); + + count = pdf_to_int(pdf_dict_gets(objstm, "N")); + first = pdf_to_int(pdf_dict_gets(objstm, "First")); + + if (count < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "negative number of objects in object stream"); + if (first < 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "first object in object stream resides outside stream"); + + numbuf = fz_calloc(ctx, count, sizeof(int)); + ofsbuf = fz_calloc(ctx, count, sizeof(int)); + + stm = pdf_open_stream(xref, num, gen); + for (i = 0; i < count; i++) + { + tok = pdf_lex(stm, buf); + if (tok != PDF_TOK_INT) + fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen); + numbuf[i] = buf->i; + + tok = pdf_lex(stm, buf); + if (tok != PDF_TOK_INT) + fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen); + ofsbuf[i] = buf->i; + } + + fz_seek(stm, first, SEEK_SET); + + for (i = 0; i < count; i++) + { + int xref_len = pdf_xref_len(xref); + pdf_xref_entry *entry; + fz_seek(stm, first + ofsbuf[i], SEEK_SET); + + obj = pdf_parse_stm_obj(xref, stm, buf); + + if (numbuf[i] < 1 || numbuf[i] >= xref_len) + { + pdf_drop_obj(obj); + fz_throw(ctx, FZ_ERROR_GENERIC, "object id (%d 0 R) out of range (0..%d)", numbuf[i], xref_len - 1); + } + + entry = pdf_get_xref_entry(xref, numbuf[i]); + + if (entry->type == 'o' && entry->ofs == num) + { + /* If we already have an entry for this object, + * we'd like to drop it and use the new one - + * but this means that anyone currently holding + * a pointer to the old one will be left with a + * stale pointer. Instead, we drop the new one + * and trust that the old one is correct. */ + if (entry->obj) { + if (pdf_objcmp(entry->obj, obj)) + fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]); + pdf_drop_obj(obj); + } else + entry->obj = obj; + } + else + { + pdf_drop_obj(obj); + } + } + } + fz_always(ctx) + { + fz_close(stm); + fz_free(xref->ctx, ofsbuf); + fz_free(xref->ctx, numbuf); + pdf_drop_obj(objstm); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot open object stream (%d %d R)", num, gen); + } +} + +/* + * object loading + */ + +void +pdf_cache_object(pdf_document *xref, int num, int gen) +{ + pdf_xref_entry *x; + int rnum, rgen; + fz_context *ctx = xref->ctx; + + if (num < 0 || num >= pdf_xref_len(xref)) + fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d %d R); xref size %d", num, gen, pdf_xref_len(xref)); + + x = pdf_get_xref_entry(xref, num); + + if (x->obj) + return; + + if (x->type == 'f') + { + x->obj = pdf_new_null(ctx); + return; + } + else if (x->type == 'n') + { + fz_seek(xref->file, x->ofs, SEEK_SET); + + fz_try(ctx) + { + x->obj = pdf_parse_ind_obj(xref, xref->file, &xref->lexbuf.base, + &rnum, &rgen, &x->stm_ofs); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot parse object (%d %d R)", num, gen); + } + + if (rnum != num) + { + pdf_drop_obj(x->obj); + x->obj = NULL; + fz_rethrow_message(ctx, "found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen); + } + + if (xref->crypt) + pdf_crypt_obj(ctx, xref->crypt, x->obj, num, gen); + } + else if (x->type == 'o') + { + if (!x->obj) + { + fz_try(ctx) + { + pdf_load_obj_stm(xref, x->ofs, 0, &xref->lexbuf.base); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot load object stream containing object (%d %d R)", num, gen); + } + if (!x->obj) + fz_throw(ctx, FZ_ERROR_GENERIC, "object (%d %d R) was not found in its object stream", num, gen); + } + } + else + { + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d %d R)", num, gen); + } +} + +pdf_obj * +pdf_load_object(pdf_document *xref, int num, int gen) +{ + fz_context *ctx = xref->ctx; + pdf_xref_entry *entry; + + fz_try(ctx) + { + pdf_cache_object(xref, num, gen); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot load object (%d %d R) into cache", num, gen); + } + + entry = pdf_get_xref_entry(xref, num); + + assert(entry->obj); + + return pdf_keep_obj(entry->obj); +} + +pdf_obj * +pdf_resolve_indirect(pdf_obj *ref) +{ + int sanity = 10; + int num; + int gen; + fz_context *ctx = NULL; /* Avoid warning for stupid compilers */ + pdf_document *xref; + pdf_xref_entry *entry; + + while (pdf_is_indirect(ref)) + { + if (--sanity == 0) + { + fz_warn(ctx, "Too many indirections (possible indirection cycle involving %d %d R)", num, gen); + return NULL; + } + xref = pdf_get_indirect_document(ref); + if (!xref) + return NULL; + ctx = xref->ctx; + num = pdf_to_num(ref); + gen = pdf_to_gen(ref); + fz_try(ctx) + { + pdf_cache_object(xref, num, gen); + } + fz_catch(ctx) + { + /* FIXME: TryLater ? */ + fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen); + return NULL; + } + entry = pdf_get_xref_entry(xref, num); + if (!entry->obj) + return NULL; + ref = entry->obj; + } + + return ref; +} + +int +pdf_count_objects(pdf_document *doc) +{ + return pdf_xref_len(doc); +} + +int +pdf_create_object(pdf_document *xref) +{ + /* TODO: reuse free object slots by properly linking free object chains in the ofs field */ + pdf_xref_entry *entry; + int num = pdf_xref_len(xref); + entry = pdf_get_new_xref_entry(xref, num); + entry->type = 'f'; + entry->ofs = -1; + entry->gen = 0; + entry->stm_ofs = 0; + entry->stm_buf = NULL; + entry->obj = NULL; + return num; +} + +void +pdf_delete_object(pdf_document *xref, int num) +{ + pdf_xref_entry *x; + + if (num < 0 || num >= pdf_xref_len(xref)) + { + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(xref)); + return; + } + + x = pdf_get_new_xref_entry(xref, num); + + fz_drop_buffer(xref->ctx, x->stm_buf); + pdf_drop_obj(x->obj); + + x->type = 'f'; + x->ofs = 0; + x->gen = 0; + x->stm_ofs = 0; + x->stm_buf = NULL; + x->obj = NULL; +} + +void +pdf_update_object(pdf_document *xref, int num, pdf_obj *newobj) +{ + pdf_xref_entry *x; + + if (num < 0 || num >= pdf_xref_len(xref)) + { + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(xref)); + return; + } + + x = pdf_get_new_xref_entry(xref, num); + + pdf_drop_obj(x->obj); + + x->type = 'n'; + x->ofs = 0; + x->obj = pdf_keep_obj(newobj); +} + +void +pdf_update_stream(pdf_document *xref, int num, fz_buffer *newbuf) +{ + pdf_xref_entry *x; + + if (num < 0 || num >= pdf_xref_len(xref)) + { + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(xref)); + return; + } + + x = pdf_get_xref_entry(xref, num); + + fz_drop_buffer(xref->ctx, x->stm_buf); + x->stm_buf = fz_keep_buffer(xref->ctx, newbuf); +} + +int +pdf_meta(pdf_document *doc, int key, void *ptr, int size) +{ + switch (key) + { + /* + ptr: Pointer to block (uninitialised on entry) + size: Size of block (at least 64 bytes) + Returns: Document format as a brief text string. + */ + case FZ_META_FORMAT_INFO: + sprintf((char *)ptr, "PDF %d.%d", doc->version/10, doc->version % 10); + return FZ_META_OK; + case FZ_META_CRYPT_INFO: + if (doc->crypt) + sprintf((char *)ptr, "Standard V%d R%d %d-bit %s", + pdf_crypt_version(doc), + pdf_crypt_revision(doc), + pdf_crypt_length(doc), + pdf_crypt_method(doc)); + else + sprintf((char *)ptr, "None"); + return FZ_META_OK; + case FZ_META_HAS_PERMISSION: + { + int i; + switch (size) + { + case FZ_PERMISSION_PRINT: + i = PDF_PERM_PRINT; + break; + case FZ_PERMISSION_CHANGE: + i = PDF_PERM_CHANGE; + break; + case FZ_PERMISSION_COPY: + i = PDF_PERM_COPY; + break; + case FZ_PERMISSION_NOTES: + i = PDF_PERM_NOTES; + break; + default: + return 0; + } + return pdf_has_permission(doc, i); + } + case FZ_META_INFO: + { + pdf_obj *info = pdf_dict_gets(pdf_trailer(doc), "Info"); + if (!info) + { + if (ptr) + *(char *)ptr = 0; + return 0; + } + info = pdf_dict_gets(info, *(char **)ptr); + if (!info) + { + if (ptr) + *(char *)ptr = 0; + return 0; + } + if (info && ptr && size) + { + char *utf8 = pdf_to_utf8(doc, info); + fz_strlcpy(ptr, utf8, size); + fz_free(doc->ctx, utf8); + } + return 1; + } + default: + return FZ_META_UNKNOWN_KEY; + } +} + +fz_transition * +pdf_page_presentation(pdf_document *doc, pdf_page *page, float *duration) +{ + *duration = page->duration; + if (!page->transition_present) + return NULL; + return &page->transition; +} + +/* + Initializers for the fz_document interface. + + The functions are split across two files to allow calls to a + version of the constructor that does not link in the interpreter. + The interpreter references the built-in font and cmap resources + which are quite big. Not linking those into the mubusy binary + saves roughly 6MB of space. +*/ + +static pdf_document * +pdf_new_document(fz_context *ctx, fz_stream *file) +{ + pdf_document *doc = fz_malloc_struct(ctx, pdf_document); + + doc->super.close = (void*)pdf_close_document; + doc->super.needs_password = (void*)pdf_needs_password; + doc->super.authenticate_password = (void*)pdf_authenticate_password; + doc->super.load_outline = (void*)pdf_load_outline; + doc->super.count_pages = (void*)pdf_count_pages; + doc->super.load_page = (void*)pdf_load_page; + doc->super.load_links = (void*)pdf_load_links; + doc->super.bound_page = (void*)pdf_bound_page; + doc->super.first_annot = (void*)pdf_first_annot; + doc->super.next_annot = (void*)pdf_next_annot; + doc->super.bound_annot = (void*)pdf_bound_annot; + doc->super.run_page_contents = NULL; /* see pdf_xref_aux.c */ + doc->super.run_annot = NULL; /* see pdf_xref_aux.c */ + doc->super.free_page = (void*)pdf_free_page; + doc->super.meta = (void*)pdf_meta; + doc->super.page_presentation = (void*)pdf_page_presentation; + doc->super.write = (void*)pdf_write_document; + + pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE); + doc->file = fz_keep_stream(file); + doc->ctx = ctx; + + return doc; +} + +pdf_document * +pdf_open_document_no_run_with_stream(fz_context *ctx, fz_stream *file) +{ + pdf_document *doc = pdf_new_document(ctx, file); + pdf_init_document(doc); + return doc; +} + +pdf_document * +pdf_open_document_no_run(fz_context *ctx, const char *filename) +{ + fz_stream *file = NULL; + pdf_document *doc; + + fz_var(file); + + fz_try(ctx) + { + file = fz_open_file(ctx, filename); + doc = pdf_new_document(ctx, file); + pdf_init_document(doc); + } + fz_always(ctx) + { + fz_close(file); + } + fz_catch(ctx) + { + fz_rethrow_message(ctx, "cannot load document '%s'", filename); + } + return doc; +} + +pdf_document *pdf_specifics(fz_document *doc) +{ + return (pdf_document *)(doc->close == (void *)pdf_close_document ? doc : NULL); +} |