summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2017-12-07 18:31:37 +0000
committerRobin Watts <robin.watts@artifex.com>2018-01-05 11:47:08 +0000
commitfd0bf575229a79bc22901b0bd8ba4dbd356faa22 (patch)
tree3041da3cb23e3df8fda8f7f9cb0f8fdcdb94a626
parent25593f4f9df0c4a9b9adaa84aaa33fe2a89087f6 (diff)
downloadmupdf-fd0bf575229a79bc22901b0bd8ba4dbd356faa22.tar.xz
Enable saving of encrypted PDF files.
We need both RC4 and AES encryption. RC4 is a straight reversable stream, and our AES library knows how to encrypt as well as decrypt, so it's "just" a matter of calling them correctly. We therefore expose a generic "encrypt this data" routine (and a matching "how long will the data be once encrypted" routine) within pdf-crypt.c. We then extend our our PDF object output routines to call these. This is enough to get encrypted data preserved over calls to mutool clean. Unfortunately the created files aren't readable, due to 2 further problems, also fixed here. Firstly, mutool clean does not preserve the Encrypt entry in the trailer. This is a simple fix. Secondly, we are required NOT to encrypt the Encrypt entry. This requires us to spot the crypt entry and to special case it.
-rw-r--r--include/mupdf/pdf/crypt.h4
-rw-r--r--include/mupdf/pdf/document.h1
-rw-r--r--include/mupdf/pdf/object.h3
-rw-r--r--source/pdf/pdf-crypt.c87
-rw-r--r--source/pdf/pdf-object.c88
-rw-r--r--source/pdf/pdf-write.c37
6 files changed, 187 insertions, 33 deletions
diff --git a/include/mupdf/pdf/crypt.h b/include/mupdf/pdf/crypt.h
index cb8eba41..f66fe0d2 100644
--- a/include/mupdf/pdf/crypt.h
+++ b/include/mupdf/pdf/crypt.h
@@ -59,4 +59,8 @@ int pdf_check_signature(fz_context *ctx, pdf_document *doc, pdf_widget *widget,
*/
void pdf_sign_signature(fz_context *ctx, pdf_document *doc, pdf_widget *widget, const char *sigfile, const char *password);
+void pdf_encrypt_data(fz_context *ctx, pdf_crypt *crypt, int num, int gen, void (*fmt_str_out)(fz_context *, void *, const unsigned char *, int), void *arg, const unsigned char *s, int n);
+
+int pdf_encrypted_len(fz_context *ctx, pdf_crypt *crypt, int num, int gen, int len);
+
#endif
diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h
index 63f94bcd..ebd04401 100644
--- a/include/mupdf/pdf/document.h
+++ b/include/mupdf/pdf/document.h
@@ -4,7 +4,6 @@
typedef struct pdf_lexbuf_s pdf_lexbuf;
typedef struct pdf_lexbuf_large_s pdf_lexbuf_large;
typedef struct pdf_xref_s pdf_xref;
-typedef struct pdf_crypt_s pdf_crypt;
typedef struct pdf_ocg_descriptor_s pdf_ocg_descriptor;
typedef struct pdf_portfolio_s pdf_portfolio;
diff --git a/include/mupdf/pdf/object.h b/include/mupdf/pdf/object.h
index 4177112b..306ab21b 100644
--- a/include/mupdf/pdf/object.h
+++ b/include/mupdf/pdf/object.h
@@ -2,6 +2,7 @@
#define MUPDF_PDF_OBJECT_H
typedef struct pdf_document_s pdf_document;
+typedef struct pdf_crypt_s pdf_crypt;
/* Defined in PDF 1.7 according to Acrobat limit. */
#define PDF_MAX_OBJECT_NUMBER 8388607
@@ -142,7 +143,9 @@ int pdf_obj_refs(fz_context *ctx, pdf_obj *ref);
int pdf_obj_parent_num(fz_context *ctx, pdf_obj *obj);
int pdf_sprint_obj(fz_context *ctx, char *s, int n, pdf_obj *obj, int tight);
+int pdf_sprint_encrypted_obj(fz_context *ctx, char *s, int n, pdf_obj *obj, int tight, pdf_crypt *crypt, int num, int gen);
int pdf_print_obj(fz_context *ctx, fz_output *out, pdf_obj *obj, int tight);
+int pdf_print_encrypted_obj(fz_context *ctx, fz_output *out, pdf_obj *obj, int tight, pdf_crypt *crypt, int num, int gen);
char *pdf_to_utf8(fz_context *ctx, pdf_obj *src);
char *pdf_load_stream_as_utf8(fz_context *ctx, pdf_obj *src);
diff --git a/source/pdf/pdf-crypt.c b/source/pdf/pdf-crypt.c
index 9f3b05e9..c427e85e 100644
--- a/source/pdf/pdf-crypt.c
+++ b/source/pdf/pdf-crypt.c
@@ -1070,3 +1070,90 @@ pdf_print_crypt(fz_context *ctx, fz_output *out, pdf_crypt *crypt)
fz_write_printf(ctx, out, "}\n");
}
+
+void pdf_encrypt_data(fz_context *ctx, pdf_crypt *crypt, int num, int gen, void (*write_data)(fz_context *ctx, void *, const unsigned char *, int), void *arg, const unsigned char *s, int n)
+{
+ unsigned char buffer[256];
+ unsigned char key[32];
+ int keylen;
+
+ if (crypt == NULL)
+ {
+ write_data(ctx, arg, s, n);
+ return;
+ }
+
+ keylen = pdf_compute_object_key(crypt, &crypt->strf, num, gen, key, 32);
+
+ if (crypt->strf.method == PDF_CRYPT_RC4)
+ {
+ fz_arc4 arc4;
+ fz_arc4_init(&arc4, key, keylen);
+ while (n > 0)
+ {
+ int len = n;
+ if (len > sizeof(buffer))
+ len = sizeof(buffer);
+ fz_arc4_encrypt(&arc4, buffer, s, len);
+ write_data(ctx, arg, buffer, len);
+ s += len;
+ n -= len;
+ }
+ return;
+ }
+
+ if (crypt->strf.method == PDF_CRYPT_AESV2 || crypt->strf.method == PDF_CRYPT_AESV3)
+ {
+ fz_aes aes;
+ unsigned char iv[16];
+
+ /* Empty strings can be represented by empty strings */
+ if (n == 0)
+ return;
+
+ if (fz_aes_setkey_enc(&aes, key, keylen * 8))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "AES key init failed (keylen=%d)", keylen * 8);
+
+ fz_memrnd(ctx, iv, 16);
+ write_data(ctx, arg, iv, 16);
+
+ while (n > 0)
+ {
+ int len = n;
+ if (len > 16)
+ len = 16;
+ memcpy(buffer, s, len);
+ if (len != 16)
+ memset(&buffer[len], 16-len, 16-len);
+ fz_aes_crypt_cbc(&aes, FZ_AES_ENCRYPT, 16, iv, buffer, buffer+16);
+ write_data(ctx, arg, buffer+16, 16);
+ s += 16;
+ n -= 16;
+ }
+ if (n == 0) {
+ memset(buffer, 16, 16);
+ fz_aes_crypt_cbc(&aes, FZ_AES_ENCRYPT, 16, iv, buffer, buffer+16);
+ write_data(ctx, arg, buffer+16, 16);
+ }
+ return;
+ }
+
+ /* Should never happen, but... */
+ write_data(ctx, arg, s, n);
+}
+
+int pdf_encrypted_len(fz_context *ctx, pdf_crypt *crypt, int num, int gen, int len)
+{
+ if (crypt == NULL)
+ return len;
+
+ if (crypt->strf.method == PDF_CRYPT_AESV2 || crypt->strf.method == PDF_CRYPT_AESV3)
+ {
+ len += 16; /* 16 for IV */
+ if ((len & 15) == 0)
+ len += 16; /* Another 16 if our last block is full anyway */
+ len = (len + 15) & ~15; /* And pad to the block */
+ }
+
+ return len;
+}
diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c
index 816d7e3d..b2ff3629 100644
--- a/source/pdf/pdf-object.c
+++ b/source/pdf/pdf-object.c
@@ -1795,6 +1795,9 @@ struct fmt
int col;
int sep;
int last;
+ pdf_crypt *crypt;
+ int num;
+ int gen;
};
static void fmt_obj(fz_context *ctx, struct fmt *fmt, pdf_obj *obj);
@@ -1862,13 +1865,11 @@ static inline void fmt_sep(fz_context *ctx, struct fmt *fmt)
fmt->sep = 1;
}
-static void fmt_str(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
+static void fmt_str_out(fz_context *ctx, void *fmt_, const unsigned char *s, int n)
{
- char *s = pdf_to_str_buf(ctx, obj);
- int n = pdf_to_str_len(ctx, obj);
+ struct fmt *fmt = (struct fmt *)fmt_;
int i, c;
- fmt_putc(ctx, fmt, '(');
for (i = 0; i < n; i++)
{
c = (unsigned char)s[i];
@@ -1897,16 +1898,23 @@ static void fmt_str(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
else
fmt_putc(ctx, fmt, c);
}
- fmt_putc(ctx, fmt, ')');
}
-static void fmt_hex(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
+static void fmt_str(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
{
- char *s = pdf_to_str_buf(ctx, obj);
+ unsigned char *s = (unsigned char *)pdf_to_str_buf(ctx, obj);
int n = pdf_to_str_len(ctx, obj);
+
+ fmt_putc(ctx, fmt, '(');
+ pdf_encrypt_data(ctx, fmt->crypt, fmt->num, fmt->gen, fmt_str_out, fmt, s, n);
+ fmt_putc(ctx, fmt, ')');
+}
+
+static void fmt_hex_out(fz_context *ctx, void *arg, const unsigned char *s, int n)
+{
+ struct fmt *fmt = (struct fmt *)arg;
int i, b, c;
- fmt_putc(ctx, fmt, '<');
for (i = 0; i < n; i++) {
b = (unsigned char) s[i];
c = (b >> 4) & 0x0f;
@@ -1914,6 +1922,15 @@ static void fmt_hex(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
c = (b) & 0x0f;
fmt_putc(ctx, fmt, c < 0xA ? c + '0' : c + 'A' - 0xA);
}
+}
+
+static void fmt_hex(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
+{
+ unsigned char *s = (unsigned char *)pdf_to_str_buf(ctx, obj);
+ int n = pdf_to_str_len(ctx, obj);
+
+ fmt_putc(ctx, fmt, '<');
+ pdf_encrypt_data(ctx, fmt->crypt, fmt->num, fmt->gen, fmt_hex_out, fmt, s, n);
fmt_putc(ctx, fmt, '>');
}
@@ -2012,6 +2029,23 @@ static void fmt_dict(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
}
}
+static void count_encrypted_data(fz_context *ctx, void *arg, const unsigned char *str, int len)
+{
+ int *encrypted_len = (int *)arg;
+ int added = 0;
+ int i;
+ unsigned char c;
+
+ for (i = 0; i < len; i++) {
+ c = (unsigned char)str[i];
+ if (c != 0 && strchr("()\\\n\r\t\b\f", c))
+ added ++;
+ else if (c < 32 || c >= 127)
+ added += 3;
+ }
+ *encrypted_len += added;
+}
+
static void fmt_obj(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
{
char buf[256];
@@ -2039,18 +2073,12 @@ static void fmt_obj(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
}
else if (pdf_is_string(ctx, obj))
{
- char *str = pdf_to_str_buf(ctx, obj);
+ unsigned char *str = (unsigned char *)pdf_to_str_buf(ctx, obj);
int len = pdf_to_str_len(ctx, obj);
- int added = 0;
- int i, c;
- for (i = 0; i < len; i++) {
- c = (unsigned char)str[i];
- if (c != 0 && strchr("()\\\n\r\t\b\f", c))
- added ++;
- else if (c < 32 || c >= 127)
- added += 3;
- }
- if (added < len)
+ int encoded_len = 0;
+
+ pdf_encrypt_data(ctx, fmt->crypt, fmt->num, fmt->gen, count_encrypted_data, &encoded_len, str, len);
+ if (encoded_len < 2*len)
fmt_str(ctx, fmt, obj);
else
fmt_hex(ctx, fmt, obj);
@@ -2066,7 +2094,7 @@ static void fmt_obj(fz_context *ctx, struct fmt *fmt, pdf_obj *obj)
}
int
-pdf_sprint_obj(fz_context *ctx, char *s, int n, pdf_obj *obj, int tight)
+pdf_sprint_encrypted_obj(fz_context *ctx, char *s, int n, pdf_obj *obj, int tight, pdf_crypt *crypt, int num, int gen)
{
struct fmt fmt;
@@ -2079,6 +2107,9 @@ pdf_sprint_obj(fz_context *ctx, char *s, int n, pdf_obj *obj, int tight)
fmt.buf = s;
fmt.cap = n;
fmt.len = 0;
+ fmt.crypt = crypt;
+ fmt.num = num;
+ fmt.gen = gen;
fmt_obj(ctx, &fmt, obj);
if (fmt.buf && fmt.len < fmt.cap)
@@ -2087,7 +2118,13 @@ pdf_sprint_obj(fz_context *ctx, char *s, int n, pdf_obj *obj, int tight)
return fmt.len;
}
-int pdf_print_obj(fz_context *ctx, fz_output *out, pdf_obj *obj, int tight)
+int
+pdf_sprint_obj(fz_context *ctx, char *s, int n, pdf_obj *obj, int tight)
+{
+ return pdf_sprint_encrypted_obj(ctx, s, n, obj, tight, NULL, 0, 0);
+}
+
+int pdf_print_encrypted_obj(fz_context *ctx, fz_output *out, pdf_obj *obj, int tight, pdf_crypt *crypt, int num, int gen)
{
char buf[1024];
char *ptr;
@@ -2096,19 +2133,24 @@ int pdf_print_obj(fz_context *ctx, fz_output *out, pdf_obj *obj, int tight)
n = pdf_sprint_obj(ctx, NULL, 0, obj, tight);
if ((n + 1) < sizeof buf)
{
- pdf_sprint_obj(ctx, buf, sizeof buf, obj, tight);
+ pdf_sprint_encrypted_obj(ctx, buf, sizeof buf, obj, tight, crypt, num, gen);
fz_write_data(ctx, out, buf, n);
}
else
{
ptr = fz_malloc(ctx, n + 1);
- pdf_sprint_obj(ctx, ptr, n + 1, obj, tight);
+ pdf_sprint_encrypted_obj(ctx, ptr, n + 1, obj, tight, crypt, num, gen);
fz_write_data(ctx, out, ptr, n);
fz_free(ctx, ptr);
}
return n;
}
+int pdf_print_obj(fz_context *ctx, fz_output *out, pdf_obj *obj, int tight)
+{
+ return pdf_print_encrypted_obj(ctx, out, obj, tight, NULL, 0, 0);
+}
+
int pdf_obj_refs(fz_context *ctx, pdf_obj *ref)
{
return (ref >= PDF_OBJ__LIMIT ? ref->refs : 0);
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index f8b6a5d1..b13970b9 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -93,6 +93,7 @@ struct pdf_write_state_s
pdf_obj *hints_length;
int page_count;
page_objects_list *page_object_lists;
+ int crypt_object_number;
};
/*
@@ -1657,6 +1658,11 @@ static fz_buffer *deflatebuf(fz_context *ctx, const unsigned char *p, size_t n)
return buf;
}
+static void write_data(fz_context *ctx, void *arg, const unsigned char *data, int len)
+{
+ fz_write_data(ctx, (fz_output *)arg, data, len);
+}
+
static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate)
{
fz_buffer *buf, *tmp;
@@ -1701,13 +1707,13 @@ static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts
addhexfilter(ctx, doc, obj);
}
- newlen = pdf_new_int(ctx, doc, (int)len);
+ newlen = pdf_new_int(ctx, doc, pdf_encrypted_len(ctx, doc->crypt, num, gen, (int)len));
pdf_dict_put_drop(ctx, obj, PDF_NAME_Length, newlen);
fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
- pdf_print_obj(ctx, opts->out, obj, opts->do_tight);
+ pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, doc->crypt, num, gen);
fz_write_string(ctx, opts->out, "\nstream\n");
- fz_write_data(ctx, opts->out, data, len);
+ pdf_encrypt_data(ctx, doc->crypt, num, gen, write_data, opts->out, data, len);
fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
fz_drop_buffer(ctx, buf);
@@ -1767,7 +1773,7 @@ static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *op
pdf_dict_put_drop(ctx, obj, PDF_NAME_Length, newlen);
fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
- pdf_print_obj(ctx, opts->out, obj, opts->do_tight);
+ pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, doc->crypt, num, gen);
fz_write_string(ctx, opts->out, "\nstream\n");
fz_write_data(ctx, opts->out, data, len);
fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
@@ -1845,7 +1851,7 @@ static int is_xml_metadata(fz_context *ctx, pdf_obj *obj)
return 0;
}
-static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs)
+static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs, int unenc)
{
pdf_xref_entry *entry;
pdf_obj *obj;
@@ -1892,13 +1898,13 @@ static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opt
if (!pdf_obj_num_is_stream(ctx, doc, num))
{
fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
- pdf_print_obj(ctx, opts->out, obj, opts->do_tight);
+ pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, unenc ? NULL : doc->crypt, num, gen);
fz_write_string(ctx, opts->out, "\nendobj\n\n");
}
else if (entry->stm_ofs < 0 && entry->stm_buf == NULL)
{
fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
- pdf_print_obj(ctx, opts->out, obj, opts->do_tight);
+ pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, doc->crypt, num, gen);
fz_write_string(ctx, opts->out, "\nstream\nendstream\nendobj\n\n");
}
else
@@ -2018,6 +2024,10 @@ static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts,
obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_ID);
if (obj)
pdf_dict_put(ctx, trailer, PDF_NAME_ID, obj);
+
+ obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Encrypt);
+ if (obj)
+ pdf_dict_put(ctx, trailer, PDF_NAME_Encrypt, obj);
}
if (main_xref_offset != 0)
{
@@ -2027,6 +2037,7 @@ static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts,
}
fz_write_string(ctx, opts->out, "trailer\n");
+ /* Trailer is NOT encrypted */
pdf_print_obj(ctx, opts->out, trailer, opts->do_tight);
fz_write_string(ctx, opts->out, "\n");
@@ -2155,7 +2166,7 @@ static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_state
pdf_update_stream(ctx, doc, dict, fzbuf, 0);
- writeobject(ctx, doc, opts, num, 0, 0);
+ writeobject(ctx, doc, opts, num, 0, 0, 0);
fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref);
}
fz_always(ctx)
@@ -2212,7 +2223,7 @@ dowriteobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num
if (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num))
{
opts->ofs_list[num] = fz_tell_output(ctx, opts->out);
- writeobject(ctx, doc, opts, num, opts->gen_list[num], 1);
+ writeobject(ctx, doc, opts, num, opts->gen_list[num], 1, num == opts->crypt_object_number);
}
}
else
@@ -2932,6 +2943,14 @@ do_pdf_save_document(fz_context *ctx, pdf_document *doc, pdf_write_state *opts,
if (opts->do_garbage >= 2 || opts->do_linear)
compactxref(ctx, doc, opts);
+ opts->crypt_object_number = 0;
+ if (doc->crypt)
+ {
+ pdf_obj *crypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Encrypt);
+ int crypt_num = pdf_to_num(ctx, crypt);
+ opts->crypt_object_number = opts->renumber_map[crypt_num];
+ }
+
/* Make renumbering affect all indirect references and update xref */
if (opts->do_garbage >= 2 || opts->do_linear)
renumberobjs(ctx, doc, opts);