10 files changed, 158 insertions, 38 deletions
diff --git a/pdf/mupdf.h b/pdf/mupdf.h
index f2d235b9..16b0f58b 100644
--- a/pdf/mupdf.h
+++ b/pdf/mupdf.h
@@ -17,7 +17,7 @@ pdf_obj *pdf_new_null(fz_context *ctx);
 pdf_obj *pdf_new_bool(fz_context *ctx, int b);
 pdf_obj *pdf_new_int(fz_context *ctx, int i);
 pdf_obj *pdf_new_real(fz_context *ctx, float f);
-pdf_obj *fz_new_name(fz_context *ctx, char *str);
+pdf_obj *pdf_new_name(fz_context *ctx, char *str);
 pdf_obj *pdf_new_string(fz_context *ctx, char *str, int len);
 pdf_obj *pdf_new_indirect(fz_context *ctx, int num, int gen, void *doc);
 pdf_obj *pdf_new_array(fz_context *ctx, int initialcap);
diff --git a/pdf/pdf_annot.c b/pdf/pdf_annot.c
index 5a463336..18a0c947 100644
--- a/pdf/pdf_annot.c
+++ b/pdf/pdf_annot.c
@@ -198,6 +198,8 @@ pdf_parse_action(pdf_document *xref, pdf_obj *action)
 	pdf_obj *obj, *dest;
 	fz_context *ctx = xref->ctx;
 
+	UNUSED(ctx);
+
 	ld.kind = FZ_LINK_NONE;
 
 	if (!action)
diff --git a/pdf/pdf_crypt.c b/pdf/pdf_crypt.c
index 5329e845..ec6f792b 100644
--- a/pdf/pdf_crypt.c
+++ b/pdf/pdf_crypt.c
@@ -103,8 +103,8 @@ pdf_new_crypt(fz_context *ctx, pdf_obj *dict, pdf_obj *id)
 	obj = pdf_dict_gets(dict, "O");
 	if (pdf_is_string(obj) && pdf_to_str_len(obj) == 32)
 		memcpy(crypt->o, pdf_to_str_buf(obj), 32);
-	/* /O and /U are supposed to be 48 bytes long for revision 5, they're often longer, though */
-	else if (crypt->r == 5 && pdf_is_string(obj) && pdf_to_str_len(obj) >= 48)
+	/* /O and /U are supposed to be 48 bytes long for revision 5 and 6, they're often longer, though */
+	else if (crypt->r >= 5 && pdf_is_string(obj) && pdf_to_str_len(obj) >= 48)
 		memcpy(crypt->o, pdf_to_str_buf(obj), 48);
 	else
 	{
@@ -115,8 +115,8 @@ pdf_new_crypt(fz_context *ctx, pdf_obj *dict, pdf_obj *id)
 	obj = pdf_dict_gets(dict, "U");
 	if (pdf_is_string(obj) && pdf_to_str_len(obj) == 32)
 		memcpy(crypt->u, pdf_to_str_buf(obj), 32);
-	/* /O and /U are supposed to be 48 bytes long for revision 5, they're often longer, though */
-	else if (crypt->r == 5 && pdf_is_string(obj) && pdf_to_str_len(obj) >= 48)
+	/* /O and /U are supposed to be 48 bytes long for revision 5 and 6, they're often longer, though */
+	else if (crypt->r >= 5 && pdf_is_string(obj) && pdf_to_str_len(obj) >= 48)
 		memcpy(crypt->u, pdf_to_str_buf(obj), 48);
 	else if (pdf_is_string(obj) && pdf_to_str_len(obj) < 32)
 	{
@@ -138,7 +138,7 @@ pdf_new_crypt(fz_context *ctx, pdf_obj *dict, pdf_obj *id)
 		crypt->p = 0xfffffffc;
 	}
 
-	if (crypt->r == 5)
+	if (crypt->r == 5 || crypt->r == 6)
 	{
 		obj = pdf_dict_gets(dict, "OE");
 		if (!pdf_is_string(obj) || pdf_to_str_len(obj) != 32)
@@ -315,10 +315,9 @@ pdf_parse_crypt_filter(fz_context *ctx, pdf_crypt_filter *cf, pdf_crypt *crypt,
 		fz_throw(ctx, "invalid key length: %d", cf->length);
 
 	if ((crypt->r == 1 || crypt->r == 2 || crypt->r == 4) &&
-		(cf->length < 0 || cf->length > 256))
+		(cf->length < 0 || cf->length > 128))
 		fz_throw(ctx, "invalid key length: %d", cf->length);
-	if (crypt->r == 5 &&
-		(cf->length != 128 && cf->length != 192 && cf->length == 256))
+	if ((crypt->r == 5 || crypt->r == 6) && cf->length != 256)
 		fz_throw(ctx, "invalid key length: %d", cf->length);
 }
 
@@ -445,6 +444,101 @@ pdf_compute_encryption_key_r5(pdf_crypt *crypt, unsigned char *password, int pwl
 }
 
 /*
+ * Compute an encryption key (PDF 1.7 ExtensionLevel 8 algorithm)
+ *
+ * Adobe has not yet released the details, so the algorithm reference is:
+ * http://esec-lab.sogeti.com/post/The-undocumented-password-validation-algorithm-of-Adobe-Reader-X
+ */
+
+static void
+pdf_compute_hardened_hash_r6(unsigned char *password, int pwlen, unsigned char salt[16], unsigned char *ownerkey, unsigned char hash[32])
+{
+	unsigned char data[(128 + 64 + 48) * 64];
+	unsigned char block[64];
+	int block_size = 32;
+	int data_len = 0;
+	int i, j, sum;
+
+	fz_sha256 sha256;
+	fz_sha384 sha384;
+	fz_sha512 sha512;
+	fz_aes aes;
+
+	/* Step 1: calculate initial data block */
+	fz_sha256_init(&sha256);
+	fz_sha256_update(&sha256, password, pwlen);
+	fz_sha256_update(&sha256, salt, 8);
+	if (ownerkey)
+		fz_sha256_update(&sha256, ownerkey, 48);
+	fz_sha256_final(&sha256, block);
+
+	for (i = 0; i < 64 || i < data[data_len * 64 - 1] + 32; i++)
+	{
+		/* Step 2: repeat password and data block 64 times */
+		memcpy(data, password, pwlen);
+		memcpy(data + pwlen, block, block_size);
+		memcpy(data + pwlen + block_size, ownerkey, ownerkey ? 48 : 0);
+		data_len = pwlen + block_size + (ownerkey ? 48 : 0);
+		for (j = 1; j < 64; j++)
+			memcpy(data + j * data_len, data, data_len);
+
+		/* Step 3: encrypt data using data block as key and iv */
+		aes_setkey_enc(&aes, block, 128);
+		aes_crypt_cbc(&aes, AES_ENCRYPT, data_len * 64, block + 16, data, data);
+
+		/* Step 4: determine SHA-2 hash size for this round */
+		for (j = 0, sum = 0; j < 16; j++)
+			sum += data[j];
+
+		/* Step 5: calculate data block for next round */
+		block_size = 32 + (sum % 3) * 16;
+		switch (block_size)
+		{
+		case 32:
+			fz_sha256_init(&sha256);
+			fz_sha256_update(&sha256, data, data_len * 64);
+			fz_sha256_final(&sha256, block);
+			break;
+		case 48:
+			fz_sha384_init(&sha384);
+			fz_sha384_update(&sha384, data, data_len * 64);
+			fz_sha384_final(&sha384, block);
+			break;
+		case 64:
+			fz_sha512_init(&sha512);
+			fz_sha512_update(&sha512, data, data_len * 64);
+			fz_sha512_final(&sha512, block);
+			break;
+		}
+	}
+
+	memset(data, 0, sizeof(data));
+	memcpy(hash, block, 32);
+}
+
+static void
+pdf_compute_encryption_key_r6(pdf_crypt *crypt, unsigned char *password, int pwlen, int ownerkey, unsigned char *validationkey)
+{
+	unsigned char hash[32];
+	unsigned char iv[16];
+	fz_aes aes;
+
+	if (pwlen > 127)
+		pwlen = 127;
+
+	pdf_compute_hardened_hash_r6(password, pwlen,
+		(ownerkey ? crypt->o : crypt->u) + 32,
+		ownerkey ? crypt->u : NULL, validationkey);
+	pdf_compute_hardened_hash_r6(password, pwlen,
+		crypt->u + 40, NULL, hash);
+
+	memset(iv, 0, sizeof(iv));
+	aes_setkey_dec(&aes, hash, 256);
+	aes_crypt_cbc(&aes, AES_DECRYPT, 32, iv,
+		ownerkey ? crypt->oe : crypt->ue, crypt->key);
+}
+
+/*
  * Computing the user password (PDF 1.7 algorithm 3.4 and 3.5)
  * Also save the generated key for decrypting objects and streams in crypt->key.
  */
@@ -496,6 +590,11 @@ pdf_compute_user_password(pdf_crypt *crypt, unsigned char *password, int pwlen,
 	{
 		pdf_compute_encryption_key_r5(crypt, password, pwlen, 0, output);
 	}
+
+	if (crypt->r == 6)
+	{
+		pdf_compute_encryption_key_r6(crypt, password, pwlen, 0, output);
+	}
 }
 
 /*
@@ -510,7 +609,7 @@ pdf_authenticate_user_password(pdf_crypt *crypt, unsigned char *password, int pw
 {
 	unsigned char output[32];
 	pdf_compute_user_password(crypt, password, pwlen, output);
-	if (crypt->r == 2 || crypt->r == 5)
+	if (crypt->r == 2 || crypt->r == 5 || crypt->r == 6)
 		return memcmp(output, crypt->u, 32) == 0;
 	if (crypt->r == 3 || crypt->r == 4)
 		return memcmp(output, crypt->u, 16) == 0;
@@ -538,9 +637,13 @@ pdf_authenticate_owner_password(pdf_crypt *crypt, unsigned char *ownerpass, int
 	if (crypt->r == 5)
 	{
 		/* PDF 1.7 ExtensionLevel 3 algorithm 3.12 */
-
 		pdf_compute_encryption_key_r5(crypt, ownerpass, pwlen, 1, key);
-
+		return !memcmp(key, crypt->o, 32);
+	}
+	else if (crypt->r == 6)
+	{
+		/* PDF 1.7 ExtensionLevel 8 algorithm */
+		pdf_compute_encryption_key_r6(crypt, ownerpass, pwlen, 1, key);
 		return !memcmp(key, crypt->o, 32);
 	}
 
diff --git a/pdf/pdf_form.c b/pdf/pdf_form.c
index cb92956f..1a902df3 100644
--- a/pdf/pdf_form.c
+++ b/pdf/pdf_form.c
@@ -1452,7 +1452,7 @@ static void reset_field(pdf_document *doc, pdf_obj *obj)
 			fz_var(name);
 			fz_try(ctx)
 			{
-				name = fz_new_name(ctx, "Off");
+				name = pdf_new_name(ctx, "Off");
 				pdf_dict_puts(obj, "AS", name);
 			}
 			fz_always(ctx)
@@ -1612,7 +1612,7 @@ static void check_off(fz_context *ctx, pdf_obj *obj)
 	fz_var(off);
 	fz_try(ctx);
 	{
-		off = fz_new_name(ctx, "Off");
+		off = pdf_new_name(ctx, "Off");
 		pdf_dict_puts(obj, "AS", off);
 	}
 	fz_always(ctx)
@@ -1636,9 +1636,9 @@ static void set_check(fz_context *ctx, pdf_obj *chk, char *name)
 		/* If name is a possible value of this check
 		* box then use it, otherwise use "Off" */
 		if (pdf_dict_gets(n, name))
-			val = fz_new_name(ctx, name);
+			val = pdf_new_name(ctx, name);
 		else
-			val = fz_new_name(ctx, "Off");
+			val = pdf_new_name(ctx, "Off");
 
 		pdf_dict_puts(chk, "AS", val);
 	}
@@ -1947,15 +1947,15 @@ void pdf_field_setBorderStyle(pdf_document *doc, pdf_obj *field, char *text)
 	pdf_obj *val = NULL;
 
 	if (!strcmp(text, "Solid"))
-		val = fz_new_name(ctx, "S");
+		val = pdf_new_name(ctx, "S");
 	else if (!strcmp(text, "Dashed"))
-		val = fz_new_name(ctx, "D");
+		val = pdf_new_name(ctx, "D");
 	else if (!strcmp(text, "Beveled"))
-		val = fz_new_name(ctx, "B");
+		val = pdf_new_name(ctx, "B");
 	else if (!strcmp(text, "Inset"))
-		val = fz_new_name(ctx, "I");
+		val = pdf_new_name(ctx, "I");
 	else if (!strcmp(text, "Underline"))
-		val = fz_new_name(ctx, "U");
+		val = pdf_new_name(ctx, "U");
 	else
 		return;
 
diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c
index 704f6e78..f596b0ed 100644
--- a/pdf/pdf_interpret.c
+++ b/pdf/pdf_interpret.c
@@ -1538,7 +1538,7 @@ pdf_run_extgstate(pdf_csi *csi, pdf_obj *rdb, pdf_obj *extgstate)
 			if (pdf_is_dict(val))
 			{
 				pdf_xobject *xobj;
-				pdf_obj *group, *luminosity, *bc;
+				pdf_obj *group, *luminosity, *bc, *tr;
 
 				if (gstate->softmask)
 				{
@@ -1572,6 +1572,19 @@ pdf_run_extgstate(pdf_csi *csi, pdf_obj *rdb, pdf_obj *extgstate)
 					gstate->luminosity = 1;
 				else
 					gstate->luminosity = 0;
+
+				tr = pdf_dict_gets(val, "TR2");
+				if (tr)
+				{
+					if (strcmp(pdf_to_name(tr), "Identity") && strcmp(pdf_to_name(tr), "Default"))
+						fz_warn(ctx, "ignoring transfer function");
+				}
+				else
+				{
+					tr = pdf_dict_gets(val, "TR");
+					if (strcmp(pdf_to_name(tr), "Identity"))
+						fz_warn(ctx, "ignoring transfer function");
+				}
 			}
 			else if (pdf_is_name(val) && !strcmp(pdf_to_name(val), "None"))
 			{
diff --git a/pdf/pdf_nametree.c b/pdf/pdf_nametree.c
index 25fced52..ab3244d3 100644
--- a/pdf/pdf_nametree.c
+++ b/pdf/pdf_nametree.c
@@ -118,6 +118,8 @@ pdf_load_name_tree_imp(pdf_obj *dict, pdf_document *xref, pdf_obj *node)
 	pdf_obj *names = pdf_dict_gets(node, "Names");
 	int i;
 
+	UNUSED(ctx);
+
 	if (kids && !pdf_dict_mark(node))
 	{
 		int len = pdf_array_len(kids);
diff --git a/pdf/pdf_object.c b/pdf/pdf_object.c
index fb53d9b8..3a1e7655 100644
--- a/pdf/pdf_object.c
+++ b/pdf/pdf_object.c
@@ -117,7 +117,7 @@ pdf_new_string(fz_context *ctx, char *str, int len)
 }
 
 pdf_obj *
-fz_new_name(fz_context *ctx, char *str)
+pdf_new_name(fz_context *ctx, char *str)
 {
 	pdf_obj *obj;
 	obj = Memento_label(fz_malloc(ctx, offsetof(pdf_obj, u.n) + strlen(str) + 1), "pdf_obj(name)");
@@ -588,12 +588,12 @@ pdf_obj *pdf_new_rect(fz_context *ctx, fz_rect *rect)
 		pdf_drop_obj(item);
 		item = NULL;
 
-		item = pdf_new_real(ctx, rect->x1 - rect->x0);
+		item = pdf_new_real(ctx, rect->x1);
 		pdf_array_push(arr, item);
 		pdf_drop_obj(item);
 		item = NULL;
 
-		item = pdf_new_real(ctx, rect->y1 - rect->y0);
+		item = pdf_new_real(ctx, rect->y1);
 		pdf_array_push(arr, item);
 		pdf_drop_obj(item);
 		item = NULL;
@@ -945,7 +945,7 @@ pdf_dict_put(pdf_obj *obj, pdf_obj *key, pdf_obj *val)
 void
 pdf_dict_puts(pdf_obj *obj, char *key, pdf_obj *val)
 {
-	pdf_obj *keyobj = fz_new_name(obj->ctx, key);
+	pdf_obj *keyobj = pdf_new_name(obj->ctx, key);
 	pdf_dict_put(obj, keyobj, val);
 	pdf_drop_obj(keyobj);
 }
diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c
index 4a7e421f..ed7889ed 100644
--- a/pdf/pdf_parse.c
+++ b/pdf/pdf_parse.c
@@ -196,7 +196,7 @@ pdf_obj *
 pdf_to_utf8_name(pdf_document *xref, pdf_obj *src)
 {
 	char *buf = pdf_to_utf8(xref, src);
-	pdf_obj *dst = fz_new_name(xref->ctx, buf);
+	pdf_obj *dst = pdf_new_name(xref->ctx, buf);
 	fz_free(xref->ctx, buf);
 	return dst;
 }
@@ -289,7 +289,7 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
 				break;
 
 			case PDF_TOK_NAME:
-				obj = fz_new_name(ctx, buf->scratch);
+				obj = pdf_new_name(ctx, buf->scratch);
 				pdf_array_push(ary, obj);
 				pdf_drop_obj(obj);
 				obj = NULL;
@@ -372,7 +372,7 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
 			if (tok != PDF_TOK_NAME)
 				fz_throw(ctx, "invalid key in dict");
 
-			key = fz_new_name(ctx, buf->scratch);
+			key = pdf_new_name(ctx, buf->scratch);
 
 			tok = pdf_lex(file, buf);
 
@@ -386,7 +386,7 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
 				val = pdf_parse_dict(xref, file, buf);
 				break;
 
-			case PDF_TOK_NAME: val = fz_new_name(ctx, buf->scratch); break;
+			case PDF_TOK_NAME: val = pdf_new_name(ctx, buf->scratch); break;
 			case PDF_TOK_REAL: val = pdf_new_real(ctx, buf->f); break;
 			case PDF_TOK_STRING: val = pdf_new_string(ctx, buf->scratch, buf->len); break;
 			case PDF_TOK_TRUE: val = pdf_new_bool(ctx, 1); break;
@@ -455,7 +455,7 @@ pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
 		return pdf_parse_array(xref, file, buf);
 	case PDF_TOK_OPEN_DICT:
 		return pdf_parse_dict(xref, file, buf);
-	case PDF_TOK_NAME: return fz_new_name(ctx, buf->scratch); break;
+	case PDF_TOK_NAME: return pdf_new_name(ctx, buf->scratch); break;
 	case PDF_TOK_REAL: return pdf_new_real(ctx, buf->f); break;
 	case PDF_TOK_STRING: return pdf_new_string(ctx, buf->scratch, buf->len); break;
 	case PDF_TOK_TRUE: return pdf_new_bool(ctx, 1); break;
@@ -506,7 +506,7 @@ pdf_parse_ind_obj(pdf_document *xref,
 		obj = pdf_parse_dict(xref, file, buf);
 		break;
 
-	case PDF_TOK_NAME: obj = fz_new_name(ctx, buf->scratch); break;
+	case PDF_TOK_NAME: obj = pdf_new_name(ctx, buf->scratch); break;
 	case PDF_TOK_REAL: obj = pdf_new_real(ctx, buf->f); break;
 	case PDF_TOK_STRING: obj = pdf_new_string(ctx, buf->scratch, buf->len); break;
 	case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, 1); break;
diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c
index 5d03ffe3..24d328d6 100644
--- a/pdf/pdf_write.c
+++ b/pdf/pdf_write.c
@@ -1054,7 +1054,7 @@ add_linearization_objs(pdf_document *xref, pdf_write_options *opts)
 		/* FIXME: Do we have document information? Do an I entry */
 		/* FIXME: Do we have logical structure heirarchy? Do a C entry */
 		/* FIXME: Do L, Page Label hint table */
-		o = fz_new_name(ctx, "FlateDecode");
+		o = pdf_new_name(ctx, "FlateDecode");
 		pdf_dict_puts(hint_obj, "Filter", o);
 		pdf_drop_obj(o);
 		o = NULL;
@@ -1426,7 +1426,7 @@ static void addhexfilter(pdf_document *xref, pdf_obj *dict)
 	pdf_obj *ahx, *nullobj;
 	fz_context *ctx = xref->ctx;
 
-	ahx = fz_new_name(ctx, "ASCIIHexDecode");
+	ahx = pdf_new_name(ctx, "ASCIIHexDecode");
 	nullobj = pdf_new_null(ctx);
 	newf = newdp = NULL;
 
diff --git a/pdf/pdf_xobject.c b/pdf/pdf_xobject.c
index afa86527..19de0363 100644
--- a/pdf/pdf_xobject.c
+++ b/pdf/pdf_xobject.c
@@ -153,11 +153,11 @@ pdf_new_xobject(pdf_document *xref, fz_rect *bbox, fz_matrix *mat)
 
 		res = pdf_new_dict(ctx, 0);
 		procset = pdf_new_array(ctx, 2);
-		obj = fz_new_name(ctx, "PDF");
+		obj = pdf_new_name(ctx, "PDF");
 		pdf_array_push(procset, obj);
 		pdf_drop_obj(obj);
 		obj = NULL;
-		obj = fz_new_name(ctx, "Text");
+		obj = pdf_new_name(ctx, "Text");
 		pdf_array_push(procset, obj);
 		pdf_drop_obj(obj);
 		obj = NULL;
@@ -166,12 +166,12 @@ pdf_new_xobject(pdf_document *xref, fz_rect *bbox, fz_matrix *mat)
 		procset = NULL;
 		pdf_dict_puts(dict, "Resources", res);
 
-		obj = fz_new_name(ctx, "Form");
+		obj = pdf_new_name(ctx, "Form");
 		pdf_dict_puts(dict, "Subtype", obj);
 		pdf_drop_obj(obj);
 		obj = NULL;
 
-		obj = fz_new_name(ctx, "XObject");
+		obj = pdf_new_name(ctx, "XObject");
 		pdf_dict_puts(dict, "Type", obj);
 		pdf_drop_obj(obj);
 		obj = NULL;