From 54a4214c86bc790cc2a3ae454b1aa709e868fa1a Mon Sep 17 00:00:00 2001 From: Jane Liu Date: Mon, 24 Jul 2017 16:40:54 -0400 Subject: Basic APIs and tests for adding attachments 1. Added APIs for adding attachments, setting attachment files, and modifying attachment dictionary entries. * Added two embedder tests covering all new APIs. Bug=pdfium:174 Change-Id: I65f43cd6ca4887b71f9f7bcee64a87ba6b7e2706 Reviewed-on: https://pdfium-review.googlesource.com/8671 Commit-Queue: Jane Liu Reviewed-by: Lei Zhang Reviewed-by: dsinclair --- BUILD.gn | 4 +- core/fxcrt/cfx_datetime.cpp | 2 +- fpdfsdk/fpdfattachment.cpp | 146 ++++++++++++++++++++++++++++++- fpdfsdk/fpdfattachment_embeddertest.cpp | 150 ++++++++++++++++++++++++++++++++ fpdfsdk/fpdfview_c_api_test.c | 3 + public/fpdf_attachment.h | 48 +++++++++- 6 files changed, 347 insertions(+), 6 deletions(-) diff --git a/BUILD.gn b/BUILD.gn index 9cc4500503..ebe6c0dc46 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -796,6 +796,8 @@ static_library("fxcrt") { sources = [ "core/fxcrt/cfx_bytestring.cpp", "core/fxcrt/cfx_bytestring.h", + "core/fxcrt/cfx_datetime.cpp", + "core/fxcrt/cfx_datetime.h", "core/fxcrt/cfx_maybe_owned.h", "core/fxcrt/cfx_memorystream.cpp", "core/fxcrt/cfx_memorystream.h", @@ -874,8 +876,6 @@ static_library("fxcrt") { "core/fxcrt/cfx_checksumcontext.h", "core/fxcrt/cfx_crtfileaccess.cpp", "core/fxcrt/cfx_crtfileaccess.h", - "core/fxcrt/cfx_datetime.cpp", - "core/fxcrt/cfx_datetime.h", "core/fxcrt/cfx_decimal.cpp", "core/fxcrt/cfx_decimal.h", "core/fxcrt/cfx_seekablestreamproxy.cpp", diff --git a/core/fxcrt/cfx_datetime.cpp b/core/fxcrt/cfx_datetime.cpp index 833fe58907..b59aabcc94 100644 --- a/core/fxcrt/cfx_datetime.cpp +++ b/core/fxcrt/cfx_datetime.cpp @@ -109,7 +109,7 @@ void CFX_DateTime::Now() { utLocal.wMinute = st.tm_min; utLocal.wSecond = st.tm_sec; utLocal.wMillisecond = curTime.tv_usec / 1000; -#endif // _FX_OS_ == _FX_WIN32_DESKTOP_ || _FX_OS_ == _FX_WIN32_MOBILE_ || \ +#endif // _FX_OS_ == _FX_WIN32_DESKTOP_ || _FX_OS_ == _FX_WIN32_MOBILE_ || // _FX_OS_ == _FX_WIN64_ year_ = utLocal.wYear; diff --git a/fpdfsdk/fpdfattachment.cpp b/fpdfsdk/fpdfattachment.cpp index 7d8cce736b..724664b05d 100644 --- a/fpdfsdk/fpdfattachment.cpp +++ b/fpdfsdk/fpdfattachment.cpp @@ -4,14 +4,45 @@ #include "public/fpdf_attachment.h" +#include "core/fdrm/crypto/fx_crypt.h" #include "core/fpdfapi/page/cpdf_streamparser.h" +#include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_name.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_string.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fpdfdoc/cpdf_filespec.h" #include "core/fpdfdoc/cpdf_nametree.h" +#include "core/fxcrt/cfx_datetime.h" +#include "core/fxcrt/fx_extension.h" #include "fpdfsdk/fsdk_define.h" +namespace { + +CFX_ByteString CFXByteStringHexDecode(const CFX_ByteString& bsHex) { + uint8_t* result = nullptr; + uint32_t size = 0; + HexDecode(bsHex.raw_str(), bsHex.GetLength(), &result, &size); + CFX_ByteString bsDecoded(result, size); + FX_Free(result); + return bsDecoded; +} + +CFX_ByteString GenerateMD5Base16(const void* contents, + const unsigned long len) { + uint8_t digest[16]; + CRYPT_MD5Generate(reinterpret_cast(contents), len, digest); + char buf[32]; + for (int i = 0; i < 16; ++i) + FXSYS_IntToTwoHexChars(digest[i], &buf[i * 2]); + + return CFX_ByteString(buf, 32); +} + +} // namespace + DLLEXPORT int STDCALL FPDFDoc_GetAttachmentCount(FPDF_DOCUMENT document) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc) @@ -20,6 +51,50 @@ DLLEXPORT int STDCALL FPDFDoc_GetAttachmentCount(FPDF_DOCUMENT document) { return CPDF_NameTree(pDoc, "EmbeddedFiles").GetCount(); } +DLLEXPORT FPDF_ATTACHMENT FPDFDoc_AddAttachment(FPDF_DOCUMENT document, + FPDF_WIDESTRING name) { + CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); + CFX_WideString wsName = + CFX_WideString::FromUTF16LE(name, CFX_WideString::WStringLength(name)); + if (!pDoc || wsName.IsEmpty()) + return nullptr; + + CPDF_Dictionary* pRoot = pDoc->GetRoot(); + if (!pRoot) + return nullptr; + + // Retrieve the document's Names dictionary; create it if missing. + CPDF_Dictionary* pNames = pRoot->GetDictFor("Names"); + if (!pNames) { + pNames = pDoc->NewIndirect(); + pRoot->SetNewFor("Names", pDoc, pNames->GetObjNum()); + } + + // Create the EmbeddedFiles dictionary if missing. + if (!pNames->GetDictFor("EmbeddedFiles")) { + CPDF_Dictionary* pFiles = pDoc->NewIndirect(); + pFiles->SetNewFor("Names"); + pNames->SetNewFor("EmbeddedFiles", pDoc, + pFiles->GetObjNum()); + } + + // Set up the basic entries in the filespec dictionary. + CPDF_Dictionary* pFile = pDoc->NewIndirect(); + pFile->SetNewFor("Type", "Filespec"); + pFile->SetNewFor("UF", wsName); + pFile->SetNewFor("F", wsName); + + // Add the new attachment name and filespec into the document's EmbeddedFiles. + CPDF_NameTree nameTree(pDoc, "EmbeddedFiles"); + if (!nameTree.AddValueAndName( + pdfium::MakeUnique(pDoc, pFile->GetObjNum()), + wsName)) { + return nullptr; + } + + return pFile; +} + DLLEXPORT FPDF_ATTACHMENT STDCALL FPDFDoc_GetAttachment(FPDF_DOCUMENT document, int index) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); @@ -73,6 +148,28 @@ FPDFAttachment_GetValueType(FPDF_ATTACHMENT attachment, FPDF_WIDESTRING key) { return pObj->GetType(); } +DLLEXPORT FPDF_BOOL STDCALL +FPDFAttachment_SetStringValue(FPDF_ATTACHMENT attachment, + FPDF_WIDESTRING key, + FPDF_WIDESTRING value) { + CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment); + if (!pFile) + return false; + + CPDF_Dictionary* pParamsDict = CPDF_FileSpec(pFile).GetParamsDict(); + if (!pParamsDict) + return false; + + CFX_ByteString bsKey = CFXByteStringFromFPDFWideString(key); + CFX_ByteString bsValue = CFXByteStringFromFPDFWideString(value); + bool bEncodedAsHex = bsKey == "CheckSum"; + if (bEncodedAsHex) + bsValue = CFXByteStringHexDecode(bsValue); + + pParamsDict->SetNewFor(bsKey, bsValue, bEncodedAsHex); + return true; +} + DLLEXPORT unsigned long STDCALL FPDFAttachment_GetStringValue(FPDF_ATTACHMENT attachment, FPDF_WIDESTRING key, @@ -88,7 +185,7 @@ FPDFAttachment_GetStringValue(FPDF_ATTACHMENT attachment, CFX_ByteString bsKey = CFXByteStringFromFPDFWideString(key); CFX_WideString value = pParamsDict->GetUnicodeTextFor(bsKey); - if (bsKey == "CheckSum") { + if (bsKey == "CheckSum" && !value.IsEmpty()) { CPDF_String* stringValue = pParamsDict->GetObjectFor(bsKey)->AsString(); if (stringValue->IsHex()) { value = @@ -101,6 +198,53 @@ FPDFAttachment_GetStringValue(FPDF_ATTACHMENT attachment, return Utf16EncodeMaybeCopyAndReturnLength(value, buffer, buflen); } +DLLEXPORT FPDF_BOOL FPDFAttachment_SetFile(FPDF_ATTACHMENT attachment, + FPDF_DOCUMENT document, + const void* contents, + const unsigned long len) { + CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment); + CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); + if (!pFile || !pFile->IsDictionary() || !pDoc || len > INT_MAX) + return false; + + // An empty content must have a zero length. + if (!contents && len != 0) + return false; + + // Create a dictionary for the new embedded file stream. + auto pFileStreamDict = pdfium::MakeUnique(); + CPDF_Dictionary* pParamsDict = + pFileStreamDict->SetNewFor("Params"); + + // Set the size of the new file in the dictionary. + pFileStreamDict->SetNewFor("DL", static_cast(len)); + pParamsDict->SetNewFor("Size", static_cast(len)); + + // Set the creation date of the new attachment in the dictionary. + CFX_DateTime dateTime; + dateTime.Now(); + CFX_ByteString bsDateTime; + bsDateTime.Format("D:%d%02d%02d%02d%02d%02d", dateTime.GetYear(), + dateTime.GetMonth(), dateTime.GetDay(), dateTime.GetHour(), + dateTime.GetMinute(), dateTime.GetSecond()); + pParamsDict->SetNewFor("CreationDate", bsDateTime, false); + + // Set the checksum of the new attachment in the dictionary. + pParamsDict->SetNewFor( + "CheckSum", CFXByteStringHexDecode(GenerateMD5Base16(contents, len)), + true); + + // Create the file stream and have the filespec dictionary link to it. + std::unique_ptr stream(FX_Alloc(uint8_t, len)); + memcpy(stream.get(), contents, len); + CPDF_Stream* pFileStream = pDoc->NewIndirect( + std::move(stream), len, std::move(pFileStreamDict)); + CPDF_Dictionary* pEFDict = + pFile->AsDictionary()->SetNewFor("EF"); + pEFDict->SetNewFor("F", pDoc, pFileStream->GetObjNum()); + return true; +} + DLLEXPORT unsigned long STDCALL FPDFAttachment_GetFile(FPDF_ATTACHMENT attachment, void* buffer, diff --git a/fpdfsdk/fpdfattachment_embeddertest.cpp b/fpdfsdk/fpdfattachment_embeddertest.cpp index d873d9b5ce..f4d0bfcfae 100644 --- a/fpdfsdk/fpdfattachment_embeddertest.cpp +++ b/fpdfsdk/fpdfattachment_embeddertest.cpp @@ -85,3 +85,153 @@ TEST_F(FPDFAttachmentEmbeddertest, ExtractAttachments) { EXPECT_EQ(kCheckSumW, GetPlatformWString(reinterpret_cast(buf.data()))); } + +TEST_F(FPDFAttachmentEmbeddertest, AddAttachments) { + // Open a file with two attachments. + ASSERT_TRUE(OpenDocument("embedded_attachments.pdf")); + EXPECT_EQ(2, FPDFDoc_GetAttachmentCount(document())); + + // Check that adding an attachment with an empty name would fail. + EXPECT_FALSE(FPDFDoc_AddAttachment(document(), nullptr)); + + // Add an attachment to the beginning of the embedded file list. + std::unique_ptr file_name = + GetFPDFWideString(L"0.txt"); + FPDF_ATTACHMENT attachment = + FPDFDoc_AddAttachment(document(), file_name.get()); + + // Check that writing to a file with nullptr but non-zero bytes would fail. + EXPECT_FALSE(FPDFAttachment_SetFile(attachment, document(), nullptr, 10)); + + // Set the new attachment's file. + constexpr char kContents1[] = "Hello!"; + EXPECT_TRUE(FPDFAttachment_SetFile(attachment, document(), kContents1, + strlen(kContents1))); + + // Verify the name of the new attachment (i.e. the first attachment). + attachment = FPDFDoc_GetAttachment(document(), 0); + ASSERT_TRUE(attachment); + unsigned long len = FPDFAttachment_GetName(attachment, nullptr, 0); + std::vector buf(len); + EXPECT_EQ(12u, FPDFAttachment_GetName(attachment, buf.data(), len)); + EXPECT_STREQ(L"0.txt", + GetPlatformWString(reinterpret_cast(buf.data())) + .c_str()); + + // Verify the content of the new attachment (i.e. the first attachment). + len = FPDFAttachment_GetFile(attachment, nullptr, 0); + buf.clear(); + buf.resize(len); + ASSERT_EQ(6u, FPDFAttachment_GetFile(attachment, buf.data(), len)); + EXPECT_EQ(std::string(kContents1), std::string(buf.data(), 6)); + + // Add an attachment to the end of the embedded file list and set its file. + file_name = GetFPDFWideString(L"z.txt"); + attachment = FPDFDoc_AddAttachment(document(), file_name.get()); + constexpr char kContents2[] = "World!"; + EXPECT_TRUE(FPDFAttachment_SetFile(attachment, document(), kContents2, + strlen(kContents2))); + EXPECT_EQ(4, FPDFDoc_GetAttachmentCount(document())); + + // Verify the name of the new attachment (i.e. the fourth attachment). + attachment = FPDFDoc_GetAttachment(document(), 3); + ASSERT_TRUE(attachment); + len = FPDFAttachment_GetName(attachment, nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(12u, FPDFAttachment_GetName(attachment, buf.data(), len)); + EXPECT_STREQ(L"z.txt", + GetPlatformWString(reinterpret_cast(buf.data())) + .c_str()); + + // Verify the content of the new attachment (i.e. the fourth attachment). + len = FPDFAttachment_GetFile(attachment, nullptr, 0); + buf.clear(); + buf.resize(len); + ASSERT_EQ(6u, FPDFAttachment_GetFile(attachment, buf.data(), len)); + EXPECT_EQ(std::string(kContents2), std::string(buf.data(), 6)); +} + +TEST_F(FPDFAttachmentEmbeddertest, AddAttachmentsWithParams) { + // Open a file with two attachments. + ASSERT_TRUE(OpenDocument("embedded_attachments.pdf")); + EXPECT_EQ(2, FPDFDoc_GetAttachmentCount(document())); + + // Add an attachment to the embedded file list. + std::unique_ptr file_name = + GetFPDFWideString(L"5.txt"); + FPDF_ATTACHMENT attachment = + FPDFDoc_AddAttachment(document(), file_name.get()); + constexpr char kContents[] = "Hello World!"; + EXPECT_TRUE(FPDFAttachment_SetFile(attachment, document(), kContents, + strlen(kContents))); + + // Set the date to be an arbitrary value. + std::unique_ptr date_key = + GetFPDFWideString(L"CreationDate"); + constexpr wchar_t kDateW[] = L"D:20170720161527-04'00'"; + std::unique_ptr ws_date = + GetFPDFWideString(kDateW); + EXPECT_TRUE( + FPDFAttachment_SetStringValue(attachment, date_key.get(), ws_date.get())); + + // Set the checksum to be an arbitrary value. + std::unique_ptr checksum_key = + GetFPDFWideString(L"CheckSum"); + constexpr wchar_t kCheckSumW[] = L""; + std::unique_ptr ws_checksum = + GetFPDFWideString(kCheckSumW); + EXPECT_TRUE(FPDFAttachment_SetStringValue(attachment, checksum_key.get(), + ws_checksum.get())); + + // Verify the name of the new attachment (i.e. the second attachment). + attachment = FPDFDoc_GetAttachment(document(), 1); + ASSERT_TRUE(attachment); + unsigned long len = FPDFAttachment_GetName(attachment, nullptr, 0); + std::vector buf(len); + EXPECT_EQ(12u, FPDFAttachment_GetName(attachment, buf.data(), len)); + EXPECT_STREQ(L"5.txt", + GetPlatformWString(reinterpret_cast(buf.data())) + .c_str()); + + // Verify the content of the new attachment. + len = FPDFAttachment_GetFile(attachment, nullptr, 0); + buf.clear(); + buf.resize(len); + ASSERT_EQ(12u, FPDFAttachment_GetFile(attachment, buf.data(), len)); + EXPECT_EQ(std::string(kContents), std::string(buf.data(), 12)); + + // Verify the creation date of the new attachment. + len = FPDFAttachment_GetStringValue(attachment, date_key.get(), nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(48u, FPDFAttachment_GetStringValue(attachment, date_key.get(), + buf.data(), len)); + EXPECT_STREQ(kDateW, + GetPlatformWString(reinterpret_cast(buf.data())) + .c_str()); + + // Verify the checksum of the new attachment. + len = + FPDFAttachment_GetStringValue(attachment, checksum_key.get(), nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(70u, FPDFAttachment_GetStringValue(attachment, checksum_key.get(), + buf.data(), len)); + EXPECT_STREQ(kCheckSumW, + GetPlatformWString(reinterpret_cast(buf.data())) + .c_str()); + + // Overwrite the existing file with empty content, and check that the checksum + // gets updated to the correct value. + EXPECT_TRUE(FPDFAttachment_SetFile(attachment, document(), nullptr, 0)); + EXPECT_EQ(0u, FPDFAttachment_GetFile(attachment, nullptr, 0)); + len = + FPDFAttachment_GetStringValue(attachment, checksum_key.get(), nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(70u, FPDFAttachment_GetStringValue(attachment, checksum_key.get(), + buf.data(), len)); + EXPECT_EQ(L"", + GetPlatformWString(reinterpret_cast(buf.data()))); +} \ No newline at end of file diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c index 97bacbba54..95a0175999 100644 --- a/fpdfsdk/fpdfview_c_api_test.c +++ b/fpdfsdk/fpdfview_c_api_test.c @@ -66,11 +66,14 @@ int CheckPDFiumCApi() { // fpdf_attachment.h CHK(FPDFDoc_GetAttachmentCount); + CHK(FPDFDoc_AddAttachment); CHK(FPDFDoc_GetAttachment); CHK(FPDFAttachment_GetName); CHK(FPDFAttachment_HasKey); CHK(FPDFAttachment_GetValueType); + CHK(FPDFAttachment_SetStringValue); CHK(FPDFAttachment_GetStringValue); + CHK(FPDFAttachment_SetFile); CHK(FPDFAttachment_GetFile); // fpdf_dataavail.h diff --git a/public/fpdf_attachment.h b/public/fpdf_attachment.h index 2c40992760..6a23970c45 100644 --- a/public/fpdf_attachment.h +++ b/public/fpdf_attachment.h @@ -20,6 +20,19 @@ extern "C" { // Returns the number of embedded files in |document|. DLLEXPORT int STDCALL FPDFDoc_GetAttachmentCount(FPDF_DOCUMENT document); +// Experimental API. +// Add an embedded file with |name| in |document|. If |name| is empty, or if +// |name| is the name of a existing embedded file in |document|, or if +// |document|'s embedded file name tree is too deep (i.e. |document| has too +// many embedded files already), then a new attachment will not be added. +// +// document - handle to a document. +// name - name of the new attachment. +// +// Returns a handle to the new attachment object, or NULL on failure. +DLLEXPORT FPDF_ATTACHMENT FPDFDoc_AddAttachment(FPDF_DOCUMENT document, + FPDF_WIDESTRING name); + // Experimental API. // Get the embedded attachment at |index| in |document|. Note that the returned // attachment handle is only valid while |document| is open. @@ -67,6 +80,21 @@ DLLEXPORT FPDF_BOOL STDCALL FPDFAttachment_HasKey(FPDF_ATTACHMENT attachment, DLLEXPORT FPDF_OBJECT_TYPE STDCALL FPDFAttachment_GetValueType(FPDF_ATTACHMENT attachment, FPDF_WIDESTRING key); +// Experimental API. +// Set the string value corresponding to |key| in the params dictionary of the +// embedded file |attachment|, overwriting the existing value if any. The value +// type should be FPDF_OBJECT_STRING after this function call succeeds. +// +// attachment - handle to an attachment. +// key - the key to the dictionary entry, encoded in UTF16-LE. +// value - the string value to be set, encoded in UTF16-LE. +// +// Returns true if successful. +DLLEXPORT FPDF_BOOL STDCALL +FPDFAttachment_SetStringValue(FPDF_ATTACHMENT attachment, + FPDF_WIDESTRING key, + FPDF_WIDESTRING value); + // Experimental API. // Get the string value corresponding to |key| in the params dictionary of the // embedded file |attachment|. |buffer| is only modified if |buflen| is longer @@ -79,7 +107,7 @@ FPDFAttachment_GetValueType(FPDF_ATTACHMENT attachment, FPDF_WIDESTRING key); // // attachment - handle to an attachment. // key - the key to the requested string value. -// buffer - buffer for holding the file's date string encoded in UTF16-LE. +// buffer - buffer for holding the string value encoded in UTF16-LE. // buflen - length of the buffer. // // Returns the length of the dictionary value string. @@ -89,13 +117,29 @@ FPDFAttachment_GetStringValue(FPDF_ATTACHMENT attachment, void* buffer, unsigned long buflen); +// Experimental API. +// Set the file data of |attachment|, overwriting the existing file data if any. +// The creation date and checksum will be updated, while all other dictionary +// entries will be deleted. Note that only contents with |len| smaller than +// INT_MAX is supported. +// +// attachment - handle to an attachment. +// contents - buffer holding the file data to be written in raw bytes. +// len - length of file data. +// +// Returns true if successful. +DLLEXPORT FPDF_BOOL FPDFAttachment_SetFile(FPDF_ATTACHMENT attachment, + FPDF_DOCUMENT document, + const void* contents, + const unsigned long len); + // Experimental API. // Get the file data of |attachment|. |buffer| is only modified if |buflen| is // longer than the length of the file. On errors, |buffer| is unmodified and the // returned length is 0. // // attachment - handle to an attachment. -// buffer - buffer for holding the file's data in raw bytes. +// buffer - buffer for holding the file data in raw bytes. // buflen - length of the buffer. // // Returns the length of the file. -- cgit v1.2.3