From 18ae06d9ae493276b3ddcd37eb19de7aeba1a0e8 Mon Sep 17 00:00:00 2001 From: Jane Liu Date: Tue, 18 Jul 2017 10:15:16 -0400 Subject: Basic APIs and tests for extracting attachments 1. Added API for extracting attachment properties and data. * Expanded the embedder test to cover all the new APIs. Bug=pdfium:174 Change-Id: I09bffd412410e9aea45faca442d2b72eefafef4e Reviewed-on: https://pdfium-review.googlesource.com/7790 Reviewed-by: dsinclair Commit-Queue: dsinclair --- fpdfsdk/fpdfannot.cpp | 15 +--- fpdfsdk/fpdfattachment.cpp | 120 ++++++++++++++++++++++++++++---- fpdfsdk/fpdfattachment_embeddertest.cpp | 69 +++++++++++++++++- fpdfsdk/fpdfdoc.cpp | 10 --- fpdfsdk/fpdfview.cpp | 20 ++++++ fpdfsdk/fpdfview_c_api_test.c | 7 +- fpdfsdk/fsdk_define.h | 8 +++ 7 files changed, 211 insertions(+), 38 deletions(-) (limited to 'fpdfsdk') diff --git a/fpdfsdk/fpdfannot.cpp b/fpdfsdk/fpdfannot.cpp index 412c80b5de..1c4345ab52 100644 --- a/fpdfsdk/fpdfannot.cpp +++ b/fpdfsdk/fpdfannot.cpp @@ -170,10 +170,6 @@ bool HasAPStream(const CPDF_Dictionary* pAnnotDict) { return !!FPDFDOC_GetAnnotAP(pAnnotDict, CPDF_Annot::AppearanceMode::Normal); } -CFX_ByteString CFXByteStringFromFPDFWideString(FPDF_WIDESTRING text) { - return CFX_WideString::FromUTF16LE(text, CFX_WideString::WStringLength(text)) - .UTF8Encode(); -} void UpdateContentStream(CPDF_Form* pForm, CPDF_Stream* pStream) { ASSERT(pForm); ASSERT(pStream); @@ -760,14 +756,9 @@ DLLEXPORT unsigned long STDCALL FPDFAnnot_GetStringValue(FPDF_ANNOTATION annot, if (!pAnnotDict) return 0; - CFX_ByteString contents = - pAnnotDict->GetUnicodeTextFor(CFXByteStringFromFPDFWideString(key)) - .UTF16LE_Encode(); - unsigned long len = contents.GetLength(); - if (buffer && buflen >= len) - memcpy(buffer, contents.c_str(), len); - - return len; + return Utf16EncodeMaybeCopyAndReturnLength( + pAnnotDict->GetUnicodeTextFor(CFXByteStringFromFPDFWideString(key)), + buffer, buflen); } DLLEXPORT int STDCALL FPDFAnnot_GetFlags(FPDF_ANNOTATION annot) { diff --git a/fpdfsdk/fpdfattachment.cpp b/fpdfsdk/fpdfattachment.cpp index e07d15b0c8..337ab35e0f 100644 --- a/fpdfsdk/fpdfattachment.cpp +++ b/fpdfsdk/fpdfattachment.cpp @@ -4,7 +4,10 @@ #include "public/fpdf_attachment.h" +#include "core/fpdfapi/page/cpdf_streamparser.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fpdfdoc/cpdf_filespec.h" #include "core/fpdfdoc/cpdf_nametree.h" #include "fpdfsdk/fsdk_define.h" @@ -17,28 +20,119 @@ DLLEXPORT int STDCALL FPDFDoc_GetAttachmentCount(FPDF_DOCUMENT document) { return CPDF_NameTree(pDoc, "EmbeddedFiles").GetCount(); } -DLLEXPORT unsigned long STDCALL -FPDFDoc_GetAttachmentName(FPDF_DOCUMENT document, - int index, - void* buffer, - unsigned long buflen) { +DLLEXPORT FPDF_ATTACHMENT STDCALL FPDFDoc_GetAttachment(FPDF_DOCUMENT document, + int index) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc || index < 0) - return 0; + return nullptr; CPDF_NameTree nameTree(pDoc, "EmbeddedFiles"); if (static_cast(index) >= nameTree.GetCount()) - return 0; + return nullptr; CFX_ByteString csName; - CPDF_Object* pFile = nameTree.LookupValueAndName(index, &csName); + return nameTree.LookupValueAndName(index, &csName); +} + +DLLEXPORT unsigned long STDCALL +FPDFAttachment_GetName(FPDF_ATTACHMENT attachment, + void* buffer, + unsigned long buflen) { + CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment); + if (!pFile) + return 0; + + return Utf16EncodeMaybeCopyAndReturnLength(CPDF_FileSpec(pFile).GetFileName(), + buffer, buflen); +} + +DLLEXPORT FPDF_BOOL STDCALL FPDFAttachment_HasKey(FPDF_ATTACHMENT attachment, + FPDF_WIDESTRING key) { + CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment); + if (!pFile) + return 0; + + CPDF_Dictionary* pParamsDict = CPDF_FileSpec(pFile).GetParamsDict(); + if (!pParamsDict) + return 0; + + return pParamsDict->KeyExist(CFXByteStringFromFPDFWideString(key)); +} + +DLLEXPORT FPDF_OBJECT_TYPE STDCALL +FPDFAttachment_GetValueType(FPDF_ATTACHMENT attachment, FPDF_WIDESTRING key) { + if (!FPDFAttachment_HasKey(attachment, key)) + return FPDF_OBJECT_UNKNOWN; + + CPDF_Object* pObj = CPDF_FileSpec(CPDFObjectFromFPDFAttachment(attachment)) + .GetParamsDict() + ->GetObjectFor(CFXByteStringFromFPDFWideString(key)); + if (!pObj) + return FPDF_OBJECT_UNKNOWN; + + return pObj->GetType(); +} + +DLLEXPORT unsigned long STDCALL +FPDFAttachment_GetStringValue(FPDF_ATTACHMENT attachment, + FPDF_WIDESTRING key, + void* buffer, + unsigned long buflen) { + CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment); + if (!pFile) + return 0; + + CPDF_Dictionary* pParamsDict = CPDF_FileSpec(pFile).GetParamsDict(); + if (!pParamsDict) + return 0; + + CFX_ByteString bsKey = CFXByteStringFromFPDFWideString(key); + CFX_WideString value = pParamsDict->GetUnicodeTextFor(bsKey); + if (bsKey == "CheckSum") { + CPDF_String* stringValue = pParamsDict->GetObjectFor(bsKey)->AsString(); + if (stringValue->IsHex()) { + value = + CPDF_String(nullptr, PDF_EncodeString(stringValue->GetString(), true), + false) + .GetUnicodeText(); + } + } + + return Utf16EncodeMaybeCopyAndReturnLength(value, buffer, buflen); +} + +DLLEXPORT unsigned long STDCALL +FPDFAttachment_GetFile(FPDF_ATTACHMENT attachment, + void* buffer, + unsigned long buflen) { + CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment); if (!pFile) return 0; - CFX_ByteString name = CPDF_FileSpec(pFile).GetFileName().UTF16LE_Encode(); - unsigned long len = name.GetLength(); - if (buffer && buflen >= len) - memcpy(buffer, name.c_str(), len); + CPDF_Stream* pFileStream = CPDF_FileSpec(pFile).GetFileStream(); + if (!pFileStream) + return 0; + + uint8_t* data = pFileStream->GetRawData(); + uint32_t len = pFileStream->GetRawSize(); + CPDF_Dictionary* pFileDict = pFileStream->GetDict(); + if (!pFileDict || pFileDict->GetStringFor("Filter").IsEmpty()) { + if (buffer && buflen >= len) + memcpy(buffer, data, len); + + return len; + } + + // Decode the stream if a stream filter is specified. + uint8_t* decodedData = nullptr; + uint32_t decodedLen = 0; + CPDF_StreamParser::DecodeInlineStream( + data, len, pFileDict->GetIntegerFor("Width"), + pFileDict->GetIntegerFor("Height"), pFileDict->GetStringFor("Filter"), + pFileDict->GetDictFor("DecodeParms"), &decodedData, &decodedLen); + if (buffer && buflen >= decodedLen) + memcpy(buffer, decodedData, decodedLen); - return len; + FX_Free(decodedData); + return decodedLen; } diff --git a/fpdfsdk/fpdfattachment_embeddertest.cpp b/fpdfsdk/fpdfattachment_embeddertest.cpp index 2cbda8a429..d873d9b5ce 100644 --- a/fpdfsdk/fpdfattachment_embeddertest.cpp +++ b/fpdfsdk/fpdfattachment_embeddertest.cpp @@ -3,6 +3,7 @@ // found in the LICENSE file. #include "public/fpdf_attachment.h" +#include "public/fpdfview.h" #include "testing/embedder_test.h" class FPDFAttachmentEmbeddertest : public EmbedderTest {}; @@ -12,11 +13,75 @@ TEST_F(FPDFAttachmentEmbeddertest, ExtractAttachments) { ASSERT_TRUE(OpenDocument("embedded_attachments.pdf")); EXPECT_EQ(2, FPDFDoc_GetAttachmentCount(document())); + // Retrieve the first attachment. + FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(document(), 0); + ASSERT_TRUE(attachment); + // Check that the name of the first attachment is correct. - unsigned long len = FPDFDoc_GetAttachmentName(document(), 0, nullptr, 0); + unsigned long len = FPDFAttachment_GetName(attachment, nullptr, 0); std::vector buf(len); - EXPECT_EQ(12u, FPDFDoc_GetAttachmentName(document(), 0, buf.data(), len)); + EXPECT_EQ(12u, FPDFAttachment_GetName(attachment, buf.data(), len)); EXPECT_STREQ(L"1.txt", GetPlatformWString(reinterpret_cast(buf.data())) .c_str()); + + // Check that the content of the first attachment is correct. + len = FPDFAttachment_GetFile(attachment, nullptr, 0); + buf.clear(); + buf.resize(len); + ASSERT_EQ(4u, FPDFAttachment_GetFile(attachment, buf.data(), len)); + EXPECT_EQ(std::string("test"), std::string(buf.data(), 4)); + + // Check that a non-existent key does not exist. + EXPECT_FALSE( + FPDFAttachment_HasKey(attachment, GetFPDFWideString(L"none").get())); + + // Check that the string value of a non-string dictionary entry is empty. + std::unique_ptr size_key = + GetFPDFWideString(L"Size"); + EXPECT_EQ(FPDF_OBJECT_NUMBER, + FPDFAttachment_GetValueType(attachment, size_key.get())); + EXPECT_EQ(2u, FPDFAttachment_GetStringValue(attachment, size_key.get(), + nullptr, 0)); + + // Check that the creation date of the first attachment is correct. + std::unique_ptr date_key = + GetFPDFWideString(L"CreationDate"); + len = FPDFAttachment_GetStringValue(attachment, date_key.get(), nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(48u, FPDFAttachment_GetStringValue(attachment, date_key.get(), + buf.data(), len)); + EXPECT_STREQ(L"D:20170712214438-07'00'", + GetPlatformWString(reinterpret_cast(buf.data())) + .c_str()); + + // Retrieve the second attachment. + attachment = FPDFDoc_GetAttachment(document(), 1); + ASSERT_TRUE(attachment); + + // Retrieve the second attachment file. + len = FPDFAttachment_GetFile(attachment, nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(5869u, FPDFAttachment_GetFile(attachment, buf.data(), len)); + + // Check that the calculated checksum of the file data matches expectation. + const char kCheckSum[] = "72afcddedf554dda63c0c88e06f1ce18"; + const wchar_t kCheckSumW[] = L"<72AFCDDEDF554DDA63C0C88E06F1CE18>"; + const std::string generated_checksum = + GenerateMD5Base16(reinterpret_cast(buf.data()), len); + EXPECT_EQ(kCheckSum, generated_checksum); + + // Check that the stored checksum matches expectation. + std::unique_ptr checksum_key = + GetFPDFWideString(L"CheckSum"); + len = + FPDFAttachment_GetStringValue(attachment, checksum_key.get(), nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(70u, FPDFAttachment_GetStringValue(attachment, checksum_key.get(), + buf.data(), len)); + EXPECT_EQ(kCheckSumW, + GetPlatformWString(reinterpret_cast(buf.data()))); } diff --git a/fpdfsdk/fpdfdoc.cpp b/fpdfsdk/fpdfdoc.cpp index 7be53a6554..b6088604fd 100644 --- a/fpdfsdk/fpdfdoc.cpp +++ b/fpdfsdk/fpdfdoc.cpp @@ -60,16 +60,6 @@ CPDF_LinkList* GetLinkList(CPDF_Page* page) { return pHolder->get(); } -unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text, - void* buffer, - unsigned long buflen) { - CFX_ByteString encodedText = text.UTF16LE_Encode(); - unsigned long len = encodedText.GetLength(); - if (buffer && len <= buflen) - memcpy(buffer, encodedText.c_str(), len); - return len; -} - } // namespace DLLEXPORT FPDF_BOOKMARK STDCALL diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp index 2e52ad6487..06e72b3a71 100644 --- a/fpdfsdk/fpdfview.cpp +++ b/fpdfsdk/fpdfview.cpp @@ -320,10 +320,30 @@ CPDF_PageObject* CPDFPageObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object) { return static_cast(page_object); } +CPDF_Object* CPDFObjectFromFPDFAttachment(FPDF_ATTACHMENT attachment) { + return static_cast(attachment); +} + +CFX_ByteString CFXByteStringFromFPDFWideString(FPDF_WIDESTRING wide_string) { + return CFX_WideString::FromUTF16LE(wide_string, + CFX_WideString::WStringLength(wide_string)) + .UTF8Encode(); +} + CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap) { return static_cast(bitmap); } +unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text, + void* buffer, + unsigned long buflen) { + CFX_ByteString encodedText = text.UTF16LE_Encode(); + unsigned long len = encodedText.GetLength(); + if (buffer && len <= buflen) + memcpy(buffer, encodedText.c_str(), len); + return len; +} + CFX_RetainPtr MakeSeekableReadStream( FPDF_FILEACCESS* pFileAccess) { return pdfium::MakeRetain(pFileAccess); diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c index 2fcaf7455f..6753e66c52 100644 --- a/fpdfsdk/fpdfview_c_api_test.c +++ b/fpdfsdk/fpdfview_c_api_test.c @@ -64,7 +64,12 @@ int CheckPDFiumCApi() { // fpdf_attachment.h CHK(FPDFDoc_GetAttachmentCount); - CHK(FPDFDoc_GetAttachmentName); + CHK(FPDFDoc_GetAttachment); + CHK(FPDFAttachment_GetName); + CHK(FPDFAttachment_HasKey); + CHK(FPDFAttachment_GetValueType); + CHK(FPDFAttachment_GetStringValue); + CHK(FPDFAttachment_GetFile); // fpdf_dataavail.h CHK(FPDFAvail_Create); diff --git a/fpdfsdk/fsdk_define.h b/fpdfsdk/fsdk_define.h index 4cfe3442ac..610b854a9d 100644 --- a/fpdfsdk/fsdk_define.h +++ b/fpdfsdk/fsdk_define.h @@ -67,8 +67,16 @@ CPDF_PathObject* CPDFPathObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object); CPDF_PageObject* CPDFPageObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object); +CPDF_Object* CPDFObjectFromFPDFAttachment(FPDF_ATTACHMENT attachment); + +CFX_ByteString CFXByteStringFromFPDFWideString(FPDF_WIDESTRING wide_string); + CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap); +unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text, + void* buffer, + unsigned long buflen); + void FSDK_SetSandBoxPolicy(FPDF_DWORD policy, FPDF_BOOL enable); FPDF_BOOL FSDK_IsSandBoxPolicyEnabled(FPDF_DWORD policy); void FPDF_RenderPage_Retail(CPDF_PageRenderContext* pContext, -- cgit v1.2.3