From 548334e57cae1039824d3db97bab5348fbe674e2 Mon Sep 17 00:00:00 2001 From: Jane Liu Date: Thu, 3 Aug 2017 16:33:40 -0400 Subject: APIs and tests for retrieving raw/decoded data from image objects Added FPDFImageObj_GetImageDataDecoded() for retrieving the uncompressed data of an image, and FPDFImageObj_GetImageDataRaw() for retrieving the raw data of an image. * Refactored out DecodeStreamMaybeCopyAndReturnLength(), which is used to decode both attachment data and image data. * Within DecodeStreamMaybeCopyAndReturnLength(), used a different decoder function which takes care of multiple filters if exist. As a result, CPDF_StreamParser::DecodeInlineStream() which was made static previously is now moved back into namespace. Bug=pdfium:677 Change-Id: I22a22c99acaca98ef8c15f88911f2646a2c854d5 Reviewed-on: https://pdfium-review.googlesource.com/9811 Commit-Queue: Jane Liu Reviewed-by: Lei Zhang --- core/fpdfapi/page/cpdf_streamparser.cpp | 21 +++++++------- core/fpdfapi/page/cpdf_streamparser.h | 9 ------ fpdfsdk/fpdfattachment.cpp | 24 +--------------- fpdfsdk/fpdfedit_embeddertest.cpp | 51 +++++++++++++++++++++++++++++++++ fpdfsdk/fpdfeditimg.cpp | 42 +++++++++++++++++++++++++++ fpdfsdk/fpdfview.cpp | 44 ++++++++++++++++++++++++++-- fpdfsdk/fpdfview_c_api_test.c | 2 ++ fpdfsdk/fsdk_define.h | 5 ++++ public/fpdf_edit.h | 29 +++++++++++++++++++ 9 files changed, 181 insertions(+), 46 deletions(-) diff --git a/core/fpdfapi/page/cpdf_streamparser.cpp b/core/fpdfapi/page/cpdf_streamparser.cpp index 964d600b3d..9d7dd1ed55 100644 --- a/core/fpdfapi/page/cpdf_streamparser.cpp +++ b/core/fpdfapi/page/cpdf_streamparser.cpp @@ -60,17 +60,14 @@ uint32_t DecodeAllScanlines(std::unique_ptr pDecoder, return pDecoder->GetSrcOffset(); } -} // namespace - -// Static -uint32_t CPDF_StreamParser::DecodeInlineStream(const uint8_t* src_buf, - uint32_t limit, - int width, - int height, - const CFX_ByteString& decoder, - CPDF_Dictionary* pParam, - uint8_t** dest_buf, - uint32_t* dest_size) { +uint32_t DecodeInlineStream(const uint8_t* src_buf, + uint32_t limit, + int width, + int height, + const CFX_ByteString& decoder, + CPDF_Dictionary* pParam, + uint8_t** dest_buf, + uint32_t* dest_size) { if (decoder == "CCITTFaxDecode" || decoder == "CCF") { std::unique_ptr pDecoder = FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam); @@ -102,6 +99,8 @@ uint32_t CPDF_StreamParser::DecodeInlineStream(const uint8_t* src_buf, return 0xFFFFFFFF; } +} // namespace + CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize) : m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(nullptr) {} diff --git a/core/fpdfapi/page/cpdf_streamparser.h b/core/fpdfapi/page/cpdf_streamparser.h index 9f9a8eaf12..fdc418c634 100644 --- a/core/fpdfapi/page/cpdf_streamparser.h +++ b/core/fpdfapi/page/cpdf_streamparser.h @@ -21,15 +21,6 @@ class CPDF_StreamParser { public: enum SyntaxType { EndOfData, Number, Keyword, Name, Others }; - static uint32_t DecodeInlineStream(const uint8_t* src_buf, - uint32_t limit, - int width, - int height, - const CFX_ByteString& decoder, - CPDF_Dictionary* pParam, - uint8_t** dest_buf, - uint32_t* dest_size); - CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize); CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize, diff --git a/fpdfsdk/fpdfattachment.cpp b/fpdfsdk/fpdfattachment.cpp index 0cb623f81c..5bdb3bd4a2 100644 --- a/fpdfsdk/fpdfattachment.cpp +++ b/fpdfsdk/fpdfattachment.cpp @@ -8,7 +8,6 @@ #include #include "core/fdrm/crypto/fx_crypt.h" -#include "core/fpdfapi/page/cpdf_streamparser.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_name.h" @@ -273,26 +272,5 @@ FPDFAttachment_GetFile(FPDF_ATTACHMENT attachment, if (!pFileStream) return 0; - uint8_t* data = pFileStream->GetRawData(); - uint32_t len = pFileStream->GetRawSize(); - CPDF_Dictionary* pFileDict = pFileStream->GetDict(); - if (!pFileDict || pFileDict->GetStringFor("Filter").IsEmpty()) { - if (buffer && buflen >= len) - memcpy(buffer, data, len); - - return len; - } - - // Decode the stream if a stream filter is specified. - uint8_t* decodedData = nullptr; - uint32_t decodedLen = 0; - CPDF_StreamParser::DecodeInlineStream( - data, len, pFileDict->GetIntegerFor("Width"), - pFileDict->GetIntegerFor("Height"), pFileDict->GetStringFor("Filter"), - pFileDict->GetDictFor("DecodeParms"), &decodedData, &decodedLen); - if (buffer && buflen >= decodedLen) - memcpy(buffer, decodedData, decodedLen); - - FX_Free(decodedData); - return decodedLen; + return DecodeStreamMaybeCopyAndReturnLength(pFileStream, buffer, buflen); } diff --git a/fpdfsdk/fpdfedit_embeddertest.cpp b/fpdfsdk/fpdfedit_embeddertest.cpp index dcaeb945d7..f1bbb87422 100644 --- a/fpdfsdk/fpdfedit_embeddertest.cpp +++ b/fpdfsdk/fpdfedit_embeddertest.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "core/fpdfapi/font/cpdf_font.h" #include "core/fpdfapi/page/cpdf_page.h" @@ -979,3 +980,53 @@ TEST_F(FPDFEditEmbeddertest, ExtractImageBitmap) { FPDFBitmap_Destroy(bitmap); UnloadPage(page); } + +TEST_F(FPDFEditEmbeddertest, GetImageData) { + EXPECT_TRUE(OpenDocument("embedded_images.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + ASSERT_EQ(39, FPDFPage_CountObject(page)); + + // Retrieve an image object with flate-encoded data stream. + FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, 33); + ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj)); + + // Check that the raw image data has the correct length and hash value. + unsigned long len = FPDFImageObj_GetImageDataRaw(obj, nullptr, 0); + std::vector buf(len); + EXPECT_EQ(4091u, FPDFImageObj_GetImageDataRaw(obj, buf.data(), len)); + EXPECT_EQ("f73802327d2e88e890f653961bcda81a", + GenerateMD5Base16(reinterpret_cast(buf.data()), len)); + + // Check that the decoded image data has the correct length and hash value. + len = FPDFImageObj_GetImageDataDecoded(obj, nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(28776u, FPDFImageObj_GetImageDataDecoded(obj, buf.data(), len)); + EXPECT_EQ("cb3637934bb3b95a6e4ae1ea9eb9e56e", + GenerateMD5Base16(reinterpret_cast(buf.data()), len)); + + // Retrieve an image obejct with DCTDecode-encoded data stream. + obj = FPDFPage_GetObject(page, 37); + ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj)); + + // Check that the raw image data has the correct length and hash value. + len = FPDFImageObj_GetImageDataRaw(obj, nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(4370u, FPDFImageObj_GetImageDataRaw(obj, buf.data(), len)); + EXPECT_EQ("6aae1f3710335023a9e12191be66b64b", + GenerateMD5Base16(reinterpret_cast(buf.data()), len)); + + // Check that the decoded image data has the correct length and hash value, + // which should be the same as those of the raw data, since this image is + // encoded by a single DCTDecode filter and decoding is a noop. + len = FPDFImageObj_GetImageDataDecoded(obj, nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(4370u, FPDFImageObj_GetImageDataDecoded(obj, buf.data(), len)); + EXPECT_EQ("6aae1f3710335023a9e12191be66b64b", + GenerateMD5Base16(reinterpret_cast(buf.data()), len)); + + UnloadPage(page); +} diff --git a/fpdfsdk/fpdfeditimg.cpp b/fpdfsdk/fpdfeditimg.cpp index bfd12b2441..0d0c54604b 100644 --- a/fpdfsdk/fpdfeditimg.cpp +++ b/fpdfsdk/fpdfeditimg.cpp @@ -137,3 +137,45 @@ FPDFImageObj_GetBitmap(FPDF_PAGEOBJECT image_object) { return pBitmap.Leak(); } + +DLLEXPORT unsigned long STDCALL +FPDFImageObj_GetImageDataDecoded(FPDF_PAGEOBJECT image_object, + void* buffer, + unsigned long buflen) { + CPDF_PageObject* pObj = CPDFPageObjectFromFPDFPageObject(image_object); + if (!pObj || !pObj->IsImage()) + return 0; + + CFX_RetainPtr pImg = pObj->AsImage()->GetImage(); + if (!pImg) + return 0; + + CPDF_Stream* pImgStream = pImg->GetStream(); + if (!pImgStream) + return 0; + + return DecodeStreamMaybeCopyAndReturnLength(pImgStream, buffer, buflen); +} + +DLLEXPORT unsigned long STDCALL +FPDFImageObj_GetImageDataRaw(FPDF_PAGEOBJECT image_object, + void* buffer, + unsigned long buflen) { + CPDF_PageObject* pObj = CPDFPageObjectFromFPDFPageObject(image_object); + if (!pObj || !pObj->IsImage()) + return 0; + + CFX_RetainPtr pImg = pObj->AsImage()->GetImage(); + if (!pImg) + return 0; + + CPDF_Stream* pImgStream = pImg->GetStream(); + if (!pImgStream) + return 0; + + uint32_t len = pImgStream->GetRawSize(); + if (buffer && buflen >= len) + memcpy(buffer, pImgStream->GetRawData(), len); + + return len; +} diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp index 5aa80139ae..57e4806d39 100644 --- a/fpdfsdk/fpdfview.cpp +++ b/fpdfsdk/fpdfview.cpp @@ -357,10 +357,48 @@ CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap) { unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text, void* buffer, unsigned long buflen) { - CFX_ByteString encodedText = text.UTF16LE_Encode(); - unsigned long len = encodedText.GetLength(); + CFX_ByteString encoded_text = text.UTF16LE_Encode(); + unsigned long len = encoded_text.GetLength(); if (buffer && len <= buflen) - memcpy(buffer, encodedText.c_str(), len); + memcpy(buffer, encoded_text.c_str(), len); + return len; +} + +unsigned long DecodeStreamMaybeCopyAndReturnLength(const CPDF_Stream* stream, + void* buffer, + unsigned long buflen) { + ASSERT(stream); + uint8_t* data = stream->GetRawData(); + uint32_t len = stream->GetRawSize(); + CPDF_Dictionary* dict = stream->GetDict(); + CPDF_Object* decoder = dict ? dict->GetDirectObjectFor("Filter") : nullptr; + if (decoder && (decoder->IsArray() || decoder->IsName())) { + // Decode the stream if one or more stream filters are specified. + uint8_t* decoded_data = nullptr; + uint32_t decoded_len = 0; + CFX_ByteString dummy_last_decoder; + CPDF_Dictionary* dummy_last_param; + if (PDF_DataDecode(data, len, dict, dict->GetIntegerFor("DL"), false, + &decoded_data, &decoded_len, &dummy_last_decoder, + &dummy_last_param)) { + if (buffer && buflen >= decoded_len) + memcpy(buffer, decoded_data, decoded_len); + + // Free the buffer for the decoded data if it was allocated by + // PDF_DataDecode(). Note that for images with a single image-specific + // filter, |decoded_data| is directly assigned to be |data|, so + // |decoded_data| does not need to be freed. + if (decoded_data != data) + FX_Free(decoded_data); + + return decoded_len; + } + } + // Copy the raw data and return its length if there is no valid filter + // specified or if decoding failed. + if (buffer && buflen >= len) + memcpy(buffer, data, len); + return len; } diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c index e47f4d172c..d40437c278 100644 --- a/fpdfsdk/fpdfview_c_api_test.c +++ b/fpdfsdk/fpdfview_c_api_test.c @@ -133,6 +133,8 @@ int CheckPDFiumCApi() { CHK(FPDFImageObj_SetMatrix); CHK(FPDFImageObj_SetBitmap); CHK(FPDFImageObj_GetBitmap); + CHK(FPDFImageObj_GetImageDataDecoded); + CHK(FPDFImageObj_GetImageDataRaw); CHK(FPDFPageObj_CreateNewPath); CHK(FPDFPageObj_CreateNewRect); CHK(FPDFPath_SetStrokeColor); diff --git a/fpdfsdk/fsdk_define.h b/fpdfsdk/fsdk_define.h index 610b854a9d..91efc27f1a 100644 --- a/fpdfsdk/fsdk_define.h +++ b/fpdfsdk/fsdk_define.h @@ -26,6 +26,7 @@ class CPDF_Page; class CPDF_PageObject; class CPDF_PageRenderContext; class CPDF_PathObject; +class CPDF_Stream; class IFSDK_PAUSE_Adapter; // Layering prevents fxcrt from knowing about FPDF_FILEACCESS, so this can't @@ -77,6 +78,10 @@ unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text, void* buffer, unsigned long buflen); +unsigned long DecodeStreamMaybeCopyAndReturnLength(const CPDF_Stream* stream, + void* buffer, + unsigned long buflen); + void FSDK_SetSandBoxPolicy(FPDF_DWORD policy, FPDF_BOOL enable); FPDF_BOOL FSDK_IsSandBoxPolicyEnabled(FPDF_DWORD policy); void FPDF_RenderPage_Retail(CPDF_PageRenderContext* pContext, diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h index e5607d1716..dc710b5f52 100644 --- a/public/fpdf_edit.h +++ b/public/fpdf_edit.h @@ -298,6 +298,35 @@ DLLEXPORT FPDF_BOOL STDCALL FPDFImageObj_SetBitmap(FPDF_PAGE* pages, DLLEXPORT FPDF_BITMAP STDCALL FPDFImageObj_GetBitmap(FPDF_PAGEOBJECT image_object); +// Get the decoded image data of |image_object|. The decoded data is the +// uncompressed image data, i.e. the raw image data after having all filters +// applied. |buffer| is only modified if |buflen| is longer than the length of +// the decoded image data. +// +// image_object - handle to an image object. +// buffer - buffer for holding the decoded image data in raw bytes. +// buflen - length of the buffer. +// +// Returns the length of the decoded image data. +DLLEXPORT unsigned long STDCALL +FPDFImageObj_GetImageDataDecoded(FPDF_PAGEOBJECT image_object, + void* buffer, + unsigned long buflen); + +// Get the raw image data of |image_object|. The raw data is the image data as +// stored in the PDF without applying any filters. |buffer| is only modified if +// |buflen| is longer than the length of the raw image data. +// +// image_object - handle to an image object. +// buffer - buffer for holding the raw image data in raw bytes. +// buflen - length of the buffer. +// +// Returns the length of the raw image data. +DLLEXPORT unsigned long STDCALL +FPDFImageObj_GetImageDataRaw(FPDF_PAGEOBJECT image_object, + void* buffer, + unsigned long buflen); + // Create a new path object at an initial position. // // x - initial horizontal position. -- cgit v1.2.3