summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJane Liu <janeliulwq@google.com>2017-08-03 16:33:40 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-08-03 21:20:23 +0000
commit548334e57cae1039824d3db97bab5348fbe674e2 (patch)
tree5c547cc35c48fe5703fde77afd208f1bd1d01029
parent6a5b7872c838ba9e24ea6e1f9a306bb95a80ae6c (diff)
downloadpdfium-548334e57cae1039824d3db97bab5348fbe674e2.tar.xz
APIs and tests for retrieving raw/decoded data from image objects
Added FPDFImageObj_GetImageDataDecoded() for retrieving the uncompressed data of an image, and FPDFImageObj_GetImageDataRaw() for retrieving the raw data of an image. * Refactored out DecodeStreamMaybeCopyAndReturnLength(), which is used to decode both attachment data and image data. * Within DecodeStreamMaybeCopyAndReturnLength(), used a different decoder function which takes care of multiple filters if exist. As a result, CPDF_StreamParser::DecodeInlineStream() which was made static previously is now moved back into namespace. Bug=pdfium:677 Change-Id: I22a22c99acaca98ef8c15f88911f2646a2c854d5 Reviewed-on: https://pdfium-review.googlesource.com/9811 Commit-Queue: Jane Liu <janeliulwq@google.com> Reviewed-by: Lei Zhang <thestig@chromium.org>
-rw-r--r--core/fpdfapi/page/cpdf_streamparser.cpp21
-rw-r--r--core/fpdfapi/page/cpdf_streamparser.h9
-rw-r--r--fpdfsdk/fpdfattachment.cpp24
-rw-r--r--fpdfsdk/fpdfedit_embeddertest.cpp51
-rw-r--r--fpdfsdk/fpdfeditimg.cpp42
-rw-r--r--fpdfsdk/fpdfview.cpp44
-rw-r--r--fpdfsdk/fpdfview_c_api_test.c2
-rw-r--r--fpdfsdk/fsdk_define.h5
-rw-r--r--public/fpdf_edit.h29
9 files changed, 181 insertions, 46 deletions
diff --git a/core/fpdfapi/page/cpdf_streamparser.cpp b/core/fpdfapi/page/cpdf_streamparser.cpp
index 964d600b3d..9d7dd1ed55 100644
--- a/core/fpdfapi/page/cpdf_streamparser.cpp
+++ b/core/fpdfapi/page/cpdf_streamparser.cpp
@@ -60,17 +60,14 @@ uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,
return pDecoder->GetSrcOffset();
}
-} // namespace
-
-// Static
-uint32_t CPDF_StreamParser::DecodeInlineStream(const uint8_t* src_buf,
- uint32_t limit,
- int width,
- int height,
- const CFX_ByteString& decoder,
- CPDF_Dictionary* pParam,
- uint8_t** dest_buf,
- uint32_t* dest_size) {
+uint32_t DecodeInlineStream(const uint8_t* src_buf,
+ uint32_t limit,
+ int width,
+ int height,
+ const CFX_ByteString& decoder,
+ CPDF_Dictionary* pParam,
+ uint8_t** dest_buf,
+ uint32_t* dest_size) {
if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
@@ -102,6 +99,8 @@ uint32_t CPDF_StreamParser::DecodeInlineStream(const uint8_t* src_buf,
return 0xFFFFFFFF;
}
+} // namespace
+
CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize)
: m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(nullptr) {}
diff --git a/core/fpdfapi/page/cpdf_streamparser.h b/core/fpdfapi/page/cpdf_streamparser.h
index 9f9a8eaf12..fdc418c634 100644
--- a/core/fpdfapi/page/cpdf_streamparser.h
+++ b/core/fpdfapi/page/cpdf_streamparser.h
@@ -21,15 +21,6 @@ class CPDF_StreamParser {
public:
enum SyntaxType { EndOfData, Number, Keyword, Name, Others };
- static uint32_t DecodeInlineStream(const uint8_t* src_buf,
- uint32_t limit,
- int width,
- int height,
- const CFX_ByteString& decoder,
- CPDF_Dictionary* pParam,
- uint8_t** dest_buf,
- uint32_t* dest_size);
-
CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize);
CPDF_StreamParser(const uint8_t* pData,
uint32_t dwSize,
diff --git a/fpdfsdk/fpdfattachment.cpp b/fpdfsdk/fpdfattachment.cpp
index 0cb623f81c..5bdb3bd4a2 100644
--- a/fpdfsdk/fpdfattachment.cpp
+++ b/fpdfsdk/fpdfattachment.cpp
@@ -8,7 +8,6 @@
#include <utility>
#include "core/fdrm/crypto/fx_crypt.h"
-#include "core/fpdfapi/page/cpdf_streamparser.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_name.h"
@@ -273,26 +272,5 @@ FPDFAttachment_GetFile(FPDF_ATTACHMENT attachment,
if (!pFileStream)
return 0;
- uint8_t* data = pFileStream->GetRawData();
- uint32_t len = pFileStream->GetRawSize();
- CPDF_Dictionary* pFileDict = pFileStream->GetDict();
- if (!pFileDict || pFileDict->GetStringFor("Filter").IsEmpty()) {
- if (buffer && buflen >= len)
- memcpy(buffer, data, len);
-
- return len;
- }
-
- // Decode the stream if a stream filter is specified.
- uint8_t* decodedData = nullptr;
- uint32_t decodedLen = 0;
- CPDF_StreamParser::DecodeInlineStream(
- data, len, pFileDict->GetIntegerFor("Width"),
- pFileDict->GetIntegerFor("Height"), pFileDict->GetStringFor("Filter"),
- pFileDict->GetDictFor("DecodeParms"), &decodedData, &decodedLen);
- if (buffer && buflen >= decodedLen)
- memcpy(buffer, decodedData, decodedLen);
-
- FX_Free(decodedData);
- return decodedLen;
+ return DecodeStreamMaybeCopyAndReturnLength(pFileStream, buffer, buflen);
}
diff --git a/fpdfsdk/fpdfedit_embeddertest.cpp b/fpdfsdk/fpdfedit_embeddertest.cpp
index dcaeb945d7..f1bbb87422 100644
--- a/fpdfsdk/fpdfedit_embeddertest.cpp
+++ b/fpdfsdk/fpdfedit_embeddertest.cpp
@@ -5,6 +5,7 @@
#include <memory>
#include <string>
#include <utility>
+#include <vector>
#include "core/fpdfapi/font/cpdf_font.h"
#include "core/fpdfapi/page/cpdf_page.h"
@@ -979,3 +980,53 @@ TEST_F(FPDFEditEmbeddertest, ExtractImageBitmap) {
FPDFBitmap_Destroy(bitmap);
UnloadPage(page);
}
+
+TEST_F(FPDFEditEmbeddertest, GetImageData) {
+ EXPECT_TRUE(OpenDocument("embedded_images.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+ ASSERT_EQ(39, FPDFPage_CountObject(page));
+
+ // Retrieve an image object with flate-encoded data stream.
+ FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, 33);
+ ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj));
+
+ // Check that the raw image data has the correct length and hash value.
+ unsigned long len = FPDFImageObj_GetImageDataRaw(obj, nullptr, 0);
+ std::vector<char> buf(len);
+ EXPECT_EQ(4091u, FPDFImageObj_GetImageDataRaw(obj, buf.data(), len));
+ EXPECT_EQ("f73802327d2e88e890f653961bcda81a",
+ GenerateMD5Base16(reinterpret_cast<uint8_t*>(buf.data()), len));
+
+ // Check that the decoded image data has the correct length and hash value.
+ len = FPDFImageObj_GetImageDataDecoded(obj, nullptr, 0);
+ buf.clear();
+ buf.resize(len);
+ EXPECT_EQ(28776u, FPDFImageObj_GetImageDataDecoded(obj, buf.data(), len));
+ EXPECT_EQ("cb3637934bb3b95a6e4ae1ea9eb9e56e",
+ GenerateMD5Base16(reinterpret_cast<uint8_t*>(buf.data()), len));
+
+ // Retrieve an image obejct with DCTDecode-encoded data stream.
+ obj = FPDFPage_GetObject(page, 37);
+ ASSERT_EQ(FPDF_PAGEOBJ_IMAGE, FPDFPageObj_GetType(obj));
+
+ // Check that the raw image data has the correct length and hash value.
+ len = FPDFImageObj_GetImageDataRaw(obj, nullptr, 0);
+ buf.clear();
+ buf.resize(len);
+ EXPECT_EQ(4370u, FPDFImageObj_GetImageDataRaw(obj, buf.data(), len));
+ EXPECT_EQ("6aae1f3710335023a9e12191be66b64b",
+ GenerateMD5Base16(reinterpret_cast<uint8_t*>(buf.data()), len));
+
+ // Check that the decoded image data has the correct length and hash value,
+ // which should be the same as those of the raw data, since this image is
+ // encoded by a single DCTDecode filter and decoding is a noop.
+ len = FPDFImageObj_GetImageDataDecoded(obj, nullptr, 0);
+ buf.clear();
+ buf.resize(len);
+ EXPECT_EQ(4370u, FPDFImageObj_GetImageDataDecoded(obj, buf.data(), len));
+ EXPECT_EQ("6aae1f3710335023a9e12191be66b64b",
+ GenerateMD5Base16(reinterpret_cast<uint8_t*>(buf.data()), len));
+
+ UnloadPage(page);
+}
diff --git a/fpdfsdk/fpdfeditimg.cpp b/fpdfsdk/fpdfeditimg.cpp
index bfd12b2441..0d0c54604b 100644
--- a/fpdfsdk/fpdfeditimg.cpp
+++ b/fpdfsdk/fpdfeditimg.cpp
@@ -137,3 +137,45 @@ FPDFImageObj_GetBitmap(FPDF_PAGEOBJECT image_object) {
return pBitmap.Leak();
}
+
+DLLEXPORT unsigned long STDCALL
+FPDFImageObj_GetImageDataDecoded(FPDF_PAGEOBJECT image_object,
+ void* buffer,
+ unsigned long buflen) {
+ CPDF_PageObject* pObj = CPDFPageObjectFromFPDFPageObject(image_object);
+ if (!pObj || !pObj->IsImage())
+ return 0;
+
+ CFX_RetainPtr<CPDF_Image> pImg = pObj->AsImage()->GetImage();
+ if (!pImg)
+ return 0;
+
+ CPDF_Stream* pImgStream = pImg->GetStream();
+ if (!pImgStream)
+ return 0;
+
+ return DecodeStreamMaybeCopyAndReturnLength(pImgStream, buffer, buflen);
+}
+
+DLLEXPORT unsigned long STDCALL
+FPDFImageObj_GetImageDataRaw(FPDF_PAGEOBJECT image_object,
+ void* buffer,
+ unsigned long buflen) {
+ CPDF_PageObject* pObj = CPDFPageObjectFromFPDFPageObject(image_object);
+ if (!pObj || !pObj->IsImage())
+ return 0;
+
+ CFX_RetainPtr<CPDF_Image> pImg = pObj->AsImage()->GetImage();
+ if (!pImg)
+ return 0;
+
+ CPDF_Stream* pImgStream = pImg->GetStream();
+ if (!pImgStream)
+ return 0;
+
+ uint32_t len = pImgStream->GetRawSize();
+ if (buffer && buflen >= len)
+ memcpy(buffer, pImgStream->GetRawData(), len);
+
+ return len;
+}
diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp
index 5aa80139ae..57e4806d39 100644
--- a/fpdfsdk/fpdfview.cpp
+++ b/fpdfsdk/fpdfview.cpp
@@ -357,10 +357,48 @@ CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap) {
unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text,
void* buffer,
unsigned long buflen) {
- CFX_ByteString encodedText = text.UTF16LE_Encode();
- unsigned long len = encodedText.GetLength();
+ CFX_ByteString encoded_text = text.UTF16LE_Encode();
+ unsigned long len = encoded_text.GetLength();
if (buffer && len <= buflen)
- memcpy(buffer, encodedText.c_str(), len);
+ memcpy(buffer, encoded_text.c_str(), len);
+ return len;
+}
+
+unsigned long DecodeStreamMaybeCopyAndReturnLength(const CPDF_Stream* stream,
+ void* buffer,
+ unsigned long buflen) {
+ ASSERT(stream);
+ uint8_t* data = stream->GetRawData();
+ uint32_t len = stream->GetRawSize();
+ CPDF_Dictionary* dict = stream->GetDict();
+ CPDF_Object* decoder = dict ? dict->GetDirectObjectFor("Filter") : nullptr;
+ if (decoder && (decoder->IsArray() || decoder->IsName())) {
+ // Decode the stream if one or more stream filters are specified.
+ uint8_t* decoded_data = nullptr;
+ uint32_t decoded_len = 0;
+ CFX_ByteString dummy_last_decoder;
+ CPDF_Dictionary* dummy_last_param;
+ if (PDF_DataDecode(data, len, dict, dict->GetIntegerFor("DL"), false,
+ &decoded_data, &decoded_len, &dummy_last_decoder,
+ &dummy_last_param)) {
+ if (buffer && buflen >= decoded_len)
+ memcpy(buffer, decoded_data, decoded_len);
+
+ // Free the buffer for the decoded data if it was allocated by
+ // PDF_DataDecode(). Note that for images with a single image-specific
+ // filter, |decoded_data| is directly assigned to be |data|, so
+ // |decoded_data| does not need to be freed.
+ if (decoded_data != data)
+ FX_Free(decoded_data);
+
+ return decoded_len;
+ }
+ }
+ // Copy the raw data and return its length if there is no valid filter
+ // specified or if decoding failed.
+ if (buffer && buflen >= len)
+ memcpy(buffer, data, len);
+
return len;
}
diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c
index e47f4d172c..d40437c278 100644
--- a/fpdfsdk/fpdfview_c_api_test.c
+++ b/fpdfsdk/fpdfview_c_api_test.c
@@ -133,6 +133,8 @@ int CheckPDFiumCApi() {
CHK(FPDFImageObj_SetMatrix);
CHK(FPDFImageObj_SetBitmap);
CHK(FPDFImageObj_GetBitmap);
+ CHK(FPDFImageObj_GetImageDataDecoded);
+ CHK(FPDFImageObj_GetImageDataRaw);
CHK(FPDFPageObj_CreateNewPath);
CHK(FPDFPageObj_CreateNewRect);
CHK(FPDFPath_SetStrokeColor);
diff --git a/fpdfsdk/fsdk_define.h b/fpdfsdk/fsdk_define.h
index 610b854a9d..91efc27f1a 100644
--- a/fpdfsdk/fsdk_define.h
+++ b/fpdfsdk/fsdk_define.h
@@ -26,6 +26,7 @@ class CPDF_Page;
class CPDF_PageObject;
class CPDF_PageRenderContext;
class CPDF_PathObject;
+class CPDF_Stream;
class IFSDK_PAUSE_Adapter;
// Layering prevents fxcrt from knowing about FPDF_FILEACCESS, so this can't
@@ -77,6 +78,10 @@ unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text,
void* buffer,
unsigned long buflen);
+unsigned long DecodeStreamMaybeCopyAndReturnLength(const CPDF_Stream* stream,
+ void* buffer,
+ unsigned long buflen);
+
void FSDK_SetSandBoxPolicy(FPDF_DWORD policy, FPDF_BOOL enable);
FPDF_BOOL FSDK_IsSandBoxPolicyEnabled(FPDF_DWORD policy);
void FPDF_RenderPage_Retail(CPDF_PageRenderContext* pContext,
diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h
index e5607d1716..dc710b5f52 100644
--- a/public/fpdf_edit.h
+++ b/public/fpdf_edit.h
@@ -298,6 +298,35 @@ DLLEXPORT FPDF_BOOL STDCALL FPDFImageObj_SetBitmap(FPDF_PAGE* pages,
DLLEXPORT FPDF_BITMAP STDCALL
FPDFImageObj_GetBitmap(FPDF_PAGEOBJECT image_object);
+// Get the decoded image data of |image_object|. The decoded data is the
+// uncompressed image data, i.e. the raw image data after having all filters
+// applied. |buffer| is only modified if |buflen| is longer than the length of
+// the decoded image data.
+//
+// image_object - handle to an image object.
+// buffer - buffer for holding the decoded image data in raw bytes.
+// buflen - length of the buffer.
+//
+// Returns the length of the decoded image data.
+DLLEXPORT unsigned long STDCALL
+FPDFImageObj_GetImageDataDecoded(FPDF_PAGEOBJECT image_object,
+ void* buffer,
+ unsigned long buflen);
+
+// Get the raw image data of |image_object|. The raw data is the image data as
+// stored in the PDF without applying any filters. |buffer| is only modified if
+// |buflen| is longer than the length of the raw image data.
+//
+// image_object - handle to an image object.
+// buffer - buffer for holding the raw image data in raw bytes.
+// buflen - length of the buffer.
+//
+// Returns the length of the raw image data.
+DLLEXPORT unsigned long STDCALL
+FPDFImageObj_GetImageDataRaw(FPDF_PAGEOBJECT image_object,
+ void* buffer,
+ unsigned long buflen);
+
// Create a new path object at an initial position.
//
// x - initial horizontal position.