summaryrefslogtreecommitdiff
path: root/core/src/fpdfapi/fpdf_parser
diff options
context:
space:
mode:
authorWei Li <weili@chromium.org>2016-02-09 11:38:47 -0800
committerWei Li <weili@chromium.org>2016-02-09 11:38:47 -0800
commit0db900952c2aa76db801c9198923ce1b3d7c017d (patch)
tree4ec9636e7785c5b0c5ac67e7f71df90dae1516b8 /core/src/fpdfapi/fpdf_parser
parentc74acf4552944c5485b1175c008708d19b57d322 (diff)
downloadpdfium-0db900952c2aa76db801c9198923ce1b3d7c017d.tar.xz
R=thestig@chromium.org Review URL: https://codereview.chromium.org/1666663004 .
Diffstat (limited to 'core/src/fpdfapi/fpdf_parser')
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp78
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp64
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp110
3 files changed, 139 insertions, 113 deletions
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp
index 5fbcf63b57..d3ef4d738a 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp
@@ -50,34 +50,34 @@ FX_DWORD A85Decode(const uint8_t* src_buf,
FX_DWORD& dest_size) {
dest_size = 0;
dest_buf = nullptr;
- if (src_size == 0) {
+ if (src_size == 0)
return 0;
- }
+
+ // Count legal characters and zeros.
FX_DWORD zcount = 0;
FX_DWORD pos = 0;
while (pos < src_size) {
uint8_t ch = src_buf[pos];
- if (ch < '!' && ch != '\n' && ch != '\r' && ch != ' ' && ch != '\t') {
- break;
- }
if (ch == 'z') {
zcount++;
- } else if (ch > 'u') {
+ } else if ((ch < '!' || ch > 'u') && !PDFCharIsLineEnding(ch) &&
+ ch != ' ' && ch != '\t') {
break;
}
pos++;
}
- if (pos == 0) {
+ // No content to decode.
+ if (pos == 0)
return 0;
- }
- if (zcount > UINT_MAX / 4) {
- return (FX_DWORD)-1;
- }
- if (zcount * 4 > UINT_MAX - (pos - zcount)) {
+
+ // Count the space needed to contain non-zero characters. The encoding ratio
+ // of Ascii85 is 4:5.
+ FX_DWORD space_for_non_zeroes = (pos - zcount) / 5 * 4 + 4;
+ if (zcount > (UINT_MAX - space_for_non_zeroes) / 4) {
return (FX_DWORD)-1;
}
- dest_buf = FX_Alloc(uint8_t, zcount * 4 + (pos - zcount));
- int state = 0;
+ dest_buf = FX_Alloc(uint8_t, zcount * 4 + space_for_non_zeroes);
+ size_t state = 0;
uint32_t res = 0;
pos = dest_size = 0;
while (pos < src_size) {
@@ -90,46 +90,49 @@ FX_DWORD A85Decode(const uint8_t* src_buf,
state = 0;
res = 0;
dest_size += 4;
- } else {
- if (ch < '!' || ch > 'u') {
- break;
- }
+ } else if (ch >= '!' && ch <= 'u') {
res = res * 85 + ch - 33;
state++;
if (state == 5) {
- for (int i = 0; i < 4; i++) {
+ for (size_t i = 0; i < 4; i++) {
dest_buf[dest_size++] = (uint8_t)(res >> (3 - i) * 8);
}
state = 0;
res = 0;
}
+ } else {
+ // The end or illegal character.
+ break;
}
}
+ // Handle partial group.
if (state) {
- int i;
- for (i = state; i < 5; i++) {
+ for (size_t i = state; i < 5; i++)
res = res * 85 + 84;
- }
- for (i = 0; i < state - 1; i++) {
+ for (size_t i = 0; i < state - 1; i++)
dest_buf[dest_size++] = (uint8_t)(res >> (3 - i) * 8);
- }
}
- if (pos < src_size && src_buf[pos] == '>') {
+ if (pos < src_size && src_buf[pos] == '>')
pos++;
- }
return pos;
}
+
FX_DWORD HexDecode(const uint8_t* src_buf,
FX_DWORD src_size,
uint8_t*& dest_buf,
FX_DWORD& dest_size) {
- FX_DWORD i;
- for (i = 0; i < src_size; i++)
- if (src_buf[i] == '>') {
- break;
- }
- dest_buf = FX_Alloc(uint8_t, i / 2 + 1);
dest_size = 0;
+ if (src_size == 0) {
+ dest_buf = nullptr;
+ return 0;
+ }
+
+ FX_DWORD i = 0;
+ // Find the end of data.
+ while (i < src_size && src_buf[i] != '>')
+ i++;
+
+ dest_buf = FX_Alloc(uint8_t, i / 2 + 1);
bool bFirst = true;
for (i = 0; i < src_size; i++) {
uint8_t ch = src_buf[i];
@@ -218,6 +221,7 @@ FX_DWORD RunLengthDecode(const uint8_t* src_buf,
}
return ret;
}
+
ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(
const uint8_t* src_buf,
FX_DWORD src_size,
@@ -248,6 +252,7 @@ ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(
src_buf, src_size, width, height, K, EndOfLine, ByteAlign, BlackIs1,
Columns, Rows);
}
+
static FX_BOOL CheckFlateDecodeParams(int Colors,
int BitsPerComponent,
int Columns) {
@@ -269,6 +274,7 @@ static FX_BOOL CheckFlateDecodeParams(int Colors,
}
return TRUE;
}
+
ICodec_ScanlineDecoder* FPDFAPI_CreateFlateDecoder(
const uint8_t* src_buf,
FX_DWORD src_size,
@@ -292,6 +298,7 @@ ICodec_ScanlineDecoder* FPDFAPI_CreateFlateDecoder(
src_buf, src_size, width, height, nComps, bpc, predictor, Colors,
BitsPerComponent, Columns);
}
+
FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW,
const uint8_t* src_buf,
FX_DWORD src_size,
@@ -316,6 +323,7 @@ FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW,
bLZW, src_buf, src_size, bEarlyChange, predictor, Colors,
BitsPerComponent, Columns, estimated_size, dest_buf, dest_size);
}
+
FX_BOOL PDF_DataDecode(const uint8_t* src_buf,
FX_DWORD src_size,
const CPDF_Dictionary* pDict,
@@ -417,6 +425,7 @@ FX_BOOL PDF_DataDecode(const uint8_t* src_buf,
dest_size = last_size;
return TRUE;
}
+
CFX_WideString PDF_DecodeText(const uint8_t* src_data,
FX_DWORD src_len,
CFX_CharMap* pCharMap) {
@@ -464,6 +473,7 @@ CFX_WideString PDF_DecodeText(const uint8_t* src_data,
}
return result;
}
+
CFX_ByteString PDF_EncodeText(const FX_WCHAR* pString,
int len,
CFX_CharMap* pCharMap) {
@@ -509,6 +519,7 @@ CFX_ByteString PDF_EncodeText(const FX_WCHAR* pString,
result.ReleaseBuffer(encLen);
return result;
}
+
CFX_ByteString PDF_EncodeString(const CFX_ByteString& src, FX_BOOL bHex) {
CFX_ByteTextBuf result;
int srclen = src.GetLength();
@@ -538,6 +549,7 @@ CFX_ByteString PDF_EncodeString(const CFX_ByteString& src, FX_BOOL bHex) {
result.AppendChar(')');
return result.GetByteString();
}
+
void FlateEncode(const uint8_t* src_buf,
FX_DWORD src_size,
uint8_t*& dest_buf,
@@ -547,6 +559,7 @@ void FlateEncode(const uint8_t* src_buf,
pEncoders->GetFlateModule()->Encode(src_buf, src_size, dest_buf, dest_size);
}
}
+
void FlateEncode(const uint8_t* src_buf,
FX_DWORD src_size,
int predictor,
@@ -562,6 +575,7 @@ void FlateEncode(const uint8_t* src_buf,
dest_size);
}
}
+
FX_DWORD FlateDecode(const uint8_t* src_buf,
FX_DWORD src_size,
uint8_t*& dest_buf,
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp
index c80770366b..4b2e686015 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp
@@ -10,30 +10,20 @@
#include "testing/embedder_test.h"
#include "testing/fx_string_testhelpers.h"
#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/test_support.h"
class FPDFParserDecodeEmbeddertest : public EmbedderTest {};
// NOTE: python's zlib.compress() and zlib.decompress() may be useful for
// external validation of the FlateEncode/FlateDecode test cases.
-#define TEST_CASE(input_literal, expected_literal) \
- { \
- (const unsigned char*) input_literal, sizeof(input_literal) - 1, \
- (const unsigned char*)expected_literal, sizeof(expected_literal) - 1 \
- }
-
TEST_F(FPDFParserDecodeEmbeddertest, FlateEncode) {
- struct FlateEncodeCase {
- const unsigned char* input;
- unsigned int input_size;
- const unsigned char* expected;
- unsigned int expected_size;
- } flate_encode_cases[] = {
- TEST_CASE("", "\x78\x9c\x03\x00\x00\x00\x00\x01"),
- TEST_CASE(" ", "\x78\x9c\x53\x00\x00\x00\x21\x00\x21"),
- TEST_CASE("123", "\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97"),
- TEST_CASE("\x00\xff", "\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00"),
- TEST_CASE(
+ pdfium::StrFuncTestData flate_encode_cases[] = {
+ STR_TEST_CASE("", "\x78\x9c\x03\x00\x00\x00\x00\x01"),
+ STR_TEST_CASE(" ", "\x78\x9c\x53\x00\x00\x00\x21\x00\x21"),
+ STR_TEST_CASE("123", "\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97"),
+ STR_TEST_CASE("\x00\xff", "\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00"),
+ STR_TEST_CASE(
"1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n"
"W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n"
"0 0 693 917 re\nf\nQ\nQ\n",
@@ -46,12 +36,12 @@ TEST_F(FPDFParserDecodeEmbeddertest, FlateEncode) {
};
for (size_t i = 0; i < FX_ArraySize(flate_encode_cases); ++i) {
- FlateEncodeCase* ptr = &flate_encode_cases[i];
+ const pdfium::StrFuncTestData& data = flate_encode_cases[i];
unsigned char* result;
unsigned int result_size;
- FlateEncode(ptr->input, ptr->input_size, result, result_size);
+ FlateEncode(data.input, data.input_size, result, result_size);
ASSERT_TRUE(result);
- EXPECT_EQ(std::string((const char*)ptr->expected, ptr->expected_size),
+ EXPECT_EQ(std::string((const char*)data.expected, data.expected_size),
std::string((const char*)result, result_size))
<< " for case " << i;
FX_Free(result);
@@ -59,18 +49,16 @@ TEST_F(FPDFParserDecodeEmbeddertest, FlateEncode) {
}
TEST_F(FPDFParserDecodeEmbeddertest, FlateDecode) {
- struct FlateDecodeCase {
- const unsigned char* input;
- unsigned int input_size;
- const unsigned char* expected;
- unsigned int expected_size;
- } flate_decode_cases[] = {
- TEST_CASE("", ""), TEST_CASE("preposterous nonsense", ""),
- TEST_CASE("\x78\x9c\x03\x00\x00\x00\x00\x01", ""),
- TEST_CASE("\x78\x9c\x53\x00\x00\x00\x21\x00\x21", " "),
- TEST_CASE("\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97", "123"),
- TEST_CASE("\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00", "\x00\xff"),
- TEST_CASE(
+ pdfium::DecodeTestData flate_decode_cases[] = {
+ DECODE_TEST_CASE("", "", 0),
+ DECODE_TEST_CASE("preposterous nonsense", "", 2),
+ DECODE_TEST_CASE("\x78\x9c\x03\x00\x00\x00\x00\x01", "", 8),
+ DECODE_TEST_CASE("\x78\x9c\x53\x00\x00\x00\x21\x00\x21", " ", 9),
+ DECODE_TEST_CASE("\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97", "123",
+ 11),
+ DECODE_TEST_CASE("\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00", "\x00\xff",
+ 10),
+ DECODE_TEST_CASE(
"\x78\x9c\x33\x54\x30\x00\x42\x5d\x43\x05\x23\x4b\x05\x73\x33\x63"
"\x85\xe4\x5c\x2e\x90\x80\xa9\xa9\xa9\x82\xb9\xb1\xa9\x42\x51\x2a"
"\x57\xb8\x42\x1e\x57\x21\x92\xa0\x89\x9e\xb1\xa5\x09\x92\x84\x9e"
@@ -79,16 +67,19 @@ TEST_F(FPDFParserDecodeEmbeddertest, FlateDecode) {
"\x2b\x58\x1a\x9a\x83\x8c\x49\xe3\x0a\x04\x42\x00\x37\x4c\x1b\x42",
"1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n"
"W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n"
- "0 0 693 917 re\nf\nQ\nQ\n"),
+ "0 0 693 917 re\nf\nQ\nQ\n",
+ 96),
};
for (size_t i = 0; i < FX_ArraySize(flate_decode_cases); ++i) {
- FlateDecodeCase* ptr = &flate_decode_cases[i];
+ const pdfium::DecodeTestData& data = flate_decode_cases[i];
unsigned char* result;
unsigned int result_size;
- FlateDecode(ptr->input, ptr->input_size, result, result_size);
+ EXPECT_EQ(data.processed_size,
+ FlateDecode(data.input, data.input_size, result, result_size))
+ << " for case " << i;
ASSERT_TRUE(result);
- EXPECT_EQ(std::string((const char*)ptr->expected, ptr->expected_size),
+ EXPECT_EQ(std::string((const char*)data.expected, data.expected_size),
std::string((const char*)result, result_size))
<< " for case " << i;
FX_Free(result);
@@ -115,4 +106,3 @@ TEST_F(FPDFParserDecodeEmbeddertest, Bug_555784) {
UnloadPage(page);
}
-#undef TEST_CASE
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp
index 3318bcdfe7..3064a24b7a 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp
@@ -4,52 +4,74 @@
#include "core/include/fpdfapi/fpdf_parser.h"
#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/test_support.h"
-TEST(fpdf_parser_decode, HexDecode) {
- {
- // Empty src string.
- uint8_t* dest = nullptr;
- FX_DWORD dest_size;
- uint8_t src[] = "";
- EXPECT_EQ(0, HexDecode(src, 0, dest, dest_size));
- EXPECT_EQ(0, dest_size);
- EXPECT_EQ('\0', dest[0]);
- FX_Free(dest);
- }
-
- {
- // Regular conversion.
- uint8_t* dest = nullptr;
- FX_DWORD dest_size;
- uint8_t src[] = "12Ac>zzz";
- EXPECT_EQ(5, HexDecode(src, 8, dest, dest_size));
- EXPECT_EQ(2, dest_size);
- EXPECT_EQ(18, dest[0]);
- EXPECT_EQ(172, dest[1]);
- FX_Free(dest);
- }
-
- {
- // Non-multiple length.
- uint8_t* dest = nullptr;
- FX_DWORD dest_size;
- uint8_t src[] = "12A>zzz";
- EXPECT_EQ(4, HexDecode(src, 8, dest, dest_size));
- EXPECT_EQ(2, dest_size);
- EXPECT_EQ(18, dest[0]);
- EXPECT_EQ(160, dest[1]);
- FX_Free(dest);
+TEST(fpdf_parser_decode, A85Decode) {
+ pdfium::DecodeTestData test_data[] = {
+ // Empty src string.
+ DECODE_TEST_CASE("", "", 0),
+ // Empty content in src string.
+ DECODE_TEST_CASE("~>", "", 0),
+ // Regular conversion.
+ DECODE_TEST_CASE("FCfN8~>", "test", 7),
+ // End at the ending mark.
+ DECODE_TEST_CASE("FCfN8~>FCfN8", "test", 7),
+ // Skip whitespaces.
+ DECODE_TEST_CASE("\t F C\r\n \tf N 8 ~>", "test", 17),
+ // No ending mark.
+ DECODE_TEST_CASE("@3B0)DJj_BF*)>@Gp#-s", "a funny story :)", 20),
+ // Non-multiple length.
+ DECODE_TEST_CASE("12A", "2k", 3),
+ // Stop at unknown characters.
+ DECODE_TEST_CASE("FCfN8FCfN8vw", "testtest", 11),
+ };
+ for (size_t i = 0; i < FX_ArraySize(test_data); ++i) {
+ pdfium::DecodeTestData* ptr = &test_data[i];
+ uint8_t* result = nullptr;
+ FX_DWORD result_size;
+ EXPECT_EQ(ptr->processed_size,
+ A85Decode(ptr->input, ptr->input_size, result, result_size))
+ << "for case " << i;
+ ASSERT_EQ(ptr->expected_size, result_size);
+ for (size_t j = 0; j < result_size; ++j) {
+ EXPECT_EQ(ptr->expected[j], result[j]) << "for case " << i << " char "
+ << j;
+ }
+ FX_Free(result);
}
+}
- {
- // Skips unknown characters.
- uint8_t* dest = nullptr;
- FX_DWORD dest_size;
- uint8_t src[] = "12tk \tAc>zzz";
- EXPECT_EQ(10, HexDecode(src, 13, dest, dest_size));
- EXPECT_EQ(2, dest_size);
- EXPECT_EQ(18, dest[0]);
- EXPECT_EQ(172, dest[1]);
- FX_Free(dest);
+TEST(fpdf_parser_decode, HexDecode) {
+ pdfium::DecodeTestData test_data[] = {
+ // Empty src string.
+ DECODE_TEST_CASE("", "", 0),
+ // Empty content in src string.
+ DECODE_TEST_CASE(">", "", 1),
+ // Only whitespaces in src string.
+ DECODE_TEST_CASE("\t \r\n>", "", 7),
+ // Regular conversion.
+ DECODE_TEST_CASE("12Ac>zzz", "\x12\xac", 5),
+ // Skip whitespaces.
+ DECODE_TEST_CASE("12 Ac\t02\r\nBF>zzz>", "\x12\xac\x02\xbf", 13),
+ // Non-multiple length.
+ DECODE_TEST_CASE("12A>zzz", "\x12\xa0", 4),
+ // Skips unknown characters.
+ DECODE_TEST_CASE("12tk \tAc>zzz", "\x12\xac", 10),
+ // No ending mark.
+ DECODE_TEST_CASE("12AcED3c3456", "\x12\xac\xed\x3c\x34\x56", 12),
+ };
+ for (size_t i = 0; i < FX_ArraySize(test_data); ++i) {
+ pdfium::DecodeTestData* ptr = &test_data[i];
+ uint8_t* result = nullptr;
+ FX_DWORD result_size;
+ EXPECT_EQ(ptr->processed_size,
+ HexDecode(ptr->input, ptr->input_size, result, result_size))
+ << "for case " << i;
+ ASSERT_EQ(ptr->expected_size, result_size);
+ for (size_t j = 0; j < result_size; ++j) {
+ EXPECT_EQ(ptr->expected[j], result[j]) << "for case " << i << " char "
+ << j;
+ }
+ FX_Free(result);
}
}