diff options
Diffstat (limited to 'core/src/fpdfapi/fpdf_parser')
3 files changed, 139 insertions, 113 deletions
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp index 5fbcf63b57..d3ef4d738a 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp @@ -50,34 +50,34 @@ FX_DWORD A85Decode(const uint8_t* src_buf, FX_DWORD& dest_size) { dest_size = 0; dest_buf = nullptr; - if (src_size == 0) { + if (src_size == 0) return 0; - } + + // Count legal characters and zeros. FX_DWORD zcount = 0; FX_DWORD pos = 0; while (pos < src_size) { uint8_t ch = src_buf[pos]; - if (ch < '!' && ch != '\n' && ch != '\r' && ch != ' ' && ch != '\t') { - break; - } if (ch == 'z') { zcount++; - } else if (ch > 'u') { + } else if ((ch < '!' || ch > 'u') && !PDFCharIsLineEnding(ch) && + ch != ' ' && ch != '\t') { break; } pos++; } - if (pos == 0) { + // No content to decode. + if (pos == 0) return 0; - } - if (zcount > UINT_MAX / 4) { - return (FX_DWORD)-1; - } - if (zcount * 4 > UINT_MAX - (pos - zcount)) { + + // Count the space needed to contain non-zero characters. The encoding ratio + // of Ascii85 is 4:5. + FX_DWORD space_for_non_zeroes = (pos - zcount) / 5 * 4 + 4; + if (zcount > (UINT_MAX - space_for_non_zeroes) / 4) { return (FX_DWORD)-1; } - dest_buf = FX_Alloc(uint8_t, zcount * 4 + (pos - zcount)); - int state = 0; + dest_buf = FX_Alloc(uint8_t, zcount * 4 + space_for_non_zeroes); + size_t state = 0; uint32_t res = 0; pos = dest_size = 0; while (pos < src_size) { @@ -90,46 +90,49 @@ FX_DWORD A85Decode(const uint8_t* src_buf, state = 0; res = 0; dest_size += 4; - } else { - if (ch < '!' || ch > 'u') { - break; - } + } else if (ch >= '!' && ch <= 'u') { res = res * 85 + ch - 33; state++; if (state == 5) { - for (int i = 0; i < 4; i++) { + for (size_t i = 0; i < 4; i++) { dest_buf[dest_size++] = (uint8_t)(res >> (3 - i) * 8); } state = 0; res = 0; } + } else { + // The end or illegal character. + break; } } + // Handle partial group. if (state) { - int i; - for (i = state; i < 5; i++) { + for (size_t i = state; i < 5; i++) res = res * 85 + 84; - } - for (i = 0; i < state - 1; i++) { + for (size_t i = 0; i < state - 1; i++) dest_buf[dest_size++] = (uint8_t)(res >> (3 - i) * 8); - } } - if (pos < src_size && src_buf[pos] == '>') { + if (pos < src_size && src_buf[pos] == '>') pos++; - } return pos; } + FX_DWORD HexDecode(const uint8_t* src_buf, FX_DWORD src_size, uint8_t*& dest_buf, FX_DWORD& dest_size) { - FX_DWORD i; - for (i = 0; i < src_size; i++) - if (src_buf[i] == '>') { - break; - } - dest_buf = FX_Alloc(uint8_t, i / 2 + 1); dest_size = 0; + if (src_size == 0) { + dest_buf = nullptr; + return 0; + } + + FX_DWORD i = 0; + // Find the end of data. + while (i < src_size && src_buf[i] != '>') + i++; + + dest_buf = FX_Alloc(uint8_t, i / 2 + 1); bool bFirst = true; for (i = 0; i < src_size; i++) { uint8_t ch = src_buf[i]; @@ -218,6 +221,7 @@ FX_DWORD RunLengthDecode(const uint8_t* src_buf, } return ret; } + ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder( const uint8_t* src_buf, FX_DWORD src_size, @@ -248,6 +252,7 @@ ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder( src_buf, src_size, width, height, K, EndOfLine, ByteAlign, BlackIs1, Columns, Rows); } + static FX_BOOL CheckFlateDecodeParams(int Colors, int BitsPerComponent, int Columns) { @@ -269,6 +274,7 @@ static FX_BOOL CheckFlateDecodeParams(int Colors, } return TRUE; } + ICodec_ScanlineDecoder* FPDFAPI_CreateFlateDecoder( const uint8_t* src_buf, FX_DWORD src_size, @@ -292,6 +298,7 @@ ICodec_ScanlineDecoder* FPDFAPI_CreateFlateDecoder( src_buf, src_size, width, height, nComps, bpc, predictor, Colors, BitsPerComponent, Columns); } + FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW, const uint8_t* src_buf, FX_DWORD src_size, @@ -316,6 +323,7 @@ FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW, bLZW, src_buf, src_size, bEarlyChange, predictor, Colors, BitsPerComponent, Columns, estimated_size, dest_buf, dest_size); } + FX_BOOL PDF_DataDecode(const uint8_t* src_buf, FX_DWORD src_size, const CPDF_Dictionary* pDict, @@ -417,6 +425,7 @@ FX_BOOL PDF_DataDecode(const uint8_t* src_buf, dest_size = last_size; return TRUE; } + CFX_WideString PDF_DecodeText(const uint8_t* src_data, FX_DWORD src_len, CFX_CharMap* pCharMap) { @@ -464,6 +473,7 @@ CFX_WideString PDF_DecodeText(const uint8_t* src_data, } return result; } + CFX_ByteString PDF_EncodeText(const FX_WCHAR* pString, int len, CFX_CharMap* pCharMap) { @@ -509,6 +519,7 @@ CFX_ByteString PDF_EncodeText(const FX_WCHAR* pString, result.ReleaseBuffer(encLen); return result; } + CFX_ByteString PDF_EncodeString(const CFX_ByteString& src, FX_BOOL bHex) { CFX_ByteTextBuf result; int srclen = src.GetLength(); @@ -538,6 +549,7 @@ CFX_ByteString PDF_EncodeString(const CFX_ByteString& src, FX_BOOL bHex) { result.AppendChar(')'); return result.GetByteString(); } + void FlateEncode(const uint8_t* src_buf, FX_DWORD src_size, uint8_t*& dest_buf, @@ -547,6 +559,7 @@ void FlateEncode(const uint8_t* src_buf, pEncoders->GetFlateModule()->Encode(src_buf, src_size, dest_buf, dest_size); } } + void FlateEncode(const uint8_t* src_buf, FX_DWORD src_size, int predictor, @@ -562,6 +575,7 @@ void FlateEncode(const uint8_t* src_buf, dest_size); } } + FX_DWORD FlateDecode(const uint8_t* src_buf, FX_DWORD src_size, uint8_t*& dest_buf, diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp index c80770366b..4b2e686015 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp @@ -10,30 +10,20 @@ #include "testing/embedder_test.h" #include "testing/fx_string_testhelpers.h" #include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" class FPDFParserDecodeEmbeddertest : public EmbedderTest {}; // NOTE: python's zlib.compress() and zlib.decompress() may be useful for // external validation of the FlateEncode/FlateDecode test cases. -#define TEST_CASE(input_literal, expected_literal) \ - { \ - (const unsigned char*) input_literal, sizeof(input_literal) - 1, \ - (const unsigned char*)expected_literal, sizeof(expected_literal) - 1 \ - } - TEST_F(FPDFParserDecodeEmbeddertest, FlateEncode) { - struct FlateEncodeCase { - const unsigned char* input; - unsigned int input_size; - const unsigned char* expected; - unsigned int expected_size; - } flate_encode_cases[] = { - TEST_CASE("", "\x78\x9c\x03\x00\x00\x00\x00\x01"), - TEST_CASE(" ", "\x78\x9c\x53\x00\x00\x00\x21\x00\x21"), - TEST_CASE("123", "\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97"), - TEST_CASE("\x00\xff", "\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00"), - TEST_CASE( + pdfium::StrFuncTestData flate_encode_cases[] = { + STR_TEST_CASE("", "\x78\x9c\x03\x00\x00\x00\x00\x01"), + STR_TEST_CASE(" ", "\x78\x9c\x53\x00\x00\x00\x21\x00\x21"), + STR_TEST_CASE("123", "\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97"), + STR_TEST_CASE("\x00\xff", "\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00"), + STR_TEST_CASE( "1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n" "W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n" "0 0 693 917 re\nf\nQ\nQ\n", @@ -46,12 +36,12 @@ TEST_F(FPDFParserDecodeEmbeddertest, FlateEncode) { }; for (size_t i = 0; i < FX_ArraySize(flate_encode_cases); ++i) { - FlateEncodeCase* ptr = &flate_encode_cases[i]; + const pdfium::StrFuncTestData& data = flate_encode_cases[i]; unsigned char* result; unsigned int result_size; - FlateEncode(ptr->input, ptr->input_size, result, result_size); + FlateEncode(data.input, data.input_size, result, result_size); ASSERT_TRUE(result); - EXPECT_EQ(std::string((const char*)ptr->expected, ptr->expected_size), + EXPECT_EQ(std::string((const char*)data.expected, data.expected_size), std::string((const char*)result, result_size)) << " for case " << i; FX_Free(result); @@ -59,18 +49,16 @@ TEST_F(FPDFParserDecodeEmbeddertest, FlateEncode) { } TEST_F(FPDFParserDecodeEmbeddertest, FlateDecode) { - struct FlateDecodeCase { - const unsigned char* input; - unsigned int input_size; - const unsigned char* expected; - unsigned int expected_size; - } flate_decode_cases[] = { - TEST_CASE("", ""), TEST_CASE("preposterous nonsense", ""), - TEST_CASE("\x78\x9c\x03\x00\x00\x00\x00\x01", ""), - TEST_CASE("\x78\x9c\x53\x00\x00\x00\x21\x00\x21", " "), - TEST_CASE("\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97", "123"), - TEST_CASE("\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00", "\x00\xff"), - TEST_CASE( + pdfium::DecodeTestData flate_decode_cases[] = { + DECODE_TEST_CASE("", "", 0), + DECODE_TEST_CASE("preposterous nonsense", "", 2), + DECODE_TEST_CASE("\x78\x9c\x03\x00\x00\x00\x00\x01", "", 8), + DECODE_TEST_CASE("\x78\x9c\x53\x00\x00\x00\x21\x00\x21", " ", 9), + DECODE_TEST_CASE("\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97", "123", + 11), + DECODE_TEST_CASE("\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00", "\x00\xff", + 10), + DECODE_TEST_CASE( "\x78\x9c\x33\x54\x30\x00\x42\x5d\x43\x05\x23\x4b\x05\x73\x33\x63" "\x85\xe4\x5c\x2e\x90\x80\xa9\xa9\xa9\x82\xb9\xb1\xa9\x42\x51\x2a" "\x57\xb8\x42\x1e\x57\x21\x92\xa0\x89\x9e\xb1\xa5\x09\x92\x84\x9e" @@ -79,16 +67,19 @@ TEST_F(FPDFParserDecodeEmbeddertest, FlateDecode) { "\x2b\x58\x1a\x9a\x83\x8c\x49\xe3\x0a\x04\x42\x00\x37\x4c\x1b\x42", "1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n" "W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n" - "0 0 693 917 re\nf\nQ\nQ\n"), + "0 0 693 917 re\nf\nQ\nQ\n", + 96), }; for (size_t i = 0; i < FX_ArraySize(flate_decode_cases); ++i) { - FlateDecodeCase* ptr = &flate_decode_cases[i]; + const pdfium::DecodeTestData& data = flate_decode_cases[i]; unsigned char* result; unsigned int result_size; - FlateDecode(ptr->input, ptr->input_size, result, result_size); + EXPECT_EQ(data.processed_size, + FlateDecode(data.input, data.input_size, result, result_size)) + << " for case " << i; ASSERT_TRUE(result); - EXPECT_EQ(std::string((const char*)ptr->expected, ptr->expected_size), + EXPECT_EQ(std::string((const char*)data.expected, data.expected_size), std::string((const char*)result, result_size)) << " for case " << i; FX_Free(result); @@ -115,4 +106,3 @@ TEST_F(FPDFParserDecodeEmbeddertest, Bug_555784) { UnloadPage(page); } -#undef TEST_CASE diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp index 3318bcdfe7..3064a24b7a 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp @@ -4,52 +4,74 @@ #include "core/include/fpdfapi/fpdf_parser.h" #include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" -TEST(fpdf_parser_decode, HexDecode) { - { - // Empty src string. - uint8_t* dest = nullptr; - FX_DWORD dest_size; - uint8_t src[] = ""; - EXPECT_EQ(0, HexDecode(src, 0, dest, dest_size)); - EXPECT_EQ(0, dest_size); - EXPECT_EQ('\0', dest[0]); - FX_Free(dest); - } - - { - // Regular conversion. - uint8_t* dest = nullptr; - FX_DWORD dest_size; - uint8_t src[] = "12Ac>zzz"; - EXPECT_EQ(5, HexDecode(src, 8, dest, dest_size)); - EXPECT_EQ(2, dest_size); - EXPECT_EQ(18, dest[0]); - EXPECT_EQ(172, dest[1]); - FX_Free(dest); - } - - { - // Non-multiple length. - uint8_t* dest = nullptr; - FX_DWORD dest_size; - uint8_t src[] = "12A>zzz"; - EXPECT_EQ(4, HexDecode(src, 8, dest, dest_size)); - EXPECT_EQ(2, dest_size); - EXPECT_EQ(18, dest[0]); - EXPECT_EQ(160, dest[1]); - FX_Free(dest); +TEST(fpdf_parser_decode, A85Decode) { + pdfium::DecodeTestData test_data[] = { + // Empty src string. + DECODE_TEST_CASE("", "", 0), + // Empty content in src string. + DECODE_TEST_CASE("~>", "", 0), + // Regular conversion. + DECODE_TEST_CASE("FCfN8~>", "test", 7), + // End at the ending mark. + DECODE_TEST_CASE("FCfN8~>FCfN8", "test", 7), + // Skip whitespaces. + DECODE_TEST_CASE("\t F C\r\n \tf N 8 ~>", "test", 17), + // No ending mark. + DECODE_TEST_CASE("@3B0)DJj_BF*)>@Gp#-s", "a funny story :)", 20), + // Non-multiple length. + DECODE_TEST_CASE("12A", "2k", 3), + // Stop at unknown characters. + DECODE_TEST_CASE("FCfN8FCfN8vw", "testtest", 11), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + pdfium::DecodeTestData* ptr = &test_data[i]; + uint8_t* result = nullptr; + FX_DWORD result_size; + EXPECT_EQ(ptr->processed_size, + A85Decode(ptr->input, ptr->input_size, result, result_size)) + << "for case " << i; + ASSERT_EQ(ptr->expected_size, result_size); + for (size_t j = 0; j < result_size; ++j) { + EXPECT_EQ(ptr->expected[j], result[j]) << "for case " << i << " char " + << j; + } + FX_Free(result); } +} - { - // Skips unknown characters. - uint8_t* dest = nullptr; - FX_DWORD dest_size; - uint8_t src[] = "12tk \tAc>zzz"; - EXPECT_EQ(10, HexDecode(src, 13, dest, dest_size)); - EXPECT_EQ(2, dest_size); - EXPECT_EQ(18, dest[0]); - EXPECT_EQ(172, dest[1]); - FX_Free(dest); +TEST(fpdf_parser_decode, HexDecode) { + pdfium::DecodeTestData test_data[] = { + // Empty src string. + DECODE_TEST_CASE("", "", 0), + // Empty content in src string. + DECODE_TEST_CASE(">", "", 1), + // Only whitespaces in src string. + DECODE_TEST_CASE("\t \r\n>", "", 7), + // Regular conversion. + DECODE_TEST_CASE("12Ac>zzz", "\x12\xac", 5), + // Skip whitespaces. + DECODE_TEST_CASE("12 Ac\t02\r\nBF>zzz>", "\x12\xac\x02\xbf", 13), + // Non-multiple length. + DECODE_TEST_CASE("12A>zzz", "\x12\xa0", 4), + // Skips unknown characters. + DECODE_TEST_CASE("12tk \tAc>zzz", "\x12\xac", 10), + // No ending mark. + DECODE_TEST_CASE("12AcED3c3456", "\x12\xac\xed\x3c\x34\x56", 12), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + pdfium::DecodeTestData* ptr = &test_data[i]; + uint8_t* result = nullptr; + FX_DWORD result_size; + EXPECT_EQ(ptr->processed_size, + HexDecode(ptr->input, ptr->input_size, result, result_size)) + << "for case " << i; + ASSERT_EQ(ptr->expected_size, result_size); + for (size_t j = 0; j < result_size; ++j) { + EXPECT_EQ(ptr->expected[j], result[j]) << "for case " << i << " char " + << j; + } + FX_Free(result); } } |