diff options
author | Wei Li <weili@chromium.org> | 2016-02-16 14:26:22 -0800 |
---|---|---|
committer | Wei Li <weili@chromium.org> | 2016-02-16 14:26:22 -0800 |
commit | 970c11e2225d73234009ccdc6f656abd01ae4026 (patch) | |
tree | c184a4d3b62785ee5431cc2c6d7199103b78986f /core/src/fpdfapi/fpdf_parser | |
parent | 4f7f4eea92b607ca3864df63e4b277abd5e5af97 (diff) | |
download | pdfium-970c11e2225d73234009ccdc6f656abd01ae4026.tar.xz |
Refactor CPDF_SimpleParser and add unit tests.
Remove unused member functions and simplify calls to find tag parameters.
R=thestig@chromium.org
Review URL: https://codereview.chromium.org/1681403007 .
Diffstat (limited to 'core/src/fpdfapi/fpdf_parser')
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp | 69 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp | 96 |
2 files changed, 112 insertions, 53 deletions
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp index e6e2f2aabe..d1a7231d45 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp @@ -65,11 +65,13 @@ CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) { m_dwSize = dwSize; m_dwCurPos = 0; } + CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) { m_pData = str.GetPtr(); m_dwSize = str.GetLength(); m_dwCurPos = 0; } + void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) { pStart = NULL; dwSize = 0; @@ -146,6 +148,7 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) { dwSize++; } } + CFX_ByteStringC CPDF_SimpleParser::GetWord() { const uint8_t* pStart; FX_DWORD dwSize; @@ -190,60 +193,14 @@ CFX_ByteStringC CPDF_SimpleParser::GetWord() { } return CFX_ByteStringC(pStart, dwSize); } -FX_BOOL CPDF_SimpleParser::SearchToken(const CFX_ByteStringC& token) { - int token_len = token.GetLength(); - while (m_dwCurPos < m_dwSize - token_len) { - if (FXSYS_memcmp(m_pData + m_dwCurPos, token.GetPtr(), token_len) == 0) { - break; - } - m_dwCurPos++; - } - if (m_dwCurPos == m_dwSize - token_len) { - return FALSE; - } - m_dwCurPos += token_len; - return TRUE; -} -FX_BOOL CPDF_SimpleParser::SkipWord(const CFX_ByteStringC& token) { - while (1) { - CFX_ByteStringC word = GetWord(); - if (word.IsEmpty()) { - return FALSE; - } - if (word == token) { - return TRUE; - } - } - return FALSE; -} -FX_BOOL CPDF_SimpleParser::FindTagPair(const CFX_ByteStringC& start_token, - const CFX_ByteStringC& end_token, - FX_DWORD& start_pos, - FX_DWORD& end_pos) { - if (!start_token.IsEmpty()) { - if (!SkipWord(start_token)) { - return FALSE; - } - start_pos = m_dwCurPos; - } - while (1) { - end_pos = m_dwCurPos; - CFX_ByteStringC word = GetWord(); - if (word.IsEmpty()) { - return FALSE; - } - if (word == end_token) { - return TRUE; - } - } - return FALSE; -} -FX_BOOL CPDF_SimpleParser::FindTagParam(const CFX_ByteStringC& token, - int nParams) { + +bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token, + int nParams) { nParams++; FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); int buf_index = 0; int buf_count = 0; + m_dwCurPos = 0; while (1) { pBuf[buf_index++] = m_dwCurPos; if (buf_index == nParams) { @@ -256,7 +213,7 @@ FX_BOOL CPDF_SimpleParser::FindTagParam(const CFX_ByteStringC& token, CFX_ByteStringC word = GetWord(); if (word.IsEmpty()) { FX_Free(pBuf); - return FALSE; + return false; } if (word == token) { if (buf_count < nParams) { @@ -264,10 +221,10 @@ FX_BOOL CPDF_SimpleParser::FindTagParam(const CFX_ByteStringC& token, } m_dwCurPos = pBuf[buf_index]; FX_Free(pBuf); - return TRUE; + return true; } } - return FALSE; + return false; } CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { @@ -291,12 +248,14 @@ CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart)); return result; } + CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) { if (!FXSYS_memchr(orig.c_str(), '#', orig.GetLength())) { return orig; } return PDF_NameDecode(CFX_ByteStringC(orig)); } + CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) { uint8_t* src_buf = (uint8_t*)orig.c_str(); int src_len = orig.GetLength(); @@ -332,6 +291,7 @@ CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) { res.ReleaseBuffer(); return res; } + CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) { if (!pObj) { buf << " null"; @@ -402,6 +362,7 @@ CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) { } return buf; } + FX_FLOAT PDF_ClipFloat(FX_FLOAT f) { if (f < 0) { return 0; @@ -411,6 +372,7 @@ FX_FLOAT PDF_ClipFloat(FX_FLOAT f) { } return f; } + static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num) { CPDF_Array* pLimits = pNode->GetArrayBy("Limits"); if (pLimits && @@ -447,6 +409,7 @@ static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num) { } return NULL; } + CPDF_Object* CPDF_NumberTree::LookupValue(int num) { return SearchNumberNode(m_pRoot, num); } diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp new file mode 100644 index 0000000000..1673798139 --- /dev/null +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp @@ -0,0 +1,96 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/include/fpdfapi/fpdf_parser.h" + +#include <string> + +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" + +TEST(SimpleParserTest, GetWord) { + pdfium::StrFuncTestData test_data[] = { + // Empty src string. + STR_TEST_CASE("", ""), + // Content with whitespaces only. + STR_TEST_CASE(" \t \0 \n", ""), + // Content with comments only. + STR_TEST_CASE("%this is a test case\r\n%2nd line", ""), + // Mixed whitespaces and comments. + STR_TEST_CASE(" \t \0%try()%haha\n %another line \aa", ""), + // Name. + STR_TEST_CASE(" /Tester ", "/Tester"), + // String. + STR_TEST_CASE("\t(nice day)!\n ", "(nice day)"), + // String with nested braces. + STR_TEST_CASE("\t(It is a (long) day)!\n ", "(It is a (long) day)"), + // String with escaped chars. + STR_TEST_CASE("\t(It is a \\(long\\) day!)hi\n ", + "(It is a \\(long\\) day!)"), + // Hex string. + STR_TEST_CASE(" \n<4545acdfedertt>abc ", "<4545acdfedertt>"), + STR_TEST_CASE(" \n<4545a<ed>ertt>abc ", "<4545a<ed>"), + // Dictionary. + STR_TEST_CASE("<</oc 234 /color 2 3 R>>", "<<"), + STR_TEST_CASE("\t\t<< /abc>>", "<<"), + // Handling ending delimiters. + STR_TEST_CASE("> little bear", ">"), + STR_TEST_CASE(") another bear", ")"), + STR_TEST_CASE(">> end ", ">>"), + // No ending delimiters. + STR_TEST_CASE("(sdfgfgbcv", "(sdfgfgbcv"), + // Regular cases. + STR_TEST_CASE("apple pear", "apple"), + STR_TEST_CASE(" pi=3.1415 ", "pi=3.1415"), + STR_TEST_CASE(" p t x c ", "p"), + STR_TEST_CASE(" pt\0xc ", "pt"), + STR_TEST_CASE(" $^&&*\t\0sdff ", "$^&&*"), + STR_TEST_CASE("\n\r+3.5656 -11.0", "+3.5656"), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + const pdfium::StrFuncTestData& data = test_data[i]; + CPDF_SimpleParser parser(data.input, data.input_size); + CFX_ByteStringC word = parser.GetWord(); + EXPECT_EQ(std::string(reinterpret_cast<const char*>(data.expected), + data.expected_size), + std::string(word.GetCStr(), word.GetLength())) + << " for case " << i; + } +} + +TEST(SimpleParserTest, FindTagParamFromStart) { + struct FindTagTestStruct { + const unsigned char* input; + unsigned int input_size; + const char* token; + int num_params; + bool result; + unsigned int result_pos; + } test_data[] = { + // Empty strings. + STR_IN_TEST_CASE("", "Tj", 1, false, 0), + STR_IN_TEST_CASE("", "", 1, false, 0), + // Empty token. + STR_IN_TEST_CASE(" T j", "", 1, false, 5), + // No parameter. + STR_IN_TEST_CASE("Tj", "Tj", 1, false, 2), + STR_IN_TEST_CASE("(Tj", "Tj", 1, false, 3), + // Partial token match. + STR_IN_TEST_CASE("\r12\t34 56 78Tj", "Tj", 1, false, 15), + // Regular cases with various parameters. + STR_IN_TEST_CASE("\r\0abd Tj", "Tj", 1, true, 0), + STR_IN_TEST_CASE("12 4 Tj 3 46 Tj", "Tj", 1, true, 2), + STR_IN_TEST_CASE("er^ 2 (34) (5667) Tj", "Tj", 2, true, 5), + STR_IN_TEST_CASE("<344> (232)\t343.4\n12 45 Tj", "Tj", 3, true, 11), + STR_IN_TEST_CASE("1 2 3 4 5 6 7 8 cm", "cm", 6, true, 3), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + const FindTagTestStruct& data = test_data[i]; + CPDF_SimpleParser parser(data.input, data.input_size); + EXPECT_EQ(data.result, + parser.FindTagParamFromStart(data.token, data.num_params)) + << " for case " << i; + EXPECT_EQ(data.result_pos, parser.GetCurPos()) << " for case " << i; + } +} |