From 970c11e2225d73234009ccdc6f656abd01ae4026 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Tue, 16 Feb 2016 14:26:22 -0800 Subject: Refactor CPDF_SimpleParser and add unit tests. Remove unused member functions and simplify calls to find tag parameters. R=thestig@chromium.org Review URL: https://codereview.chromium.org/1681403007 . --- core/include/fpdfapi/fpdf_parser.h | 22 ++--- .../fpdfapi/fpdf_parser/fpdf_parser_utility.cpp | 69 ++++------------ .../fpdf_parser/fpdf_parser_utility_unittest.cpp | 96 ++++++++++++++++++++++ core/src/fpdfdoc/doc_ap.cpp | 11 +-- core/src/fpdfdoc/doc_formfield.cpp | 2 +- core/src/fpdfdoc/doc_utils.cpp | 44 ++++------ 6 files changed, 140 insertions(+), 104 deletions(-) create mode 100644 core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp (limited to 'core') diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h index 18a942e4e8..de32b1fb37 100644 --- a/core/include/fpdfapi/fpdf_parser.h +++ b/core/include/fpdfapi/fpdf_parser.h @@ -212,28 +212,16 @@ class CPDF_Document : public CFX_PrivateData, public CPDF_IndirectObjectHolder { class CPDF_SimpleParser { public: CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize); - CPDF_SimpleParser(const CFX_ByteStringC& str); CFX_ByteStringC GetWord(); - FX_BOOL SearchToken(const CFX_ByteStringC& token); - - FX_BOOL SkipWord(const CFX_ByteStringC& token); - - FX_BOOL FindTagPair(const CFX_ByteStringC& start_token, - const CFX_ByteStringC& end_token, - FX_DWORD& start_pos, - FX_DWORD& end_pos); + // Find the token and its |nParams| parameters from the start of data, + // and move the current position to the start of those parameters. + bool FindTagParamFromStart(const CFX_ByteStringC& token, int nParams); - FX_BOOL FindTagParam(const CFX_ByteStringC& token, int nParams); - - FX_DWORD GetPos() { return m_dwCurPos; } - - void SetPos(FX_DWORD pos) { - ASSERT(pos <= m_dwSize); - m_dwCurPos = pos; - } + // For testing only. + FX_DWORD GetCurPos() const { return m_dwCurPos; } private: void ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize); diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp index e6e2f2aabe..d1a7231d45 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp @@ -65,11 +65,13 @@ CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) { m_dwSize = dwSize; m_dwCurPos = 0; } + CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) { m_pData = str.GetPtr(); m_dwSize = str.GetLength(); m_dwCurPos = 0; } + void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) { pStart = NULL; dwSize = 0; @@ -146,6 +148,7 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) { dwSize++; } } + CFX_ByteStringC CPDF_SimpleParser::GetWord() { const uint8_t* pStart; FX_DWORD dwSize; @@ -190,60 +193,14 @@ CFX_ByteStringC CPDF_SimpleParser::GetWord() { } return CFX_ByteStringC(pStart, dwSize); } -FX_BOOL CPDF_SimpleParser::SearchToken(const CFX_ByteStringC& token) { - int token_len = token.GetLength(); - while (m_dwCurPos < m_dwSize - token_len) { - if (FXSYS_memcmp(m_pData + m_dwCurPos, token.GetPtr(), token_len) == 0) { - break; - } - m_dwCurPos++; - } - if (m_dwCurPos == m_dwSize - token_len) { - return FALSE; - } - m_dwCurPos += token_len; - return TRUE; -} -FX_BOOL CPDF_SimpleParser::SkipWord(const CFX_ByteStringC& token) { - while (1) { - CFX_ByteStringC word = GetWord(); - if (word.IsEmpty()) { - return FALSE; - } - if (word == token) { - return TRUE; - } - } - return FALSE; -} -FX_BOOL CPDF_SimpleParser::FindTagPair(const CFX_ByteStringC& start_token, - const CFX_ByteStringC& end_token, - FX_DWORD& start_pos, - FX_DWORD& end_pos) { - if (!start_token.IsEmpty()) { - if (!SkipWord(start_token)) { - return FALSE; - } - start_pos = m_dwCurPos; - } - while (1) { - end_pos = m_dwCurPos; - CFX_ByteStringC word = GetWord(); - if (word.IsEmpty()) { - return FALSE; - } - if (word == end_token) { - return TRUE; - } - } - return FALSE; -} -FX_BOOL CPDF_SimpleParser::FindTagParam(const CFX_ByteStringC& token, - int nParams) { + +bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token, + int nParams) { nParams++; FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); int buf_index = 0; int buf_count = 0; + m_dwCurPos = 0; while (1) { pBuf[buf_index++] = m_dwCurPos; if (buf_index == nParams) { @@ -256,7 +213,7 @@ FX_BOOL CPDF_SimpleParser::FindTagParam(const CFX_ByteStringC& token, CFX_ByteStringC word = GetWord(); if (word.IsEmpty()) { FX_Free(pBuf); - return FALSE; + return false; } if (word == token) { if (buf_count < nParams) { @@ -264,10 +221,10 @@ FX_BOOL CPDF_SimpleParser::FindTagParam(const CFX_ByteStringC& token, } m_dwCurPos = pBuf[buf_index]; FX_Free(pBuf); - return TRUE; + return true; } } - return FALSE; + return false; } CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { @@ -291,12 +248,14 @@ CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart)); return result; } + CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) { if (!FXSYS_memchr(orig.c_str(), '#', orig.GetLength())) { return orig; } return PDF_NameDecode(CFX_ByteStringC(orig)); } + CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) { uint8_t* src_buf = (uint8_t*)orig.c_str(); int src_len = orig.GetLength(); @@ -332,6 +291,7 @@ CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) { res.ReleaseBuffer(); return res; } + CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) { if (!pObj) { buf << " null"; @@ -402,6 +362,7 @@ CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) { } return buf; } + FX_FLOAT PDF_ClipFloat(FX_FLOAT f) { if (f < 0) { return 0; @@ -411,6 +372,7 @@ FX_FLOAT PDF_ClipFloat(FX_FLOAT f) { } return f; } + static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num) { CPDF_Array* pLimits = pNode->GetArrayBy("Limits"); if (pLimits && @@ -447,6 +409,7 @@ static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num) { } return NULL; } + CPDF_Object* CPDF_NumberTree::LookupValue(int num) { return SearchNumberNode(m_pRoot, num); } diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp new file mode 100644 index 0000000000..1673798139 --- /dev/null +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp @@ -0,0 +1,96 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/include/fpdfapi/fpdf_parser.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" + +TEST(SimpleParserTest, GetWord) { + pdfium::StrFuncTestData test_data[] = { + // Empty src string. + STR_TEST_CASE("", ""), + // Content with whitespaces only. + STR_TEST_CASE(" \t \0 \n", ""), + // Content with comments only. + STR_TEST_CASE("%this is a test case\r\n%2nd line", ""), + // Mixed whitespaces and comments. + STR_TEST_CASE(" \t \0%try()%haha\n %another line \aa", ""), + // Name. + STR_TEST_CASE(" /Tester ", "/Tester"), + // String. + STR_TEST_CASE("\t(nice day)!\n ", "(nice day)"), + // String with nested braces. + STR_TEST_CASE("\t(It is a (long) day)!\n ", "(It is a (long) day)"), + // String with escaped chars. + STR_TEST_CASE("\t(It is a \\(long\\) day!)hi\n ", + "(It is a \\(long\\) day!)"), + // Hex string. + STR_TEST_CASE(" \n<4545acdfedertt>abc ", "<4545acdfedertt>"), + STR_TEST_CASE(" \n<4545aertt>abc ", "<4545a"), + // Dictionary. + STR_TEST_CASE("<>", "<<"), + STR_TEST_CASE("\t\t<< /abc>>", "<<"), + // Handling ending delimiters. + STR_TEST_CASE("> little bear", ">"), + STR_TEST_CASE(") another bear", ")"), + STR_TEST_CASE(">> end ", ">>"), + // No ending delimiters. + STR_TEST_CASE("(sdfgfgbcv", "(sdfgfgbcv"), + // Regular cases. + STR_TEST_CASE("apple pear", "apple"), + STR_TEST_CASE(" pi=3.1415 ", "pi=3.1415"), + STR_TEST_CASE(" p t x c ", "p"), + STR_TEST_CASE(" pt\0xc ", "pt"), + STR_TEST_CASE(" $^&&*\t\0sdff ", "$^&&*"), + STR_TEST_CASE("\n\r+3.5656 -11.0", "+3.5656"), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + const pdfium::StrFuncTestData& data = test_data[i]; + CPDF_SimpleParser parser(data.input, data.input_size); + CFX_ByteStringC word = parser.GetWord(); + EXPECT_EQ(std::string(reinterpret_cast(data.expected), + data.expected_size), + std::string(word.GetCStr(), word.GetLength())) + << " for case " << i; + } +} + +TEST(SimpleParserTest, FindTagParamFromStart) { + struct FindTagTestStruct { + const unsigned char* input; + unsigned int input_size; + const char* token; + int num_params; + bool result; + unsigned int result_pos; + } test_data[] = { + // Empty strings. + STR_IN_TEST_CASE("", "Tj", 1, false, 0), + STR_IN_TEST_CASE("", "", 1, false, 0), + // Empty token. + STR_IN_TEST_CASE(" T j", "", 1, false, 5), + // No parameter. + STR_IN_TEST_CASE("Tj", "Tj", 1, false, 2), + STR_IN_TEST_CASE("(Tj", "Tj", 1, false, 3), + // Partial token match. + STR_IN_TEST_CASE("\r12\t34 56 78Tj", "Tj", 1, false, 15), + // Regular cases with various parameters. + STR_IN_TEST_CASE("\r\0abd Tj", "Tj", 1, true, 0), + STR_IN_TEST_CASE("12 4 Tj 3 46 Tj", "Tj", 1, true, 2), + STR_IN_TEST_CASE("er^ 2 (34) (5667) Tj", "Tj", 2, true, 5), + STR_IN_TEST_CASE("<344> (232)\t343.4\n12 45 Tj", "Tj", 3, true, 11), + STR_IN_TEST_CASE("1 2 3 4 5 6 7 8 cm", "cm", 6, true, 3), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + const FindTagTestStruct& data = test_data[i]; + CPDF_SimpleParser parser(data.input, data.input_size); + EXPECT_EQ(data.result, + parser.FindTagParamFromStart(data.token, data.num_params)) + << " for case " << i; + EXPECT_EQ(data.result_pos, parser.GetCurPos()) << " for case " << i; + } +} diff --git a/core/src/fpdfdoc/doc_ap.cpp b/core/src/fpdfdoc/doc_ap.cpp index c84a36d499..b6134927cf 100644 --- a/core/src/fpdfdoc/doc_ap.cpp +++ b/core/src/fpdfdoc/doc_ap.cpp @@ -228,19 +228,16 @@ static CFX_ByteString GetFontSetString(IPVT_FontMap* pFontMap, } static CPVT_Color ParseColor(const CFX_ByteString& str) { CPDF_SimpleParser syntax(str); - syntax.SetPos(0); - if (syntax.FindTagParam("g", 1)) { + if (syntax.FindTagParamFromStart("g", 1)) { return CPVT_Color(CPVT_Color::kGray, FX_atof(syntax.GetWord())); } - syntax.SetPos(0); - if (syntax.FindTagParam("rg", 3)) { + if (syntax.FindTagParamFromStart("rg", 3)) { FX_FLOAT f1 = FX_atof(syntax.GetWord()); FX_FLOAT f2 = FX_atof(syntax.GetWord()); FX_FLOAT f3 = FX_atof(syntax.GetWord()); return CPVT_Color(CPVT_Color::kRGB, f1, f2, f3); } - syntax.SetPos(0); - if (syntax.FindTagParam("k", 4)) { + if (syntax.FindTagParamFromStart("k", 4)) { FX_FLOAT f1 = FX_atof(syntax.GetWord()); FX_FLOAT f2 = FX_atof(syntax.GetWord()); FX_FLOAT f3 = FX_atof(syntax.GetWord()); @@ -288,7 +285,7 @@ static FX_BOOL GenerateWidgetAP(CPDF_Document* pDoc, return FALSE; } CPDF_SimpleParser syntax(DA); - syntax.FindTagParam("Tf", 2); + syntax.FindTagParamFromStart("Tf", 2); CFX_ByteString sFontName = syntax.GetWord(); sFontName = PDF_NameDecode(sFontName); if (sFontName.IsEmpty()) { diff --git a/core/src/fpdfdoc/doc_formfield.cpp b/core/src/fpdfdoc/doc_formfield.cpp index 31557b490d..274459aec0 100644 --- a/core/src/fpdfdoc/doc_formfield.cpp +++ b/core/src/fpdfdoc/doc_formfield.cpp @@ -1075,7 +1075,7 @@ void CPDF_FormField::LoadDA() { return; } CPDF_SimpleParser syntax(DA); - syntax.FindTagParam("Tf", 2); + syntax.FindTagParamFromStart("Tf", 2); CFX_ByteString font_name = syntax.GetWord(); CPDF_Dictionary* pFontDict = NULL; if (m_pForm->m_pFormDict && m_pForm->m_pFormDict->GetDictBy("DR") && diff --git a/core/src/fpdfdoc/doc_utils.cpp b/core/src/fpdfdoc/doc_utils.cpp index a2c0454bc9..db905c880f 100644 --- a/core/src/fpdfdoc/doc_utils.cpp +++ b/core/src/fpdfdoc/doc_utils.cpp @@ -32,7 +32,7 @@ FX_BOOL CPDF_DefaultAppearance::HasFont() { return FALSE; } CPDF_SimpleParser syntax(m_csDA); - return syntax.FindTagParam("Tf", 2); + return syntax.FindTagParamFromStart("Tf", 2); } CFX_ByteString CPDF_DefaultAppearance::GetFontString() { CFX_ByteString csFont; @@ -40,7 +40,7 @@ CFX_ByteString CPDF_DefaultAppearance::GetFontString() { return csFont; } CPDF_SimpleParser syntax(m_csDA); - if (syntax.FindTagParam("Tf", 2)) { + if (syntax.FindTagParamFromStart("Tf", 2)) { csFont += (CFX_ByteString)syntax.GetWord(); csFont += " "; csFont += (CFX_ByteString)syntax.GetWord(); @@ -57,7 +57,7 @@ void CPDF_DefaultAppearance::GetFont(CFX_ByteString& csFontNameTag, return; } CPDF_SimpleParser syntax(m_csDA); - if (syntax.FindTagParam("Tf", 2)) { + if (syntax.FindTagParamFromStart("Tf", 2)) { csFontNameTag = (CFX_ByteString)syntax.GetWord(); csFontNameTag.Delete(0, 1); fFontSize = FX_atof((CFX_ByteString)syntax.GetWord()); @@ -69,15 +69,13 @@ FX_BOOL CPDF_DefaultAppearance::HasColor(FX_BOOL bStrokingOperation) { return FALSE; } CPDF_SimpleParser syntax(m_csDA); - if (syntax.FindTagParam(bStrokingOperation ? "G" : "g", 1)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "G" : "g", 1)) { return TRUE; } - syntax.SetPos(0); - if (syntax.FindTagParam(bStrokingOperation ? "RG" : "rg", 3)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "RG" : "rg", 3)) { return TRUE; } - syntax.SetPos(0); - return syntax.FindTagParam(bStrokingOperation ? "K" : "k", 4); + return syntax.FindTagParamFromStart(bStrokingOperation ? "K" : "k", 4); } CFX_ByteString CPDF_DefaultAppearance::GetColorString( FX_BOOL bStrokingOperation) { @@ -86,14 +84,13 @@ CFX_ByteString CPDF_DefaultAppearance::GetColorString( return csColor; } CPDF_SimpleParser syntax(m_csDA); - if (syntax.FindTagParam(bStrokingOperation ? "G" : "g", 1)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "G" : "g", 1)) { csColor += (CFX_ByteString)syntax.GetWord(); csColor += " "; csColor += (CFX_ByteString)syntax.GetWord(); return csColor; } - syntax.SetPos(0); - if (syntax.FindTagParam(bStrokingOperation ? "RG" : "rg", 3)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "RG" : "rg", 3)) { csColor += (CFX_ByteString)syntax.GetWord(); csColor += " "; csColor += (CFX_ByteString)syntax.GetWord(); @@ -103,8 +100,7 @@ CFX_ByteString CPDF_DefaultAppearance::GetColorString( csColor += (CFX_ByteString)syntax.GetWord(); return csColor; } - syntax.SetPos(0); - if (syntax.FindTagParam(bStrokingOperation ? "K" : "k", 4)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "K" : "k", 4)) { csColor += (CFX_ByteString)syntax.GetWord(); csColor += " "; csColor += (CFX_ByteString)syntax.GetWord(); @@ -128,21 +124,19 @@ void CPDF_DefaultAppearance::GetColor(int& iColorType, return; } CPDF_SimpleParser syntax(m_csDA); - if (syntax.FindTagParam(bStrokingOperation ? "G" : "g", 1)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "G" : "g", 1)) { iColorType = COLORTYPE_GRAY; fc[0] = FX_atof((CFX_ByteString)syntax.GetWord()); return; } - syntax.SetPos(0); - if (syntax.FindTagParam(bStrokingOperation ? "RG" : "rg", 3)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "RG" : "rg", 3)) { iColorType = COLORTYPE_RGB; fc[0] = FX_atof((CFX_ByteString)syntax.GetWord()); fc[1] = FX_atof((CFX_ByteString)syntax.GetWord()); fc[2] = FX_atof((CFX_ByteString)syntax.GetWord()); return; } - syntax.SetPos(0); - if (syntax.FindTagParam(bStrokingOperation ? "K" : "k", 4)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "K" : "k", 4)) { iColorType = COLORTYPE_CMYK; fc[0] = FX_atof((CFX_ByteString)syntax.GetWord()); fc[1] = FX_atof((CFX_ByteString)syntax.GetWord()); @@ -159,14 +153,13 @@ void CPDF_DefaultAppearance::GetColor(FX_ARGB& color, return; } CPDF_SimpleParser syntax(m_csDA); - if (syntax.FindTagParam(bStrokingOperation ? "G" : "g", 1)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "G" : "g", 1)) { iColorType = COLORTYPE_GRAY; FX_FLOAT g = FX_atof((CFX_ByteString)syntax.GetWord()) * 255 + 0.5f; color = ArgbEncode(255, (int)g, (int)g, (int)g); return; } - syntax.SetPos(0); - if (syntax.FindTagParam(bStrokingOperation ? "RG" : "rg", 3)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "RG" : "rg", 3)) { iColorType = COLORTYPE_RGB; FX_FLOAT r = FX_atof((CFX_ByteString)syntax.GetWord()) * 255 + 0.5f; FX_FLOAT g = FX_atof((CFX_ByteString)syntax.GetWord()) * 255 + 0.5f; @@ -174,8 +167,7 @@ void CPDF_DefaultAppearance::GetColor(FX_ARGB& color, color = ArgbEncode(255, (int)r, (int)g, (int)b); return; } - syntax.SetPos(0); - if (syntax.FindTagParam(bStrokingOperation ? "K" : "k", 4)) { + if (syntax.FindTagParamFromStart(bStrokingOperation ? "K" : "k", 4)) { iColorType = COLORTYPE_CMYK; FX_FLOAT c = FX_atof((CFX_ByteString)syntax.GetWord()); FX_FLOAT m = FX_atof((CFX_ByteString)syntax.GetWord()); @@ -193,7 +185,7 @@ FX_BOOL CPDF_DefaultAppearance::HasTextMatrix() { return FALSE; } CPDF_SimpleParser syntax(m_csDA); - return syntax.FindTagParam("Tm", 6); + return syntax.FindTagParamFromStart("Tm", 6); } CFX_ByteString CPDF_DefaultAppearance::GetTextMatrixString() { CFX_ByteString csTM; @@ -201,7 +193,7 @@ CFX_ByteString CPDF_DefaultAppearance::GetTextMatrixString() { return csTM; } CPDF_SimpleParser syntax(m_csDA); - if (syntax.FindTagParam("Tm", 6)) { + if (syntax.FindTagParamFromStart("Tm", 6)) { for (int i = 0; i < 6; i++) { csTM += (CFX_ByteString)syntax.GetWord(); csTM += " "; @@ -216,7 +208,7 @@ CFX_Matrix CPDF_DefaultAppearance::GetTextMatrix() { return tm; } CPDF_SimpleParser syntax(m_csDA); - if (syntax.FindTagParam("Tm", 6)) { + if (syntax.FindTagParamFromStart("Tm", 6)) { FX_FLOAT f[6]; for (int i = 0; i < 6; i++) { f[i] = FX_atof((CFX_ByteString)syntax.GetWord()); -- cgit v1.2.3