summaryrefslogtreecommitdiff
path: root/core/src/fpdfapi/fpdf_parser
diff options
context:
space:
mode:
authorWei Li <weili@chromium.org>2016-02-16 14:26:22 -0800
committerWei Li <weili@chromium.org>2016-02-16 14:26:22 -0800
commit970c11e2225d73234009ccdc6f656abd01ae4026 (patch)
treec184a4d3b62785ee5431cc2c6d7199103b78986f /core/src/fpdfapi/fpdf_parser
parent4f7f4eea92b607ca3864df63e4b277abd5e5af97 (diff)
downloadpdfium-970c11e2225d73234009ccdc6f656abd01ae4026.tar.xz
Refactor CPDF_SimpleParser and add unit tests.
Remove unused member functions and simplify calls to find tag parameters. R=thestig@chromium.org Review URL: https://codereview.chromium.org/1681403007 .
Diffstat (limited to 'core/src/fpdfapi/fpdf_parser')
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp69
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp96
2 files changed, 112 insertions, 53 deletions
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
index e6e2f2aabe..d1a7231d45 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
@@ -65,11 +65,13 @@ CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) {
m_dwSize = dwSize;
m_dwCurPos = 0;
}
+
CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) {
m_pData = str.GetPtr();
m_dwSize = str.GetLength();
m_dwCurPos = 0;
}
+
void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) {
pStart = NULL;
dwSize = 0;
@@ -146,6 +148,7 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) {
dwSize++;
}
}
+
CFX_ByteStringC CPDF_SimpleParser::GetWord() {
const uint8_t* pStart;
FX_DWORD dwSize;
@@ -190,60 +193,14 @@ CFX_ByteStringC CPDF_SimpleParser::GetWord() {
}
return CFX_ByteStringC(pStart, dwSize);
}
-FX_BOOL CPDF_SimpleParser::SearchToken(const CFX_ByteStringC& token) {
- int token_len = token.GetLength();
- while (m_dwCurPos < m_dwSize - token_len) {
- if (FXSYS_memcmp(m_pData + m_dwCurPos, token.GetPtr(), token_len) == 0) {
- break;
- }
- m_dwCurPos++;
- }
- if (m_dwCurPos == m_dwSize - token_len) {
- return FALSE;
- }
- m_dwCurPos += token_len;
- return TRUE;
-}
-FX_BOOL CPDF_SimpleParser::SkipWord(const CFX_ByteStringC& token) {
- while (1) {
- CFX_ByteStringC word = GetWord();
- if (word.IsEmpty()) {
- return FALSE;
- }
- if (word == token) {
- return TRUE;
- }
- }
- return FALSE;
-}
-FX_BOOL CPDF_SimpleParser::FindTagPair(const CFX_ByteStringC& start_token,
- const CFX_ByteStringC& end_token,
- FX_DWORD& start_pos,
- FX_DWORD& end_pos) {
- if (!start_token.IsEmpty()) {
- if (!SkipWord(start_token)) {
- return FALSE;
- }
- start_pos = m_dwCurPos;
- }
- while (1) {
- end_pos = m_dwCurPos;
- CFX_ByteStringC word = GetWord();
- if (word.IsEmpty()) {
- return FALSE;
- }
- if (word == end_token) {
- return TRUE;
- }
- }
- return FALSE;
-}
-FX_BOOL CPDF_SimpleParser::FindTagParam(const CFX_ByteStringC& token,
- int nParams) {
+
+bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token,
+ int nParams) {
nParams++;
FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);
int buf_index = 0;
int buf_count = 0;
+ m_dwCurPos = 0;
while (1) {
pBuf[buf_index++] = m_dwCurPos;
if (buf_index == nParams) {
@@ -256,7 +213,7 @@ FX_BOOL CPDF_SimpleParser::FindTagParam(const CFX_ByteStringC& token,
CFX_ByteStringC word = GetWord();
if (word.IsEmpty()) {
FX_Free(pBuf);
- return FALSE;
+ return false;
}
if (word == token) {
if (buf_count < nParams) {
@@ -264,10 +221,10 @@ FX_BOOL CPDF_SimpleParser::FindTagParam(const CFX_ByteStringC& token,
}
m_dwCurPos = pBuf[buf_index];
FX_Free(pBuf);
- return TRUE;
+ return true;
}
}
- return FALSE;
+ return false;
}
CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) {
@@ -291,12 +248,14 @@ CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) {
result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart));
return result;
}
+
CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) {
if (!FXSYS_memchr(orig.c_str(), '#', orig.GetLength())) {
return orig;
}
return PDF_NameDecode(CFX_ByteStringC(orig));
}
+
CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) {
uint8_t* src_buf = (uint8_t*)orig.c_str();
int src_len = orig.GetLength();
@@ -332,6 +291,7 @@ CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) {
res.ReleaseBuffer();
return res;
}
+
CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) {
if (!pObj) {
buf << " null";
@@ -402,6 +362,7 @@ CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) {
}
return buf;
}
+
FX_FLOAT PDF_ClipFloat(FX_FLOAT f) {
if (f < 0) {
return 0;
@@ -411,6 +372,7 @@ FX_FLOAT PDF_ClipFloat(FX_FLOAT f) {
}
return f;
}
+
static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num) {
CPDF_Array* pLimits = pNode->GetArrayBy("Limits");
if (pLimits &&
@@ -447,6 +409,7 @@ static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num) {
}
return NULL;
}
+
CPDF_Object* CPDF_NumberTree::LookupValue(int num) {
return SearchNumberNode(m_pRoot, num);
}
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp
new file mode 100644
index 0000000000..1673798139
--- /dev/null
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp
@@ -0,0 +1,96 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/include/fpdfapi/fpdf_parser.h"
+
+#include <string>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/test_support.h"
+
+TEST(SimpleParserTest, GetWord) {
+ pdfium::StrFuncTestData test_data[] = {
+ // Empty src string.
+ STR_TEST_CASE("", ""),
+ // Content with whitespaces only.
+ STR_TEST_CASE(" \t \0 \n", ""),
+ // Content with comments only.
+ STR_TEST_CASE("%this is a test case\r\n%2nd line", ""),
+ // Mixed whitespaces and comments.
+ STR_TEST_CASE(" \t \0%try()%haha\n %another line \aa", ""),
+ // Name.
+ STR_TEST_CASE(" /Tester ", "/Tester"),
+ // String.
+ STR_TEST_CASE("\t(nice day)!\n ", "(nice day)"),
+ // String with nested braces.
+ STR_TEST_CASE("\t(It is a (long) day)!\n ", "(It is a (long) day)"),
+ // String with escaped chars.
+ STR_TEST_CASE("\t(It is a \\(long\\) day!)hi\n ",
+ "(It is a \\(long\\) day!)"),
+ // Hex string.
+ STR_TEST_CASE(" \n<4545acdfedertt>abc ", "<4545acdfedertt>"),
+ STR_TEST_CASE(" \n<4545a<ed>ertt>abc ", "<4545a<ed>"),
+ // Dictionary.
+ STR_TEST_CASE("<</oc 234 /color 2 3 R>>", "<<"),
+ STR_TEST_CASE("\t\t<< /abc>>", "<<"),
+ // Handling ending delimiters.
+ STR_TEST_CASE("> little bear", ">"),
+ STR_TEST_CASE(") another bear", ")"),
+ STR_TEST_CASE(">> end ", ">>"),
+ // No ending delimiters.
+ STR_TEST_CASE("(sdfgfgbcv", "(sdfgfgbcv"),
+ // Regular cases.
+ STR_TEST_CASE("apple pear", "apple"),
+ STR_TEST_CASE(" pi=3.1415 ", "pi=3.1415"),
+ STR_TEST_CASE(" p t x c ", "p"),
+ STR_TEST_CASE(" pt\0xc ", "pt"),
+ STR_TEST_CASE(" $^&&*\t\0sdff ", "$^&&*"),
+ STR_TEST_CASE("\n\r+3.5656 -11.0", "+3.5656"),
+ };
+ for (size_t i = 0; i < FX_ArraySize(test_data); ++i) {
+ const pdfium::StrFuncTestData& data = test_data[i];
+ CPDF_SimpleParser parser(data.input, data.input_size);
+ CFX_ByteStringC word = parser.GetWord();
+ EXPECT_EQ(std::string(reinterpret_cast<const char*>(data.expected),
+ data.expected_size),
+ std::string(word.GetCStr(), word.GetLength()))
+ << " for case " << i;
+ }
+}
+
+TEST(SimpleParserTest, FindTagParamFromStart) {
+ struct FindTagTestStruct {
+ const unsigned char* input;
+ unsigned int input_size;
+ const char* token;
+ int num_params;
+ bool result;
+ unsigned int result_pos;
+ } test_data[] = {
+ // Empty strings.
+ STR_IN_TEST_CASE("", "Tj", 1, false, 0),
+ STR_IN_TEST_CASE("", "", 1, false, 0),
+ // Empty token.
+ STR_IN_TEST_CASE(" T j", "", 1, false, 5),
+ // No parameter.
+ STR_IN_TEST_CASE("Tj", "Tj", 1, false, 2),
+ STR_IN_TEST_CASE("(Tj", "Tj", 1, false, 3),
+ // Partial token match.
+ STR_IN_TEST_CASE("\r12\t34 56 78Tj", "Tj", 1, false, 15),
+ // Regular cases with various parameters.
+ STR_IN_TEST_CASE("\r\0abd Tj", "Tj", 1, true, 0),
+ STR_IN_TEST_CASE("12 4 Tj 3 46 Tj", "Tj", 1, true, 2),
+ STR_IN_TEST_CASE("er^ 2 (34) (5667) Tj", "Tj", 2, true, 5),
+ STR_IN_TEST_CASE("<344> (232)\t343.4\n12 45 Tj", "Tj", 3, true, 11),
+ STR_IN_TEST_CASE("1 2 3 4 5 6 7 8 cm", "cm", 6, true, 3),
+ };
+ for (size_t i = 0; i < FX_ArraySize(test_data); ++i) {
+ const FindTagTestStruct& data = test_data[i];
+ CPDF_SimpleParser parser(data.input, data.input_size);
+ EXPECT_EQ(data.result,
+ parser.FindTagParamFromStart(data.token, data.num_params))
+ << " for case " << i;
+ EXPECT_EQ(data.result_pos, parser.GetCurPos()) << " for case " << i;
+ }
+}