From dec08c8d3fbc4e89748f2d655b32727cfab373ed Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Tue, 8 May 2018 15:20:27 +0000 Subject: [fm2js] Fail transpiling if lexer has left over data If there is remaining data after the lexer has said it's complete then something has gone wrong while lexing the formcalc data. This CL changes the transpiler to return an error in the case of the lexer havign extra data. Bug: chromium:834575 Change-Id: I8a1288a7f01cc69faf2033829d68246d815258de Reviewed-on: https://pdfium-review.googlesource.com/32130 Commit-Queue: dsinclair Reviewed-by: Henrique Nakashima --- xfa/fxfa/fm2js/cxfa_fmlexer.cpp | 10 +++++----- xfa/fxfa/fm2js/cxfa_fmlexer.h | 1 + xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp | 24 ++++++++++++++++++++++++ xfa/fxfa/fm2js/cxfa_fmparser.cpp | 19 ++++++++++++++----- 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp index 72fe0f2a01..32e29575c5 100644 --- a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp +++ b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp @@ -133,13 +133,13 @@ CXFA_FMLexer::CXFA_FMLexer(const WideStringView& wsFormCalc) m_end(m_cursor + wsFormCalc.GetLength()), m_lexer_error(false) {} -CXFA_FMLexer::~CXFA_FMLexer() {} +CXFA_FMLexer::~CXFA_FMLexer() = default; CXFA_FMToken CXFA_FMLexer::NextToken() { if (m_lexer_error) return CXFA_FMToken(); - while (m_cursor < m_end && *m_cursor) { + while (!IsComplete() && *m_cursor) { if (!IsFormCalcCharacter(*m_cursor)) { RaiseError(); return CXFA_FMToken(); @@ -323,7 +323,7 @@ CXFA_FMToken CXFA_FMLexer::AdvanceForString() { const wchar_t* start = m_cursor; ++m_cursor; - while (m_cursor < m_end && *m_cursor) { + while (!IsComplete() && *m_cursor) { if (!IsFormCalcCharacter(*m_cursor)) break; @@ -357,7 +357,7 @@ CXFA_FMToken CXFA_FMLexer::AdvanceForString() { CXFA_FMToken CXFA_FMLexer::AdvanceForIdentifier() { const wchar_t* start = m_cursor; ++m_cursor; - while (m_cursor < m_end && *m_cursor) { + while (!IsComplete() && *m_cursor) { if (!IsFormCalcCharacter(*m_cursor)) { RaiseError(); return CXFA_FMToken(); @@ -377,7 +377,7 @@ CXFA_FMToken CXFA_FMLexer::AdvanceForIdentifier() { void CXFA_FMLexer::AdvanceForComment() { m_cursor++; - while (m_cursor < m_end && *m_cursor) { + while (!IsComplete() && *m_cursor) { if (!IsFormCalcCharacter(*m_cursor)) { RaiseError(); return; diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer.h b/xfa/fxfa/fm2js/cxfa_fmlexer.h index 3864abb0eb..58b193e0ec 100644 --- a/xfa/fxfa/fm2js/cxfa_fmlexer.h +++ b/xfa/fxfa/fm2js/cxfa_fmlexer.h @@ -109,6 +109,7 @@ class CXFA_FMLexer { ~CXFA_FMLexer(); CXFA_FMToken NextToken(); + bool IsComplete() const { return m_cursor >= m_end; } private: CXFA_FMToken AdvanceForNumber(); diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp index 248b9fead6..cefc1cb992 100644 --- a/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp +++ b/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp @@ -15,12 +15,14 @@ TEST(CXFA_FMLexerTest, NullString) { CXFA_FMLexer lexer(null_string); CXFA_FMToken token = lexer.NextToken(); EXPECT_EQ(TOKeof, token.m_type); + EXPECT_TRUE(lexer.IsComplete()); } TEST(CXFA_FMLexerTest, EmptyString) { CXFA_FMLexer lexer(L""); CXFA_FMToken token = lexer.NextToken(); EXPECT_EQ(TOKeof, token.m_type); + EXPECT_TRUE(lexer.IsComplete()); } TEST(CXFA_FMLexerTest, Numbers) { @@ -67,6 +69,7 @@ TEST(CXFA_FMLexerTest, Numbers) { // prior to the exponent. // EXPECT_EQ(L"100000000000000000", token.m_string); EXPECT_EQ(L"99999999999999999", token.m_string); + EXPECT_TRUE(lexer->IsComplete()); } // The quotes are stripped in CXFA_FMStringExpression::ToJavaScript. @@ -99,6 +102,7 @@ TEST(CXFA_FMLexerTest, Strings) { EXPECT_EQ( L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"", token.m_string); + EXPECT_TRUE(lexer->IsComplete()); } // Note, 'this' is a keyword but is not matched by the lexer. @@ -170,6 +174,7 @@ TEST(CXFA_FMLexerTest, OperatorsAndKeywords) { auto lexer = pdfium::MakeUnique(op[i].op); CXFA_FMToken token = lexer->NextToken(); EXPECT_EQ(op[i].token, token.m_type); + EXPECT_TRUE(lexer->IsComplete()); } } @@ -213,6 +218,7 @@ TEST(CXFA_FMLexerTest, Comments) { token = lexer->NextToken(); EXPECT_EQ(TOKeof, token.m_type); + EXPECT_TRUE(lexer->IsComplete()); } TEST(CXFA_FMLexerTest, ValidIdentifiers) { @@ -223,6 +229,7 @@ TEST(CXFA_FMLexerTest, ValidIdentifiers) { CXFA_FMToken token = lexer->NextToken(); EXPECT_EQ(TOKidentifier, token.m_type); EXPECT_EQ(ident, token.m_string); + EXPECT_TRUE(lexer->IsComplete()); } } @@ -248,6 +255,7 @@ TEST(CXFA_FMLexerTest, InvalidIdentifiers) { EXPECT_NE(TOKreserver, token.m_type); token = lexer->NextToken(); EXPECT_EQ(TOKreserver, token.m_type); + EXPECT_FALSE(lexer->IsComplete()); } TEST(CXFA_FMLexerTest, Whitespace) { @@ -266,4 +274,20 @@ TEST(CXFA_FMLexerTest, Whitespace) { token = lexer->NextToken(); EXPECT_EQ(TOKeof, token.m_type); + EXPECT_TRUE(lexer->IsComplete()); +} + +TEST(CXFA_FMLexerTest, NullData) { + auto lexer = pdfium::MakeUnique( + WideStringView(L"\x2d\x32\x00\x2d\x32", 5)); + CXFA_FMToken token = lexer->NextToken(); + EXPECT_EQ(TOKminus, token.m_type); + + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token.m_type); + EXPECT_EQ(L"2", token.m_string); + + token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token.m_type); + EXPECT_FALSE(lexer->IsComplete()); } diff --git a/xfa/fxfa/fm2js/cxfa_fmparser.cpp b/xfa/fxfa/fm2js/cxfa_fmparser.cpp index fb4a7f4cf9..be0a31b519 100644 --- a/xfa/fxfa/fm2js/cxfa_fmparser.cpp +++ b/xfa/fxfa/fm2js/cxfa_fmparser.cpp @@ -21,18 +21,26 @@ constexpr unsigned int kMaxPostExpressions = 256; } // namespace CXFA_FMParser::CXFA_FMParser(const WideStringView& wsFormcalc) - : m_error(false), m_parse_depth(0), m_max_parse_depth(kMaxParseDepth) { - m_lexer = pdfium::MakeUnique(wsFormcalc); - m_token = m_lexer->NextToken(); -} + : m_lexer(pdfium::MakeUnique(wsFormcalc)), + m_error(false), + m_parse_depth(0), + m_max_parse_depth(kMaxParseDepth) {} -CXFA_FMParser::~CXFA_FMParser() {} +CXFA_FMParser::~CXFA_FMParser() = default; std::unique_ptr CXFA_FMParser::Parse() { + m_token = m_lexer->NextToken(); + if (HasError()) + return nullptr; + auto expressions = ParseExpressionList(); if (HasError()) return nullptr; + // We failed to parse all of the input so something has gone wrong. + if (!m_lexer->IsComplete()) + return nullptr; + return pdfium::MakeUnique(std::move(expressions)); } @@ -66,6 +74,7 @@ CXFA_FMParser::ParseExpressionList() { AutoRestorer restorer(&m_parse_depth); if (HasError() || !IncrementParseDepthAndCheck()) return std::vector>(); + std::vector> expressions; while (!HasError()) { if (m_token.m_type == TOKeof || m_token.m_type == TOKendfunc || -- cgit v1.2.3