[fm2js] Fail transpiling if lexer has left over data

If there is remaining data after the lexer has said it's complete then something has gone wrong while lexing the formcalc data. This CL changes the transpiler to return an error in the case of the lexer havign extra data. Bug: chromium:834575 Change-Id: I8a1288a7f01cc69faf2033829d68246d815258de Reviewed-on: https://pdfium-review.googlesource.com/32130 Commit-Queue: dsinclair <dsinclair@chromium.org> Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
author: Dan Sinclair <dsinclair@chromium.org> 2018-05-08 15:20:27 +0000
committer: Chromium commit bot <commit-bot@chromium.org> 2018-05-08 15:20:27 +0000
commit: dec08c8d3fbc4e89748f2d655b32727cfab373ed (patch)
tree: 9aac943ee5b8471322be5747d02ed8927a5ffa05 /xfa/fxfa
parent: bda113c645673fd152bb9ca3eaddd3c34920223e (diff)
download: pdfium-dec08c8d3fbc4e89748f2d655b32727cfab373ed.tar.xz
4 files changed, 44 insertions, 10 deletions
diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
index 72fe0f2a01..32e29575c5 100644
--- a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
+++ b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
@@ -133,13 +133,13 @@ CXFA_FMLexer::CXFA_FMLexer(const WideStringView& wsFormCalc)
       m_end(m_cursor + wsFormCalc.GetLength()),
       m_lexer_error(false) {}
 
-CXFA_FMLexer::~CXFA_FMLexer() {}
+CXFA_FMLexer::~CXFA_FMLexer() = default;
 
 CXFA_FMToken CXFA_FMLexer::NextToken() {
   if (m_lexer_error)
     return CXFA_FMToken();
 
-  while (m_cursor < m_end && *m_cursor) {
+  while (!IsComplete() && *m_cursor) {
     if (!IsFormCalcCharacter(*m_cursor)) {
       RaiseError();
       return CXFA_FMToken();
@@ -323,7 +323,7 @@ CXFA_FMToken CXFA_FMLexer::AdvanceForString() {
 
   const wchar_t* start = m_cursor;
   ++m_cursor;
-  while (m_cursor < m_end && *m_cursor) {
+  while (!IsComplete() && *m_cursor) {
     if (!IsFormCalcCharacter(*m_cursor))
       break;
 
@@ -357,7 +357,7 @@ CXFA_FMToken CXFA_FMLexer::AdvanceForString() {
 CXFA_FMToken CXFA_FMLexer::AdvanceForIdentifier() {
   const wchar_t* start = m_cursor;
   ++m_cursor;
-  while (m_cursor < m_end && *m_cursor) {
+  while (!IsComplete() && *m_cursor) {
     if (!IsFormCalcCharacter(*m_cursor)) {
       RaiseError();
       return CXFA_FMToken();
@@ -377,7 +377,7 @@ CXFA_FMToken CXFA_FMLexer::AdvanceForIdentifier() {
 
 void CXFA_FMLexer::AdvanceForComment() {
   m_cursor++;
-  while (m_cursor < m_end && *m_cursor) {
+  while (!IsComplete() && *m_cursor) {
     if (!IsFormCalcCharacter(*m_cursor)) {
       RaiseError();
       return;
diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer.h b/xfa/fxfa/fm2js/cxfa_fmlexer.h
index 3864abb0eb..58b193e0ec 100644
--- a/xfa/fxfa/fm2js/cxfa_fmlexer.h
+++ b/xfa/fxfa/fm2js/cxfa_fmlexer.h
@@ -109,6 +109,7 @@ class CXFA_FMLexer {
   ~CXFA_FMLexer();
 
   CXFA_FMToken NextToken();
+  bool IsComplete() const { return m_cursor >= m_end; }
 
  private:
   CXFA_FMToken AdvanceForNumber();
diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp
index 248b9fead6..cefc1cb992 100644
--- a/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp
+++ b/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp
@@ -15,12 +15,14 @@ TEST(CXFA_FMLexerTest, NullString) {
   CXFA_FMLexer lexer(null_string);
   CXFA_FMToken token = lexer.NextToken();
   EXPECT_EQ(TOKeof, token.m_type);
+  EXPECT_TRUE(lexer.IsComplete());
 }
 
 TEST(CXFA_FMLexerTest, EmptyString) {
   CXFA_FMLexer lexer(L"");
   CXFA_FMToken token = lexer.NextToken();
   EXPECT_EQ(TOKeof, token.m_type);
+  EXPECT_TRUE(lexer.IsComplete());
 }
 
 TEST(CXFA_FMLexerTest, Numbers) {
@@ -67,6 +69,7 @@ TEST(CXFA_FMLexerTest, Numbers) {
   // prior to the exponent.
   // EXPECT_EQ(L"100000000000000000", token.m_string);
   EXPECT_EQ(L"99999999999999999", token.m_string);
+  EXPECT_TRUE(lexer->IsComplete());
 }
 
 // The quotes are stripped in CXFA_FMStringExpression::ToJavaScript.
@@ -99,6 +102,7 @@ TEST(CXFA_FMLexerTest, Strings) {
   EXPECT_EQ(
       L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"",
       token.m_string);
+  EXPECT_TRUE(lexer->IsComplete());
 }
 
 // Note, 'this' is a keyword but is not matched by the lexer.
@@ -170,6 +174,7 @@ TEST(CXFA_FMLexerTest, OperatorsAndKeywords) {
     auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(op[i].op);
     CXFA_FMToken token = lexer->NextToken();
     EXPECT_EQ(op[i].token, token.m_type);
+    EXPECT_TRUE(lexer->IsComplete());
   }
 }
 
@@ -213,6 +218,7 @@ TEST(CXFA_FMLexerTest, Comments) {
 
   token = lexer->NextToken();
   EXPECT_EQ(TOKeof, token.m_type);
+  EXPECT_TRUE(lexer->IsComplete());
 }
 
 TEST(CXFA_FMLexerTest, ValidIdentifiers) {
@@ -223,6 +229,7 @@ TEST(CXFA_FMLexerTest, ValidIdentifiers) {
     CXFA_FMToken token = lexer->NextToken();
     EXPECT_EQ(TOKidentifier, token.m_type);
     EXPECT_EQ(ident, token.m_string);
+    EXPECT_TRUE(lexer->IsComplete());
   }
 }
 
@@ -248,6 +255,7 @@ TEST(CXFA_FMLexerTest, InvalidIdentifiers) {
   EXPECT_NE(TOKreserver, token.m_type);
   token = lexer->NextToken();
   EXPECT_EQ(TOKreserver, token.m_type);
+  EXPECT_FALSE(lexer->IsComplete());
 }
 
 TEST(CXFA_FMLexerTest, Whitespace) {
@@ -266,4 +274,20 @@ TEST(CXFA_FMLexerTest, Whitespace) {
 
   token = lexer->NextToken();
   EXPECT_EQ(TOKeof, token.m_type);
+  EXPECT_TRUE(lexer->IsComplete());
+}
+
+TEST(CXFA_FMLexerTest, NullData) {
+  auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(
+      WideStringView(L"\x2d\x32\x00\x2d\x32", 5));
+  CXFA_FMToken token = lexer->NextToken();
+  EXPECT_EQ(TOKminus, token.m_type);
+
+  token = lexer->NextToken();
+  EXPECT_EQ(TOKnumber, token.m_type);
+  EXPECT_EQ(L"2", token.m_string);
+
+  token = lexer->NextToken();
+  EXPECT_EQ(TOKeof, token.m_type);
+  EXPECT_FALSE(lexer->IsComplete());
 }
diff --git a/xfa/fxfa/fm2js/cxfa_fmparser.cpp b/xfa/fxfa/fm2js/cxfa_fmparser.cpp
index fb4a7f4cf9..be0a31b519 100644
--- a/xfa/fxfa/fm2js/cxfa_fmparser.cpp
+++ b/xfa/fxfa/fm2js/cxfa_fmparser.cpp
@@ -21,18 +21,26 @@ constexpr unsigned int kMaxPostExpressions = 256;
 }  // namespace
 
 CXFA_FMParser::CXFA_FMParser(const WideStringView& wsFormcalc)
-    : m_error(false), m_parse_depth(0), m_max_parse_depth(kMaxParseDepth) {
-  m_lexer = pdfium::MakeUnique<CXFA_FMLexer>(wsFormcalc);
-  m_token = m_lexer->NextToken();
-}
+    : m_lexer(pdfium::MakeUnique<CXFA_FMLexer>(wsFormcalc)),
+      m_error(false),
+      m_parse_depth(0),
+      m_max_parse_depth(kMaxParseDepth) {}
 
-CXFA_FMParser::~CXFA_FMParser() {}
+CXFA_FMParser::~CXFA_FMParser() = default;
 
 std::unique_ptr<CXFA_FMAST> CXFA_FMParser::Parse() {
+  m_token = m_lexer->NextToken();
+  if (HasError())
+    return nullptr;
+
   auto expressions = ParseExpressionList();
   if (HasError())
     return nullptr;
 
+  // We failed to parse all of the input so something has gone wrong.
+  if (!m_lexer->IsComplete())
+    return nullptr;
+
   return pdfium::MakeUnique<CXFA_FMAST>(std::move(expressions));
 }
 
@@ -66,6 +74,7 @@ CXFA_FMParser::ParseExpressionList() {
   AutoRestorer<unsigned long> restorer(&m_parse_depth);
   if (HasError() || !IncrementParseDepthAndCheck())
     return std::vector<std::unique_ptr<CXFA_FMExpression>>();
+
   std::vector<std::unique_ptr<CXFA_FMExpression>> expressions;
   while (!HasError()) {
     if (m_token.m_type == TOKeof || m_token.m_type == TOKendfunc ||
author	Dan Sinclair <dsinclair@chromium.org>	2018-05-08 15:20:27 +0000
committer	Chromium commit bot <commit-bot@chromium.org>	2018-05-08 15:20:27 +0000
commit	dec08c8d3fbc4e89748f2d655b32727cfab373ed (patch)
tree	9aac943ee5b8471322be5747d02ed8927a5ffa05 /xfa/fxfa
parent	bda113c645673fd152bb9ca3eaddd3c34920223e (diff)
download	pdfium-dec08c8d3fbc4e89748f2d655b32727cfab373ed.tar.xz