Correct lexer handling of FormCalc identifiers

This makes the lexer stricter on valid characters for identifiers, and conform to the grammar in the FormCalc spec. This should remove a class of inputs that ClusterFuzz is attempting that are breaking later stages of the transpile. BUG: chromium:736234, pdfium:783, pdfium:784 Change-Id: I3987d6778a82b71d768fa751035993c0af2577ee Reviewed-on: https://pdfium-review.googlesource.com/8010 Commit-Queue: Ryan Harrison <rharrison@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org>
author: Ryan Harrison <rharrison@chromium.org> 2017-07-18 10:27:00 -0400
committer: Chromium commit bot <commit-bot@chromium.org> 2017-07-18 14:48:37 +0000
commit: 756023071d1c4574fcb433c4bc7f13e7b763f763 (patch)
tree: 3388aed81fc470ddfff8afa7def359070c2ad086 /xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp
parent: 574366b637c1e937efc7b1becb1d151c3599f7af (diff)
download: pdfium-756023071d1c4574fcb433c4bc7f13e7b763f763.tar.xz
1 files changed, 34 insertions, 1 deletions
diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp
index 7ca12d2fad..5a8139d416 100644
--- a/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp
+++ b/xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp
@@ -208,7 +208,7 @@ TEST(CXFA_FMLexerTest, Comments) {
   EXPECT_EQ(TOKeof, token->m_type);
 }
 
-TEST(CXFA_FMLexerTest, Identifiers) {
+TEST(CXFA_FMLexerTest, ValidIdentifiers) {
   std::vector<const wchar_t*> identifiers = {
       L"a", L"an_identifier", L"_ident", L"$ident", L"!ident", L"GetAddr"};
   for (const auto* ident : identifiers) {
@@ -219,6 +219,39 @@ TEST(CXFA_FMLexerTest, Identifiers) {
   }
 }
 
+TEST(CXFA_FMLexerTest, InvalidIdentifiers) {
+  auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"#a");
+  lexer->NextToken();
+  // TODO(rharrison): Add an expects for the return being nullptr here.
+  // See https://crbug.com/pdfium/814
+  EXPECT_TRUE(lexer->HasError());
+
+  lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1a");
+  lexer->NextToken();
+  // TODO(rharrison): Add an expects for the return being nullptr here.
+  // See https://crbug.com/pdfium/814
+  EXPECT_TRUE(lexer->HasError());
+
+  lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"an@identifier");
+  lexer->NextToken();
+  EXPECT_FALSE(lexer->HasError());
+  lexer->NextToken();
+  // TODO(rharrison): Add an expects for the return being nullptr here.
+  // See https://crbug.com/pdfium/814
+  EXPECT_TRUE(lexer->HasError());
+  // TODO(rharrison): Add a test for if an another call to NextToken occurs,
+  // the error state will be retained, instead of continuing the parse.
+  // See https://crbug.com/pdfium/814
+
+  lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"_ident@");
+  lexer->NextToken();
+  EXPECT_FALSE(lexer->HasError());
+  lexer->NextToken();
+  // TODO(rharrison): Add an expects for the return being nullptr here.
+  // See https://crbug.com/pdfium/814
+  EXPECT_TRUE(lexer->HasError());
+}
+
 TEST(CXFA_FMLexerTest, Whitespace) {
   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L" \t\xc\x9\xb");
   CXFA_FMToken* token = lexer->NextToken();
author	Ryan Harrison <rharrison@chromium.org>	2017-07-18 10:27:00 -0400
committer	Chromium commit bot <commit-bot@chromium.org>	2017-07-18 14:48:37 +0000
commit	756023071d1c4574fcb433c4bc7f13e7b763f763 (patch)
tree	3388aed81fc470ddfff8afa7def359070c2ad086 /xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp
parent	574366b637c1e937efc7b1becb1d151c3599f7af (diff)
download	pdfium-756023071d1c4574fcb433c4bc7f13e7b763f763.tar.xz