summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--BUILD.gn1
-rw-r--r--xfa/fxfa/fm2js/xfa_lexer.cpp57
-rw-r--r--xfa/fxfa/fm2js/xfa_lexer_unittest.cpp239
3 files changed, 267 insertions, 30 deletions
diff --git a/BUILD.gn b/BUILD.gn
index b7b3ff6e0b..25ff2c6fd3 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -1908,6 +1908,7 @@ test("pdfium_unittests") {
"xfa/fxfa/app/cxfa_textparser_unittest.cpp",
"xfa/fxfa/app/xfa_ffbarcode_unittest.cpp",
"xfa/fxfa/cxfa_ffapp_unittest.cpp",
+ "xfa/fxfa/fm2js/xfa_lexer_unittest.cpp",
"xfa/fxfa/fm2js/xfa_simpleexpression_unittest.cpp",
"xfa/fxfa/parser/xfa_utils_unittest.cpp",
]
diff --git a/xfa/fxfa/fm2js/xfa_lexer.cpp b/xfa/fxfa/fm2js/xfa_lexer.cpp
index dfac51ab2c..bdffa7e998 100644
--- a/xfa/fxfa/fm2js/xfa_lexer.cpp
+++ b/xfa/fxfa/fm2js/xfa_lexer.cpp
@@ -124,9 +124,13 @@ std::unique_ptr<CXFA_FMToken> CXFA_FMLexer::Scan() {
}
while (1) {
- // Make sure we don't walk off the end of the string.
- if (m_ptr > m_end)
+ // Make sure we don't walk off the end of the string. If we don't currently
+ // have a token type then mark it EOF.
+ if (m_ptr > m_end) {
+ if (p->m_type == TOKreserver)
+ p->m_type = TOKeof;
return p;
+ }
ch = *m_ptr;
if (!IsValid(m_ptr)) {
@@ -172,7 +176,7 @@ std::unique_ptr<CXFA_FMToken> CXFA_FMLexer::Scan() {
case '=':
++m_ptr;
if (m_ptr > m_end) {
- Error(kFMErrEndOfInput);
+ p->m_type = TOKassign;
return p;
}
@@ -192,7 +196,7 @@ std::unique_ptr<CXFA_FMToken> CXFA_FMLexer::Scan() {
case '<':
++m_ptr;
if (m_ptr > m_end) {
- Error(kFMErrEndOfInput);
+ p->m_type = TOKlt;
return p;
}
@@ -215,7 +219,7 @@ std::unique_ptr<CXFA_FMToken> CXFA_FMLexer::Scan() {
case '>':
++m_ptr;
if (m_ptr > m_end) {
- Error(kFMErrEndOfInput);
+ p->m_type = TOKgt;
return p;
}
@@ -275,7 +279,7 @@ std::unique_ptr<CXFA_FMToken> CXFA_FMLexer::Scan() {
case '/': {
++m_ptr;
if (m_ptr > m_end) {
- Error(kFMErrEndOfInput);
+ p->m_type = TOKdiv;
return p;
}
@@ -295,7 +299,7 @@ std::unique_ptr<CXFA_FMToken> CXFA_FMLexer::Scan() {
case '.':
++m_ptr;
if (m_ptr > m_end) {
- Error(kFMErrEndOfInput);
+ p->m_type = TOKdot;
return p;
}
@@ -369,15 +373,18 @@ const wchar_t* CXFA_FMLexer::String(CXFA_FMToken* t, const wchar_t* p) {
}
++p;
- if (p > m_end) {
- Error(kFMErrEndOfInput);
- return p;
- }
-
if (ch != '"') {
+ // We've hit the end of the input, return the string.
+ if (p > m_end) {
+ Error(kFMErrEndOfInput);
+ return p;
+ }
ch = *p;
continue;
}
+ // We've hit the end of the input, return the string.
+ if (p > m_end)
+ break;
if (!IsValid(p)) {
ch = *p;
@@ -405,7 +412,8 @@ const wchar_t* CXFA_FMLexer::Identifiers(CXFA_FMToken* t, const wchar_t* p) {
uint16_t ch = *p;
++p;
if (p > m_end) {
- Error(kFMErrEndOfInput);
+ t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
+ t->m_type = IsKeyword(t->m_wstring);
return p;
}
@@ -432,10 +440,8 @@ const wchar_t* CXFA_FMLexer::Identifiers(CXFA_FMToken* t, const wchar_t* p) {
break;
}
++p;
- if (p > m_end) {
- Error(kFMErrEndOfInput);
- return p;
- }
+ if (p > m_end)
+ break;
}
t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
t->m_type = IsKeyword(t->m_wstring);
@@ -445,29 +451,20 @@ const wchar_t* CXFA_FMLexer::Identifiers(CXFA_FMToken* t, const wchar_t* p) {
const wchar_t* CXFA_FMLexer::Comment(const wchar_t* p) {
++p;
- if (p > m_end) {
- Error(kFMErrEndOfInput);
+ if (p > m_end)
return p;
- }
unsigned ch = *p;
while (ch) {
- if (ch == L'\r') {
- ++p;
- if (p > m_end)
- Error(kFMErrEndOfInput);
- return p;
- }
-
++p;
- if (p > m_end) {
- Error(kFMErrEndOfInput);
+ if (ch == L'\r')
return p;
- }
if (ch == L'\n') {
++m_uCurrentLine;
return p;
}
+ if (p > m_end)
+ return p;
ch = *p;
}
return p;
diff --git a/xfa/fxfa/fm2js/xfa_lexer_unittest.cpp b/xfa/fxfa/fm2js/xfa_lexer_unittest.cpp
new file mode 100644
index 0000000000..fac0c9ac55
--- /dev/null
+++ b/xfa/fxfa/fm2js/xfa_lexer_unittest.cpp
@@ -0,0 +1,239 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "xfa/fxfa/fm2js/xfa_lexer.h"
+
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/test_support.h"
+#include "third_party/base/ptr_util.h"
+
+TEST(CXFA_FMLexerTest, EmptyString) {
+ CXFA_FMLexer lexer(L"", nullptr);
+ CXFA_FMToken* token = lexer.NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+}
+
+TEST(CXFA_FMLexerTest, Numbers) {
+ auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"-12", nullptr);
+ CXFA_FMToken* token = lexer->NextToken();
+ // TODO(dsinclair): Should this return -12 instead of two tokens?
+ EXPECT_EQ(TOKminus, token->m_type);
+ token = lexer->NextToken();
+ EXPECT_EQ(L"12", token->m_wstring);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1.5362", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ EXPECT_EQ(L"1.5362", token->m_wstring);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"0.875", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ EXPECT_EQ(L"0.875", token->m_wstring);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"5.56e-2", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ EXPECT_EQ(L"5.56e-2", token->m_wstring);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1.234E10", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ EXPECT_EQ(L"1.234E10", token->m_wstring);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123456789.012345678", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ // TODO(dsinclair): This should round as per IEEE 64-bit values.
+ // EXPECT_EQ(L"123456789.01234567", token->m_wstring);
+ EXPECT_EQ(L"123456789.012345678", token->m_wstring);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"99999999999999999", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ // TODO(dsinclair): This is spec'd as rounding when > 16 significant digits
+ // prior to the exponent.
+ // EXPECT_EQ(L"100000000000000000", token->m_wstring);
+ EXPECT_EQ(L"99999999999999999", token->m_wstring);
+}
+
+// The quotes are stripped in CXFA_FMStringExpression::ToJavaScript.
+TEST(CXFA_FMLexerTest, Strings) {
+ auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(
+ L"\"The cat jumped over the fence.\"", nullptr);
+ CXFA_FMToken* token = lexer->NextToken();
+ EXPECT_EQ(TOKstring, token->m_type);
+ EXPECT_EQ(L"\"The cat jumped over the fence.\"", token->m_wstring);
+
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"\"\"", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKstring, token->m_type);
+ EXPECT_EQ(L"\"\"", token->m_wstring);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(
+ L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKstring, token->m_type);
+ EXPECT_EQ(L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"",
+ token->m_wstring);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(
+ L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"",
+ nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKstring, token->m_type);
+ EXPECT_EQ(
+ L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"",
+ token->m_wstring);
+}
+
+// Note, 'this' is a keyword but is not matched by the lexer.
+TEST(CXFA_FMLexerTest, OperatorsAndKeywords) {
+ struct {
+ const wchar_t* op;
+ XFA_FM_TOKEN token;
+ } op[] = {{L"+", TOKplus},
+ {L"/", TOKdiv},
+ {L"-", TOKminus},
+ {L"&", TOKand},
+ {L"|", TOKor},
+ {L"*", TOKmul},
+ {L"<", TOKlt},
+ {L">", TOKgt},
+ {L"==", TOKeq},
+ {L"<>", TOKne},
+ {L"<=", TOKle},
+ {L">=", TOKge},
+ {L"and", TOKksand},
+ {L"break", TOKbreak},
+ {L"continue", TOKcontinue},
+ {L"do", TOKdo},
+ {L"downto", TOKdownto},
+ {L"else", TOKelse},
+ {L"elseif", TOKelseif},
+ {L"end", TOKend},
+ {L"endfor", TOKendfor},
+ {L"endfunc", TOKendfunc},
+ {L"endif", TOKendif},
+ {L"endwhile", TOKendwhile},
+ {L"eq", TOKkseq},
+ {L"exit", TOKexit},
+ {L"for", TOKfor},
+ {L"foreach", TOKforeach},
+ {L"func", TOKfunc},
+ {L"ge", TOKksge},
+ {L"gt", TOKksgt},
+ {L"if", TOKif},
+ {L"in", TOKin},
+ {L"infinity", TOKinfinity},
+ {L"le", TOKksle},
+ {L"lt", TOKkslt},
+ {L"nan", TOKnan},
+ {L"ne", TOKksne},
+ {L"not", TOKksnot},
+ {L"null", TOKnull},
+ {L"or", TOKksor},
+ {L"return", TOKreturn},
+ {L"step", TOKstep},
+ {L"then", TOKthen},
+ {L"throw", TOKthrow},
+ {L"upto", TOKupto},
+ {L"var", TOKvar},
+ {L"while", TOKwhile},
+
+ // The following are defined but aren't in the spec.
+ {L"(", TOKlparen},
+ {L")", TOKrparen},
+ {L",", TOKcomma},
+ {L".", TOKdot},
+ {L"[", TOKlbracket},
+ {L"]", TOKrbracket},
+ {L"..", TOKdotdot},
+ {L".#", TOKdotscream},
+ {L".*", TOKdotstar}};
+
+ for (size_t i = 0; i < FX_ArraySize(op); ++i) {
+ auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(op[i].op, nullptr);
+ CXFA_FMToken* token = lexer->NextToken();
+ EXPECT_EQ(op[i].token, token->m_type);
+ }
+}
+
+TEST(CXFA_FMLexerTest, Comments) {
+ auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"// Empty.", nullptr);
+ CXFA_FMToken* token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"//", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 // Empty.\n\"str\"", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ EXPECT_EQ(L"123", token->m_wstring);
+
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKstring, token->m_type);
+ EXPECT_EQ(L"\"str\"", token->m_wstring);
+
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L";", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"; Empty.", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 ;Empty.\n\"str\"", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ EXPECT_EQ(L"123", token->m_wstring);
+
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKstring, token->m_type);
+ EXPECT_EQ(L"\"str\"", token->m_wstring);
+
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+}
+
+TEST(CXFA_FMLexerTest, Identifiers) {
+ std::vector<const wchar_t*> identifiers = {
+ L"a", L"an_identifier", L"_ident", L"$ident", L"!ident", L"GetAddr"};
+ for (const auto* ident : identifiers) {
+ auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(ident, nullptr);
+ CXFA_FMToken* token = lexer->NextToken();
+ EXPECT_EQ(TOKidentifier, token->m_type);
+ EXPECT_EQ(ident, token->m_wstring);
+ }
+}
+
+TEST(CXFA_FMLexerTest, Whitespace) {
+ auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L" \t\xc\x9\xb", nullptr);
+ CXFA_FMToken* token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+
+ lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 \t\xc\x9\xb 456", nullptr);
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ EXPECT_EQ(L"123", token->m_wstring);
+
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKnumber, token->m_type);
+ EXPECT_EQ(L"456", token->m_wstring);
+
+ token = lexer->NextToken();
+ EXPECT_EQ(TOKeof, token->m_type);
+}