From 9774984f96946eb96eed29abfcbe824cb5858bbb Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Tue, 16 May 2017 12:59:10 -0400 Subject: Add formcalc lexer tests. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL adds tests for CXFA_FMLexer. Change-Id: I4cb7000212dda6d2b32211005a1c22deabb813ae Reviewed-on: https://pdfium-review.googlesource.com/5554 Commit-Queue: dsinclair Reviewed-by: Nicolás Peña --- BUILD.gn | 1 + xfa/fxfa/fm2js/xfa_lexer.cpp | 57 ++++---- xfa/fxfa/fm2js/xfa_lexer_unittest.cpp | 239 ++++++++++++++++++++++++++++++++++ 3 files changed, 267 insertions(+), 30 deletions(-) create mode 100644 xfa/fxfa/fm2js/xfa_lexer_unittest.cpp diff --git a/BUILD.gn b/BUILD.gn index b7b3ff6e0b..25ff2c6fd3 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -1908,6 +1908,7 @@ test("pdfium_unittests") { "xfa/fxfa/app/cxfa_textparser_unittest.cpp", "xfa/fxfa/app/xfa_ffbarcode_unittest.cpp", "xfa/fxfa/cxfa_ffapp_unittest.cpp", + "xfa/fxfa/fm2js/xfa_lexer_unittest.cpp", "xfa/fxfa/fm2js/xfa_simpleexpression_unittest.cpp", "xfa/fxfa/parser/xfa_utils_unittest.cpp", ] diff --git a/xfa/fxfa/fm2js/xfa_lexer.cpp b/xfa/fxfa/fm2js/xfa_lexer.cpp index dfac51ab2c..bdffa7e998 100644 --- a/xfa/fxfa/fm2js/xfa_lexer.cpp +++ b/xfa/fxfa/fm2js/xfa_lexer.cpp @@ -124,9 +124,13 @@ std::unique_ptr CXFA_FMLexer::Scan() { } while (1) { - // Make sure we don't walk off the end of the string. - if (m_ptr > m_end) + // Make sure we don't walk off the end of the string. If we don't currently + // have a token type then mark it EOF. + if (m_ptr > m_end) { + if (p->m_type == TOKreserver) + p->m_type = TOKeof; return p; + } ch = *m_ptr; if (!IsValid(m_ptr)) { @@ -172,7 +176,7 @@ std::unique_ptr CXFA_FMLexer::Scan() { case '=': ++m_ptr; if (m_ptr > m_end) { - Error(kFMErrEndOfInput); + p->m_type = TOKassign; return p; } @@ -192,7 +196,7 @@ std::unique_ptr CXFA_FMLexer::Scan() { case '<': ++m_ptr; if (m_ptr > m_end) { - Error(kFMErrEndOfInput); + p->m_type = TOKlt; return p; } @@ -215,7 +219,7 @@ std::unique_ptr CXFA_FMLexer::Scan() { case '>': ++m_ptr; if (m_ptr > m_end) { - Error(kFMErrEndOfInput); + p->m_type = TOKgt; return p; } @@ -275,7 +279,7 @@ std::unique_ptr CXFA_FMLexer::Scan() { case '/': { ++m_ptr; if (m_ptr > m_end) { - Error(kFMErrEndOfInput); + p->m_type = TOKdiv; return p; } @@ -295,7 +299,7 @@ std::unique_ptr CXFA_FMLexer::Scan() { case '.': ++m_ptr; if (m_ptr > m_end) { - Error(kFMErrEndOfInput); + p->m_type = TOKdot; return p; } @@ -369,15 +373,18 @@ const wchar_t* CXFA_FMLexer::String(CXFA_FMToken* t, const wchar_t* p) { } ++p; - if (p > m_end) { - Error(kFMErrEndOfInput); - return p; - } - if (ch != '"') { + // We've hit the end of the input, return the string. + if (p > m_end) { + Error(kFMErrEndOfInput); + return p; + } ch = *p; continue; } + // We've hit the end of the input, return the string. + if (p > m_end) + break; if (!IsValid(p)) { ch = *p; @@ -405,7 +412,8 @@ const wchar_t* CXFA_FMLexer::Identifiers(CXFA_FMToken* t, const wchar_t* p) { uint16_t ch = *p; ++p; if (p > m_end) { - Error(kFMErrEndOfInput); + t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); + t->m_type = IsKeyword(t->m_wstring); return p; } @@ -432,10 +440,8 @@ const wchar_t* CXFA_FMLexer::Identifiers(CXFA_FMToken* t, const wchar_t* p) { break; } ++p; - if (p > m_end) { - Error(kFMErrEndOfInput); - return p; - } + if (p > m_end) + break; } t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); t->m_type = IsKeyword(t->m_wstring); @@ -445,29 +451,20 @@ const wchar_t* CXFA_FMLexer::Identifiers(CXFA_FMToken* t, const wchar_t* p) { const wchar_t* CXFA_FMLexer::Comment(const wchar_t* p) { ++p; - if (p > m_end) { - Error(kFMErrEndOfInput); + if (p > m_end) return p; - } unsigned ch = *p; while (ch) { - if (ch == L'\r') { - ++p; - if (p > m_end) - Error(kFMErrEndOfInput); - return p; - } - ++p; - if (p > m_end) { - Error(kFMErrEndOfInput); + if (ch == L'\r') return p; - } if (ch == L'\n') { ++m_uCurrentLine; return p; } + if (p > m_end) + return p; ch = *p; } return p; diff --git a/xfa/fxfa/fm2js/xfa_lexer_unittest.cpp b/xfa/fxfa/fm2js/xfa_lexer_unittest.cpp new file mode 100644 index 0000000000..fac0c9ac55 --- /dev/null +++ b/xfa/fxfa/fm2js/xfa_lexer_unittest.cpp @@ -0,0 +1,239 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "xfa/fxfa/fm2js/xfa_lexer.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" +#include "third_party/base/ptr_util.h" + +TEST(CXFA_FMLexerTest, EmptyString) { + CXFA_FMLexer lexer(L"", nullptr); + CXFA_FMToken* token = lexer.NextToken(); + EXPECT_EQ(TOKeof, token->m_type); +} + +TEST(CXFA_FMLexerTest, Numbers) { + auto lexer = pdfium::MakeUnique(L"-12", nullptr); + CXFA_FMToken* token = lexer->NextToken(); + // TODO(dsinclair): Should this return -12 instead of two tokens? + EXPECT_EQ(TOKminus, token->m_type); + token = lexer->NextToken(); + EXPECT_EQ(L"12", token->m_wstring); + token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); + + lexer = pdfium::MakeUnique(L"1.5362", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + EXPECT_EQ(L"1.5362", token->m_wstring); + + lexer = pdfium::MakeUnique(L"0.875", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + EXPECT_EQ(L"0.875", token->m_wstring); + + lexer = pdfium::MakeUnique(L"5.56e-2", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + EXPECT_EQ(L"5.56e-2", token->m_wstring); + + lexer = pdfium::MakeUnique(L"1.234E10", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + EXPECT_EQ(L"1.234E10", token->m_wstring); + + lexer = pdfium::MakeUnique(L"123456789.012345678", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + // TODO(dsinclair): This should round as per IEEE 64-bit values. + // EXPECT_EQ(L"123456789.01234567", token->m_wstring); + EXPECT_EQ(L"123456789.012345678", token->m_wstring); + + lexer = pdfium::MakeUnique(L"99999999999999999", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + // TODO(dsinclair): This is spec'd as rounding when > 16 significant digits + // prior to the exponent. + // EXPECT_EQ(L"100000000000000000", token->m_wstring); + EXPECT_EQ(L"99999999999999999", token->m_wstring); +} + +// The quotes are stripped in CXFA_FMStringExpression::ToJavaScript. +TEST(CXFA_FMLexerTest, Strings) { + auto lexer = pdfium::MakeUnique( + L"\"The cat jumped over the fence.\"", nullptr); + CXFA_FMToken* token = lexer->NextToken(); + EXPECT_EQ(TOKstring, token->m_type); + EXPECT_EQ(L"\"The cat jumped over the fence.\"", token->m_wstring); + + token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); + + lexer = pdfium::MakeUnique(L"\"\"", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKstring, token->m_type); + EXPECT_EQ(L"\"\"", token->m_wstring); + + lexer = pdfium::MakeUnique( + L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKstring, token->m_type); + EXPECT_EQ(L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"", + token->m_wstring); + + lexer = pdfium::MakeUnique( + L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"", + nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKstring, token->m_type); + EXPECT_EQ( + L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"", + token->m_wstring); +} + +// Note, 'this' is a keyword but is not matched by the lexer. +TEST(CXFA_FMLexerTest, OperatorsAndKeywords) { + struct { + const wchar_t* op; + XFA_FM_TOKEN token; + } op[] = {{L"+", TOKplus}, + {L"/", TOKdiv}, + {L"-", TOKminus}, + {L"&", TOKand}, + {L"|", TOKor}, + {L"*", TOKmul}, + {L"<", TOKlt}, + {L">", TOKgt}, + {L"==", TOKeq}, + {L"<>", TOKne}, + {L"<=", TOKle}, + {L">=", TOKge}, + {L"and", TOKksand}, + {L"break", TOKbreak}, + {L"continue", TOKcontinue}, + {L"do", TOKdo}, + {L"downto", TOKdownto}, + {L"else", TOKelse}, + {L"elseif", TOKelseif}, + {L"end", TOKend}, + {L"endfor", TOKendfor}, + {L"endfunc", TOKendfunc}, + {L"endif", TOKendif}, + {L"endwhile", TOKendwhile}, + {L"eq", TOKkseq}, + {L"exit", TOKexit}, + {L"for", TOKfor}, + {L"foreach", TOKforeach}, + {L"func", TOKfunc}, + {L"ge", TOKksge}, + {L"gt", TOKksgt}, + {L"if", TOKif}, + {L"in", TOKin}, + {L"infinity", TOKinfinity}, + {L"le", TOKksle}, + {L"lt", TOKkslt}, + {L"nan", TOKnan}, + {L"ne", TOKksne}, + {L"not", TOKksnot}, + {L"null", TOKnull}, + {L"or", TOKksor}, + {L"return", TOKreturn}, + {L"step", TOKstep}, + {L"then", TOKthen}, + {L"throw", TOKthrow}, + {L"upto", TOKupto}, + {L"var", TOKvar}, + {L"while", TOKwhile}, + + // The following are defined but aren't in the spec. + {L"(", TOKlparen}, + {L")", TOKrparen}, + {L",", TOKcomma}, + {L".", TOKdot}, + {L"[", TOKlbracket}, + {L"]", TOKrbracket}, + {L"..", TOKdotdot}, + {L".#", TOKdotscream}, + {L".*", TOKdotstar}}; + + for (size_t i = 0; i < FX_ArraySize(op); ++i) { + auto lexer = pdfium::MakeUnique(op[i].op, nullptr); + CXFA_FMToken* token = lexer->NextToken(); + EXPECT_EQ(op[i].token, token->m_type); + } +} + +TEST(CXFA_FMLexerTest, Comments) { + auto lexer = pdfium::MakeUnique(L"// Empty.", nullptr); + CXFA_FMToken* token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); + + lexer = pdfium::MakeUnique(L"//", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); + + lexer = pdfium::MakeUnique(L"123 // Empty.\n\"str\"", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + EXPECT_EQ(L"123", token->m_wstring); + + token = lexer->NextToken(); + EXPECT_EQ(TOKstring, token->m_type); + EXPECT_EQ(L"\"str\"", token->m_wstring); + + token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); + + lexer = pdfium::MakeUnique(L";", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); + + lexer = pdfium::MakeUnique(L"; Empty.", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); + + lexer = pdfium::MakeUnique(L"123 ;Empty.\n\"str\"", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + EXPECT_EQ(L"123", token->m_wstring); + + token = lexer->NextToken(); + EXPECT_EQ(TOKstring, token->m_type); + EXPECT_EQ(L"\"str\"", token->m_wstring); + + token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); +} + +TEST(CXFA_FMLexerTest, Identifiers) { + std::vector identifiers = { + L"a", L"an_identifier", L"_ident", L"$ident", L"!ident", L"GetAddr"}; + for (const auto* ident : identifiers) { + auto lexer = pdfium::MakeUnique(ident, nullptr); + CXFA_FMToken* token = lexer->NextToken(); + EXPECT_EQ(TOKidentifier, token->m_type); + EXPECT_EQ(ident, token->m_wstring); + } +} + +TEST(CXFA_FMLexerTest, Whitespace) { + auto lexer = pdfium::MakeUnique(L" \t\xc\x9\xb", nullptr); + CXFA_FMToken* token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); + + lexer = pdfium::MakeUnique(L"123 \t\xc\x9\xb 456", nullptr); + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + EXPECT_EQ(L"123", token->m_wstring); + + token = lexer->NextToken(); + EXPECT_EQ(TOKnumber, token->m_type); + EXPECT_EQ(L"456", token->m_wstring); + + token = lexer->NextToken(); + EXPECT_EQ(TOKeof, token->m_type); +} -- cgit v1.2.3