summaryrefslogtreecommitdiff
path: root/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
diff options
context:
space:
mode:
authorDan Sinclair <dsinclair@chromium.org>2017-05-16 15:14:02 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-05-17 03:08:06 +0000
commit2e9d47ac2c9a4ebf0821d10c766fafa85e6d3cb9 (patch)
tree212ca832851067eb5f40cf49c1dc2ffe81a24ada /xfa/fxfa/fm2js/cxfa_fmlexer.cpp
parent7876609b3540137663d48282ad94ba42a3749e73 (diff)
downloadpdfium-2e9d47ac2c9a4ebf0821d10c766fafa85e6d3cb9.tar.xz
Rename formcalc files to better match contents
Most files match the contents. The expression files are named to match their base type even though they contain all the expression subclasses. Change-Id: I3b7705c7b206a9fa1afae8b677f765e8b788e84d Reviewed-on: https://pdfium-review.googlesource.com/5492 Commit-Queue: dsinclair <dsinclair@chromium.org> Reviewed-by: Nicolás Peña <npm@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org>
Diffstat (limited to 'xfa/fxfa/fm2js/cxfa_fmlexer.cpp')
-rw-r--r--xfa/fxfa/fm2js/cxfa_fmlexer.cpp501
1 files changed, 501 insertions, 0 deletions
diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
new file mode 100644
index 0000000000..18f915f198
--- /dev/null
+++ b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
@@ -0,0 +1,501 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "xfa/fxfa/fm2js/cxfa_fmlexer.h"
+
+#include "core/fxcrt/fx_extension.h"
+#include "third_party/base/ptr_util.h"
+
+namespace {
+
+bool IsValid(const wchar_t* p) {
+ return *p == 0 || (*p >= 0x09 && *p <= 0x0D) ||
+ (*p >= 0x20 && *p <= 0xd7FF) || (*p >= 0xE000 && *p <= 0xFFFD);
+}
+
+const XFA_FMKeyword keyWords[] = {
+ {TOKand, 0x00000026, L"&"},
+ {TOKlparen, 0x00000028, L"("},
+ {TOKrparen, 0x00000029, L")"},
+ {TOKmul, 0x0000002a, L"*"},
+ {TOKplus, 0x0000002b, L"+"},
+ {TOKcomma, 0x0000002c, L","},
+ {TOKminus, 0x0000002d, L"-"},
+ {TOKdot, 0x0000002e, L"."},
+ {TOKdiv, 0x0000002f, L"/"},
+ {TOKlt, 0x0000003c, L"<"},
+ {TOKassign, 0x0000003d, L"="},
+ {TOKgt, 0x0000003e, L">"},
+ {TOKlbracket, 0x0000005b, L"["},
+ {TOKrbracket, 0x0000005d, L"]"},
+ {TOKor, 0x0000007c, L"|"},
+ {TOKdotscream, 0x0000ec11, L".#"},
+ {TOKdotstar, 0x0000ec18, L".*"},
+ {TOKdotdot, 0x0000ec1c, L".."},
+ {TOKle, 0x000133f9, L"<="},
+ {TOKne, 0x000133fa, L"<>"},
+ {TOKeq, 0x0001391a, L"=="},
+ {TOKge, 0x00013e3b, L">="},
+ {TOKdo, 0x00020153, L"do"},
+ {TOKkseq, 0x00020676, L"eq"},
+ {TOKksge, 0x000210ac, L"ge"},
+ {TOKksgt, 0x000210bb, L"gt"},
+ {TOKif, 0x00021aef, L"if"},
+ {TOKin, 0x00021af7, L"in"},
+ {TOKksle, 0x00022a51, L"le"},
+ {TOKkslt, 0x00022a60, L"lt"},
+ {TOKksne, 0x00023493, L"ne"},
+ {TOKksor, 0x000239c1, L"or"},
+ {TOKnull, 0x052931bb, L"null"},
+ {TOKbreak, 0x05518c25, L"break"},
+ {TOKksand, 0x09f9db33, L"and"},
+ {TOKend, 0x0a631437, L"end"},
+ {TOKeof, 0x0a63195a, L"eof"},
+ {TOKfor, 0x0a7d67a7, L"for"},
+ {TOKnan, 0x0b4f91dd, L"nan"},
+ {TOKksnot, 0x0b4fd9b1, L"not"},
+ {TOKvar, 0x0c2203e9, L"var"},
+ {TOKthen, 0x2d5738cf, L"then"},
+ {TOKelse, 0x45f65ee9, L"else"},
+ {TOKexit, 0x4731d6ba, L"exit"},
+ {TOKdownto, 0x4caadc3b, L"downto"},
+ {TOKreturn, 0x4db8bd60, L"return"},
+ {TOKinfinity, 0x5c0a010a, L"infinity"},
+ {TOKendwhile, 0x5c64bff0, L"endwhile"},
+ {TOKforeach, 0x67e31f38, L"foreach"},
+ {TOKendfunc, 0x68f984a3, L"endfunc"},
+ {TOKelseif, 0x78253218, L"elseif"},
+ {TOKwhile, 0x84229259, L"while"},
+ {TOKendfor, 0x8ab49d7e, L"endfor"},
+ {TOKthrow, 0x8db05c94, L"throw"},
+ {TOKstep, 0xa7a7887c, L"step"},
+ {TOKupto, 0xb5155328, L"upto"},
+ {TOKcontinue, 0xc0340685, L"continue"},
+ {TOKfunc, 0xcdce60ec, L"func"},
+ {TOKendif, 0xe0e8fee6, L"endif"},
+};
+
+const XFA_FM_TOKEN KEYWORD_START = TOKdo;
+const XFA_FM_TOKEN KEYWORD_END = TOKendif;
+
+} // namespace
+
+const wchar_t* XFA_FM_KeywordToString(XFA_FM_TOKEN op) {
+ if (op < KEYWORD_START || op > KEYWORD_END)
+ return L"";
+ return keyWords[op].m_keyword;
+}
+
+CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {}
+
+CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum)
+ : m_type(TOKreserver), m_uLinenum(uLineNum) {}
+
+CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc,
+ CXFA_FMErrorInfo* pErrorInfo)
+ : m_ptr(wsFormCalc.c_str()),
+ m_end(m_ptr + wsFormCalc.GetLength() - 1),
+ m_uCurrentLine(1),
+ m_pErrorInfo(pErrorInfo) {}
+
+CXFA_FMLexer::~CXFA_FMLexer() {}
+
+CXFA_FMToken* CXFA_FMLexer::NextToken() {
+ // Make sure we don't walk off the end of the string.
+ if (m_ptr > m_end) {
+ m_pToken = pdfium::MakeUnique<CXFA_FMToken>(m_uCurrentLine);
+ m_pToken->m_type = TOKeof;
+ } else {
+ m_pToken = Scan();
+ }
+ return m_pToken.get();
+}
+
+std::unique_ptr<CXFA_FMToken> CXFA_FMLexer::Scan() {
+ uint16_t ch = 0;
+ auto p = pdfium::MakeUnique<CXFA_FMToken>(m_uCurrentLine);
+ if (!IsValid(m_ptr)) {
+ ch = *m_ptr;
+ Error(kFMErrUnsupportedChar, ch);
+ return p;
+ }
+
+ while (1) {
+ // Make sure we don't walk off the end of the string. If we don't currently
+ // have a token type then mark it EOF.
+ if (m_ptr > m_end) {
+ if (p->m_type == TOKreserver)
+ p->m_type = TOKeof;
+ return p;
+ }
+
+ ch = *m_ptr;
+ if (!IsValid(m_ptr)) {
+ Error(kFMErrUnsupportedChar, ch);
+ return p;
+ }
+
+ switch (ch) {
+ case 0:
+ p->m_type = TOKeof;
+ return p;
+ case 0x0A:
+ ++m_uCurrentLine;
+ p->m_uLinenum = m_uCurrentLine;
+ ++m_ptr;
+ break;
+ case 0x0D:
+ ++m_ptr;
+ break;
+ case ';': {
+ m_ptr = Comment(m_ptr);
+ break;
+ }
+ case '"': {
+ p->m_type = TOKstring;
+ m_ptr = String(p.get(), m_ptr);
+ return p;
+ }
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ p->m_type = TOKnumber;
+ m_ptr = Number(p.get(), m_ptr);
+ return p;
+ }
+ case '=':
+ ++m_ptr;
+ if (m_ptr > m_end) {
+ p->m_type = TOKassign;
+ return p;
+ }
+
+ if (IsValid(m_ptr)) {
+ ch = *m_ptr;
+ if (ch == '=') {
+ p->m_type = TOKeq;
+ ++m_ptr;
+ } else {
+ p->m_type = TOKassign;
+ }
+ } else {
+ ch = *m_ptr;
+ Error(kFMErrUnsupportedChar, ch);
+ }
+ return p;
+ case '<':
+ ++m_ptr;
+ if (m_ptr > m_end) {
+ p->m_type = TOKlt;
+ return p;
+ }
+
+ if (IsValid(m_ptr)) {
+ ch = *m_ptr;
+ if (ch == '=') {
+ p->m_type = TOKle;
+ ++m_ptr;
+ } else if (ch == '>') {
+ p->m_type = TOKne;
+ ++m_ptr;
+ } else {
+ p->m_type = TOKlt;
+ }
+ } else {
+ ch = *m_ptr;
+ Error(kFMErrUnsupportedChar, ch);
+ }
+ return p;
+ case '>':
+ ++m_ptr;
+ if (m_ptr > m_end) {
+ p->m_type = TOKgt;
+ return p;
+ }
+
+ if (IsValid(m_ptr)) {
+ ch = *m_ptr;
+ if (ch == '=') {
+ p->m_type = TOKge;
+ ++m_ptr;
+ } else {
+ p->m_type = TOKgt;
+ }
+ } else {
+ ch = *m_ptr;
+ Error(kFMErrUnsupportedChar, ch);
+ }
+ return p;
+ case ',':
+ p->m_type = TOKcomma;
+ ++m_ptr;
+ return p;
+ case '(':
+ p->m_type = TOKlparen;
+ ++m_ptr;
+ return p;
+ case ')':
+ p->m_type = TOKrparen;
+ ++m_ptr;
+ return p;
+ case '[':
+ p->m_type = TOKlbracket;
+ ++m_ptr;
+ return p;
+ case ']':
+ p->m_type = TOKrbracket;
+ ++m_ptr;
+ return p;
+ case '&':
+ ++m_ptr;
+ p->m_type = TOKand;
+ return p;
+ case '|':
+ ++m_ptr;
+ p->m_type = TOKor;
+ return p;
+ case '+':
+ ++m_ptr;
+ p->m_type = TOKplus;
+ return p;
+ case '-':
+ ++m_ptr;
+ p->m_type = TOKminus;
+ return p;
+ case '*':
+ ++m_ptr;
+ p->m_type = TOKmul;
+ return p;
+ case '/': {
+ ++m_ptr;
+ if (m_ptr > m_end) {
+ p->m_type = TOKdiv;
+ return p;
+ }
+
+ if (!IsValid(m_ptr)) {
+ ch = *m_ptr;
+ Error(kFMErrUnsupportedChar, ch);
+ return p;
+ }
+ ch = *m_ptr;
+ if (ch != '/') {
+ p->m_type = TOKdiv;
+ return p;
+ }
+ m_ptr = Comment(m_ptr);
+ break;
+ }
+ case '.':
+ ++m_ptr;
+ if (m_ptr > m_end) {
+ p->m_type = TOKdot;
+ return p;
+ }
+
+ if (IsValid(m_ptr)) {
+ ch = *m_ptr;
+ if (ch == '.') {
+ p->m_type = TOKdotdot;
+ ++m_ptr;
+ } else if (ch == '*') {
+ p->m_type = TOKdotstar;
+ ++m_ptr;
+ } else if (ch == '#') {
+ p->m_type = TOKdotscream;
+ ++m_ptr;
+ } else if (ch <= '9' && ch >= '0') {
+ p->m_type = TOKnumber;
+ --m_ptr;
+ m_ptr = Number(p.get(), m_ptr);
+ } else {
+ p->m_type = TOKdot;
+ }
+ } else {
+ ch = *m_ptr;
+ Error(kFMErrUnsupportedChar, ch);
+ }
+ return p;
+ case 0x09:
+ case 0x0B:
+ case 0x0C:
+ case 0x20:
+ ++m_ptr;
+ break;
+ default: {
+ m_ptr = Identifiers(p.get(), m_ptr);
+ return p;
+ }
+ }
+ }
+}
+
+const wchar_t* CXFA_FMLexer::Number(CXFA_FMToken* t, const wchar_t* p) {
+ // This will set pEnd to the character after the end of the number.
+ wchar_t* pEnd = nullptr;
+ if (p)
+ wcstod(const_cast<wchar_t*>(p), &pEnd);
+ if (pEnd && FXSYS_iswalpha(*pEnd)) {
+ Error(kFMErrBadSuffixNumber);
+ return pEnd;
+ }
+
+ t->m_wstring = CFX_WideStringC(p, (pEnd - p));
+ return pEnd;
+}
+
+const wchar_t* CXFA_FMLexer::String(CXFA_FMToken* t, const wchar_t* p) {
+ const wchar_t* pStart = p;
+
+ ++p;
+ if (p > m_end) {
+ Error(kFMErrEndOfInput);
+ return p;
+ }
+
+ uint16_t ch = *p;
+ while (ch) {
+ if (!IsValid(p)) {
+ ch = *p;
+ t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
+ Error(kFMErrUnsupportedChar, ch);
+ return p;
+ }
+
+ ++p;
+ if (ch != '"') {
+ // We've hit the end of the input, return the string.
+ if (p > m_end) {
+ Error(kFMErrEndOfInput);
+ return p;
+ }
+ ch = *p;
+ continue;
+ }
+ // We've hit the end of the input, return the string.
+ if (p > m_end)
+ break;
+
+ if (!IsValid(p)) {
+ ch = *p;
+ t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
+ Error(kFMErrUnsupportedChar, ch);
+ return p;
+ }
+ ch = *p;
+ if (ch != '"')
+ break;
+
+ ++p;
+ if (p > m_end) {
+ Error(kFMErrEndOfInput);
+ return p;
+ }
+ ch = *p;
+ }
+ t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
+ return p;
+}
+
+const wchar_t* CXFA_FMLexer::Identifiers(CXFA_FMToken* t, const wchar_t* p) {
+ const wchar_t* pStart = p;
+ uint16_t ch = *p;
+ ++p;
+ if (p > m_end) {
+ t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
+ t->m_type = IsKeyword(t->m_wstring);
+ return p;
+ }
+
+ if (!IsValid(p)) {
+ t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
+ Error(kFMErrUnsupportedChar, ch);
+ return p;
+ }
+
+ ch = *p;
+ while (ch) {
+ if (!IsValid(p)) {
+ t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
+ Error(kFMErrUnsupportedChar, ch);
+ return p;
+ }
+
+ ch = *p;
+ if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B ||
+ ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' ||
+ ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' ||
+ ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' ||
+ ch == '+' || ch == '-' || ch == '*' || ch == '/') {
+ break;
+ }
+ ++p;
+ if (p > m_end)
+ break;
+ }
+ t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
+ t->m_type = IsKeyword(t->m_wstring);
+ return p;
+}
+
+const wchar_t* CXFA_FMLexer::Comment(const wchar_t* p) {
+ ++p;
+
+ if (p > m_end)
+ return p;
+
+ unsigned ch = *p;
+ while (ch) {
+ ++p;
+ if (ch == L'\r')
+ return p;
+ if (ch == L'\n') {
+ ++m_uCurrentLine;
+ return p;
+ }
+ if (p > m_end)
+ return p;
+ ch = *p;
+ }
+ return p;
+}
+
+XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) {
+ uint32_t uHash = FX_HashCode_GetW(str, true);
+ int32_t iStart = KEYWORD_START;
+ int32_t iEnd = KEYWORD_END;
+ do {
+ int32_t iMid = (iStart + iEnd) / 2;
+ XFA_FMKeyword keyword = keyWords[iMid];
+ if (uHash == keyword.m_uHash)
+ return keyword.m_type;
+ if (uHash < keyword.m_uHash)
+ iEnd = iMid - 1;
+ else
+ iStart = iMid + 1;
+ } while (iStart <= iEnd);
+ return TOKidentifier;
+}
+
+void CXFA_FMLexer::Error(const wchar_t* msg, ...) {
+ m_pErrorInfo->linenum = m_uCurrentLine;
+ va_list ap;
+ va_start(ap, msg);
+ m_pErrorInfo->message.FormatV(msg, ap);
+ va_end(ap);
+ ASSERT(!m_pErrorInfo->message.IsEmpty());
+}
+
+bool CXFA_FMLexer::HasError() const {
+ return !m_pErrorInfo->message.IsEmpty();
+}