diff options
author | Dan Sinclair <dsinclair@chromium.org> | 2017-05-16 15:14:02 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-05-17 03:08:06 +0000 |
commit | 2e9d47ac2c9a4ebf0821d10c766fafa85e6d3cb9 (patch) | |
tree | 212ca832851067eb5f40cf49c1dc2ffe81a24ada /xfa/fxfa/fm2js/cxfa_fmlexer.cpp | |
parent | 7876609b3540137663d48282ad94ba42a3749e73 (diff) | |
download | pdfium-2e9d47ac2c9a4ebf0821d10c766fafa85e6d3cb9.tar.xz |
Rename formcalc files to better match contents
Most files match the contents. The expression files are named to match
their base type even though they contain all the expression
subclasses.
Change-Id: I3b7705c7b206a9fa1afae8b677f765e8b788e84d
Reviewed-on: https://pdfium-review.googlesource.com/5492
Commit-Queue: dsinclair <dsinclair@chromium.org>
Reviewed-by: Nicolás Peña <npm@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Diffstat (limited to 'xfa/fxfa/fm2js/cxfa_fmlexer.cpp')
-rw-r--r-- | xfa/fxfa/fm2js/cxfa_fmlexer.cpp | 501 |
1 files changed, 501 insertions, 0 deletions
diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp new file mode 100644 index 0000000000..18f915f198 --- /dev/null +++ b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp @@ -0,0 +1,501 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fxfa/fm2js/cxfa_fmlexer.h" + +#include "core/fxcrt/fx_extension.h" +#include "third_party/base/ptr_util.h" + +namespace { + +bool IsValid(const wchar_t* p) { + return *p == 0 || (*p >= 0x09 && *p <= 0x0D) || + (*p >= 0x20 && *p <= 0xd7FF) || (*p >= 0xE000 && *p <= 0xFFFD); +} + +const XFA_FMKeyword keyWords[] = { + {TOKand, 0x00000026, L"&"}, + {TOKlparen, 0x00000028, L"("}, + {TOKrparen, 0x00000029, L")"}, + {TOKmul, 0x0000002a, L"*"}, + {TOKplus, 0x0000002b, L"+"}, + {TOKcomma, 0x0000002c, L","}, + {TOKminus, 0x0000002d, L"-"}, + {TOKdot, 0x0000002e, L"."}, + {TOKdiv, 0x0000002f, L"/"}, + {TOKlt, 0x0000003c, L"<"}, + {TOKassign, 0x0000003d, L"="}, + {TOKgt, 0x0000003e, L">"}, + {TOKlbracket, 0x0000005b, L"["}, + {TOKrbracket, 0x0000005d, L"]"}, + {TOKor, 0x0000007c, L"|"}, + {TOKdotscream, 0x0000ec11, L".#"}, + {TOKdotstar, 0x0000ec18, L".*"}, + {TOKdotdot, 0x0000ec1c, L".."}, + {TOKle, 0x000133f9, L"<="}, + {TOKne, 0x000133fa, L"<>"}, + {TOKeq, 0x0001391a, L"=="}, + {TOKge, 0x00013e3b, L">="}, + {TOKdo, 0x00020153, L"do"}, + {TOKkseq, 0x00020676, L"eq"}, + {TOKksge, 0x000210ac, L"ge"}, + {TOKksgt, 0x000210bb, L"gt"}, + {TOKif, 0x00021aef, L"if"}, + {TOKin, 0x00021af7, L"in"}, + {TOKksle, 0x00022a51, L"le"}, + {TOKkslt, 0x00022a60, L"lt"}, + {TOKksne, 0x00023493, L"ne"}, + {TOKksor, 0x000239c1, L"or"}, + {TOKnull, 0x052931bb, L"null"}, + {TOKbreak, 0x05518c25, L"break"}, + {TOKksand, 0x09f9db33, L"and"}, + {TOKend, 0x0a631437, L"end"}, + {TOKeof, 0x0a63195a, L"eof"}, + {TOKfor, 0x0a7d67a7, L"for"}, + {TOKnan, 0x0b4f91dd, L"nan"}, + {TOKksnot, 0x0b4fd9b1, L"not"}, + {TOKvar, 0x0c2203e9, L"var"}, + {TOKthen, 0x2d5738cf, L"then"}, + {TOKelse, 0x45f65ee9, L"else"}, + {TOKexit, 0x4731d6ba, L"exit"}, + {TOKdownto, 0x4caadc3b, L"downto"}, + {TOKreturn, 0x4db8bd60, L"return"}, + {TOKinfinity, 0x5c0a010a, L"infinity"}, + {TOKendwhile, 0x5c64bff0, L"endwhile"}, + {TOKforeach, 0x67e31f38, L"foreach"}, + {TOKendfunc, 0x68f984a3, L"endfunc"}, + {TOKelseif, 0x78253218, L"elseif"}, + {TOKwhile, 0x84229259, L"while"}, + {TOKendfor, 0x8ab49d7e, L"endfor"}, + {TOKthrow, 0x8db05c94, L"throw"}, + {TOKstep, 0xa7a7887c, L"step"}, + {TOKupto, 0xb5155328, L"upto"}, + {TOKcontinue, 0xc0340685, L"continue"}, + {TOKfunc, 0xcdce60ec, L"func"}, + {TOKendif, 0xe0e8fee6, L"endif"}, +}; + +const XFA_FM_TOKEN KEYWORD_START = TOKdo; +const XFA_FM_TOKEN KEYWORD_END = TOKendif; + +} // namespace + +const wchar_t* XFA_FM_KeywordToString(XFA_FM_TOKEN op) { + if (op < KEYWORD_START || op > KEYWORD_END) + return L""; + return keyWords[op].m_keyword; +} + +CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {} + +CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum) + : m_type(TOKreserver), m_uLinenum(uLineNum) {} + +CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc, + CXFA_FMErrorInfo* pErrorInfo) + : m_ptr(wsFormCalc.c_str()), + m_end(m_ptr + wsFormCalc.GetLength() - 1), + m_uCurrentLine(1), + m_pErrorInfo(pErrorInfo) {} + +CXFA_FMLexer::~CXFA_FMLexer() {} + +CXFA_FMToken* CXFA_FMLexer::NextToken() { + // Make sure we don't walk off the end of the string. + if (m_ptr > m_end) { + m_pToken = pdfium::MakeUnique<CXFA_FMToken>(m_uCurrentLine); + m_pToken->m_type = TOKeof; + } else { + m_pToken = Scan(); + } + return m_pToken.get(); +} + +std::unique_ptr<CXFA_FMToken> CXFA_FMLexer::Scan() { + uint16_t ch = 0; + auto p = pdfium::MakeUnique<CXFA_FMToken>(m_uCurrentLine); + if (!IsValid(m_ptr)) { + ch = *m_ptr; + Error(kFMErrUnsupportedChar, ch); + return p; + } + + while (1) { + // Make sure we don't walk off the end of the string. If we don't currently + // have a token type then mark it EOF. + if (m_ptr > m_end) { + if (p->m_type == TOKreserver) + p->m_type = TOKeof; + return p; + } + + ch = *m_ptr; + if (!IsValid(m_ptr)) { + Error(kFMErrUnsupportedChar, ch); + return p; + } + + switch (ch) { + case 0: + p->m_type = TOKeof; + return p; + case 0x0A: + ++m_uCurrentLine; + p->m_uLinenum = m_uCurrentLine; + ++m_ptr; + break; + case 0x0D: + ++m_ptr; + break; + case ';': { + m_ptr = Comment(m_ptr); + break; + } + case '"': { + p->m_type = TOKstring; + m_ptr = String(p.get(), m_ptr); + return p; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + p->m_type = TOKnumber; + m_ptr = Number(p.get(), m_ptr); + return p; + } + case '=': + ++m_ptr; + if (m_ptr > m_end) { + p->m_type = TOKassign; + return p; + } + + if (IsValid(m_ptr)) { + ch = *m_ptr; + if (ch == '=') { + p->m_type = TOKeq; + ++m_ptr; + } else { + p->m_type = TOKassign; + } + } else { + ch = *m_ptr; + Error(kFMErrUnsupportedChar, ch); + } + return p; + case '<': + ++m_ptr; + if (m_ptr > m_end) { + p->m_type = TOKlt; + return p; + } + + if (IsValid(m_ptr)) { + ch = *m_ptr; + if (ch == '=') { + p->m_type = TOKle; + ++m_ptr; + } else if (ch == '>') { + p->m_type = TOKne; + ++m_ptr; + } else { + p->m_type = TOKlt; + } + } else { + ch = *m_ptr; + Error(kFMErrUnsupportedChar, ch); + } + return p; + case '>': + ++m_ptr; + if (m_ptr > m_end) { + p->m_type = TOKgt; + return p; + } + + if (IsValid(m_ptr)) { + ch = *m_ptr; + if (ch == '=') { + p->m_type = TOKge; + ++m_ptr; + } else { + p->m_type = TOKgt; + } + } else { + ch = *m_ptr; + Error(kFMErrUnsupportedChar, ch); + } + return p; + case ',': + p->m_type = TOKcomma; + ++m_ptr; + return p; + case '(': + p->m_type = TOKlparen; + ++m_ptr; + return p; + case ')': + p->m_type = TOKrparen; + ++m_ptr; + return p; + case '[': + p->m_type = TOKlbracket; + ++m_ptr; + return p; + case ']': + p->m_type = TOKrbracket; + ++m_ptr; + return p; + case '&': + ++m_ptr; + p->m_type = TOKand; + return p; + case '|': + ++m_ptr; + p->m_type = TOKor; + return p; + case '+': + ++m_ptr; + p->m_type = TOKplus; + return p; + case '-': + ++m_ptr; + p->m_type = TOKminus; + return p; + case '*': + ++m_ptr; + p->m_type = TOKmul; + return p; + case '/': { + ++m_ptr; + if (m_ptr > m_end) { + p->m_type = TOKdiv; + return p; + } + + if (!IsValid(m_ptr)) { + ch = *m_ptr; + Error(kFMErrUnsupportedChar, ch); + return p; + } + ch = *m_ptr; + if (ch != '/') { + p->m_type = TOKdiv; + return p; + } + m_ptr = Comment(m_ptr); + break; + } + case '.': + ++m_ptr; + if (m_ptr > m_end) { + p->m_type = TOKdot; + return p; + } + + if (IsValid(m_ptr)) { + ch = *m_ptr; + if (ch == '.') { + p->m_type = TOKdotdot; + ++m_ptr; + } else if (ch == '*') { + p->m_type = TOKdotstar; + ++m_ptr; + } else if (ch == '#') { + p->m_type = TOKdotscream; + ++m_ptr; + } else if (ch <= '9' && ch >= '0') { + p->m_type = TOKnumber; + --m_ptr; + m_ptr = Number(p.get(), m_ptr); + } else { + p->m_type = TOKdot; + } + } else { + ch = *m_ptr; + Error(kFMErrUnsupportedChar, ch); + } + return p; + case 0x09: + case 0x0B: + case 0x0C: + case 0x20: + ++m_ptr; + break; + default: { + m_ptr = Identifiers(p.get(), m_ptr); + return p; + } + } + } +} + +const wchar_t* CXFA_FMLexer::Number(CXFA_FMToken* t, const wchar_t* p) { + // This will set pEnd to the character after the end of the number. + wchar_t* pEnd = nullptr; + if (p) + wcstod(const_cast<wchar_t*>(p), &pEnd); + if (pEnd && FXSYS_iswalpha(*pEnd)) { + Error(kFMErrBadSuffixNumber); + return pEnd; + } + + t->m_wstring = CFX_WideStringC(p, (pEnd - p)); + return pEnd; +} + +const wchar_t* CXFA_FMLexer::String(CXFA_FMToken* t, const wchar_t* p) { + const wchar_t* pStart = p; + + ++p; + if (p > m_end) { + Error(kFMErrEndOfInput); + return p; + } + + uint16_t ch = *p; + while (ch) { + if (!IsValid(p)) { + ch = *p; + t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); + Error(kFMErrUnsupportedChar, ch); + return p; + } + + ++p; + if (ch != '"') { + // We've hit the end of the input, return the string. + if (p > m_end) { + Error(kFMErrEndOfInput); + return p; + } + ch = *p; + continue; + } + // We've hit the end of the input, return the string. + if (p > m_end) + break; + + if (!IsValid(p)) { + ch = *p; + t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); + Error(kFMErrUnsupportedChar, ch); + return p; + } + ch = *p; + if (ch != '"') + break; + + ++p; + if (p > m_end) { + Error(kFMErrEndOfInput); + return p; + } + ch = *p; + } + t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); + return p; +} + +const wchar_t* CXFA_FMLexer::Identifiers(CXFA_FMToken* t, const wchar_t* p) { + const wchar_t* pStart = p; + uint16_t ch = *p; + ++p; + if (p > m_end) { + t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); + t->m_type = IsKeyword(t->m_wstring); + return p; + } + + if (!IsValid(p)) { + t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); + Error(kFMErrUnsupportedChar, ch); + return p; + } + + ch = *p; + while (ch) { + if (!IsValid(p)) { + t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); + Error(kFMErrUnsupportedChar, ch); + return p; + } + + ch = *p; + if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B || + ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' || + ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' || + ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' || + ch == '+' || ch == '-' || ch == '*' || ch == '/') { + break; + } + ++p; + if (p > m_end) + break; + } + t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); + t->m_type = IsKeyword(t->m_wstring); + return p; +} + +const wchar_t* CXFA_FMLexer::Comment(const wchar_t* p) { + ++p; + + if (p > m_end) + return p; + + unsigned ch = *p; + while (ch) { + ++p; + if (ch == L'\r') + return p; + if (ch == L'\n') { + ++m_uCurrentLine; + return p; + } + if (p > m_end) + return p; + ch = *p; + } + return p; +} + +XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) { + uint32_t uHash = FX_HashCode_GetW(str, true); + int32_t iStart = KEYWORD_START; + int32_t iEnd = KEYWORD_END; + do { + int32_t iMid = (iStart + iEnd) / 2; + XFA_FMKeyword keyword = keyWords[iMid]; + if (uHash == keyword.m_uHash) + return keyword.m_type; + if (uHash < keyword.m_uHash) + iEnd = iMid - 1; + else + iStart = iMid + 1; + } while (iStart <= iEnd); + return TOKidentifier; +} + +void CXFA_FMLexer::Error(const wchar_t* msg, ...) { + m_pErrorInfo->linenum = m_uCurrentLine; + va_list ap; + va_start(ap, msg); + m_pErrorInfo->message.FormatV(msg, ap); + va_end(ap); + ASSERT(!m_pErrorInfo->message.IsEmpty()); +} + +bool CXFA_FMLexer::HasError() const { + return !m_pErrorInfo->message.IsEmpty(); +} |