diff options
Diffstat (limited to 'xfa/src/fxfa/fm2js/xfa_lexer.cpp')
-rw-r--r-- | xfa/src/fxfa/fm2js/xfa_lexer.cpp | 552 |
1 files changed, 0 insertions, 552 deletions
diff --git a/xfa/src/fxfa/fm2js/xfa_lexer.cpp b/xfa/src/fxfa/fm2js/xfa_lexer.cpp deleted file mode 100644 index f93fa839d3..0000000000 --- a/xfa/src/fxfa/fm2js/xfa_lexer.cpp +++ /dev/null @@ -1,552 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/src/fxfa/fm2js/xfa_lexer.h" - -#include "core/include/fxcrt/fx_ext.h" - -namespace { - -struct XFA_FMDChar { - static const FX_WCHAR* inc(const FX_WCHAR*& p) { - ++p; - return p; - } - static const FX_WCHAR* dec(const FX_WCHAR*& p) { - --p; - return p; - } - static uint16_t get(const FX_WCHAR* p) { return *p; } - static FX_BOOL isWhiteSpace(const FX_WCHAR* p) { - return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20; - } - static FX_BOOL isLineTerminator(const FX_WCHAR* p) { - return *p == 0x0A || *p == 0x0D; - } - static FX_BOOL isBinary(const FX_WCHAR* p) { - return (*p) >= '0' && (*p) <= '1'; - } - static FX_BOOL isOctal(const FX_WCHAR* p) { - return (*p) >= '0' && (*p) <= '7'; - } - static FX_BOOL isDigital(const FX_WCHAR* p) { - return (*p) >= '0' && (*p) <= '9'; - } - static FX_BOOL isHex(const FX_WCHAR* p) { - return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') || - ((*p) >= 'A' && (*p) <= 'F'); - } - static FX_BOOL isAlpha(const FX_WCHAR* p) { - return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A'); - } - static FX_BOOL isAvalid(const FX_WCHAR* p, FX_BOOL flag = 0); - static FX_BOOL string2number(const FX_WCHAR* s, - FX_DOUBLE* pValue, - const FX_WCHAR*& pEnd); - static FX_BOOL isUnicodeAlpha(uint16_t ch); -}; - -inline FX_BOOL XFA_FMDChar::isAvalid(const FX_WCHAR* p, FX_BOOL flag) { - if (*p == 0) { - return 1; - } - if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D || - (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) { - return 1; - } - if (!flag) { - if (*p == 0x0B || *p == 0x0C) { - return 1; - } - } - return 0; -} - -inline FX_BOOL XFA_FMDChar::string2number(const FX_WCHAR* s, - FX_DOUBLE* pValue, - const FX_WCHAR*& pEnd) { - if (s) { - *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd); - } - return 0; -} - -inline FX_BOOL XFA_FMDChar::isUnicodeAlpha(uint16_t ch) { - if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B || - ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' || - ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' || - ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' || - ch == '+' || ch == '-' || ch == '*' || ch == '/') { - return FALSE; - } - return TRUE; -} - -const XFA_FMKeyword keyWords[] = { - {TOKand, 0x00000026, L"&"}, - {TOKlparen, 0x00000028, L"("}, - {TOKrparen, 0x00000029, L")"}, - {TOKmul, 0x0000002a, L"*"}, - {TOKplus, 0x0000002b, L"+"}, - {TOKcomma, 0x0000002c, L","}, - {TOKminus, 0x0000002d, L"-"}, - {TOKdot, 0x0000002e, L"."}, - {TOKdiv, 0x0000002f, L"/"}, - {TOKlt, 0x0000003c, L"<"}, - {TOKassign, 0x0000003d, L"="}, - {TOKgt, 0x0000003e, L">"}, - {TOKlbracket, 0x0000005b, L"["}, - {TOKrbracket, 0x0000005d, L"]"}, - {TOKor, 0x0000007c, L"|"}, - {TOKdotscream, 0x0000ec11, L".#"}, - {TOKdotstar, 0x0000ec18, L".*"}, - {TOKdotdot, 0x0000ec1c, L".."}, - {TOKle, 0x000133f9, L"<="}, - {TOKne, 0x000133fa, L"<>"}, - {TOKeq, 0x0001391a, L"=="}, - {TOKge, 0x00013e3b, L">="}, - {TOKdo, 0x00020153, L"do"}, - {TOKkseq, 0x00020676, L"eq"}, - {TOKksge, 0x000210ac, L"ge"}, - {TOKksgt, 0x000210bb, L"gt"}, - {TOKif, 0x00021aef, L"if"}, - {TOKin, 0x00021af7, L"in"}, - {TOKksle, 0x00022a51, L"le"}, - {TOKkslt, 0x00022a60, L"lt"}, - {TOKksne, 0x00023493, L"ne"}, - {TOKksor, 0x000239c1, L"or"}, - {TOKnull, 0x052931bb, L"null"}, - {TOKbreak, 0x05518c25, L"break"}, - {TOKksand, 0x09f9db33, L"and"}, - {TOKend, 0x0a631437, L"end"}, - {TOKeof, 0x0a63195a, L"eof"}, - {TOKfor, 0x0a7d67a7, L"for"}, - {TOKnan, 0x0b4f91dd, L"nan"}, - {TOKksnot, 0x0b4fd9b1, L"not"}, - {TOKvar, 0x0c2203e9, L"var"}, - {TOKthen, 0x2d5738cf, L"then"}, - {TOKelse, 0x45f65ee9, L"else"}, - {TOKexit, 0x4731d6ba, L"exit"}, - {TOKdownto, 0x4caadc3b, L"downto"}, - {TOKreturn, 0x4db8bd60, L"return"}, - {TOKinfinity, 0x5c0a010a, L"infinity"}, - {TOKendwhile, 0x5c64bff0, L"endwhile"}, - {TOKforeach, 0x67e31f38, L"foreach"}, - {TOKendfunc, 0x68f984a3, L"endfunc"}, - {TOKelseif, 0x78253218, L"elseif"}, - {TOKwhile, 0x84229259, L"while"}, - {TOKendfor, 0x8ab49d7e, L"endfor"}, - {TOKthrow, 0x8db05c94, L"throw"}, - {TOKstep, 0xa7a7887c, L"step"}, - {TOKupto, 0xb5155328, L"upto"}, - {TOKcontinue, 0xc0340685, L"continue"}, - {TOKfunc, 0xcdce60ec, L"func"}, - {TOKendif, 0xe0e8fee6, L"endif"}, -}; - -const XFA_FM_TOKEN KEYWORD_START = TOKdo; -const XFA_FM_TOKEN KEYWORD_END = TOKendif; - -} // namespace - -const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) { - if (op < KEYWORD_START || op > KEYWORD_END) - return L""; - return keyWords[op].m_keyword; -} - -CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {} - -CXFA_FMToken::CXFA_FMToken(FX_DWORD uLineNum) - : m_type(TOKreserver), m_uLinenum(uLineNum) {} - -CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc, - CXFA_FMErrorInfo* pErrorInfo) - : m_ptr(wsFormCalc.GetPtr()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {} - -CXFA_FMToken* CXFA_FMLexer::NextToken() { - m_pToken.reset(Scan()); - return m_pToken.get(); -} - -CXFA_FMToken* CXFA_FMLexer::Scan() { - uint16_t ch = 0; - CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine); - if (!XFA_FMDChar::isAvalid(m_ptr)) { - ch = XFA_FMDChar::get(m_ptr); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return p; - } - int iRet = 0; - while (1) { - if (!XFA_FMDChar::isAvalid(m_ptr)) { - ch = XFA_FMDChar::get(m_ptr); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return p; - } - ch = XFA_FMDChar::get(m_ptr); - switch (ch) { - case 0: - p->m_type = TOKeof; - return p; - case 0x0A: - ++m_uCurrentLine; - p->m_uLinenum = m_uCurrentLine; - XFA_FMDChar::inc(m_ptr); - break; - case 0x0D: - XFA_FMDChar::inc(m_ptr); - break; - case ';': { - const FX_WCHAR* pTemp = 0; - Comment(m_ptr, pTemp); - m_ptr = pTemp; - } break; - case '"': { - const FX_WCHAR* pTemp = 0; - p->m_type = TOKstring; - iRet = String(p, m_ptr, pTemp); - m_ptr = pTemp; - } - return p; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - p->m_type = TOKnumber; - const FX_WCHAR* pTemp = 0; - iRet = Number(p, m_ptr, pTemp); - m_ptr = pTemp; - if (iRet) { - Error(FMERR_BAD_SUFFIX_NUMBER); - return p; - } - } - return p; - case '=': - XFA_FMDChar::inc(m_ptr); - if (XFA_FMDChar::isAvalid(m_ptr)) { - ch = XFA_FMDChar::get(m_ptr); - if (ch == '=') { - p->m_type = TOKeq; - XFA_FMDChar::inc(m_ptr); - return p; - } else { - p->m_type = TOKassign; - return p; - } - } else { - ch = XFA_FMDChar::get(m_ptr); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return p; - } - break; - case '<': - XFA_FMDChar::inc(m_ptr); - if (XFA_FMDChar::isAvalid(m_ptr)) { - ch = XFA_FMDChar::get(m_ptr); - if (ch == '=') { - p->m_type = TOKle; - XFA_FMDChar::inc(m_ptr); - return p; - } else if (ch == '>') { - p->m_type = TOKne; - XFA_FMDChar::inc(m_ptr); - return p; - } else { - p->m_type = TOKlt; - return p; - } - } else { - ch = XFA_FMDChar::get(m_ptr); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return p; - } - break; - case '>': - XFA_FMDChar::inc(m_ptr); - if (XFA_FMDChar::isAvalid(m_ptr)) { - ch = XFA_FMDChar::get(m_ptr); - if (ch == '=') { - p->m_type = TOKge; - XFA_FMDChar::inc(m_ptr); - return p; - } else { - p->m_type = TOKgt; - return p; - } - } else { - ch = XFA_FMDChar::get(m_ptr); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return p; - } - break; - case ',': - p->m_type = TOKcomma; - XFA_FMDChar::inc(m_ptr); - return p; - case '(': - p->m_type = TOKlparen; - XFA_FMDChar::inc(m_ptr); - return p; - case ')': - p->m_type = TOKrparen; - XFA_FMDChar::inc(m_ptr); - return p; - case '[': - p->m_type = TOKlbracket; - XFA_FMDChar::inc(m_ptr); - return p; - case ']': - p->m_type = TOKrbracket; - XFA_FMDChar::inc(m_ptr); - return p; - case '&': - XFA_FMDChar::inc(m_ptr); - p->m_type = TOKand; - return p; - case '|': - XFA_FMDChar::inc(m_ptr); - p->m_type = TOKor; - return p; - case '+': - XFA_FMDChar::inc(m_ptr); - p->m_type = TOKplus; - return p; - case '-': - XFA_FMDChar::inc(m_ptr); - p->m_type = TOKminus; - return p; - case '*': - XFA_FMDChar::inc(m_ptr); - p->m_type = TOKmul; - return p; - case '/': - XFA_FMDChar::inc(m_ptr); - if (XFA_FMDChar::isAvalid(m_ptr)) { - ch = XFA_FMDChar::get(m_ptr); - if (ch == '/') { - const FX_WCHAR* pTemp = 0; - Comment(m_ptr, pTemp); - m_ptr = pTemp; - break; - } else { - p->m_type = TOKdiv; - return p; - } - } else { - ch = XFA_FMDChar::get(m_ptr); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return p; - } - break; - case '.': - XFA_FMDChar::inc(m_ptr); - if (XFA_FMDChar::isAvalid(m_ptr)) { - ch = XFA_FMDChar::get(m_ptr); - if (ch == '.') { - p->m_type = TOKdotdot; - XFA_FMDChar::inc(m_ptr); - return p; - } else if (ch == '*') { - p->m_type = TOKdotstar; - XFA_FMDChar::inc(m_ptr); - return p; - } else if (ch == '#') { - p->m_type = TOKdotscream; - XFA_FMDChar::inc(m_ptr); - return p; - } else if (ch <= '9' && ch >= '0') { - p->m_type = TOKnumber; - const FX_WCHAR* pTemp = 0; - XFA_FMDChar::dec(m_ptr); - iRet = Number(p, m_ptr, pTemp); - m_ptr = pTemp; - if (iRet) { - Error(FMERR_BAD_SUFFIX_NUMBER); - } - return p; - } else { - p->m_type = TOKdot; - return p; - } - } else { - ch = XFA_FMDChar::get(m_ptr); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return p; - } - case 0x09: - case 0x0B: - case 0x0C: - case 0x20: - XFA_FMDChar::inc(m_ptr); - break; - default: { - const FX_WCHAR* pTemp = 0; - iRet = Identifiers(p, m_ptr, pTemp); - m_ptr = pTemp; - if (iRet) { - return p; - } - p->m_type = IsKeyword(p->m_wstring); - } - return p; - } - } -} - -FX_DWORD CXFA_FMLexer::Number(CXFA_FMToken* t, - const FX_WCHAR* p, - const FX_WCHAR*& pEnd) { - FX_DOUBLE number = 0; - if (XFA_FMDChar::string2number(p, &number, pEnd)) { - return 1; - } - if (pEnd && XFA_FMDChar::isAlpha(pEnd)) { - return 1; - } - t->m_wstring = CFX_WideStringC(p, (pEnd - p)); - return 0; -} - -FX_DWORD CXFA_FMLexer::String(CXFA_FMToken* t, - const FX_WCHAR* p, - const FX_WCHAR*& pEnd) { - const FX_WCHAR* pStart = p; - uint16_t ch = 0; - XFA_FMDChar::inc(p); - ch = XFA_FMDChar::get(p); - while (ch) { - if (!XFA_FMDChar::isAvalid(p)) { - ch = XFA_FMDChar::get(p); - pEnd = p; - t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return 1; - } - if (ch == '"') { - XFA_FMDChar::inc(p); - if (!XFA_FMDChar::isAvalid(p)) { - ch = XFA_FMDChar::get(p); - pEnd = p; - t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return 1; - } - ch = XFA_FMDChar::get(p); - if (ch == '"') { - goto NEXT; - } else { - break; - } - } - NEXT: - XFA_FMDChar::inc(p); - ch = XFA_FMDChar::get(p); - } - pEnd = p; - t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); - return 0; -} - -FX_DWORD CXFA_FMLexer::Identifiers(CXFA_FMToken* t, - const FX_WCHAR* p, - const FX_WCHAR*& pEnd) { - const FX_WCHAR* pStart = p; - uint16_t ch = 0; - ch = XFA_FMDChar::get(p); - XFA_FMDChar::inc(p); - if (!XFA_FMDChar::isAvalid(p)) { - pEnd = p; - t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return 1; - } - ch = XFA_FMDChar::get(p); - while (ch) { - if (!XFA_FMDChar::isAvalid(p)) { - pEnd = p; - t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); - Error(FMERR_UNSUPPORTED_CHAR, ch); - return 1; - } - ch = XFA_FMDChar::get(p); - if (XFA_FMDChar::isUnicodeAlpha(ch)) { - XFA_FMDChar::inc(p); - } else { - pEnd = p; - t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); - return 0; - } - } - pEnd = p; - t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); - return 0; -} - -void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) { - unsigned ch = 0; - XFA_FMDChar::inc(p); - ch = XFA_FMDChar::get(p); - while (ch) { - if (ch == 0x0D) { - XFA_FMDChar::inc(p); - pEnd = p; - return; - } - if (ch == 0x0A) { - ++m_uCurrentLine; - XFA_FMDChar::inc(p); - pEnd = p; - return; - } - XFA_FMDChar::inc(p); - ch = XFA_FMDChar::get(p); - } - pEnd = p; -} - -XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) { - int32_t iLength = str.GetLength(); - uint32_t uHash = FX_HashCode_String_GetW(str.GetPtr(), iLength, TRUE); - int32_t iStart = KEYWORD_START, iEnd = KEYWORD_END; - int32_t iMid = (iStart + iEnd) / 2; - XFA_FMKeyword keyword; - do { - iMid = (iStart + iEnd) / 2; - keyword = keyWords[iMid]; - if (uHash == keyword.m_uHash) { - return keyword.m_type; - } else if (uHash < keyword.m_uHash) { - iEnd = iMid - 1; - } else { - iStart = iMid + 1; - } - } while (iStart <= iEnd); - return TOKidentifier; -} - -void CXFA_FMLexer::Error(XFA_FM_ERRMSG msg, ...) { - m_pErrorInfo->linenum = m_uCurrentLine; - const FX_WCHAR* lpMessageInfo = XFA_FM_ErrorMsg(msg); - va_list ap; - va_start(ap, msg); - m_pErrorInfo->message.FormatV(lpMessageInfo, ap); - va_end(ap); -} - -FX_BOOL CXFA_FMLexer::HasError() const { - if (m_pErrorInfo->message.IsEmpty()) { - return FALSE; - } - return TRUE; -} |