// Copyright 2014 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "xfa/fxfa/fm2js/xfa_lexer.h" #include "core/fxcrt/include/fx_ext.h" namespace { struct XFA_FMDChar { static const FX_WCHAR* inc(const FX_WCHAR*& p) { ++p; return p; } static const FX_WCHAR* dec(const FX_WCHAR*& p) { --p; return p; } static uint16_t get(const FX_WCHAR* p) { return *p; } static FX_BOOL isWhiteSpace(const FX_WCHAR* p) { return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20; } static FX_BOOL isLineTerminator(const FX_WCHAR* p) { return *p == 0x0A || *p == 0x0D; } static FX_BOOL isBinary(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '1'; } static FX_BOOL isOctal(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '7'; } static FX_BOOL isDigital(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '9'; } static FX_BOOL isHex(const FX_WCHAR* p) { return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') || ((*p) >= 'A' && (*p) <= 'F'); } static FX_BOOL isAlpha(const FX_WCHAR* p) { return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A'); } static FX_BOOL isAvalid(const FX_WCHAR* p, FX_BOOL flag = 0); static FX_BOOL string2number(const FX_WCHAR* s, FX_DOUBLE* pValue, const FX_WCHAR*& pEnd); static FX_BOOL isUnicodeAlpha(uint16_t ch); }; inline FX_BOOL XFA_FMDChar::isAvalid(const FX_WCHAR* p, FX_BOOL flag) { if (*p == 0) { return 1; } if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D || (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) { return 1; } if (!flag) { if (*p == 0x0B || *p == 0x0C) { return 1; } } return 0; } inline FX_BOOL XFA_FMDChar::string2number(const FX_WCHAR* s, FX_DOUBLE* pValue, const FX_WCHAR*& pEnd) { if (s) { *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd); } return 0; } inline FX_BOOL XFA_FMDChar::isUnicodeAlpha(uint16_t ch) { if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B || ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' || ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' || ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' || ch == '+' || ch == '-' || ch == '*' || ch == '/') { return FALSE; } return TRUE; } const XFA_FMKeyword keyWords[] = { {TOKand, 0x00000026, L"&"}, {TOKlparen, 0x00000028, L"("}, {TOKrparen, 0x00000029, L")"}, {TOKmul, 0x0000002a, L"*"}, {TOKplus, 0x0000002b, L"+"}, {TOKcomma, 0x0000002c, L","}, {TOKminus, 0x0000002d, L"-"}, {TOKdot, 0x0000002e, L"."}, {TOKdiv, 0x0000002f, L"/"}, {TOKlt, 0x0000003c, L"<"}, {TOKassign, 0x0000003d, L"="}, {TOKgt, 0x0000003e, L">"}, {TOKlbracket, 0x0000005b, L"["}, {TOKrbracket, 0x0000005d, L"]"}, {TOKor, 0x0000007c, L"|"}, {TOKdotscream, 0x0000ec11, L".#"}, {TOKdotstar, 0x0000ec18, L".*"}, {TOKdotdot, 0x0000ec1c, L".."}, {TOKle, 0x000133f9, L"<="}, {TOKne, 0x000133fa, L"<>"}, {TOKeq, 0x0001391a, L"=="}, {TOKge, 0x00013e3b, L">="}, {TOKdo, 0x00020153, L"do"}, {TOKkseq, 0x00020676, L"eq"}, {TOKksge, 0x000210ac, L"ge"}, {TOKksgt, 0x000210bb, L"gt"}, {TOKif, 0x00021aef, L"if"}, {TOKin, 0x00021af7, L"in"}, {TOKksle, 0x00022a51, L"le"}, {TOKkslt, 0x00022a60, L"lt"}, {TOKksne, 0x00023493, L"ne"}, {TOKksor, 0x000239c1, L"or"}, {TOKnull, 0x052931bb, L"null"}, {TOKbreak, 0x05518c25, L"break"}, {TOKksand, 0x09f9db33, L"and"}, {TOKend, 0x0a631437, L"end"}, {TOKeof, 0x0a63195a, L"eof"}, {TOKfor, 0x0a7d67a7, L"for"}, {TOKnan, 0x0b4f91dd, L"nan"}, {TOKksnot, 0x0b4fd9b1, L"not"}, {TOKvar, 0x0c2203e9, L"var"}, {TOKthen, 0x2d5738cf, L"then"}, {TOKelse, 0x45f65ee9, L"else"}, {TOKexit, 0x4731d6ba, L"exit"}, {TOKdownto, 0x4caadc3b, L"downto"}, {TOKreturn, 0x4db8bd60, L"return"}, {TOKinfinity, 0x5c0a010a, L"infinity"}, {TOKendwhile, 0x5c64bff0, L"endwhile"}, {TOKforeach, 0x67e31f38, L"foreach"}, {TOKendfunc, 0x68f984a3, L"endfunc"}, {TOKelseif, 0x78253218, L"elseif"}, {TOKwhile, 0x84229259, L"while"}, {TOKendfor, 0x8ab49d7e, L"endfor"}, {TOKthrow, 0x8db05c94, L"throw"}, {TOKstep, 0xa7a7887c, L"step"}, {TOKupto, 0xb5155328, L"upto"}, {TOKcontinue, 0xc0340685, L"continue"}, {TOKfunc, 0xcdce60ec, L"func"}, {TOKendif, 0xe0e8fee6, L"endif"}, }; const XFA_FM_TOKEN KEYWORD_START = TOKdo; const XFA_FM_TOKEN KEYWORD_END = TOKendif; } // namespace const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) { if (op < KEYWORD_START || op > KEYWORD_END) return L""; return keyWords[op].m_keyword; } CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {} CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum) : m_type(TOKreserver), m_uLinenum(uLineNum) {} CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc, CXFA_FMErrorInfo* pErrorInfo) : m_ptr(wsFormCalc.c_str()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {} CXFA_FMToken* CXFA_FMLexer::NextToken() { m_pToken.reset(Scan()); return m_pToken.get(); } CXFA_FMToken* CXFA_FMLexer::Scan() { uint16_t ch = 0; CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine); if (!XFA_FMDChar::isAvalid(m_ptr)) { ch = XFA_FMDChar::get(m_ptr); Error(kFMErrUnsupportedChar, ch); return p; } int iRet = 0; while (1) { if (!XFA_FMDChar::isAvalid(m_ptr)) { ch = XFA_FMDChar::get(m_ptr); Error(kFMErrUnsupportedChar, ch); return p; } ch = XFA_FMDChar::get(m_ptr); switch (ch) { case 0: p->m_type = TOKeof; return p; case 0x0A: ++m_uCurrentLine; p->m_uLinenum = m_uCurrentLine; XFA_FMDChar::inc(m_ptr); break; case 0x0D: XFA_FMDChar::inc(m_ptr); break; case ';': { const FX_WCHAR* pTemp = 0; Comment(m_ptr, pTemp); m_ptr = pTemp; } break; case '"': { const FX_WCHAR* pTemp = 0; p->m_type = TOKstring; iRet = String(p, m_ptr, pTemp); m_ptr = pTemp; } return p; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { p->m_type = TOKnumber; const FX_WCHAR* pTemp = 0; iRet = Number(p, m_ptr, pTemp); m_ptr = pTemp; if (iRet) { Error(kFMErrBadSuffixNumber); return p; } } return p; case '=': XFA_FMDChar::inc(m_ptr); if (XFA_FMDChar::isAvalid(m_ptr)) { ch = XFA_FMDChar::get(m_ptr); if (ch == '=') { p->m_type = TOKeq; XFA_FMDChar::inc(m_ptr); return p; } else { p->m_type = TOKassign; return p; } } else { ch = XFA_FMDChar::get(m_ptr); Error(kFMErrUnsupportedChar, ch); return p; } break; case '<': XFA_FMDChar::inc(m_ptr); if (XFA_FMDChar::isAvalid(m_ptr)) { ch = XFA_FMDChar::get(m_ptr); if (ch == '=') { p->m_type = TOKle; XFA_FMDChar::inc(m_ptr); return p; } else if (ch == '>') { p->m_type = TOKne; XFA_FMDChar::inc(m_ptr); return p; } else { p->m_type = TOKlt; return p; } } else { ch = XFA_FMDChar::get(m_ptr); Error(kFMErrUnsupportedChar, ch); return p; } break; case '>': XFA_FMDChar::inc(m_ptr); if (XFA_FMDChar::isAvalid(m_ptr)) { ch = XFA_FMDChar::get(m_ptr); if (ch == '=') { p->m_type = TOKge; XFA_FMDChar::inc(m_ptr); return p; } else { p->m_type = TOKgt; return p; } } else { ch = XFA_FMDChar::get(m_ptr); Error(kFMErrUnsupportedChar, ch); return p; } break; case ',': p->m_type = TOKcomma; XFA_FMDChar::inc(m_ptr); return p; case '(': p->m_type = TOKlparen; XFA_FMDChar::inc(m_ptr); return p; case ')': p->m_type = TOKrparen; XFA_FMDChar::inc(m_ptr); return p; case '[': p->m_type = TOKlbracket; XFA_FMDChar::inc(m_ptr); return p; case ']': p->m_type = TOKrbracket; XFA_FMDChar::inc(m_ptr); return p; case '&': XFA_FMDChar::inc(m_ptr); p->m_type = TOKand; return p; case '|': XFA_FMDChar::inc(m_ptr); p->m_type = TOKor; return p; case '+': XFA_FMDChar::inc(m_ptr); p->m_type = TOKplus; return p; case '-': XFA_FMDChar::inc(m_ptr); p->m_type = TOKminus; return p; case '*': XFA_FMDChar::inc(m_ptr); p->m_type = TOKmul; return p; case '/': XFA_FMDChar::inc(m_ptr); if (XFA_FMDChar::isAvalid(m_ptr)) { ch = XFA_FMDChar::get(m_ptr); if (ch == '/') { const FX_WCHAR* pTemp = 0; Comment(m_ptr, pTemp); m_ptr = pTemp; break; } else { p->m_type = TOKdiv; return p; } } else { ch = XFA_FMDChar::get(m_ptr); Error(kFMErrUnsupportedChar, ch); return p; } break; case '.': XFA_FMDChar::inc(m_ptr); if (XFA_FMDChar::isAvalid(m_ptr)) { ch = XFA_FMDChar::get(m_ptr); if (ch == '.') { p->m_type = TOKdotdot; XFA_FMDChar::inc(m_ptr); return p; } else if (ch == '*') { p->m_type = TOKdotstar; XFA_FMDChar::inc(m_ptr); return p; } else if (ch == '#') { p->m_type = TOKdotscream; XFA_FMDChar::inc(m_ptr); return p; } else if (ch <= '9' && ch >= '0') { p->m_type = TOKnumber; const FX_WCHAR* pTemp = 0; XFA_FMDChar::dec(m_ptr); iRet = Number(p, m_ptr, pTemp); m_ptr = pTemp; if (iRet) { Error(kFMErrBadSuffixNumber); } return p; } else { p->m_type = TOKdot; return p; } } else { ch = XFA_FMDChar::get(m_ptr); Error(kFMErrUnsupportedChar, ch); return p; } case 0x09: case 0x0B: case 0x0C: case 0x20: XFA_FMDChar::inc(m_ptr); break; default: { const FX_WCHAR* pTemp = 0; iRet = Identifiers(p, m_ptr, pTemp); m_ptr = pTemp; if (iRet) { return p; } p->m_type = IsKeyword(p->m_wstring); } return p; } } } uint32_t CXFA_FMLexer::Number(CXFA_FMToken* t, const FX_WCHAR* p, const FX_WCHAR*& pEnd) { FX_DOUBLE number = 0; if (XFA_FMDChar::string2number(p, &number, pEnd)) { return 1; } if (pEnd && XFA_FMDChar::isAlpha(pEnd)) { return 1; } t->m_wstring = CFX_WideStringC(p, (pEnd - p)); return 0; } uint32_t CXFA_FMLexer::String(CXFA_FMToken* t, const FX_WCHAR* p, const FX_WCHAR*& pEnd) { const FX_WCHAR* pStart = p; uint16_t ch = 0; XFA_FMDChar::inc(p); ch = XFA_FMDChar::get(p); while (ch) { if (!XFA_FMDChar::isAvalid(p)) { ch = XFA_FMDChar::get(p); pEnd = p; t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); Error(kFMErrUnsupportedChar, ch); return 1; } if (ch == '"') { XFA_FMDChar::inc(p); if (!XFA_FMDChar::isAvalid(p)) { ch = XFA_FMDChar::get(p); pEnd = p; t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); Error(kFMErrUnsupportedChar, ch); return 1; } ch = XFA_FMDChar::get(p); if (ch == '"') { goto NEXT; } else { break; } } NEXT: XFA_FMDChar::inc(p); ch = XFA_FMDChar::get(p); } pEnd = p; t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); return 0; } uint32_t CXFA_FMLexer::Identifiers(CXFA_FMToken* t, const FX_WCHAR* p, const FX_WCHAR*& pEnd) { const FX_WCHAR* pStart = p; uint16_t ch = 0; ch = XFA_FMDChar::get(p); XFA_FMDChar::inc(p); if (!XFA_FMDChar::isAvalid(p)) { pEnd = p; t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); Error(kFMErrUnsupportedChar, ch); return 1; } ch = XFA_FMDChar::get(p); while (ch) { if (!XFA_FMDChar::isAvalid(p)) { pEnd = p; t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); Error(kFMErrUnsupportedChar, ch); return 1; } ch = XFA_FMDChar::get(p); if (XFA_FMDChar::isUnicodeAlpha(ch)) { XFA_FMDChar::inc(p); } else { pEnd = p; t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); return 0; } } pEnd = p; t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); return 0; } void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) { unsigned ch = 0; XFA_FMDChar::inc(p); ch = XFA_FMDChar::get(p); while (ch) { if (ch == 0x0D) { XFA_FMDChar::inc(p); pEnd = p; return; } if (ch == 0x0A) { ++m_uCurrentLine; XFA_FMDChar::inc(p); pEnd = p; return; } XFA_FMDChar::inc(p); ch = XFA_FMDChar::get(p); } pEnd = p; } XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) { uint32_t uHash = FX_HashCode_GetW(str, true); int32_t iStart = KEYWORD_START; int32_t iEnd = KEYWORD_END; do { int32_t iMid = (iStart + iEnd) / 2; XFA_FMKeyword keyword = keyWords[iMid]; if (uHash == keyword.m_uHash) return keyword.m_type; if (uHash < keyword.m_uHash) iEnd = iMid - 1; else iStart = iMid + 1; } while (iStart <= iEnd); return TOKidentifier; } void CXFA_FMLexer::Error(const FX_WCHAR* msg, ...) { m_pErrorInfo->linenum = m_uCurrentLine; va_list ap; va_start(ap, msg); m_pErrorInfo->message.FormatV(msg, ap); va_end(ap); } FX_BOOL CXFA_FMLexer::HasError() const { if (m_pErrorInfo->message.IsEmpty()) { return FALSE; } return TRUE; }