// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#include "xfa/fxfa/fm2js/xfa_lexer.h"

#include "core/fxcrt/include/fx_ext.h"

namespace {

struct XFA_FMDChar {
  static const FX_WCHAR* inc(const FX_WCHAR*& p) {
    ++p;
    return p;
  }
  static const FX_WCHAR* dec(const FX_WCHAR*& p) {
    --p;
    return p;
  }
  static uint16_t get(const FX_WCHAR* p) { return *p; }
  static FX_BOOL isWhiteSpace(const FX_WCHAR* p) {
    return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20;
  }
  static FX_BOOL isLineTerminator(const FX_WCHAR* p) {
    return *p == 0x0A || *p == 0x0D;
  }
  static FX_BOOL isBinary(const FX_WCHAR* p) {
    return (*p) >= '0' && (*p) <= '1';
  }
  static FX_BOOL isOctal(const FX_WCHAR* p) {
    return (*p) >= '0' && (*p) <= '7';
  }
  static FX_BOOL isDigital(const FX_WCHAR* p) {
    return (*p) >= '0' && (*p) <= '9';
  }
  static FX_BOOL isHex(const FX_WCHAR* p) {
    return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') ||
           ((*p) >= 'A' && (*p) <= 'F');
  }
  static FX_BOOL isAlpha(const FX_WCHAR* p) {
    return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A');
  }
  static FX_BOOL isAvalid(const FX_WCHAR* p, FX_BOOL flag = 0);
  static FX_BOOL string2number(const FX_WCHAR* s,
                               FX_DOUBLE* pValue,
                               const FX_WCHAR*& pEnd);
  static FX_BOOL isUnicodeAlpha(uint16_t ch);
};

inline FX_BOOL XFA_FMDChar::isAvalid(const FX_WCHAR* p, FX_BOOL flag) {
  if (*p == 0) {
    return 1;
  }
  if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D ||
      (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) {
    return 1;
  }
  if (!flag) {
    if (*p == 0x0B || *p == 0x0C) {
      return 1;
    }
  }
  return 0;
}

inline FX_BOOL XFA_FMDChar::string2number(const FX_WCHAR* s,
                                          FX_DOUBLE* pValue,
                                          const FX_WCHAR*& pEnd) {
  if (s) {
    *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd);
  }
  return 0;
}

inline FX_BOOL XFA_FMDChar::isUnicodeAlpha(uint16_t ch) {
  if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B ||
      ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' ||
      ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' ||
      ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' ||
      ch == '+' || ch == '-' || ch == '*' || ch == '/') {
    return FALSE;
  }
  return TRUE;
}

const XFA_FMKeyword keyWords[] = {
    {TOKand, 0x00000026, L"&"},
    {TOKlparen, 0x00000028, L"("},
    {TOKrparen, 0x00000029, L")"},
    {TOKmul, 0x0000002a, L"*"},
    {TOKplus, 0x0000002b, L"+"},
    {TOKcomma, 0x0000002c, L","},
    {TOKminus, 0x0000002d, L"-"},
    {TOKdot, 0x0000002e, L"."},
    {TOKdiv, 0x0000002f, L"/"},
    {TOKlt, 0x0000003c, L"<"},
    {TOKassign, 0x0000003d, L"="},
    {TOKgt, 0x0000003e, L">"},
    {TOKlbracket, 0x0000005b, L"["},
    {TOKrbracket, 0x0000005d, L"]"},
    {TOKor, 0x0000007c, L"|"},
    {TOKdotscream, 0x0000ec11, L".#"},
    {TOKdotstar, 0x0000ec18, L".*"},
    {TOKdotdot, 0x0000ec1c, L".."},
    {TOKle, 0x000133f9, L"<="},
    {TOKne, 0x000133fa, L"<>"},
    {TOKeq, 0x0001391a, L"=="},
    {TOKge, 0x00013e3b, L">="},
    {TOKdo, 0x00020153, L"do"},
    {TOKkseq, 0x00020676, L"eq"},
    {TOKksge, 0x000210ac, L"ge"},
    {TOKksgt, 0x000210bb, L"gt"},
    {TOKif, 0x00021aef, L"if"},
    {TOKin, 0x00021af7, L"in"},
    {TOKksle, 0x00022a51, L"le"},
    {TOKkslt, 0x00022a60, L"lt"},
    {TOKksne, 0x00023493, L"ne"},
    {TOKksor, 0x000239c1, L"or"},
    {TOKnull, 0x052931bb, L"null"},
    {TOKbreak, 0x05518c25, L"break"},
    {TOKksand, 0x09f9db33, L"and"},
    {TOKend, 0x0a631437, L"end"},
    {TOKeof, 0x0a63195a, L"eof"},
    {TOKfor, 0x0a7d67a7, L"for"},
    {TOKnan, 0x0b4f91dd, L"nan"},
    {TOKksnot, 0x0b4fd9b1, L"not"},
    {TOKvar, 0x0c2203e9, L"var"},
    {TOKthen, 0x2d5738cf, L"then"},
    {TOKelse, 0x45f65ee9, L"else"},
    {TOKexit, 0x4731d6ba, L"exit"},
    {TOKdownto, 0x4caadc3b, L"downto"},
    {TOKreturn, 0x4db8bd60, L"return"},
    {TOKinfinity, 0x5c0a010a, L"infinity"},
    {TOKendwhile, 0x5c64bff0, L"endwhile"},
    {TOKforeach, 0x67e31f38, L"foreach"},
    {TOKendfunc, 0x68f984a3, L"endfunc"},
    {TOKelseif, 0x78253218, L"elseif"},
    {TOKwhile, 0x84229259, L"while"},
    {TOKendfor, 0x8ab49d7e, L"endfor"},
    {TOKthrow, 0x8db05c94, L"throw"},
    {TOKstep, 0xa7a7887c, L"step"},
    {TOKupto, 0xb5155328, L"upto"},
    {TOKcontinue, 0xc0340685, L"continue"},
    {TOKfunc, 0xcdce60ec, L"func"},
    {TOKendif, 0xe0e8fee6, L"endif"},
};

const XFA_FM_TOKEN KEYWORD_START = TOKdo;
const XFA_FM_TOKEN KEYWORD_END = TOKendif;

}  // namespace

const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) {
  if (op < KEYWORD_START || op > KEYWORD_END)
    return L"";
  return keyWords[op].m_keyword;
}

CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {}

CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum)
    : m_type(TOKreserver), m_uLinenum(uLineNum) {}

CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc,
                           CXFA_FMErrorInfo* pErrorInfo)
    : m_ptr(wsFormCalc.GetPtr()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {}

CXFA_FMToken* CXFA_FMLexer::NextToken() {
  m_pToken.reset(Scan());
  return m_pToken.get();
}

CXFA_FMToken* CXFA_FMLexer::Scan() {
  uint16_t ch = 0;
  CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine);
  if (!XFA_FMDChar::isAvalid(m_ptr)) {
    ch = XFA_FMDChar::get(m_ptr);
    Error(FMERR_UNSUPPORTED_CHAR, ch);
    return p;
  }
  int iRet = 0;
  while (1) {
    if (!XFA_FMDChar::isAvalid(m_ptr)) {
      ch = XFA_FMDChar::get(m_ptr);
      Error(FMERR_UNSUPPORTED_CHAR, ch);
      return p;
    }
    ch = XFA_FMDChar::get(m_ptr);
    switch (ch) {
      case 0:
        p->m_type = TOKeof;
        return p;
      case 0x0A:
        ++m_uCurrentLine;
        p->m_uLinenum = m_uCurrentLine;
        XFA_FMDChar::inc(m_ptr);
        break;
      case 0x0D:
        XFA_FMDChar::inc(m_ptr);
        break;
      case ';': {
        const FX_WCHAR* pTemp = 0;
        Comment(m_ptr, pTemp);
        m_ptr = pTemp;
      } break;
      case '"': {
        const FX_WCHAR* pTemp = 0;
        p->m_type = TOKstring;
        iRet = String(p, m_ptr, pTemp);
        m_ptr = pTemp;
      }
        return p;
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9': {
        p->m_type = TOKnumber;
        const FX_WCHAR* pTemp = 0;
        iRet = Number(p, m_ptr, pTemp);
        m_ptr = pTemp;
        if (iRet) {
          Error(FMERR_BAD_SUFFIX_NUMBER);
          return p;
        }
      }
        return p;
      case '=':
        XFA_FMDChar::inc(m_ptr);
        if (XFA_FMDChar::isAvalid(m_ptr)) {
          ch = XFA_FMDChar::get(m_ptr);
          if (ch == '=') {
            p->m_type = TOKeq;
            XFA_FMDChar::inc(m_ptr);
            return p;
          } else {
            p->m_type = TOKassign;
            return p;
          }
        } else {
          ch = XFA_FMDChar::get(m_ptr);
          Error(FMERR_UNSUPPORTED_CHAR, ch);
          return p;
        }
        break;
      case '<':
        XFA_FMDChar::inc(m_ptr);
        if (XFA_FMDChar::isAvalid(m_ptr)) {
          ch = XFA_FMDChar::get(m_ptr);
          if (ch == '=') {
            p->m_type = TOKle;
            XFA_FMDChar::inc(m_ptr);
            return p;
          } else if (ch == '>') {
            p->m_type = TOKne;
            XFA_FMDChar::inc(m_ptr);
            return p;
          } else {
            p->m_type = TOKlt;
            return p;
          }
        } else {
          ch = XFA_FMDChar::get(m_ptr);
          Error(FMERR_UNSUPPORTED_CHAR, ch);
          return p;
        }
        break;
      case '>':
        XFA_FMDChar::inc(m_ptr);
        if (XFA_FMDChar::isAvalid(m_ptr)) {
          ch = XFA_FMDChar::get(m_ptr);
          if (ch == '=') {
            p->m_type = TOKge;
            XFA_FMDChar::inc(m_ptr);
            return p;
          } else {
            p->m_type = TOKgt;
            return p;
          }
        } else {
          ch = XFA_FMDChar::get(m_ptr);
          Error(FMERR_UNSUPPORTED_CHAR, ch);
          return p;
        }
        break;
      case ',':
        p->m_type = TOKcomma;
        XFA_FMDChar::inc(m_ptr);
        return p;
      case '(':
        p->m_type = TOKlparen;
        XFA_FMDChar::inc(m_ptr);
        return p;
      case ')':
        p->m_type = TOKrparen;
        XFA_FMDChar::inc(m_ptr);
        return p;
      case '[':
        p->m_type = TOKlbracket;
        XFA_FMDChar::inc(m_ptr);
        return p;
      case ']':
        p->m_type = TOKrbracket;
        XFA_FMDChar::inc(m_ptr);
        return p;
      case '&':
        XFA_FMDChar::inc(m_ptr);
        p->m_type = TOKand;
        return p;
      case '|':
        XFA_FMDChar::inc(m_ptr);
        p->m_type = TOKor;
        return p;
      case '+':
        XFA_FMDChar::inc(m_ptr);
        p->m_type = TOKplus;
        return p;
      case '-':
        XFA_FMDChar::inc(m_ptr);
        p->m_type = TOKminus;
        return p;
      case '*':
        XFA_FMDChar::inc(m_ptr);
        p->m_type = TOKmul;
        return p;
      case '/':
        XFA_FMDChar::inc(m_ptr);
        if (XFA_FMDChar::isAvalid(m_ptr)) {
          ch = XFA_FMDChar::get(m_ptr);
          if (ch == '/') {
            const FX_WCHAR* pTemp = 0;
            Comment(m_ptr, pTemp);
            m_ptr = pTemp;
            break;
          } else {
            p->m_type = TOKdiv;
            return p;
          }
        } else {
          ch = XFA_FMDChar::get(m_ptr);
          Error(FMERR_UNSUPPORTED_CHAR, ch);
          return p;
        }
        break;
      case '.':
        XFA_FMDChar::inc(m_ptr);
        if (XFA_FMDChar::isAvalid(m_ptr)) {
          ch = XFA_FMDChar::get(m_ptr);
          if (ch == '.') {
            p->m_type = TOKdotdot;
            XFA_FMDChar::inc(m_ptr);
            return p;
          } else if (ch == '*') {
            p->m_type = TOKdotstar;
            XFA_FMDChar::inc(m_ptr);
            return p;
          } else if (ch == '#') {
            p->m_type = TOKdotscream;
            XFA_FMDChar::inc(m_ptr);
            return p;
          } else if (ch <= '9' && ch >= '0') {
            p->m_type = TOKnumber;
            const FX_WCHAR* pTemp = 0;
            XFA_FMDChar::dec(m_ptr);
            iRet = Number(p, m_ptr, pTemp);
            m_ptr = pTemp;
            if (iRet) {
              Error(FMERR_BAD_SUFFIX_NUMBER);
            }
            return p;
          } else {
            p->m_type = TOKdot;
            return p;
          }
        } else {
          ch = XFA_FMDChar::get(m_ptr);
          Error(FMERR_UNSUPPORTED_CHAR, ch);
          return p;
        }
      case 0x09:
      case 0x0B:
      case 0x0C:
      case 0x20:
        XFA_FMDChar::inc(m_ptr);
        break;
      default: {
        const FX_WCHAR* pTemp = 0;
        iRet = Identifiers(p, m_ptr, pTemp);
        m_ptr = pTemp;
        if (iRet) {
          return p;
        }
        p->m_type = IsKeyword(p->m_wstring);
      }
        return p;
    }
  }
}

uint32_t CXFA_FMLexer::Number(CXFA_FMToken* t,
                              const FX_WCHAR* p,
                              const FX_WCHAR*& pEnd) {
  FX_DOUBLE number = 0;
  if (XFA_FMDChar::string2number(p, &number, pEnd)) {
    return 1;
  }
  if (pEnd && XFA_FMDChar::isAlpha(pEnd)) {
    return 1;
  }
  t->m_wstring = CFX_WideStringC(p, (pEnd - p));
  return 0;
}

uint32_t CXFA_FMLexer::String(CXFA_FMToken* t,
                              const FX_WCHAR* p,
                              const FX_WCHAR*& pEnd) {
  const FX_WCHAR* pStart = p;
  uint16_t ch = 0;
  XFA_FMDChar::inc(p);
  ch = XFA_FMDChar::get(p);
  while (ch) {
    if (!XFA_FMDChar::isAvalid(p)) {
      ch = XFA_FMDChar::get(p);
      pEnd = p;
      t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
      Error(FMERR_UNSUPPORTED_CHAR, ch);
      return 1;
    }
    if (ch == '"') {
      XFA_FMDChar::inc(p);
      if (!XFA_FMDChar::isAvalid(p)) {
        ch = XFA_FMDChar::get(p);
        pEnd = p;
        t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
        Error(FMERR_UNSUPPORTED_CHAR, ch);
        return 1;
      }
      ch = XFA_FMDChar::get(p);
      if (ch == '"') {
        goto NEXT;
      } else {
        break;
      }
    }
  NEXT:
    XFA_FMDChar::inc(p);
    ch = XFA_FMDChar::get(p);
  }
  pEnd = p;
  t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
  return 0;
}

uint32_t CXFA_FMLexer::Identifiers(CXFA_FMToken* t,
                                   const FX_WCHAR* p,
                                   const FX_WCHAR*& pEnd) {
  const FX_WCHAR* pStart = p;
  uint16_t ch = 0;
  ch = XFA_FMDChar::get(p);
  XFA_FMDChar::inc(p);
  if (!XFA_FMDChar::isAvalid(p)) {
    pEnd = p;
    t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
    Error(FMERR_UNSUPPORTED_CHAR, ch);
    return 1;
  }
  ch = XFA_FMDChar::get(p);
  while (ch) {
    if (!XFA_FMDChar::isAvalid(p)) {
      pEnd = p;
      t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
      Error(FMERR_UNSUPPORTED_CHAR, ch);
      return 1;
    }
    ch = XFA_FMDChar::get(p);
    if (XFA_FMDChar::isUnicodeAlpha(ch)) {
      XFA_FMDChar::inc(p);
    } else {
      pEnd = p;
      t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
      return 0;
    }
  }
  pEnd = p;
  t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
  return 0;
}

void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) {
  unsigned ch = 0;
  XFA_FMDChar::inc(p);
  ch = XFA_FMDChar::get(p);
  while (ch) {
    if (ch == 0x0D) {
      XFA_FMDChar::inc(p);
      pEnd = p;
      return;
    }
    if (ch == 0x0A) {
      ++m_uCurrentLine;
      XFA_FMDChar::inc(p);
      pEnd = p;
      return;
    }
    XFA_FMDChar::inc(p);
    ch = XFA_FMDChar::get(p);
  }
  pEnd = p;
}

XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) {
  int32_t iLength = str.GetLength();
  uint32_t uHash = FX_HashCode_String_GetW(str.GetPtr(), iLength, TRUE);
  int32_t iStart = KEYWORD_START, iEnd = KEYWORD_END;
  int32_t iMid = (iStart + iEnd) / 2;
  XFA_FMKeyword keyword;
  do {
    iMid = (iStart + iEnd) / 2;
    keyword = keyWords[iMid];
    if (uHash == keyword.m_uHash) {
      return keyword.m_type;
    } else if (uHash < keyword.m_uHash) {
      iEnd = iMid - 1;
    } else {
      iStart = iMid + 1;
    }
  } while (iStart <= iEnd);
  return TOKidentifier;
}

void CXFA_FMLexer::Error(XFA_FM_ERRMSG msg, ...) {
  m_pErrorInfo->linenum = m_uCurrentLine;
  const FX_WCHAR* lpMessageInfo = XFA_FM_ErrorMsg(msg);
  va_list ap;
  va_start(ap, msg);
  m_pErrorInfo->message.FormatV(lpMessageInfo, ap);
  va_end(ap);
}

FX_BOOL CXFA_FMLexer::HasError() const {
  if (m_pErrorInfo->message.IsEmpty()) {
    return FALSE;
  }
  return TRUE;
}