summaryrefslogtreecommitdiff
path: root/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
diff options
context:
space:
mode:
authorRyan Harrison <rharrison@chromium.org>2017-07-25 11:10:15 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-07-25 15:26:28 +0000
commit952477dbee761a6e38ce675f2095bbfc9cfd7450 (patch)
treec248f649135de60d81d87d9b0afebc153a589d69 /xfa/fxfa/fm2js/cxfa_fmlexer.cpp
parent364d18b13575a2b569e9fc175cb0dd60106fa954 (diff)
downloadpdfium-952477dbee761a6e38ce675f2095bbfc9cfd7450.tar.xz
Clean up data passing in FormCalc Lexerchromium/3167
This CL removes the pattern used in the lexer of passing the lexing member variables around as args to methods. Instead it uses the fact that they are member variables in the methods. This CL also includes renaming of variable and function names to remove unneeded details or make them more precise. BUG=pdfium:814 Change-Id: Id4c592338db9ff462835314252d39ab3b4b2b2ab Reviewed-on: https://pdfium-review.googlesource.com/8850 Commit-Queue: Ryan Harrison <rharrison@chromium.org> Reviewed-by: dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'xfa/fxfa/fm2js/cxfa_fmlexer.cpp')
-rw-r--r--xfa/fxfa/fm2js/cxfa_fmlexer.cpp382
1 files changed, 190 insertions, 192 deletions
diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
index 04db1dbe1c..c8a064fad0 100644
--- a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
+++ b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
@@ -14,17 +14,17 @@
namespace {
-bool IsValidFormCalcCharacter(wchar_t c) {
+bool IsFormCalcCharacter(wchar_t c) {
return c == 0 || (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0xd7FF) ||
(c >= 0xE000 && c <= 0xFFFD);
}
-bool IsValidIdentifierCharacter(wchar_t c) {
+bool IsIdentifierCharacter(wchar_t c) {
return u_isalnum(c) || c == 0x005F || // '_'
c == 0x0024; // '$'
}
-bool IsValidInitialIdentifierCharacter(wchar_t c) {
+bool IsInitialIdentifierCharacter(wchar_t c) {
return u_isalpha(c) || c == 0x005F || // '_'
c == 0x0024 || // '$'
c == 0x0021; // '!'
@@ -101,9 +101,9 @@ XFA_FM_TOKEN TokenizeIdentifier(const CFX_WideStringC& str) {
const XFA_FMKeyword* result =
std::lower_bound(std::begin(keyWords) + KEYWORD_START, std::end(keyWords),
key, [](const XFA_FMKeyword& iter, const uint32_t& val) {
- return iter.m_uHash < val;
+ return iter.m_hash < val;
});
- if (result != std::end(keyWords) && result->m_uHash == key) {
+ if (result != std::end(keyWords) && result->m_hash == key) {
return result->m_type;
}
return TOKidentifier;
@@ -117,46 +117,46 @@ const wchar_t* XFA_FM_KeywordToString(XFA_FM_TOKEN op) {
return keyWords[op].m_keyword;
}
-CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {}
+CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_line_num(1) {}
-CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum)
- : m_type(TOKreserver), m_uLinenum(uLineNum) {}
+CXFA_FMToken::CXFA_FMToken(uint32_t line_num)
+ : m_type(TOKreserver), m_line_num(line_num) {}
CXFA_FMToken::~CXFA_FMToken() {}
CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc)
- : m_ptr(wsFormCalc.unterminated_c_str()),
- m_end(m_ptr + wsFormCalc.GetLength() - 1),
- m_uCurrentLine(1),
- m_LexerError(false) {}
+ : m_cursor(wsFormCalc.unterminated_c_str()),
+ m_end(m_cursor + wsFormCalc.GetLength() - 1),
+ m_current_line(1),
+ m_lexer_error(false) {}
CXFA_FMLexer::~CXFA_FMLexer() {}
CXFA_FMToken* CXFA_FMLexer::NextToken() {
- m_pToken = pdfium::MakeUnique<CXFA_FMToken>(m_uCurrentLine);
- while (m_ptr <= m_end && *m_ptr) {
- if (!IsValidFormCalcCharacter(*m_ptr)) {
- m_LexerError = true;
- return m_pToken.get();
+ m_token = pdfium::MakeUnique<CXFA_FMToken>(m_current_line);
+ while (m_cursor <= m_end && *m_cursor) {
+ if (!IsFormCalcCharacter(*m_cursor)) {
+ m_lexer_error = true;
+ return m_token.get();
}
- switch (*m_ptr) {
+ switch (*m_cursor) {
case 0x0A:
- ++m_uCurrentLine;
- m_pToken->m_uLinenum = m_uCurrentLine;
- ++m_ptr;
+ ++m_current_line;
+ m_token->m_line_num = m_current_line;
+ ++m_cursor;
break;
case 0x0D:
- ++m_ptr;
+ ++m_cursor;
break;
case ';': {
- m_ptr = AdvanceForComment(m_ptr);
+ AdvanceForComment();
break;
}
case '"': {
- m_pToken->m_type = TOKstring;
- m_ptr = AdvanceForString(m_pToken.get(), m_ptr);
- return m_pToken.get();
+ m_token->m_type = TOKstring;
+ AdvanceForString();
+ return m_token.get();
}
case '0':
case '1':
@@ -168,257 +168,255 @@ CXFA_FMToken* CXFA_FMLexer::NextToken() {
case '7':
case '8':
case '9': {
- m_pToken->m_type = TOKnumber;
- m_ptr = AdvanceForNumber(m_pToken.get(), m_ptr);
- return m_pToken.get();
+ m_token->m_type = TOKnumber;
+ AdvanceForNumber();
+ return m_token.get();
}
case '=':
- ++m_ptr;
- if (m_ptr > m_end) {
- m_pToken->m_type = TOKassign;
- return m_pToken.get();
+ ++m_cursor;
+ if (m_cursor > m_end) {
+ m_token->m_type = TOKassign;
+ return m_token.get();
}
- if (IsValidFormCalcCharacter(*m_ptr)) {
- if (*m_ptr == '=') {
- m_pToken->m_type = TOKeq;
- ++m_ptr;
+ if (IsFormCalcCharacter(*m_cursor)) {
+ if (*m_cursor == '=') {
+ m_token->m_type = TOKeq;
+ ++m_cursor;
} else {
- m_pToken->m_type = TOKassign;
+ m_token->m_type = TOKassign;
}
} else {
- m_LexerError = true;
+ m_lexer_error = true;
}
- return m_pToken.get();
+ return m_token.get();
case '<':
- ++m_ptr;
- if (m_ptr > m_end) {
- m_pToken->m_type = TOKlt;
- return m_pToken.get();
+ ++m_cursor;
+ if (m_cursor > m_end) {
+ m_token->m_type = TOKlt;
+ return m_token.get();
}
- if (IsValidFormCalcCharacter(*m_ptr)) {
- if (*m_ptr == '=') {
- m_pToken->m_type = TOKle;
- ++m_ptr;
- } else if (*m_ptr == '>') {
- m_pToken->m_type = TOKne;
- ++m_ptr;
+ if (IsFormCalcCharacter(*m_cursor)) {
+ if (*m_cursor == '=') {
+ m_token->m_type = TOKle;
+ ++m_cursor;
+ } else if (*m_cursor == '>') {
+ m_token->m_type = TOKne;
+ ++m_cursor;
} else {
- m_pToken->m_type = TOKlt;
+ m_token->m_type = TOKlt;
}
} else {
- m_LexerError = true;
+ m_lexer_error = true;
}
- return m_pToken.get();
+ return m_token.get();
case '>':
- ++m_ptr;
- if (m_ptr > m_end) {
- m_pToken->m_type = TOKgt;
- return m_pToken.get();
+ ++m_cursor;
+ if (m_cursor > m_end) {
+ m_token->m_type = TOKgt;
+ return m_token.get();
}
- if (IsValidFormCalcCharacter(*m_ptr)) {
- if (*m_ptr == '=') {
- m_pToken->m_type = TOKge;
- ++m_ptr;
+ if (IsFormCalcCharacter(*m_cursor)) {
+ if (*m_cursor == '=') {
+ m_token->m_type = TOKge;
+ ++m_cursor;
} else {
- m_pToken->m_type = TOKgt;
+ m_token->m_type = TOKgt;
}
} else {
- m_LexerError = true;
+ m_lexer_error = true;
}
- return m_pToken.get();
+ return m_token.get();
case ',':
- m_pToken->m_type = TOKcomma;
- ++m_ptr;
- return m_pToken.get();
+ m_token->m_type = TOKcomma;
+ ++m_cursor;
+ return m_token.get();
case '(':
- m_pToken->m_type = TOKlparen;
- ++m_ptr;
- return m_pToken.get();
+ m_token->m_type = TOKlparen;
+ ++m_cursor;
+ return m_token.get();
case ')':
- m_pToken->m_type = TOKrparen;
- ++m_ptr;
- return m_pToken.get();
+ m_token->m_type = TOKrparen;
+ ++m_cursor;
+ return m_token.get();
case '[':
- m_pToken->m_type = TOKlbracket;
- ++m_ptr;
- return m_pToken.get();
+ m_token->m_type = TOKlbracket;
+ ++m_cursor;
+ return m_token.get();
case ']':
- m_pToken->m_type = TOKrbracket;
- ++m_ptr;
- return m_pToken.get();
+ m_token->m_type = TOKrbracket;
+ ++m_cursor;
+ return m_token.get();
case '&':
- ++m_ptr;
- m_pToken->m_type = TOKand;
- return m_pToken.get();
+ ++m_cursor;
+ m_token->m_type = TOKand;
+ return m_token.get();
case '|':
- ++m_ptr;
- m_pToken->m_type = TOKor;
- return m_pToken.get();
+ ++m_cursor;
+ m_token->m_type = TOKor;
+ return m_token.get();
case '+':
- ++m_ptr;
- m_pToken->m_type = TOKplus;
- return m_pToken.get();
+ ++m_cursor;
+ m_token->m_type = TOKplus;
+ return m_token.get();
case '-':
- ++m_ptr;
- m_pToken->m_type = TOKminus;
- return m_pToken.get();
+ ++m_cursor;
+ m_token->m_type = TOKminus;
+ return m_token.get();
case '*':
- ++m_ptr;
- m_pToken->m_type = TOKmul;
- return m_pToken.get();
+ ++m_cursor;
+ m_token->m_type = TOKmul;
+ return m_token.get();
case '/': {
- ++m_ptr;
- if (m_ptr > m_end) {
- m_pToken->m_type = TOKdiv;
- return m_pToken.get();
+ ++m_cursor;
+ if (m_cursor > m_end) {
+ m_token->m_type = TOKdiv;
+ return m_token.get();
}
- if (!IsValidFormCalcCharacter(*m_ptr)) {
- m_LexerError = true;
- return m_pToken.get();
+ if (!IsFormCalcCharacter(*m_cursor)) {
+ m_lexer_error = true;
+ return m_token.get();
}
- if (*m_ptr != '/') {
- m_pToken->m_type = TOKdiv;
- return m_pToken.get();
+ if (*m_cursor != '/') {
+ m_token->m_type = TOKdiv;
+ return m_token.get();
}
- m_ptr = AdvanceForComment(m_ptr);
+ AdvanceForComment();
break;
}
case '.':
- ++m_ptr;
- if (m_ptr > m_end) {
- m_pToken->m_type = TOKdot;
- return m_pToken.get();
+ ++m_cursor;
+ if (m_cursor > m_end) {
+ m_token->m_type = TOKdot;
+ return m_token.get();
}
- if (IsValidFormCalcCharacter(*m_ptr)) {
- if (*m_ptr == '.') {
- m_pToken->m_type = TOKdotdot;
- ++m_ptr;
- } else if (*m_ptr == '*') {
- m_pToken->m_type = TOKdotstar;
- ++m_ptr;
- } else if (*m_ptr == '#') {
- m_pToken->m_type = TOKdotscream;
- ++m_ptr;
- } else if (*m_ptr <= '9' && *m_ptr >= '0') {
- m_pToken->m_type = TOKnumber;
- --m_ptr;
- m_ptr = AdvanceForNumber(m_pToken.get(), m_ptr);
- } else {
- m_pToken->m_type = TOKdot;
- }
+ if (!IsFormCalcCharacter(*m_cursor)) {
+ m_lexer_error = true;
+ return m_token.get();
+ }
+
+ if (*m_cursor == '.') {
+ m_token->m_type = TOKdotdot;
+ ++m_cursor;
+ } else if (*m_cursor == '*') {
+ m_token->m_type = TOKdotstar;
+ ++m_cursor;
+ } else if (*m_cursor == '#') {
+ m_token->m_type = TOKdotscream;
+ ++m_cursor;
+ } else if (*m_cursor <= '9' && *m_cursor >= '0') {
+ m_token->m_type = TOKnumber;
+ --m_cursor;
+ AdvanceForNumber();
} else {
- m_LexerError = true;
+ m_token->m_type = TOKdot;
}
- return m_pToken.get();
+ return m_token.get();
case 0x09:
case 0x0B:
case 0x0C:
case 0x20:
- ++m_ptr;
+ ++m_cursor;
break;
default: {
- if (!IsValidInitialIdentifierCharacter(*m_ptr)) {
- m_LexerError = true;
- return m_pToken.get();
+ if (!IsInitialIdentifierCharacter(*m_cursor)) {
+ m_lexer_error = true;
+ return m_token.get();
}
- m_ptr = AdvanceForIdentifier(m_pToken.get(), m_ptr);
- return m_pToken.get();
+ AdvanceForIdentifier();
+ return m_token.get();
}
}
}
// If there isn't currently a token type then mark it EOF.
- if (m_pToken->m_type == TOKreserver)
- m_pToken->m_type = TOKeof;
- return m_pToken.get();
+ if (m_token->m_type == TOKreserver)
+ m_token->m_type = TOKeof;
+ return m_token.get();
}
-const wchar_t* CXFA_FMLexer::AdvanceForNumber(CXFA_FMToken* t,
- const wchar_t* p) {
- // This will set pEnd to the character after the end of the AdvanceForNumber.
- wchar_t* pEnd = nullptr;
- if (p)
- wcstod(const_cast<wchar_t*>(p), &pEnd);
- if (pEnd && FXSYS_iswalpha(*pEnd)) {
- m_LexerError = true;
- return pEnd;
+void CXFA_FMLexer::AdvanceForNumber() {
+ // This will set end to the character after the end of the number.
+ wchar_t* end = nullptr;
+ if (m_cursor)
+ wcstod(const_cast<wchar_t*>(m_cursor), &end);
+ if (end && FXSYS_iswalpha(*end)) {
+ m_lexer_error = true;
+ return;
}
- t->m_wstring = CFX_WideStringC(p, (pEnd - p));
- return pEnd;
+ m_token->m_string = CFX_WideStringC(m_cursor, (end - m_cursor));
+ m_cursor = end;
}
-const wchar_t* CXFA_FMLexer::AdvanceForString(CXFA_FMToken* t,
- const wchar_t* p) {
- const wchar_t* start = p;
- ++p;
- while (p <= m_end && *p) {
- if (!IsValidFormCalcCharacter(*p))
+void CXFA_FMLexer::AdvanceForString() {
+ const wchar_t* start = m_cursor;
+ ++m_cursor;
+ while (m_cursor <= m_end && *m_cursor) {
+ if (!IsFormCalcCharacter(*m_cursor))
break;
- if (*p == '"') {
+ if (*m_cursor == '"') {
// Check for escaped "s, i.e. "".
- ++p;
+ ++m_cursor;
// If the end of the input has been reached it was not escaped.
- if (p > m_end) {
- t->m_wstring = CFX_WideStringC(start, (p - start));
- return p;
+ if (m_cursor > m_end) {
+ m_token->m_string = CFX_WideStringC(start, (m_cursor - start));
+ return;
}
// If the next character is not a " then the end of the string has been
// found.
- if (*p != '"') {
- if (!IsValidFormCalcCharacter(*p)) {
+ if (*m_cursor != '"') {
+ if (!IsFormCalcCharacter(*m_cursor)) {
break;
}
- t->m_wstring = CFX_WideStringC(start, (p - start));
- return p;
+ m_token->m_string = CFX_WideStringC(start, (m_cursor - start));
+ return;
}
}
- ++p;
+ ++m_cursor;
}
// Didn't find the end of the string.
- t->m_wstring = CFX_WideStringC(start, (p - start));
- m_LexerError = true;
- return p;
+ m_token->m_string = CFX_WideStringC(start, (m_cursor - start));
+ m_lexer_error = true;
}
-const wchar_t* CXFA_FMLexer::AdvanceForIdentifier(CXFA_FMToken* t,
- const wchar_t* p) {
- const wchar_t* pStart = p;
- ++p;
- while (p <= m_end && *p) {
- if (!IsValidFormCalcCharacter(*p)) {
- t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
- m_LexerError = true;
- return p;
+void CXFA_FMLexer::AdvanceForIdentifier() {
+ const wchar_t* start = m_cursor;
+ ++m_cursor;
+ while (m_cursor <= m_end && *m_cursor) {
+ if (!IsFormCalcCharacter(*m_cursor)) {
+ m_token->m_string = CFX_WideStringC(start, (m_cursor - start));
+ m_lexer_error = true;
+ return;
}
- if (!IsValidIdentifierCharacter(*p)) {
+ if (!IsIdentifierCharacter(*m_cursor)) {
break;
}
- ++p;
+ ++m_cursor;
}
- t->m_wstring = CFX_WideStringC(pStart, (p - pStart));
- t->m_type = TokenizeIdentifier(t->m_wstring);
- return p;
+ m_token->m_string = CFX_WideStringC(start, (m_cursor - start));
+ m_token->m_type = TokenizeIdentifier(m_token->m_string);
}
-const wchar_t* CXFA_FMLexer::AdvanceForComment(const wchar_t* p) {
- p++;
- while (p <= m_end && *p) {
- if (*p == L'\r')
- return ++p;
- if (*p == L'\n') {
- ++m_uCurrentLine;
- return ++p;
+void CXFA_FMLexer::AdvanceForComment() {
+ m_cursor++;
+ while (m_cursor <= m_end && *m_cursor) {
+ if (*m_cursor == L'\r') {
+ ++m_cursor;
+ return;
+ }
+ if (*m_cursor == L'\n') {
+ ++m_current_line;
+ ++m_cursor;
+ return;
}
- ++p;
+ ++m_cursor;
}
- return p;
}