diff options
author | Ryan Harrison <rharrison@chromium.org> | 2017-07-25 11:10:15 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-07-25 15:26:28 +0000 |
commit | 952477dbee761a6e38ce675f2095bbfc9cfd7450 (patch) | |
tree | c248f649135de60d81d87d9b0afebc153a589d69 /xfa/fxfa/fm2js/cxfa_fmlexer.cpp | |
parent | 364d18b13575a2b569e9fc175cb0dd60106fa954 (diff) | |
download | pdfium-952477dbee761a6e38ce675f2095bbfc9cfd7450.tar.xz |
Clean up data passing in FormCalc Lexerchromium/3167
This CL removes the pattern used in the lexer of passing the lexing
member variables around as args to methods. Instead it uses the fact
that they are member variables in the methods.
This CL also includes renaming of variable and function names to
remove unneeded details or make them more precise.
BUG=pdfium:814
Change-Id: Id4c592338db9ff462835314252d39ab3b4b2b2ab
Reviewed-on: https://pdfium-review.googlesource.com/8850
Commit-Queue: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'xfa/fxfa/fm2js/cxfa_fmlexer.cpp')
-rw-r--r-- | xfa/fxfa/fm2js/cxfa_fmlexer.cpp | 382 |
1 files changed, 190 insertions, 192 deletions
diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp index 04db1dbe1c..c8a064fad0 100644 --- a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp +++ b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp @@ -14,17 +14,17 @@ namespace { -bool IsValidFormCalcCharacter(wchar_t c) { +bool IsFormCalcCharacter(wchar_t c) { return c == 0 || (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0xd7FF) || (c >= 0xE000 && c <= 0xFFFD); } -bool IsValidIdentifierCharacter(wchar_t c) { +bool IsIdentifierCharacter(wchar_t c) { return u_isalnum(c) || c == 0x005F || // '_' c == 0x0024; // '$' } -bool IsValidInitialIdentifierCharacter(wchar_t c) { +bool IsInitialIdentifierCharacter(wchar_t c) { return u_isalpha(c) || c == 0x005F || // '_' c == 0x0024 || // '$' c == 0x0021; // '!' @@ -101,9 +101,9 @@ XFA_FM_TOKEN TokenizeIdentifier(const CFX_WideStringC& str) { const XFA_FMKeyword* result = std::lower_bound(std::begin(keyWords) + KEYWORD_START, std::end(keyWords), key, [](const XFA_FMKeyword& iter, const uint32_t& val) { - return iter.m_uHash < val; + return iter.m_hash < val; }); - if (result != std::end(keyWords) && result->m_uHash == key) { + if (result != std::end(keyWords) && result->m_hash == key) { return result->m_type; } return TOKidentifier; @@ -117,46 +117,46 @@ const wchar_t* XFA_FM_KeywordToString(XFA_FM_TOKEN op) { return keyWords[op].m_keyword; } -CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {} +CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_line_num(1) {} -CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum) - : m_type(TOKreserver), m_uLinenum(uLineNum) {} +CXFA_FMToken::CXFA_FMToken(uint32_t line_num) + : m_type(TOKreserver), m_line_num(line_num) {} CXFA_FMToken::~CXFA_FMToken() {} CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc) - : m_ptr(wsFormCalc.unterminated_c_str()), - m_end(m_ptr + wsFormCalc.GetLength() - 1), - m_uCurrentLine(1), - m_LexerError(false) {} + : m_cursor(wsFormCalc.unterminated_c_str()), + m_end(m_cursor + wsFormCalc.GetLength() - 1), + m_current_line(1), + m_lexer_error(false) {} CXFA_FMLexer::~CXFA_FMLexer() {} CXFA_FMToken* CXFA_FMLexer::NextToken() { - m_pToken = pdfium::MakeUnique<CXFA_FMToken>(m_uCurrentLine); - while (m_ptr <= m_end && *m_ptr) { - if (!IsValidFormCalcCharacter(*m_ptr)) { - m_LexerError = true; - return m_pToken.get(); + m_token = pdfium::MakeUnique<CXFA_FMToken>(m_current_line); + while (m_cursor <= m_end && *m_cursor) { + if (!IsFormCalcCharacter(*m_cursor)) { + m_lexer_error = true; + return m_token.get(); } - switch (*m_ptr) { + switch (*m_cursor) { case 0x0A: - ++m_uCurrentLine; - m_pToken->m_uLinenum = m_uCurrentLine; - ++m_ptr; + ++m_current_line; + m_token->m_line_num = m_current_line; + ++m_cursor; break; case 0x0D: - ++m_ptr; + ++m_cursor; break; case ';': { - m_ptr = AdvanceForComment(m_ptr); + AdvanceForComment(); break; } case '"': { - m_pToken->m_type = TOKstring; - m_ptr = AdvanceForString(m_pToken.get(), m_ptr); - return m_pToken.get(); + m_token->m_type = TOKstring; + AdvanceForString(); + return m_token.get(); } case '0': case '1': @@ -168,257 +168,255 @@ CXFA_FMToken* CXFA_FMLexer::NextToken() { case '7': case '8': case '9': { - m_pToken->m_type = TOKnumber; - m_ptr = AdvanceForNumber(m_pToken.get(), m_ptr); - return m_pToken.get(); + m_token->m_type = TOKnumber; + AdvanceForNumber(); + return m_token.get(); } case '=': - ++m_ptr; - if (m_ptr > m_end) { - m_pToken->m_type = TOKassign; - return m_pToken.get(); + ++m_cursor; + if (m_cursor > m_end) { + m_token->m_type = TOKassign; + return m_token.get(); } - if (IsValidFormCalcCharacter(*m_ptr)) { - if (*m_ptr == '=') { - m_pToken->m_type = TOKeq; - ++m_ptr; + if (IsFormCalcCharacter(*m_cursor)) { + if (*m_cursor == '=') { + m_token->m_type = TOKeq; + ++m_cursor; } else { - m_pToken->m_type = TOKassign; + m_token->m_type = TOKassign; } } else { - m_LexerError = true; + m_lexer_error = true; } - return m_pToken.get(); + return m_token.get(); case '<': - ++m_ptr; - if (m_ptr > m_end) { - m_pToken->m_type = TOKlt; - return m_pToken.get(); + ++m_cursor; + if (m_cursor > m_end) { + m_token->m_type = TOKlt; + return m_token.get(); } - if (IsValidFormCalcCharacter(*m_ptr)) { - if (*m_ptr == '=') { - m_pToken->m_type = TOKle; - ++m_ptr; - } else if (*m_ptr == '>') { - m_pToken->m_type = TOKne; - ++m_ptr; + if (IsFormCalcCharacter(*m_cursor)) { + if (*m_cursor == '=') { + m_token->m_type = TOKle; + ++m_cursor; + } else if (*m_cursor == '>') { + m_token->m_type = TOKne; + ++m_cursor; } else { - m_pToken->m_type = TOKlt; + m_token->m_type = TOKlt; } } else { - m_LexerError = true; + m_lexer_error = true; } - return m_pToken.get(); + return m_token.get(); case '>': - ++m_ptr; - if (m_ptr > m_end) { - m_pToken->m_type = TOKgt; - return m_pToken.get(); + ++m_cursor; + if (m_cursor > m_end) { + m_token->m_type = TOKgt; + return m_token.get(); } - if (IsValidFormCalcCharacter(*m_ptr)) { - if (*m_ptr == '=') { - m_pToken->m_type = TOKge; - ++m_ptr; + if (IsFormCalcCharacter(*m_cursor)) { + if (*m_cursor == '=') { + m_token->m_type = TOKge; + ++m_cursor; } else { - m_pToken->m_type = TOKgt; + m_token->m_type = TOKgt; } } else { - m_LexerError = true; + m_lexer_error = true; } - return m_pToken.get(); + return m_token.get(); case ',': - m_pToken->m_type = TOKcomma; - ++m_ptr; - return m_pToken.get(); + m_token->m_type = TOKcomma; + ++m_cursor; + return m_token.get(); case '(': - m_pToken->m_type = TOKlparen; - ++m_ptr; - return m_pToken.get(); + m_token->m_type = TOKlparen; + ++m_cursor; + return m_token.get(); case ')': - m_pToken->m_type = TOKrparen; - ++m_ptr; - return m_pToken.get(); + m_token->m_type = TOKrparen; + ++m_cursor; + return m_token.get(); case '[': - m_pToken->m_type = TOKlbracket; - ++m_ptr; - return m_pToken.get(); + m_token->m_type = TOKlbracket; + ++m_cursor; + return m_token.get(); case ']': - m_pToken->m_type = TOKrbracket; - ++m_ptr; - return m_pToken.get(); + m_token->m_type = TOKrbracket; + ++m_cursor; + return m_token.get(); case '&': - ++m_ptr; - m_pToken->m_type = TOKand; - return m_pToken.get(); + ++m_cursor; + m_token->m_type = TOKand; + return m_token.get(); case '|': - ++m_ptr; - m_pToken->m_type = TOKor; - return m_pToken.get(); + ++m_cursor; + m_token->m_type = TOKor; + return m_token.get(); case '+': - ++m_ptr; - m_pToken->m_type = TOKplus; - return m_pToken.get(); + ++m_cursor; + m_token->m_type = TOKplus; + return m_token.get(); case '-': - ++m_ptr; - m_pToken->m_type = TOKminus; - return m_pToken.get(); + ++m_cursor; + m_token->m_type = TOKminus; + return m_token.get(); case '*': - ++m_ptr; - m_pToken->m_type = TOKmul; - return m_pToken.get(); + ++m_cursor; + m_token->m_type = TOKmul; + return m_token.get(); case '/': { - ++m_ptr; - if (m_ptr > m_end) { - m_pToken->m_type = TOKdiv; - return m_pToken.get(); + ++m_cursor; + if (m_cursor > m_end) { + m_token->m_type = TOKdiv; + return m_token.get(); } - if (!IsValidFormCalcCharacter(*m_ptr)) { - m_LexerError = true; - return m_pToken.get(); + if (!IsFormCalcCharacter(*m_cursor)) { + m_lexer_error = true; + return m_token.get(); } - if (*m_ptr != '/') { - m_pToken->m_type = TOKdiv; - return m_pToken.get(); + if (*m_cursor != '/') { + m_token->m_type = TOKdiv; + return m_token.get(); } - m_ptr = AdvanceForComment(m_ptr); + AdvanceForComment(); break; } case '.': - ++m_ptr; - if (m_ptr > m_end) { - m_pToken->m_type = TOKdot; - return m_pToken.get(); + ++m_cursor; + if (m_cursor > m_end) { + m_token->m_type = TOKdot; + return m_token.get(); } - if (IsValidFormCalcCharacter(*m_ptr)) { - if (*m_ptr == '.') { - m_pToken->m_type = TOKdotdot; - ++m_ptr; - } else if (*m_ptr == '*') { - m_pToken->m_type = TOKdotstar; - ++m_ptr; - } else if (*m_ptr == '#') { - m_pToken->m_type = TOKdotscream; - ++m_ptr; - } else if (*m_ptr <= '9' && *m_ptr >= '0') { - m_pToken->m_type = TOKnumber; - --m_ptr; - m_ptr = AdvanceForNumber(m_pToken.get(), m_ptr); - } else { - m_pToken->m_type = TOKdot; - } + if (!IsFormCalcCharacter(*m_cursor)) { + m_lexer_error = true; + return m_token.get(); + } + + if (*m_cursor == '.') { + m_token->m_type = TOKdotdot; + ++m_cursor; + } else if (*m_cursor == '*') { + m_token->m_type = TOKdotstar; + ++m_cursor; + } else if (*m_cursor == '#') { + m_token->m_type = TOKdotscream; + ++m_cursor; + } else if (*m_cursor <= '9' && *m_cursor >= '0') { + m_token->m_type = TOKnumber; + --m_cursor; + AdvanceForNumber(); } else { - m_LexerError = true; + m_token->m_type = TOKdot; } - return m_pToken.get(); + return m_token.get(); case 0x09: case 0x0B: case 0x0C: case 0x20: - ++m_ptr; + ++m_cursor; break; default: { - if (!IsValidInitialIdentifierCharacter(*m_ptr)) { - m_LexerError = true; - return m_pToken.get(); + if (!IsInitialIdentifierCharacter(*m_cursor)) { + m_lexer_error = true; + return m_token.get(); } - m_ptr = AdvanceForIdentifier(m_pToken.get(), m_ptr); - return m_pToken.get(); + AdvanceForIdentifier(); + return m_token.get(); } } } // If there isn't currently a token type then mark it EOF. - if (m_pToken->m_type == TOKreserver) - m_pToken->m_type = TOKeof; - return m_pToken.get(); + if (m_token->m_type == TOKreserver) + m_token->m_type = TOKeof; + return m_token.get(); } -const wchar_t* CXFA_FMLexer::AdvanceForNumber(CXFA_FMToken* t, - const wchar_t* p) { - // This will set pEnd to the character after the end of the AdvanceForNumber. - wchar_t* pEnd = nullptr; - if (p) - wcstod(const_cast<wchar_t*>(p), &pEnd); - if (pEnd && FXSYS_iswalpha(*pEnd)) { - m_LexerError = true; - return pEnd; +void CXFA_FMLexer::AdvanceForNumber() { + // This will set end to the character after the end of the number. + wchar_t* end = nullptr; + if (m_cursor) + wcstod(const_cast<wchar_t*>(m_cursor), &end); + if (end && FXSYS_iswalpha(*end)) { + m_lexer_error = true; + return; } - t->m_wstring = CFX_WideStringC(p, (pEnd - p)); - return pEnd; + m_token->m_string = CFX_WideStringC(m_cursor, (end - m_cursor)); + m_cursor = end; } -const wchar_t* CXFA_FMLexer::AdvanceForString(CXFA_FMToken* t, - const wchar_t* p) { - const wchar_t* start = p; - ++p; - while (p <= m_end && *p) { - if (!IsValidFormCalcCharacter(*p)) +void CXFA_FMLexer::AdvanceForString() { + const wchar_t* start = m_cursor; + ++m_cursor; + while (m_cursor <= m_end && *m_cursor) { + if (!IsFormCalcCharacter(*m_cursor)) break; - if (*p == '"') { + if (*m_cursor == '"') { // Check for escaped "s, i.e. "". - ++p; + ++m_cursor; // If the end of the input has been reached it was not escaped. - if (p > m_end) { - t->m_wstring = CFX_WideStringC(start, (p - start)); - return p; + if (m_cursor > m_end) { + m_token->m_string = CFX_WideStringC(start, (m_cursor - start)); + return; } // If the next character is not a " then the end of the string has been // found. - if (*p != '"') { - if (!IsValidFormCalcCharacter(*p)) { + if (*m_cursor != '"') { + if (!IsFormCalcCharacter(*m_cursor)) { break; } - t->m_wstring = CFX_WideStringC(start, (p - start)); - return p; + m_token->m_string = CFX_WideStringC(start, (m_cursor - start)); + return; } } - ++p; + ++m_cursor; } // Didn't find the end of the string. - t->m_wstring = CFX_WideStringC(start, (p - start)); - m_LexerError = true; - return p; + m_token->m_string = CFX_WideStringC(start, (m_cursor - start)); + m_lexer_error = true; } -const wchar_t* CXFA_FMLexer::AdvanceForIdentifier(CXFA_FMToken* t, - const wchar_t* p) { - const wchar_t* pStart = p; - ++p; - while (p <= m_end && *p) { - if (!IsValidFormCalcCharacter(*p)) { - t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); - m_LexerError = true; - return p; +void CXFA_FMLexer::AdvanceForIdentifier() { + const wchar_t* start = m_cursor; + ++m_cursor; + while (m_cursor <= m_end && *m_cursor) { + if (!IsFormCalcCharacter(*m_cursor)) { + m_token->m_string = CFX_WideStringC(start, (m_cursor - start)); + m_lexer_error = true; + return; } - if (!IsValidIdentifierCharacter(*p)) { + if (!IsIdentifierCharacter(*m_cursor)) { break; } - ++p; + ++m_cursor; } - t->m_wstring = CFX_WideStringC(pStart, (p - pStart)); - t->m_type = TokenizeIdentifier(t->m_wstring); - return p; + m_token->m_string = CFX_WideStringC(start, (m_cursor - start)); + m_token->m_type = TokenizeIdentifier(m_token->m_string); } -const wchar_t* CXFA_FMLexer::AdvanceForComment(const wchar_t* p) { - p++; - while (p <= m_end && *p) { - if (*p == L'\r') - return ++p; - if (*p == L'\n') { - ++m_uCurrentLine; - return ++p; +void CXFA_FMLexer::AdvanceForComment() { + m_cursor++; + while (m_cursor <= m_end && *m_cursor) { + if (*m_cursor == L'\r') { + ++m_cursor; + return; + } + if (*m_cursor == L'\n') { + ++m_current_line; + ++m_cursor; + return; } - ++p; + ++m_cursor; } - return p; } |