From a169364e46956875db35fb1baacc4a0a1ee17f08 Mon Sep 17 00:00:00 2001 From: Ryan Harrison Date: Wed, 16 Aug 2017 13:56:12 -0400 Subject: Add parse depth limit to FormCalc parser Due to the recursive nature of the FormCalc parser, deeply nested expressions can lead to memory being exhausted. This check is being added to have the parser exit early instead of running out of memory. This should reduce the number of false positives about addressing issues being found by fuzzers. BUG=chromium:752433 Change-Id: I511ecfb07e32073555e1fd1658f3b8b47f1a5a91 Reviewed-on: https://pdfium-review.googlesource.com/11170 Commit-Queue: Ryan Harrison Reviewed-by: Tom Sepez --- xfa/fxfa/fm2js/cxfa_fmparser.cpp | 74 ++++++++++++++++++++----------- xfa/fxfa/fm2js/cxfa_fmparser.h | 7 +++ xfa/fxfa/fm2js/cxfa_fmparser_unittest.cpp | 7 +++ 3 files changed, 63 insertions(+), 25 deletions(-) diff --git a/xfa/fxfa/fm2js/cxfa_fmparser.cpp b/xfa/fxfa/fm2js/cxfa_fmparser.cpp index 4cd9a747a9..42a65e5838 100644 --- a/xfa/fxfa/fm2js/cxfa_fmparser.cpp +++ b/xfa/fxfa/fm2js/cxfa_fmparser.cpp @@ -14,12 +14,13 @@ namespace { -const int kMaxAssignmentChainLength = 12; +const unsigned int kMaxAssignmentChainLength = 12; +const unsigned int kMaxParseDepth = 2000; } // namespace CXFA_FMParser::CXFA_FMParser(const CFX_WideStringC& wsFormcalc) - : m_error(false) { + : m_error(false), m_parse_depth(0), m_max_parse_depth(kMaxParseDepth) { m_lexer = pdfium::MakeUnique(wsFormcalc); m_token = m_lexer->NextToken(); } @@ -56,13 +57,18 @@ bool CXFA_FMParser::CheckThenNext(XFA_FM_TOKEN op) { return NextToken(); } +bool CXFA_FMParser::IncrementParseDepthAndCheck() { + return ++m_parse_depth < m_max_parse_depth; +} + std::vector> CXFA_FMParser::ParseTopExpression() { + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) + return std::vector>(); + std::unique_ptr expr; std::vector> expressions; - if (HasError()) - return expressions; - while (!HasError()) { if (m_token->m_type == TOKeof || m_token->m_type == TOKendfunc || m_token->m_type == TOKendif || m_token->m_type == TOKelseif || @@ -73,18 +79,16 @@ CXFA_FMParser::ParseTopExpression() { expr = m_token->m_type == TOKfunc ? ParseFunction() : ParseExpression(); if (!expr) { m_error = true; - expressions.clear(); break; } expressions.push_back(std::move(expr)); } - if (HasError()) - expressions.clear(); - return expressions; + return std::vector>(); } std::unique_ptr CXFA_FMParser::ParseFunction() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; CFX_WideStringC ident; @@ -146,7 +150,8 @@ std::unique_ptr CXFA_FMParser::ParseFunction() { } std::unique_ptr CXFA_FMParser::ParseExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; std::unique_ptr expr; @@ -198,7 +203,8 @@ std::unique_ptr CXFA_FMParser::ParseExpression() { } std::unique_ptr CXFA_FMParser::ParseVarExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; CFX_WideStringC ident; @@ -254,7 +260,8 @@ CXFA_FMParser::ParseSimpleExpression() { } std::unique_ptr CXFA_FMParser::ParseExpExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -266,7 +273,8 @@ std::unique_ptr CXFA_FMParser::ParseExpExpression() { std::unique_ptr CXFA_FMParser::ParseLogicalOrExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -300,7 +308,8 @@ CXFA_FMParser::ParseLogicalOrExpression() { std::unique_ptr CXFA_FMParser::ParseLogicalAndExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -333,7 +342,8 @@ CXFA_FMParser::ParseLogicalAndExpression() { std::unique_ptr CXFA_FMParser::ParseEqualityExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -377,7 +387,8 @@ CXFA_FMParser::ParseEqualityExpression() { std::unique_ptr CXFA_FMParser::ParseRelationalExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -446,7 +457,8 @@ CXFA_FMParser::ParseRelationalExpression() { std::unique_ptr CXFA_FMParser::ParseAddtiveExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -489,7 +501,8 @@ CXFA_FMParser::ParseAddtiveExpression() { std::unique_ptr CXFA_FMParser::ParseMultiplicativeExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -531,7 +544,8 @@ CXFA_FMParser::ParseMultiplicativeExpression() { } std::unique_ptr CXFA_FMParser::ParseUnaryExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; std::unique_ptr expr; @@ -578,7 +592,8 @@ std::unique_ptr CXFA_FMParser::ParseUnaryExpression() { std::unique_ptr CXFA_FMParser::ParsePrimaryExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; std::unique_ptr expr; @@ -661,6 +676,7 @@ std::unique_ptr CXFA_FMParser::ParsePostExpression( ParseSimpleExpression(); if (!simple_expr) return nullptr; + expressions.push_back(std::move(simple_expr)); if (m_token->m_type == TOKcomma) { if (!NextToken()) @@ -834,7 +850,8 @@ std::unique_ptr CXFA_FMParser::ParsePostExpression( } std::unique_ptr CXFA_FMParser::ParseIndexExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -876,6 +893,10 @@ std::unique_ptr CXFA_FMParser::ParseIndexExpression() { } std::unique_ptr CXFA_FMParser::ParseParenExpression() { + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) + return nullptr; + if (!CheckThenNext(TOKlparen)) return nullptr; @@ -951,7 +972,8 @@ std::unique_ptr CXFA_FMParser::ParseBlockExpression() { } std::unique_ptr CXFA_FMParser::ParseIfExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -1021,7 +1043,8 @@ std::unique_ptr CXFA_FMParser::ParseIfExpression() { } std::unique_ptr CXFA_FMParser::ParseWhileExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; uint32_t line = m_token->m_line_num; @@ -1055,7 +1078,8 @@ CXFA_FMParser::ParseSubassignmentInForExpression() { } std::unique_ptr CXFA_FMParser::ParseForExpression() { - if (HasError()) + CFX_AutoRestorer restorer(&m_parse_depth); + if (HasError() || !IncrementParseDepthAndCheck()) return nullptr; CFX_WideStringC wsVariant; diff --git a/xfa/fxfa/fm2js/cxfa_fmparser.h b/xfa/fxfa/fm2js/cxfa_fmparser.h index 66eb1f8392..ddfaa1aaa6 100644 --- a/xfa/fxfa/fm2js/cxfa_fmparser.h +++ b/xfa/fxfa/fm2js/cxfa_fmparser.h @@ -21,9 +21,14 @@ class CXFA_FMParser { std::unique_ptr Parse(); bool HasError() const; + void SetMaxParseDepthForTest(unsigned long max_depth) { + m_max_parse_depth = max_depth; + } + private: bool NextToken(); bool CheckThenNext(XFA_FM_TOKEN op); + bool IncrementParseDepthAndCheck(); std::vector> ParseTopExpression(); std::unique_ptr ParseFunction(); @@ -54,6 +59,8 @@ class CXFA_FMParser { std::unique_ptr m_lexer; std::unique_ptr m_token; bool m_error; + unsigned long m_parse_depth; + unsigned long m_max_parse_depth; }; #endif // XFA_FXFA_FM2JS_CXFA_FMPARSER_H_ diff --git a/xfa/fxfa/fm2js/cxfa_fmparser_unittest.cpp b/xfa/fxfa/fm2js/cxfa_fmparser_unittest.cpp index 9907890e83..214fd4aec3 100644 --- a/xfa/fxfa/fm2js/cxfa_fmparser_unittest.cpp +++ b/xfa/fxfa/fm2js/cxfa_fmparser_unittest.cpp @@ -107,3 +107,10 @@ TEST(CXFA_FMParserTest, Parse) { EXPECT_TRUE(ast->ToJavaScript(buf)); EXPECT_EQ(ret, buf.AsStringC()); } + +TEST(CXFA_FMParserTest, MaxParseDepth) { + auto parser = pdfium::MakeUnique(L"foo(bar[baz(fizz[0])])"); + parser->SetMaxParseDepthForTest(5); + EXPECT_EQ(nullptr, parser->Parse()); + EXPECT_TRUE(parser->HasError()); +} -- cgit v1.2.3