diff options
author | dsinclair <dsinclair@chromium.org> | 2016-08-11 07:50:08 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-08-11 07:50:08 -0700 |
commit | 22eeccb34f91f9932f7cec295bcaf641ba249e3a (patch) | |
tree | 9775d10c30ff05cabd4d8238a037d3865588ec94 | |
parent | b4d1b576bccb5ca6cebe29288af014bd0f512af1 (diff) | |
download | pdfium-22eeccb34f91f9932f7cec295bcaf641ba249e3a.tar.xz |
Guard against undefined shift.
This Cl fixes the CFDE_XMLSyntaxParser::ParseTextChar() to handle entities
where the value goes negative. Currently this could cause an undefined-shift
as due to the (ch << 4) calls. Instead, detect if the value has gone negative
and return a space character.
BUG=chromium:603489
Review-Url: https://codereview.chromium.org/2223823003
-rw-r--r-- | xfa/fde/xml/fde_xml_imp.cpp | 23 | ||||
-rw-r--r-- | xfa/fde/xml/fde_xml_imp_unittest.cpp | 123 |
2 files changed, 137 insertions, 9 deletions
diff --git a/xfa/fde/xml/fde_xml_imp.cpp b/xfa/fde/xml/fde_xml_imp.cpp index 4c6dcf989c..8e14f021ce 100644 --- a/xfa/fde/xml/fde_xml_imp.cpp +++ b/xfa/fde/xml/fde_xml_imp.cpp @@ -14,6 +14,8 @@ namespace { +const uint32_t kMaxCharRange = 0x10ffff; + const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09}, {0x0A, 0x0A}, {0x0D, 0x0D}, @@ -1838,23 +1840,23 @@ FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { return m_iParsedBytes + nDstLen; } -void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR ch) { +void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR character) { if (m_iIndexInBlock == m_iAllocStep) { m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); if (!m_pCurrentBlock) { return; } } - m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_pCurrentBlock[m_iIndexInBlock++] = character; m_iDataLength++; - if (m_iEntityStart > -1 && ch == L';') { + if (m_iEntityStart > -1 && character == L';') { CFX_WideString csEntity; m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, (m_iDataLength - 1) - m_iEntityStart - 1); int32_t iLen = csEntity.GetLength(); if (iLen > 0) { if (csEntity[0] == L'#') { - ch = 0; + uint32_t ch = 0; FX_WCHAR w; if (iLen > 1 && csEntity[1] == L'x') { for (int32_t i = 2; i < iLen; i++) { @@ -1872,14 +1874,17 @@ void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR ch) { } else { for (int32_t i = 1; i < iLen; i++) { w = csEntity[i]; - if (w < L'0' || w > L'9') { + if (w < L'0' || w > L'9') break; - } ch = ch * 10 + w - L'0'; } } - if (ch != 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, ch); + if (ch > kMaxCharRange) + ch = ' '; + + character = static_cast<FX_WCHAR>(ch); + if (character != 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, character); m_iEntityStart++; } } else { @@ -1905,7 +1910,7 @@ void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR ch) { m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); m_iEntityStart = -1; } else { - if (m_iEntityStart < 0 && ch == L'&') { + if (m_iEntityStart < 0 && character == L'&') { m_iEntityStart = m_iDataLength - 1; } } diff --git a/xfa/fde/xml/fde_xml_imp_unittest.cpp b/xfa/fde/xml/fde_xml_imp_unittest.cpp index 0db63e4965..03cc426d32 100644 --- a/xfa/fde/xml/fde_xml_imp_unittest.cpp +++ b/xfa/fde/xml/fde_xml_imp_unittest.cpp @@ -520,3 +520,126 @@ TEST(CFDE_XMLSyntaxParser, CommentTwoDash) { EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } + +TEST(CFDE_XMLSyntaxParser, Entities) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">" + L"B" + L"T" + L"H" + L"ꭈ" + L"�" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"BTH\xab48", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, EntityOverflowHex) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">" + L"�" + L"�" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L" ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, EntityOverflowDecimal) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">" + L"�" + L"�" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L" ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} |