summaryrefslogtreecommitdiff
path: root/xfa
diff options
context:
space:
mode:
Diffstat (limited to 'xfa')
-rw-r--r--xfa/fde/xml/fde_xml_imp.cpp23
-rw-r--r--xfa/fde/xml/fde_xml_imp_unittest.cpp123
2 files changed, 137 insertions, 9 deletions
diff --git a/xfa/fde/xml/fde_xml_imp.cpp b/xfa/fde/xml/fde_xml_imp.cpp
index 4c6dcf989c..8e14f021ce 100644
--- a/xfa/fde/xml/fde_xml_imp.cpp
+++ b/xfa/fde/xml/fde_xml_imp.cpp
@@ -14,6 +14,8 @@
namespace {
+const uint32_t kMaxCharRange = 0x10ffff;
+
const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09},
{0x0A, 0x0A},
{0x0D, 0x0D},
@@ -1838,23 +1840,23 @@ FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const {
return m_iParsedBytes + nDstLen;
}
-void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR ch) {
+void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR character) {
if (m_iIndexInBlock == m_iAllocStep) {
m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
if (!m_pCurrentBlock) {
return;
}
}
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
+ m_pCurrentBlock[m_iIndexInBlock++] = character;
m_iDataLength++;
- if (m_iEntityStart > -1 && ch == L';') {
+ if (m_iEntityStart > -1 && character == L';') {
CFX_WideString csEntity;
m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1,
(m_iDataLength - 1) - m_iEntityStart - 1);
int32_t iLen = csEntity.GetLength();
if (iLen > 0) {
if (csEntity[0] == L'#') {
- ch = 0;
+ uint32_t ch = 0;
FX_WCHAR w;
if (iLen > 1 && csEntity[1] == L'x') {
for (int32_t i = 2; i < iLen; i++) {
@@ -1872,14 +1874,17 @@ void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR ch) {
} else {
for (int32_t i = 1; i < iLen; i++) {
w = csEntity[i];
- if (w < L'0' || w > L'9') {
+ if (w < L'0' || w > L'9')
break;
- }
ch = ch * 10 + w - L'0';
}
}
- if (ch != 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, ch);
+ if (ch > kMaxCharRange)
+ ch = ' ';
+
+ character = static_cast<FX_WCHAR>(ch);
+ if (character != 0) {
+ m_BlockBuffer.SetTextChar(m_iEntityStart, character);
m_iEntityStart++;
}
} else {
@@ -1905,7 +1910,7 @@ void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR ch) {
m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
m_iEntityStart = -1;
} else {
- if (m_iEntityStart < 0 && ch == L'&') {
+ if (m_iEntityStart < 0 && character == L'&') {
m_iEntityStart = m_iDataLength - 1;
}
}
diff --git a/xfa/fde/xml/fde_xml_imp_unittest.cpp b/xfa/fde/xml/fde_xml_imp_unittest.cpp
index 0db63e4965..03cc426d32 100644
--- a/xfa/fde/xml/fde_xml_imp_unittest.cpp
+++ b/xfa/fde/xml/fde_xml_imp_unittest.cpp
@@ -520,3 +520,126 @@ TEST(CFDE_XMLSyntaxParser, CommentTwoDash) {
EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
}
+
+TEST(CFDE_XMLSyntaxParser, Entities) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">"
+ L"&#66;"
+ L"&#x54;"
+ L"&#x00000000000000000048;"
+ L"&#x0000000000000000AB48;"
+ L"&#x0000000000000000000;"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"BTH\xab48", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, EntityOverflowHex) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">"
+ L"&#xaDBDFFFFF;"
+ L"&#xafffffffffffffffffffffffffffffffff;"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L" ", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, EntityOverflowDecimal) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">"
+ L"&#2914910205;"
+ L"&#29149102052342342134521341234512351234213452315;"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L" ", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
+}