summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordsinclair <dsinclair@chromium.org>2016-03-31 09:45:20 -0700
committerCommit bot <commit-bot@chromium.org>2016-03-31 09:45:20 -0700
commit11ac93cfdb9f4f25eee2ba60b947f992ab40ec54 (patch)
tree9738d127550a1aaa47e8828c3890c842da7dcf6f
parent5a839e938bad5b766a928fb545f0b0aba39e3829 (diff)
downloadpdfium-11ac93cfdb9f4f25eee2ba60b947f992ab40ec54.tar.xz
Fix CData parsing in CFDE_XMLSyntaxParser.
This CL splits the handling of CData sections out to an individual phase of the parser. This fixes the issue with the CData parser getting confused by < characters inside the data section. BUG=pdfium:90 Review URL: https://codereview.chromium.org/1842633004
-rw-r--r--BUILD.gn1
-rw-r--r--pdfium.gyp1
-rw-r--r--xfa/fde/xml/fde_xml_imp.cpp82
-rw-r--r--xfa/fde/xml/fde_xml_imp.h4
-rw-r--r--xfa/fde/xml/fde_xml_imp_unittest.cpp522
-rw-r--r--xfa/fgas/crt/fgas_stream.cpp6
-rw-r--r--xfa/fgas/crt/fgas_system.cpp3
7 files changed, 573 insertions, 46 deletions
diff --git a/BUILD.gn b/BUILD.gn
index a21c0da263..6780f2b53a 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -1613,6 +1613,7 @@ test("pdfium_unittests") {
include_dirs = []
if (pdf_enable_xfa) {
sources += [
+ "xfa/fde/xml/fde_xml_imp_unittest.cpp",
"xfa/fxbarcode/pdf417/BC_PDF417HighLevelEncoder_unittest.cpp",
"xfa/fxfa/parser/xfa_utils_imp_unittest.cpp",
]
diff --git a/pdfium.gyp b/pdfium.gyp
index 06f960b1d4..63d0c03404 100644
--- a/pdfium.gyp
+++ b/pdfium.gyp
@@ -922,6 +922,7 @@
'conditions': [
['pdf_enable_xfa==1', {
'sources': [
+ 'xfa/fde/xml/fde_xml_imp_unittest.cpp',
'xfa/fxbarcode/pdf417/BC_PDF417HighLevelEncoder_unittest.cpp',
'xfa/fxfa/parser/xfa_utils_imp_unittest.cpp',
],
diff --git a/xfa/fde/xml/fde_xml_imp.cpp b/xfa/fde/xml/fde_xml_imp.cpp
index ef5a7e4ee3..0affe8a621 100644
--- a/xfa/fde/xml/fde_xml_imp.cpp
+++ b/xfa/fde/xml/fde_xml_imp.cpp
@@ -1491,7 +1491,7 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() {
FXSYS_assert(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized());
int32_t iStreamLength = m_pStream->GetLength();
int32_t iPos;
- FX_WCHAR ch;
+
uint32_t dwStatus = FDE_XMLSYNTAXSTATUS_None;
while (TRUE) {
if (m_pStart >= m_pEnd) {
@@ -1516,8 +1516,9 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() {
m_pStart = m_pBuffer;
m_pEnd = m_pBuffer + m_iBufferChars;
}
+
while (m_pStart < m_pEnd) {
- ch = *m_pStart;
+ FX_WCHAR ch = *m_pStart;
switch (m_dwMode) {
case FDE_XMLSYNTAXMODE_Text:
if (ch == L'<') {
@@ -1783,27 +1784,51 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() {
m_pStart++;
break;
case FDE_XMLSYNTAXMODE_SkipCommentOrDecl:
- if (ch == '-') {
+ if (FX_wcsnicmp(m_pStart, L"--", 2) == 0) {
+ m_pStart += 2;
m_dwMode = FDE_XMLSYNTAXMODE_SkipComment;
+ } else if (FX_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) {
+ m_pStart += 7;
+ m_dwMode = FDE_XMLSYNTAXMODE_SkipCData;
} else {
m_dwMode = FDE_XMLSYNTAXMODE_SkipDeclNode;
m_SkipChar = L'>';
m_SkipStack.Push(L'>');
}
break;
+ case FDE_XMLSYNTAXMODE_SkipCData: {
+ if (FX_wcsnicmp(m_pStart, L"]]>", 3) == 0) {
+ m_pStart += 3;
+ dwStatus = FDE_XMLSYNTAXSTATUS_CData;
+ m_iTextDataLength = m_iDataLength;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_dwMode = FDE_XMLSYNTAXMODE_Text;
+ } else {
+ if (m_iIndexInBlock == m_iAllocStep) {
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (!m_pCurrentBlock)
+ return FDE_XMLSYNTAXSTATUS_Error;
+ }
+ m_pCurrentBlock[m_iIndexInBlock++] = ch;
+ m_iDataLength++;
+ m_pStart++;
+ }
+ break;
+ }
case FDE_XMLSYNTAXMODE_SkipDeclNode:
if (m_SkipChar == L'\'' || m_SkipChar == L'\"') {
m_pStart++;
- if (ch != m_SkipChar) {
+ if (ch != m_SkipChar)
break;
- }
+
m_SkipStack.Pop();
uint32_t* pDWord = m_SkipStack.GetTopElement();
- if (pDWord == NULL) {
+ if (!pDWord)
m_dwMode = FDE_XMLSYNTAXMODE_Text;
- } else {
+ else
m_SkipChar = (FX_WCHAR)*pDWord;
- }
} else {
switch (ch) {
case L'<':
@@ -1830,20 +1855,10 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() {
if (ch == m_SkipChar) {
m_SkipStack.Pop();
uint32_t* pDWord = m_SkipStack.GetTopElement();
- if (pDWord == NULL) {
+ if (!pDWord) {
if (m_iDataLength >= 9) {
CFX_WideString wsHeader;
m_BlockBuffer.GetTextData(wsHeader, 0, 7);
- if (wsHeader.Equal(FX_WSTRC(L"[CDATA["))) {
- CFX_WideString wsTailer;
- m_BlockBuffer.GetTextData(wsTailer, m_iDataLength - 2,
- 2);
- if (wsTailer.Equal(FX_WSTRC(L"]]"))) {
- m_BlockBuffer.DeleteTextChars(7, TRUE);
- m_BlockBuffer.DeleteTextChars(2, FALSE);
- dwStatus = FDE_XMLSYNTAXSTATUS_CData;
- }
- }
}
m_iTextDataLength = m_iDataLength;
m_BlockBuffer.Reset();
@@ -1851,7 +1866,7 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() {
m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
m_dwMode = FDE_XMLSYNTAXMODE_Text;
} else {
- m_SkipChar = (FX_WCHAR)*pDWord;
+ m_SkipChar = static_cast<FX_WCHAR>(*pDWord);
}
}
break;
@@ -1871,27 +1886,11 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() {
}
break;
case FDE_XMLSYNTAXMODE_SkipComment:
- if (ch == L'-') {
- if (m_iIndexInBlock == m_iAllocStep) {
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (!m_pCurrentBlock) {
- return FDE_XMLSYNTAXSTATUS_Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = L'-';
- m_iDataLength++;
- } else if (ch == L'>') {
- if (m_iDataLength > 1) {
- m_BlockBuffer.Reset();
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_dwMode = FDE_XMLSYNTAXMODE_Text;
- }
- } else {
- m_BlockBuffer.Reset();
- m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (FX_wcsnicmp(m_pStart, L"-->", 3) == 0) {
+ m_pStart += 2;
+ m_dwMode = FDE_XMLSYNTAXMODE_Text;
}
+
m_pStart++;
break;
case FDE_XMLSYNTAXMODE_TargetData:
@@ -1945,9 +1944,8 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() {
default:
break;
}
- if (dwStatus != FDE_XMLSYNTAXSTATUS_None) {
+ if (dwStatus != FDE_XMLSYNTAXSTATUS_None)
return dwStatus;
- }
}
}
return 0;
diff --git a/xfa/fde/xml/fde_xml_imp.h b/xfa/fde/xml/fde_xml_imp.h
index a4361bdfea..0f252c12dd 100644
--- a/xfa/fde/xml/fde_xml_imp.h
+++ b/xfa/fde/xml/fde_xml_imp.h
@@ -309,7 +309,9 @@ class CFDE_BlockBuffer : public CFX_Target {
#define FDE_XMLSYNTAXMODE_DeclCharData 15
#define FDE_XMLSYNTAXMODE_SkipComment 16
#define FDE_XMLSYNTAXMODE_SkipCommentOrDecl 17
-#define FDE_XMLSYNTAXMODE_TargetData 18
+#define FDE_XMLSYNTAXMODE_SkipCData 18
+#define FDE_XMLSYNTAXMODE_TargetData 19
+
class CFDE_XMLSyntaxParser : public CFX_Target {
public:
CFDE_XMLSyntaxParser();
diff --git a/xfa/fde/xml/fde_xml_imp_unittest.cpp b/xfa/fde/xml/fde_xml_imp_unittest.cpp
new file mode 100644
index 0000000000..42119ebe37
--- /dev/null
+++ b/xfa/fde/xml/fde_xml_imp_unittest.cpp
@@ -0,0 +1,522 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "xfa/fde/xml/fde_xml_imp.h"
+
+#include "xfa/fgas/crt/fgas_stream.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(CFDE_XMLSyntaxParser, CData) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <![CDATA[\n"
+ L" if (a[1] < 3)\n"
+ L" app.alert(\"Tclams\");\n"
+ L" ]]>\n"
+ L"</script>";
+
+ const FX_WCHAR* cdata =
+ L"\n"
+ L" if (a[1] < 3)\n"
+ L" app.alert(\"Tclams\");\n"
+ L" ";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_CData, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(cdata, data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, CDataWithInnerScript) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <![CDATA[\n"
+ L" if (a[1] < 3)\n"
+ L" app.alert(\"Tclams\");\n"
+ L" </script>\n"
+ L" ]]>\n"
+ L"</script>";
+
+ const FX_WCHAR* cdata =
+ L"\n"
+ L" if (a[1] < 3)\n"
+ L" app.alert(\"Tclams\");\n"
+ L" </script>\n"
+ L" ";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_CData, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(cdata, data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, ArrowBangArrow) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <!>\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, ArrowBangBracketArrow) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <![>\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ // Parser walks to end of input.
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, IncompleteCData) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <![CDATA>\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ // Parser walks to end of input.
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, UnClosedCData) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <![CDATA[\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ // Parser walks to end of input.
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, EmptyCData) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <![CDATA[]]>\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_CData, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, Comment) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <!-- A Comment -->\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, IncorrectCommentStart) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <!- A Comment -->\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, CommentEmpty) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <!---->\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, CommentThreeDash) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <!--->\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
+
+TEST(CFDE_XMLSyntaxParser, CommentTwoDash) {
+ const FX_WCHAR* input =
+ L"<script contentType=\"application/x-javascript\">\n"
+ L" <!-->\n"
+ L"</script>";
+
+ // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR.
+ size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR);
+ std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream(
+ reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0));
+ CFDE_XMLSyntaxParser parser;
+ parser.Init(stream.get(), 256);
+
+ CFX_WideString data;
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse());
+ parser.GetTagName(data);
+ EXPECT_EQ(L"script", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse());
+ parser.GetAttributeName(data);
+ EXPECT_EQ(L"contentType", data);
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse());
+ parser.GetAttributeValue(data);
+ EXPECT_EQ(L"application/x-javascript", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse());
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse());
+ parser.GetTextData(data);
+ EXPECT_EQ(L"\n ", data);
+
+ EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse());
+}
diff --git a/xfa/fgas/crt/fgas_stream.cpp b/xfa/fgas/crt/fgas_stream.cpp
index 8ab2f9fd37..94468e23da 100644
--- a/xfa/fgas/crt/fgas_stream.cpp
+++ b/xfa/fgas/crt/fgas_stream.cpp
@@ -870,8 +870,9 @@ int32_t CFX_BufferStreamImp::ReadString(FX_WCHAR* pStr,
}
const FX_WCHAR* pSrc = (const FX_WCHAR*)(FX_CHAR*)(m_pData + m_iPosition);
int32_t iCount = 0;
- while (*pSrc != L'\0' && iCount < iLen) {
- *pStr++ = *pSrc++, iCount++;
+ while (*pSrc && iCount < iLen) {
+ *pStr++ = *pSrc++;
+ iCount++;
}
m_iPosition += iCount * 2;
bEOS = (*pSrc == L'\0') || (m_iPosition >= m_iLength);
@@ -1345,6 +1346,7 @@ int32_t CFX_Stream::ReadString(FX_WCHAR* pStr,
}
return iLen;
}
+
int32_t CFX_Stream::WriteData(const uint8_t* pBuffer, int32_t iBufferSize) {
FXSYS_assert(pBuffer != NULL && iBufferSize > 0);
if (m_pStreamImp == NULL) {
diff --git a/xfa/fgas/crt/fgas_system.cpp b/xfa/fgas/crt/fgas_system.cpp
index df1a9d044a..7ba2d924ca 100644
--- a/xfa/fgas/crt/fgas_system.cpp
+++ b/xfa/fgas/crt/fgas_system.cpp
@@ -31,7 +31,8 @@ inline int32_t FX_tolower(int32_t ch) {
int32_t FX_wcsnicmp(const FX_WCHAR* s1, const FX_WCHAR* s2, size_t count) {
FXSYS_assert(s1 != NULL && s2 != NULL && count > 0);
- FX_WCHAR wch1 = 0, wch2 = 0;
+ FX_WCHAR wch1 = 0;
+ FX_WCHAR wch2 = 0;
while (count-- > 0) {
wch1 = (FX_WCHAR)FX_tolower(*s1++);
wch2 = (FX_WCHAR)FX_tolower(*s2++);