diff options
author | dsinclair <dsinclair@chromium.org> | 2016-03-31 09:45:20 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-03-31 09:45:20 -0700 |
commit | 11ac93cfdb9f4f25eee2ba60b947f992ab40ec54 (patch) | |
tree | 9738d127550a1aaa47e8828c3890c842da7dcf6f /xfa/fde | |
parent | 5a839e938bad5b766a928fb545f0b0aba39e3829 (diff) | |
download | pdfium-11ac93cfdb9f4f25eee2ba60b947f992ab40ec54.tar.xz |
Fix CData parsing in CFDE_XMLSyntaxParser.
This CL splits the handling of CData sections out to an individual phase
of the parser. This fixes the issue with the CData parser getting confused
by < characters inside the data section.
BUG=pdfium:90
Review URL: https://codereview.chromium.org/1842633004
Diffstat (limited to 'xfa/fde')
-rw-r--r-- | xfa/fde/xml/fde_xml_imp.cpp | 82 | ||||
-rw-r--r-- | xfa/fde/xml/fde_xml_imp.h | 4 | ||||
-rw-r--r-- | xfa/fde/xml/fde_xml_imp_unittest.cpp | 522 |
3 files changed, 565 insertions, 43 deletions
diff --git a/xfa/fde/xml/fde_xml_imp.cpp b/xfa/fde/xml/fde_xml_imp.cpp index ef5a7e4ee3..0affe8a621 100644 --- a/xfa/fde/xml/fde_xml_imp.cpp +++ b/xfa/fde/xml/fde_xml_imp.cpp @@ -1491,7 +1491,7 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { FXSYS_assert(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized()); int32_t iStreamLength = m_pStream->GetLength(); int32_t iPos; - FX_WCHAR ch; + uint32_t dwStatus = FDE_XMLSYNTAXSTATUS_None; while (TRUE) { if (m_pStart >= m_pEnd) { @@ -1516,8 +1516,9 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { m_pStart = m_pBuffer; m_pEnd = m_pBuffer + m_iBufferChars; } + while (m_pStart < m_pEnd) { - ch = *m_pStart; + FX_WCHAR ch = *m_pStart; switch (m_dwMode) { case FDE_XMLSYNTAXMODE_Text: if (ch == L'<') { @@ -1783,27 +1784,51 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { m_pStart++; break; case FDE_XMLSYNTAXMODE_SkipCommentOrDecl: - if (ch == '-') { + if (FX_wcsnicmp(m_pStart, L"--", 2) == 0) { + m_pStart += 2; m_dwMode = FDE_XMLSYNTAXMODE_SkipComment; + } else if (FX_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) { + m_pStart += 7; + m_dwMode = FDE_XMLSYNTAXMODE_SkipCData; } else { m_dwMode = FDE_XMLSYNTAXMODE_SkipDeclNode; m_SkipChar = L'>'; m_SkipStack.Push(L'>'); } break; + case FDE_XMLSYNTAXMODE_SkipCData: { + if (FX_wcsnicmp(m_pStart, L"]]>", 3) == 0) { + m_pStart += 3; + dwStatus = FDE_XMLSYNTAXSTATUS_CData; + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_dwMode = FDE_XMLSYNTAXMODE_Text; + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) + return FDE_XMLSYNTAXSTATUS_Error; + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + } case FDE_XMLSYNTAXMODE_SkipDeclNode: if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { m_pStart++; - if (ch != m_SkipChar) { + if (ch != m_SkipChar) break; - } + m_SkipStack.Pop(); uint32_t* pDWord = m_SkipStack.GetTopElement(); - if (pDWord == NULL) { + if (!pDWord) m_dwMode = FDE_XMLSYNTAXMODE_Text; - } else { + else m_SkipChar = (FX_WCHAR)*pDWord; - } } else { switch (ch) { case L'<': @@ -1830,20 +1855,10 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { if (ch == m_SkipChar) { m_SkipStack.Pop(); uint32_t* pDWord = m_SkipStack.GetTopElement(); - if (pDWord == NULL) { + if (!pDWord) { if (m_iDataLength >= 9) { CFX_WideString wsHeader; m_BlockBuffer.GetTextData(wsHeader, 0, 7); - if (wsHeader.Equal(FX_WSTRC(L"[CDATA["))) { - CFX_WideString wsTailer; - m_BlockBuffer.GetTextData(wsTailer, m_iDataLength - 2, - 2); - if (wsTailer.Equal(FX_WSTRC(L"]]"))) { - m_BlockBuffer.DeleteTextChars(7, TRUE); - m_BlockBuffer.DeleteTextChars(2, FALSE); - dwStatus = FDE_XMLSYNTAXSTATUS_CData; - } - } } m_iTextDataLength = m_iDataLength; m_BlockBuffer.Reset(); @@ -1851,7 +1866,7 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); m_dwMode = FDE_XMLSYNTAXMODE_Text; } else { - m_SkipChar = (FX_WCHAR)*pDWord; + m_SkipChar = static_cast<FX_WCHAR>(*pDWord); } } break; @@ -1871,27 +1886,11 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { } break; case FDE_XMLSYNTAXMODE_SkipComment: - if (ch == L'-') { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XMLSYNTAXSTATUS_Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = L'-'; - m_iDataLength++; - } else if (ch == L'>') { - if (m_iDataLength > 1) { - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_dwMode = FDE_XMLSYNTAXMODE_Text; - } - } else { - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (FX_wcsnicmp(m_pStart, L"-->", 3) == 0) { + m_pStart += 2; + m_dwMode = FDE_XMLSYNTAXMODE_Text; } + m_pStart++; break; case FDE_XMLSYNTAXMODE_TargetData: @@ -1945,9 +1944,8 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { default: break; } - if (dwStatus != FDE_XMLSYNTAXSTATUS_None) { + if (dwStatus != FDE_XMLSYNTAXSTATUS_None) return dwStatus; - } } } return 0; diff --git a/xfa/fde/xml/fde_xml_imp.h b/xfa/fde/xml/fde_xml_imp.h index a4361bdfea..0f252c12dd 100644 --- a/xfa/fde/xml/fde_xml_imp.h +++ b/xfa/fde/xml/fde_xml_imp.h @@ -309,7 +309,9 @@ class CFDE_BlockBuffer : public CFX_Target { #define FDE_XMLSYNTAXMODE_DeclCharData 15 #define FDE_XMLSYNTAXMODE_SkipComment 16 #define FDE_XMLSYNTAXMODE_SkipCommentOrDecl 17 -#define FDE_XMLSYNTAXMODE_TargetData 18 +#define FDE_XMLSYNTAXMODE_SkipCData 18 +#define FDE_XMLSYNTAXMODE_TargetData 19 + class CFDE_XMLSyntaxParser : public CFX_Target { public: CFDE_XMLSyntaxParser(); diff --git a/xfa/fde/xml/fde_xml_imp_unittest.cpp b/xfa/fde/xml/fde_xml_imp_unittest.cpp new file mode 100644 index 0000000000..42119ebe37 --- /dev/null +++ b/xfa/fde/xml/fde_xml_imp_unittest.cpp @@ -0,0 +1,522 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "xfa/fde/xml/fde_xml_imp.h" + +#include "xfa/fgas/crt/fgas_stream.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(CFDE_XMLSyntaxParser, CData) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <![CDATA[\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" ]]>\n" + L"</script>"; + + const FX_WCHAR* cdata = + L"\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" "; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_CData, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(cdata, data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CDataWithInnerScript) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <![CDATA[\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" </script>\n" + L" ]]>\n" + L"</script>"; + + const FX_WCHAR* cdata = + L"\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" </script>\n" + L" "; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_CData, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(cdata, data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, ArrowBangArrow) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <!>\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, ArrowBangBracketArrow) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <![>\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + // Parser walks to end of input. + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, IncompleteCData) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <![CDATA>\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + // Parser walks to end of input. + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, UnClosedCData) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <![CDATA[\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + // Parser walks to end of input. + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, EmptyCData) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <![CDATA[]]>\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_CData, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, Comment) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <!-- A Comment -->\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, IncorrectCommentStart) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <!- A Comment -->\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CommentEmpty) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <!---->\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CommentThreeDash) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <!--->\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CommentTwoDash) { + const FX_WCHAR* input = + L"<script contentType=\"application/x-javascript\">\n" + L" <!-->\n" + L"</script>"; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr<IFX_Stream> stream(IFX_Stream::CreateStream( + reinterpret_cast<uint8_t*>(const_cast<FX_WCHAR*>(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} |