From 11ac93cfdb9f4f25eee2ba60b947f992ab40ec54 Mon Sep 17 00:00:00 2001 From: dsinclair Date: Thu, 31 Mar 2016 09:45:20 -0700 Subject: Fix CData parsing in CFDE_XMLSyntaxParser. This CL splits the handling of CData sections out to an individual phase of the parser. This fixes the issue with the CData parser getting confused by < characters inside the data section. BUG=pdfium:90 Review URL: https://codereview.chromium.org/1842633004 --- BUILD.gn | 1 + pdfium.gyp | 1 + xfa/fde/xml/fde_xml_imp.cpp | 82 +++--- xfa/fde/xml/fde_xml_imp.h | 4 +- xfa/fde/xml/fde_xml_imp_unittest.cpp | 522 +++++++++++++++++++++++++++++++++++ xfa/fgas/crt/fgas_stream.cpp | 6 +- xfa/fgas/crt/fgas_system.cpp | 3 +- 7 files changed, 573 insertions(+), 46 deletions(-) create mode 100644 xfa/fde/xml/fde_xml_imp_unittest.cpp diff --git a/BUILD.gn b/BUILD.gn index a21c0da263..6780f2b53a 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -1613,6 +1613,7 @@ test("pdfium_unittests") { include_dirs = [] if (pdf_enable_xfa) { sources += [ + "xfa/fde/xml/fde_xml_imp_unittest.cpp", "xfa/fxbarcode/pdf417/BC_PDF417HighLevelEncoder_unittest.cpp", "xfa/fxfa/parser/xfa_utils_imp_unittest.cpp", ] diff --git a/pdfium.gyp b/pdfium.gyp index 06f960b1d4..63d0c03404 100644 --- a/pdfium.gyp +++ b/pdfium.gyp @@ -922,6 +922,7 @@ 'conditions': [ ['pdf_enable_xfa==1', { 'sources': [ + 'xfa/fde/xml/fde_xml_imp_unittest.cpp', 'xfa/fxbarcode/pdf417/BC_PDF417HighLevelEncoder_unittest.cpp', 'xfa/fxfa/parser/xfa_utils_imp_unittest.cpp', ], diff --git a/xfa/fde/xml/fde_xml_imp.cpp b/xfa/fde/xml/fde_xml_imp.cpp index ef5a7e4ee3..0affe8a621 100644 --- a/xfa/fde/xml/fde_xml_imp.cpp +++ b/xfa/fde/xml/fde_xml_imp.cpp @@ -1491,7 +1491,7 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { FXSYS_assert(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized()); int32_t iStreamLength = m_pStream->GetLength(); int32_t iPos; - FX_WCHAR ch; + uint32_t dwStatus = FDE_XMLSYNTAXSTATUS_None; while (TRUE) { if (m_pStart >= m_pEnd) { @@ -1516,8 +1516,9 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { m_pStart = m_pBuffer; m_pEnd = m_pBuffer + m_iBufferChars; } + while (m_pStart < m_pEnd) { - ch = *m_pStart; + FX_WCHAR ch = *m_pStart; switch (m_dwMode) { case FDE_XMLSYNTAXMODE_Text: if (ch == L'<') { @@ -1783,27 +1784,51 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { m_pStart++; break; case FDE_XMLSYNTAXMODE_SkipCommentOrDecl: - if (ch == '-') { + if (FX_wcsnicmp(m_pStart, L"--", 2) == 0) { + m_pStart += 2; m_dwMode = FDE_XMLSYNTAXMODE_SkipComment; + } else if (FX_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) { + m_pStart += 7; + m_dwMode = FDE_XMLSYNTAXMODE_SkipCData; } else { m_dwMode = FDE_XMLSYNTAXMODE_SkipDeclNode; m_SkipChar = L'>'; m_SkipStack.Push(L'>'); } break; + case FDE_XMLSYNTAXMODE_SkipCData: { + if (FX_wcsnicmp(m_pStart, L"]]>", 3) == 0) { + m_pStart += 3; + dwStatus = FDE_XMLSYNTAXSTATUS_CData; + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_dwMode = FDE_XMLSYNTAXMODE_Text; + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) + return FDE_XMLSYNTAXSTATUS_Error; + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + } case FDE_XMLSYNTAXMODE_SkipDeclNode: if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { m_pStart++; - if (ch != m_SkipChar) { + if (ch != m_SkipChar) break; - } + m_SkipStack.Pop(); uint32_t* pDWord = m_SkipStack.GetTopElement(); - if (pDWord == NULL) { + if (!pDWord) m_dwMode = FDE_XMLSYNTAXMODE_Text; - } else { + else m_SkipChar = (FX_WCHAR)*pDWord; - } } else { switch (ch) { case L'<': @@ -1830,20 +1855,10 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { if (ch == m_SkipChar) { m_SkipStack.Pop(); uint32_t* pDWord = m_SkipStack.GetTopElement(); - if (pDWord == NULL) { + if (!pDWord) { if (m_iDataLength >= 9) { CFX_WideString wsHeader; m_BlockBuffer.GetTextData(wsHeader, 0, 7); - if (wsHeader.Equal(FX_WSTRC(L"[CDATA["))) { - CFX_WideString wsTailer; - m_BlockBuffer.GetTextData(wsTailer, m_iDataLength - 2, - 2); - if (wsTailer.Equal(FX_WSTRC(L"]]"))) { - m_BlockBuffer.DeleteTextChars(7, TRUE); - m_BlockBuffer.DeleteTextChars(2, FALSE); - dwStatus = FDE_XMLSYNTAXSTATUS_CData; - } - } } m_iTextDataLength = m_iDataLength; m_BlockBuffer.Reset(); @@ -1851,7 +1866,7 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); m_dwMode = FDE_XMLSYNTAXMODE_Text; } else { - m_SkipChar = (FX_WCHAR)*pDWord; + m_SkipChar = static_cast(*pDWord); } } break; @@ -1871,27 +1886,11 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { } break; case FDE_XMLSYNTAXMODE_SkipComment: - if (ch == L'-') { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XMLSYNTAXSTATUS_Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = L'-'; - m_iDataLength++; - } else if (ch == L'>') { - if (m_iDataLength > 1) { - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_dwMode = FDE_XMLSYNTAXMODE_Text; - } - } else { - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (FX_wcsnicmp(m_pStart, L"-->", 3) == 0) { + m_pStart += 2; + m_dwMode = FDE_XMLSYNTAXMODE_Text; } + m_pStart++; break; case FDE_XMLSYNTAXMODE_TargetData: @@ -1945,9 +1944,8 @@ uint32_t CFDE_XMLSyntaxParser::DoSyntaxParse() { default: break; } - if (dwStatus != FDE_XMLSYNTAXSTATUS_None) { + if (dwStatus != FDE_XMLSYNTAXSTATUS_None) return dwStatus; - } } } return 0; diff --git a/xfa/fde/xml/fde_xml_imp.h b/xfa/fde/xml/fde_xml_imp.h index a4361bdfea..0f252c12dd 100644 --- a/xfa/fde/xml/fde_xml_imp.h +++ b/xfa/fde/xml/fde_xml_imp.h @@ -309,7 +309,9 @@ class CFDE_BlockBuffer : public CFX_Target { #define FDE_XMLSYNTAXMODE_DeclCharData 15 #define FDE_XMLSYNTAXMODE_SkipComment 16 #define FDE_XMLSYNTAXMODE_SkipCommentOrDecl 17 -#define FDE_XMLSYNTAXMODE_TargetData 18 +#define FDE_XMLSYNTAXMODE_SkipCData 18 +#define FDE_XMLSYNTAXMODE_TargetData 19 + class CFDE_XMLSyntaxParser : public CFX_Target { public: CFDE_XMLSyntaxParser(); diff --git a/xfa/fde/xml/fde_xml_imp_unittest.cpp b/xfa/fde/xml/fde_xml_imp_unittest.cpp new file mode 100644 index 0000000000..42119ebe37 --- /dev/null +++ b/xfa/fde/xml/fde_xml_imp_unittest.cpp @@ -0,0 +1,522 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "xfa/fde/xml/fde_xml_imp.h" + +#include "xfa/fgas/crt/fgas_stream.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(CFDE_XMLSyntaxParser, CData) { + const FX_WCHAR* input = + L""; + + const FX_WCHAR* cdata = + L"\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" "; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_CData, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(cdata, data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CDataWithInnerScript) { + const FX_WCHAR* input = + L"\n" + L" ]]>\n" + L""; + + const FX_WCHAR* cdata = + L"\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" \n" + L" "; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_CData, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(cdata, data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, ArrowBangArrow) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, ArrowBangBracketArrow) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + // Parser walks to end of input. + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, IncompleteCData) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + // Parser walks to end of input. + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, UnClosedCData) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + // Parser walks to end of input. + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, EmptyCData) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_CData, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, Comment) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, IncorrectCommentStart) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CommentEmpty) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CommentThreeDash) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CommentTwoDash) { + const FX_WCHAR* input = + L""; + + // We * sizeof(FX_WCHAR) because we pass in the uint8_t, not the FX_WCHAR. + size_t len = FXSYS_wcslen(input) * sizeof(FX_WCHAR); + std::unique_ptr stream(IFX_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0)); + CFDE_XMLSyntaxParser parser; + parser.Init(stream.get(), 256); + + CFX_WideString data; + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_TagName, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XMLSYNTAXSTATUS_EOS, parser.DoSyntaxParse()); +} diff --git a/xfa/fgas/crt/fgas_stream.cpp b/xfa/fgas/crt/fgas_stream.cpp index 8ab2f9fd37..94468e23da 100644 --- a/xfa/fgas/crt/fgas_stream.cpp +++ b/xfa/fgas/crt/fgas_stream.cpp @@ -870,8 +870,9 @@ int32_t CFX_BufferStreamImp::ReadString(FX_WCHAR* pStr, } const FX_WCHAR* pSrc = (const FX_WCHAR*)(FX_CHAR*)(m_pData + m_iPosition); int32_t iCount = 0; - while (*pSrc != L'\0' && iCount < iLen) { - *pStr++ = *pSrc++, iCount++; + while (*pSrc && iCount < iLen) { + *pStr++ = *pSrc++; + iCount++; } m_iPosition += iCount * 2; bEOS = (*pSrc == L'\0') || (m_iPosition >= m_iLength); @@ -1345,6 +1346,7 @@ int32_t CFX_Stream::ReadString(FX_WCHAR* pStr, } return iLen; } + int32_t CFX_Stream::WriteData(const uint8_t* pBuffer, int32_t iBufferSize) { FXSYS_assert(pBuffer != NULL && iBufferSize > 0); if (m_pStreamImp == NULL) { diff --git a/xfa/fgas/crt/fgas_system.cpp b/xfa/fgas/crt/fgas_system.cpp index df1a9d044a..7ba2d924ca 100644 --- a/xfa/fgas/crt/fgas_system.cpp +++ b/xfa/fgas/crt/fgas_system.cpp @@ -31,7 +31,8 @@ inline int32_t FX_tolower(int32_t ch) { int32_t FX_wcsnicmp(const FX_WCHAR* s1, const FX_WCHAR* s2, size_t count) { FXSYS_assert(s1 != NULL && s2 != NULL && count > 0); - FX_WCHAR wch1 = 0, wch2 = 0; + FX_WCHAR wch1 = 0; + FX_WCHAR wch2 = 0; while (count-- > 0) { wch1 = (FX_WCHAR)FX_tolower(*s1++); wch2 = (FX_WCHAR)FX_tolower(*s2++); -- cgit v1.2.3