From 19ae9bf554fc1c01e352a846646c8005a4fe6b2b Mon Sep 17 00:00:00 2001 From: dan sinclair Date: Mon, 16 Apr 2018 16:54:27 +0000 Subject: Use CFX_XML instead of CXML in CPDF_Metadata This CL converts CPDF_Metadata to use the CFX_XML classes instead of CXML classes. This also moves the CFX_XML classes from being XFA only to being used everywhere. Change-Id: Idb784f8aaa0bc843d8a3415ba5262ccf4949308a Reviewed-on: https://pdfium-review.googlesource.com/30650 Reviewed-by: Henrique Nakashima Commit-Queue: dsinclair --- BUILD.gn | 38 +++++++------- core/fpdfdoc/cpdf_metadata.cpp | 83 ++++++++++++++++--------------- core/fpdfdoc/cpdf_metadata_unittest.cpp | 16 +++--- core/fxcrt/xml/cfx_xmlparser_unittest.cpp | 2 + testing/libfuzzer/BUILD.gn | 14 +++--- 5 files changed, 80 insertions(+), 73 deletions(-) diff --git a/BUILD.gn b/BUILD.gn index 45857e552d..f28a879643 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -821,6 +821,8 @@ jumbo_static_library("fxcrt") { "core/fxcrt/cfx_binarybuf.h", "core/fxcrt/cfx_bitstream.cpp", "core/fxcrt/cfx_bitstream.h", + "core/fxcrt/cfx_blockbuffer.cpp", + "core/fxcrt/cfx_blockbuffer.h", "core/fxcrt/cfx_datetime.cpp", "core/fxcrt/cfx_datetime.h", "core/fxcrt/cfx_fileaccess_posix.cpp", @@ -832,6 +834,8 @@ jumbo_static_library("fxcrt") { "core/fxcrt/cfx_memorystream.h", "core/fxcrt/cfx_seekablemultistream.cpp", "core/fxcrt/cfx_seekablemultistream.h", + "core/fxcrt/cfx_seekablestreamproxy.cpp", + "core/fxcrt/cfx_seekablestreamproxy.h", "core/fxcrt/cfx_utf8decoder.cpp", "core/fxcrt/cfx_utf8decoder.h", "core/fxcrt/cfx_widetextbuf.cpp", @@ -871,6 +875,20 @@ jumbo_static_library("fxcrt") { "core/fxcrt/weak_ptr.h", "core/fxcrt/widestring.cpp", "core/fxcrt/widestring.h", + "core/fxcrt/xml/cfx_xmlattributenode.cpp", + "core/fxcrt/xml/cfx_xmlattributenode.h", + "core/fxcrt/xml/cfx_xmlchardata.cpp", + "core/fxcrt/xml/cfx_xmlchardata.h", + "core/fxcrt/xml/cfx_xmlelement.cpp", + "core/fxcrt/xml/cfx_xmlelement.h", + "core/fxcrt/xml/cfx_xmlinstruction.cpp", + "core/fxcrt/xml/cfx_xmlinstruction.h", + "core/fxcrt/xml/cfx_xmlnode.cpp", + "core/fxcrt/xml/cfx_xmlnode.h", + "core/fxcrt/xml/cfx_xmlparser.cpp", + "core/fxcrt/xml/cfx_xmlparser.h", + "core/fxcrt/xml/cfx_xmltext.cpp", + "core/fxcrt/xml/cfx_xmltext.h", "core/fxcrt/xml/cxml_attritem.cpp", "core/fxcrt/xml/cxml_attritem.h", "core/fxcrt/xml/cxml_content.cpp", @@ -897,14 +915,10 @@ jumbo_static_library("fxcrt") { if (pdf_enable_xfa) { sources += [ - "core/fxcrt/cfx_blockbuffer.cpp", - "core/fxcrt/cfx_blockbuffer.h", "core/fxcrt/cfx_char.cpp", "core/fxcrt/cfx_char.h", "core/fxcrt/cfx_decimal.cpp", "core/fxcrt/cfx_decimal.h", - "core/fxcrt/cfx_seekablestreamproxy.cpp", - "core/fxcrt/cfx_seekablestreamproxy.h", "core/fxcrt/css/cfx_css.h", "core/fxcrt/css/cfx_csscolorvalue.cpp", "core/fxcrt/css/cfx_csscolorvalue.h", @@ -949,20 +963,6 @@ jumbo_static_library("fxcrt") { "core/fxcrt/fx_arabic.cpp", "core/fxcrt/fx_arabic.h", "core/fxcrt/locale_iface.h", - "core/fxcrt/xml/cfx_xmlattributenode.cpp", - "core/fxcrt/xml/cfx_xmlattributenode.h", - "core/fxcrt/xml/cfx_xmlchardata.cpp", - "core/fxcrt/xml/cfx_xmlchardata.h", - "core/fxcrt/xml/cfx_xmlelement.cpp", - "core/fxcrt/xml/cfx_xmlelement.h", - "core/fxcrt/xml/cfx_xmlinstruction.cpp", - "core/fxcrt/xml/cfx_xmlinstruction.h", - "core/fxcrt/xml/cfx_xmlnode.cpp", - "core/fxcrt/xml/cfx_xmlnode.h", - "core/fxcrt/xml/cfx_xmlparser.cpp", - "core/fxcrt/xml/cfx_xmlparser.h", - "core/fxcrt/xml/cfx_xmltext.cpp", - "core/fxcrt/xml/cfx_xmltext.h", ] } } @@ -2897,6 +2897,7 @@ test("pdfium_unittests") { "core/fxcrt/unowned_ptr_unittest.cpp", "core/fxcrt/weak_ptr_unittest.cpp", "core/fxcrt/widestring_unittest.cpp", + "core/fxcrt/xml/cfx_xmlparser_unittest.cpp", "core/fxge/dib/cfx_dibitmap_unittest.cpp", "core/fxge/dib/cstretchengine_unittest.cpp", "fpdfsdk/fpdf_catalog_unittest.cpp", @@ -2918,7 +2919,6 @@ test("pdfium_unittests") { "core/fxcrt/css/cfx_cssdeclaration_unittest.cpp", "core/fxcrt/css/cfx_cssstylesheet_unittest.cpp", "core/fxcrt/css/cfx_cssvaluelistparser_unittest.cpp", - "core/fxcrt/xml/cfx_xmlparser_unittest.cpp", "fxbarcode/oned/BC_OnedCodaBarWriter_unittest.cpp", "fxbarcode/oned/BC_OnedCode128Writer_unittest.cpp", "fxbarcode/oned/BC_OnedCode39Writer_unittest.cpp", diff --git a/core/fpdfdoc/cpdf_metadata.cpp b/core/fpdfdoc/cpdf_metadata.cpp index 11fde82036..972569a25d 100644 --- a/core/fpdfdoc/cpdf_metadata.cpp +++ b/core/fpdfdoc/cpdf_metadata.cpp @@ -8,53 +8,54 @@ #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" -#include "core/fxcrt/xml/cxml_content.h" -#include "core/fxcrt/xml/cxml_element.h" +#include "core/fxcrt/fx_codepage.h" +#include "core/fxcrt/xml/cfx_xmlelement.h" +#include "core/fxcrt/xml/cfx_xmlparser.h" namespace { -void CheckForSharedFormInternal(CXML_Element* element, +void CheckForSharedFormInternal(CFX_XMLElement* element, std::vector* unsupported) { - size_t count = element->CountAttrs(); - for (size_t i = 0; i < count; ++i) { - ByteString space; - ByteString name; - WideString value; - element->GetAttrByIndex(i, &space, &name, &value); - if (space != "xmlns" || name != "adhocwf" || - value != L"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/") { + for (const auto& pair : element->GetAttributes()) { + if (pair.first != L"xmlns:adhocwf" || + pair.second != L"http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/") { continue; } - CXML_Element* pVersion = element->GetElement("adhocwf", "workflowType", 0); - if (!pVersion) - continue; - - CXML_Content* pContent = ToContent(pVersion->GetChild(0)); - if (!pContent) - continue; - - switch (pContent->m_Content.GetInteger()) { - case 0: - unsupported->push_back(UnsupportedFeature::kDocumentSharedFormEmail); - break; - case 1: - unsupported->push_back(UnsupportedFeature::kDocumentSharedFormAcrobat); - break; - case 2: - unsupported->push_back( - UnsupportedFeature::kDocumentSharedFormFilesystem); - break; + for (const auto* child = element->GetFirstChild(); child; + child = child->GetNextSibling()) { + if (child->GetType() != FX_XMLNODE_Element) + continue; + + const auto* child_elem = static_cast(child); + if (child_elem->GetName() != L"adhocwf:workflowType") + continue; + + switch (child_elem->GetTextData().GetInteger()) { + case 0: + unsupported->push_back(UnsupportedFeature::kDocumentSharedFormEmail); + break; + case 1: + unsupported->push_back( + UnsupportedFeature::kDocumentSharedFormAcrobat); + break; + case 2: + unsupported->push_back( + UnsupportedFeature::kDocumentSharedFormFilesystem); + break; + } + // We only care about the first one we find. + break; } } - count = element->CountChildren(); - for (size_t i = 0; i < count; ++i) { - CXML_Element* child = ToElement(element->GetChild(i)); - if (!child) + for (auto* child = element->GetFirstChild(); child; + child = child->GetNextSibling()) { + if (child->GetType() != FX_XMLNODE_Element) continue; - CheckForSharedFormInternal(child, unsupported); + CheckForSharedFormInternal(static_cast(child), + unsupported); } } @@ -70,12 +71,16 @@ std::vector CPDF_Metadata::CheckForSharedForm() const { auto pAcc = pdfium::MakeRetain(stream_.Get()); pAcc->LoadAllDataFiltered(); - std::unique_ptr xml_root = - CXML_Element::Parse(pAcc->GetData(), pAcc->GetSize()); - if (!xml_root) + auto root = pdfium::MakeUnique(L"root"); + auto proxy = pdfium::MakeRetain(pAcc->GetData(), + pAcc->GetSize()); + proxy->SetCodePage(FX_CODEPAGE_UTF8); + + CFX_XMLParser parser(root.get(), proxy); + if (!parser.Parse()) return {}; std::vector unsupported; - CheckForSharedFormInternal(xml_root.get(), &unsupported); + CheckForSharedFormInternal(root.get(), &unsupported); return unsupported; } diff --git a/core/fpdfdoc/cpdf_metadata_unittest.cpp b/core/fpdfdoc/cpdf_metadata_unittest.cpp index 6e6d2f63dd..1a39948461 100644 --- a/core/fpdfdoc/cpdf_metadata_unittest.cpp +++ b/core/fpdfdoc/cpdf_metadata_unittest.cpp @@ -10,7 +10,7 @@ TEST(CPDF_MetadataTest, CheckSharedFormEmailAtTopLevel) { const char* data = - "\n" + "\n" "\n" "0\n" "1.1\n" @@ -27,7 +27,7 @@ TEST(CPDF_MetadataTest, CheckSharedFormEmailAtTopLevel) { TEST(CPDF_MetadataTest, CheckSharedFormAcrobatAtTopLevel) { const char* data = - "\n" + "\n" "\n" "1\n" "1.1\n" @@ -44,7 +44,7 @@ TEST(CPDF_MetadataTest, CheckSharedFormAcrobatAtTopLevel) { TEST(CPDF_MetadataTest, CheckSharedFormFilesystemAtTopLevel) { const char* data = - "\n" + "\n" "\n" "2\n" "1.1\n" @@ -61,7 +61,7 @@ TEST(CPDF_MetadataTest, CheckSharedFormFilesystemAtTopLevel) { TEST(CPDF_MetadataTest, CheckSharedFormWithoutWorkflow) { const char* data = - "\n" + "\n" "\n" "2\n" "1.1\n" @@ -77,7 +77,7 @@ TEST(CPDF_MetadataTest, CheckSharedFormWithoutWorkflow) { TEST(CPDF_MetadataTest, CheckSharedFormAsChild) { const char* data = - "\n" + "\n" "\n" "\n" "0\n" @@ -96,7 +96,7 @@ TEST(CPDF_MetadataTest, CheckSharedFormAsChild) { TEST(CPDF_MetadataTest, CheckSharedFormAsNoAdhoc) { const char* data = - "\n" + "\n" ""; CPDF_Stream stream; @@ -109,7 +109,7 @@ TEST(CPDF_MetadataTest, CheckSharedFormAsNoAdhoc) { TEST(CPDF_MetadataTest, CheckSharedFormWrongNamespace) { const char* data = - "\n" + "\n" "\n" "1\n" "1.1\n" @@ -125,7 +125,7 @@ TEST(CPDF_MetadataTest, CheckSharedFormWrongNamespace) { TEST(CPDF_MetadataTest, CheckSharedFormMultipleErrors) { const char* data = - "\n" + "\n" "" "\n" "\n" diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp index d22925f797..39ddc32987 100644 --- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp +++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp @@ -8,8 +8,10 @@ #include "core/fxcrt/cfx_seekablestreamproxy.h" #include "core/fxcrt/fx_codepage.h" +#include "core/fxcrt/xml/cfx_xmlnode.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/test_support.h" +#include "third_party/base/ptr_util.h" class CFX_XMLTestParser : public CFX_XMLParser { public: diff --git a/testing/libfuzzer/BUILD.gn b/testing/libfuzzer/BUILD.gn index 16c62bb040..ba368948e2 100644 --- a/testing/libfuzzer/BUILD.gn +++ b/testing/libfuzzer/BUILD.gn @@ -34,6 +34,7 @@ group("libfuzzer") { ":pdf_jpx_fuzzer", ":pdf_psengine_fuzzer", ":pdf_streamparser_fuzzer", + ":pdf_xml_fuzzer", ] if (pdf_enable_xfa) { deps += [ @@ -47,7 +48,6 @@ group("libfuzzer") { ":pdf_fm2js_fuzzer", ":pdf_formcalc_fuzzer", ":pdf_lzw_fuzzer", - ":pdf_xml_fuzzer", ] } } @@ -135,12 +135,6 @@ if (pdf_enable_xfa) { "pdf_lzw_fuzzer.cc", ] } - - pdfium_fuzzer("pdf_xml_fuzzer") { - sources = [ - "pdf_xml_fuzzer.cc", - ] - } } pdfium_fuzzer("pdf_cmap_fuzzer") { @@ -211,3 +205,9 @@ pdfium_fuzzer("pdf_streamparser_fuzzer") { "pdf_streamparser_fuzzer.cc", ] } + +pdfium_fuzzer("pdf_xml_fuzzer") { + sources = [ + "pdf_xml_fuzzer.cc", + ] +} -- cgit v1.2.3