From d9871435eb7cea00a173baf780934f9d3525329a Mon Sep 17 00:00:00 2001 From: tsepez Date: Thu, 15 Sep 2016 14:01:31 -0700 Subject: Add string pools to save storage. Adds string hashes so CFX strings will interoperate with STL unordered containers. These will be employed per-document in a subsequent cl. BUG=pdfium:597 Review-Url: https://codereview.chromium.org/2341683005 --- BUILD.gn | 2 + core/fxcrt/cfx_string_pool_template_unittest.cpp | 94 ++++++++++++++++++++++++ core/fxcrt/fx_basic_bstring.cpp | 3 + core/fxcrt/fx_basic_wstring.cpp | 3 + core/fxcrt/include/cfx_string_pool_template.h | 30 ++++++++ core/fxcrt/include/fx_ext.h | 10 +-- core/fxcrt/include/fx_string.h | 30 ++++++++ 7 files changed, 163 insertions(+), 9 deletions(-) create mode 100644 core/fxcrt/cfx_string_pool_template_unittest.cpp create mode 100644 core/fxcrt/include/cfx_string_pool_template.h diff --git a/BUILD.gn b/BUILD.gn index ccb1e0cca3..df8cdbaaad 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -709,6 +709,7 @@ static_library("fxcrt") { "core/fxcrt/include/cfx_count_ref.h", "core/fxcrt/include/cfx_observable.h", "core/fxcrt/include/cfx_retain_ptr.h", + "core/fxcrt/include/cfx_string_pool_template.h", "core/fxcrt/include/fx_basic.h", "core/fxcrt/include/fx_coordinates.h", "core/fxcrt/include/fx_ext.h", @@ -1654,6 +1655,7 @@ test("pdfium_unittests") { "core/fxcrt/cfx_count_ref_unittest.cpp", "core/fxcrt/cfx_observable_unittest.cpp", "core/fxcrt/cfx_retain_ptr_unittest.cpp", + "core/fxcrt/cfx_string_pool_template_unittest.cpp", "core/fxcrt/fx_basic_bstring_unittest.cpp", "core/fxcrt/fx_basic_gcc_unittest.cpp", "core/fxcrt/fx_basic_memmgr_unittest.cpp", diff --git a/core/fxcrt/cfx_string_pool_template_unittest.cpp b/core/fxcrt/cfx_string_pool_template_unittest.cpp new file mode 100644 index 0000000000..95a9007b92 --- /dev/null +++ b/core/fxcrt/cfx_string_pool_template_unittest.cpp @@ -0,0 +1,94 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fxcrt/include/cfx_string_pool_template.h" +#include "core/fxcrt/include/fx_string.h" +#include "testing/fx_string_testhelpers.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(fxcrt, ByteStringPool) { + CFX_ByteStringPool pool; + + CFX_ByteString null1; + CFX_ByteString null2; + CFX_ByteString goats1("goats"); + CFX_ByteString goats2("goats"); + + // Underlying storage, if non-null, is not shared. + EXPECT_EQ(nullptr, null1.m_pData.Get()); + EXPECT_EQ(nullptr, null2.m_pData.Get()); + EXPECT_NE(goats1.m_pData, goats2.m_pData); + + CFX_ByteString interned_null1 = pool.Intern(null1); + CFX_ByteString interned_null2 = pool.Intern(null2); + CFX_ByteString interned_goats1 = pool.Intern(goats1); + CFX_ByteString interned_goats2 = pool.Intern(goats2); + + // Strings are logically equal after being interned. + EXPECT_EQ(null1, interned_null1); + EXPECT_EQ(null2, interned_null2); + EXPECT_EQ(goats1, interned_goats1); + EXPECT_EQ(goats2, interned_goats2); + + // Interned underlying storage, if non-null, belongs to first seen. + EXPECT_EQ(nullptr, interned_null1.m_pData.Get()); + EXPECT_EQ(nullptr, interned_null2.m_pData.Get()); + EXPECT_EQ(goats1.m_pData, interned_goats1.m_pData); + EXPECT_EQ(goats1.m_pData, interned_goats2.m_pData); + + pool.Clear(); + CFX_ByteString reinterned_null2 = pool.Intern(null2); + CFX_ByteString reinterned_null1 = pool.Intern(null2); + CFX_ByteString reinterned_goats2 = pool.Intern(goats2); + CFX_ByteString reinterned_goats1 = pool.Intern(goats2); + + // After clearing pool, storage was re-interned using second strings. + EXPECT_EQ(nullptr, interned_null1.m_pData.Get()); + EXPECT_EQ(nullptr, interned_null2.m_pData.Get()); + EXPECT_EQ(goats2.m_pData, reinterned_goats1.m_pData); + EXPECT_EQ(goats2.m_pData, reinterned_goats2.m_pData); +} + +TEST(fxcrt, WideStringPool) { + CFX_WideStringPool pool; + + CFX_WideString null1; + CFX_WideString null2; + CFX_WideString goats1(L"goats"); + CFX_WideString goats2(L"goats"); + + // Underlying storage, if non-null, is not shared. + EXPECT_EQ(nullptr, null1.m_pData.Get()); + EXPECT_EQ(nullptr, null2.m_pData.Get()); + EXPECT_NE(goats1.m_pData, goats2.m_pData); + + CFX_WideString interned_null1 = pool.Intern(null1); + CFX_WideString interned_null2 = pool.Intern(null2); + CFX_WideString interned_goats1 = pool.Intern(goats1); + CFX_WideString interned_goats2 = pool.Intern(goats2); + + // Strings are logically equal after being interned. + EXPECT_EQ(null1, interned_null1); + EXPECT_EQ(null2, interned_null2); + EXPECT_EQ(goats1, interned_goats1); + EXPECT_EQ(goats2, interned_goats2); + + // Interned underlying storage, if non-null, belongs to first seen. + EXPECT_EQ(nullptr, interned_null1.m_pData.Get()); + EXPECT_EQ(nullptr, interned_null2.m_pData.Get()); + EXPECT_EQ(goats1.m_pData, interned_goats1.m_pData); + EXPECT_EQ(goats1.m_pData, interned_goats2.m_pData); + + pool.Clear(); + CFX_WideString reinterned_null2 = pool.Intern(null2); + CFX_WideString reinterned_null1 = pool.Intern(null2); + CFX_WideString reinterned_goats2 = pool.Intern(goats2); + CFX_WideString reinterned_goats1 = pool.Intern(goats2); + + // After clearing pool, storage was re-interned using second strings. + EXPECT_EQ(nullptr, interned_null1.m_pData.Get()); + EXPECT_EQ(nullptr, interned_null2.m_pData.Get()); + EXPECT_EQ(goats2.m_pData, reinterned_goats1.m_pData); + EXPECT_EQ(goats2.m_pData, reinterned_goats2.m_pData); +} diff --git a/core/fxcrt/fx_basic_bstring.cpp b/core/fxcrt/fx_basic_bstring.cpp index c5979a7466..63db86e0c8 100644 --- a/core/fxcrt/fx_basic_bstring.cpp +++ b/core/fxcrt/fx_basic_bstring.cpp @@ -9,11 +9,14 @@ #include #include +#include "core/fxcrt/include/cfx_string_pool_template.h" #include "core/fxcrt/include/fx_basic.h" #include "third_party/base/numerics/safe_math.h" template class CFX_StringDataTemplate; template class CFX_StringCTemplate; +template class CFX_StringPoolTemplate; +template struct std::hash; namespace { diff --git a/core/fxcrt/fx_basic_wstring.cpp b/core/fxcrt/fx_basic_wstring.cpp index 29e915f30c..377f09c913 100644 --- a/core/fxcrt/fx_basic_wstring.cpp +++ b/core/fxcrt/fx_basic_wstring.cpp @@ -9,12 +9,15 @@ #include #include +#include "core/fxcrt/include/cfx_string_pool_template.h" #include "core/fxcrt/include/fx_basic.h" #include "core/fxcrt/include/fx_ext.h" #include "third_party/base/numerics/safe_math.h" template class CFX_StringDataTemplate; template class CFX_StringCTemplate; +template class CFX_StringPoolTemplate; +template struct std::hash; namespace { diff --git a/core/fxcrt/include/cfx_string_pool_template.h b/core/fxcrt/include/cfx_string_pool_template.h new file mode 100644 index 0000000000..a59d13a61d --- /dev/null +++ b/core/fxcrt/include/cfx_string_pool_template.h @@ -0,0 +1,30 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FXCRT_INCLUDE_CFX_STRING_POOL_TEMPLATE_H_ +#define CORE_FXCRT_INCLUDE_CFX_STRING_POOL_TEMPLATE_H_ + +#include + +#include "core/fxcrt/include/fx_string.h" + +template +class CFX_StringPoolTemplate { + public: + StringType Intern(const StringType& str) { return *m_Pool.insert(str).first; } + void Clear() { m_Pool.clear(); } + + private: + std::unordered_set m_Pool; +}; + +using CFX_ByteStringPool = CFX_StringPoolTemplate; +using CFX_WideStringPool = CFX_StringPoolTemplate; + +extern template class CFX_StringPoolTemplate; +extern template class CFX_StringPoolTemplate; + +#endif // CORE_FXCRT_INCLUDE_CFX_STRING_POOL_TEMPLATE_H_ diff --git a/core/fxcrt/include/fx_ext.h b/core/fxcrt/include/fx_ext.h index f7aca68d64..e33d57bdb7 100644 --- a/core/fxcrt/include/fx_ext.h +++ b/core/fxcrt/include/fx_ext.h @@ -83,19 +83,11 @@ inline int FXSYS_toDecimalDigit(const FX_WCHAR c) { FX_FLOAT FXSYS_FractionalScale(size_t scale_factor, int value); int FXSYS_FractionalScaleCount(); -uint32_t FX_HashCode_GetA(const CFX_ByteStringC& str, bool bIgnoreCase); -uint32_t FX_HashCode_GetW(const CFX_WideStringC& Str, bool bIgnoreCase); - void* FX_Random_MT_Start(uint32_t dwSeed); - -uint32_t FX_Random_MT_Generate(void* pContext); - void FX_Random_MT_Close(void* pContext); - +uint32_t FX_Random_MT_Generate(void* pContext); void FX_Random_GenerateBase(uint32_t* pBuffer, int32_t iCount); - void FX_Random_GenerateMT(uint32_t* pBuffer, int32_t iCount); - void FX_Random_GenerateCrypto(uint32_t* pBuffer, int32_t iCount); #ifdef PDF_ENABLE_XFA diff --git a/core/fxcrt/include/fx_string.h b/core/fxcrt/include/fx_string.h index 48378586d3..6e9af221ca 100644 --- a/core/fxcrt/include/fx_string.h +++ b/core/fxcrt/include/fx_string.h @@ -8,7 +8,9 @@ #define CORE_FXCRT_INCLUDE_FX_STRING_H_ #include // For intptr_t. + #include +#include #include "core/fxcrt/cfx_string_c_template.h" #include "core/fxcrt/cfx_string_data_template.h" @@ -166,7 +168,9 @@ class CFX_ByteString { void Concat(const FX_CHAR* lpszSrcData, FX_STRSIZE nSrcLen); CFX_RetainPtr m_pData; + friend class fxcrt_ByteStringConcat_Test; + friend class fxcrt_ByteStringPool_Test; }; inline bool operator==(const char* lhs, const CFX_ByteString& rhs) { @@ -357,7 +361,9 @@ class CFX_WideString { void Concat(const FX_WCHAR* lpszSrcData, FX_STRSIZE nSrcLen); CFX_RetainPtr m_pData; + friend class fxcrt_WideStringConcatInPlace_Test; + friend class fxcrt_WideStringPool_Test; }; inline CFX_WideString operator+(const CFX_WideStringC& str1, @@ -432,4 +438,28 @@ inline FX_FLOAT FX_atof(const CFX_WideStringC& wsStr) { bool FX_atonum(const CFX_ByteStringC& str, void* pData); FX_STRSIZE FX_ftoa(FX_FLOAT f, FX_CHAR* buf); +uint32_t FX_HashCode_GetA(const CFX_ByteStringC& str, bool bIgnoreCase); +uint32_t FX_HashCode_GetW(const CFX_WideStringC& str, bool bIgnoreCase); + +namespace std { + +template <> +struct hash { + std::size_t operator()(const CFX_ByteString& str) const { + return FX_HashCode_GetA(str.AsStringC(), false); + } +}; + +template <> +struct hash { + std::size_t operator()(const CFX_WideString& str) const { + return FX_HashCode_GetW(str.AsStringC(), false); + } +}; + +} // namespace std + +extern template struct std::hash; +extern template struct std::hash; + #endif // CORE_FXCRT_INCLUDE_FX_STRING_H_ -- cgit v1.2.3