diff options
author | Tom Sepez <tsepez@chromium.org> | 2018-08-08 17:49:02 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2018-08-08 17:49:02 +0000 |
commit | 34dab07ed6e826666fd0589069f2c9b5bd2ba4dc (patch) | |
tree | 0eb30bd1c76f54890a6d365258a7157ae9972748 /core/fxcrt | |
parent | 6d9897b103aef10b369eb999a40c22011a8ae4f5 (diff) | |
download | pdfium-34dab07ed6e826666fd0589069f2c9b5bd2ba4dc.tar.xz |
Move ByteString::FromUnicode() to WideString::ToDefANSI()
Turns out that "FromUnicode" is misleading in that, on linux, it simply
removes any characters beyond 0xFF and passes the rest unchanged, so
no unicode decoding actually takes place. On Windows, it passes it into
the system function specifying FX_CODEPAGE_DefANSI, converting it into
the so-called "default ANSI code plane", passing some characters,
converting others to '?' and still others to 'A'. Either way, nothing
resembling UTF8 comes out of this, so pick a better name.
These now immediately look suspicious, so a follow-up CL will see
which ones should really be WideString::UTF8Encode() instead.
Making this a normal method on a widestring rather than a static
method on a bytestring feels more natural; this is parallel to
the UTF8Encode and UTF16LE_Encode functions.
Add a test that shows these conversions.
Change-Id: Ia7551b47199eba61b5c328a97bfe9176ac8e583c
Reviewed-on: https://pdfium-review.googlesource.com/39690
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Diffstat (limited to 'core/fxcrt')
-rw-r--r-- | core/fxcrt/bytestring.cpp | 20 | ||||
-rw-r--r-- | core/fxcrt/bytestring.h | 2 | ||||
-rw-r--r-- | core/fxcrt/widestring.cpp | 19 | ||||
-rw-r--r-- | core/fxcrt/widestring.h | 1 | ||||
-rw-r--r-- | core/fxcrt/widestring_unittest.cpp | 26 |
5 files changed, 46 insertions, 22 deletions
diff --git a/core/fxcrt/bytestring.cpp b/core/fxcrt/bytestring.cpp index 4d55c98912..b6c1ce7bbd 100644 --- a/core/fxcrt/bytestring.cpp +++ b/core/fxcrt/bytestring.cpp @@ -676,26 +676,6 @@ WideString ByteString::UTF8Decode() const { return WideString(decoder.GetResult()); } -// static -ByteString ByteString::FromUnicode(const WideString& wstr) { - int src_len = wstr.GetLength(); - int dest_len = - FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, wstr.c_str(), src_len, - nullptr, 0, nullptr, nullptr); - if (!dest_len) - return ByteString(); - - ByteString bstr; - { - // Span's lifetime must end before ReleaseBuffer() below. - pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len); - FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, wstr.c_str(), src_len, - dest_buf.data(), dest_len, nullptr, nullptr); - } - bstr.ReleaseBuffer(dest_len); - return bstr; -} - int ByteString::Compare(const ByteStringView& str) const { if (!m_pData) return str.IsEmpty() ? 0 : -1; diff --git a/core/fxcrt/bytestring.h b/core/fxcrt/bytestring.h index 5722c4925d..5975acbddd 100644 --- a/core/fxcrt/bytestring.h +++ b/core/fxcrt/bytestring.h @@ -66,8 +66,6 @@ class ByteString { void clear() { m_pData.Reset(); } - static ByteString FromUnicode(const WideString& str) WARN_UNUSED_RESULT; - // Explicit conversion to C-style string. // Note: Any subsequent modification of |this| will invalidate the result. const char* c_str() const { return m_pData ? m_pData->m_String : ""; } diff --git a/core/fxcrt/widestring.cpp b/core/fxcrt/widestring.cpp index cde1973d26..7dd1c30eb0 100644 --- a/core/fxcrt/widestring.cpp +++ b/core/fxcrt/widestring.cpp @@ -673,6 +673,25 @@ intptr_t WideString::ReferenceCountForTesting() const { return m_pData ? m_pData->m_nRefs : 0; } +// static +ByteString WideString::ToDefANSI() const { + int src_len = GetLength(); + int dest_len = FXSYS_WideCharToMultiByte( + FX_CODEPAGE_DefANSI, 0, c_str(), src_len, nullptr, 0, nullptr, nullptr); + if (!dest_len) + return ByteString(); + + ByteString bstr; + { + // Span's lifetime must end before ReleaseBuffer() below. + pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len); + FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, c_str(), src_len, + dest_buf.data(), dest_len, nullptr, nullptr); + } + bstr.ReleaseBuffer(dest_len); + return bstr; +} + ByteString WideString::UTF8Encode() const { return FX_UTF8Encode(AsStringView()); } diff --git a/core/fxcrt/widestring.h b/core/fxcrt/widestring.h index b531292c57..dc5dd23428 100644 --- a/core/fxcrt/widestring.h +++ b/core/fxcrt/widestring.h @@ -195,6 +195,7 @@ class WideString { size_t Replace(const WideStringView& pOld, const WideStringView& pNew); size_t Remove(wchar_t ch); + ByteString ToDefANSI() const; ByteString UTF8Encode() const; // This method will add \0\0 to the end of the string to represent the diff --git a/core/fxcrt/widestring_unittest.cpp b/core/fxcrt/widestring_unittest.cpp index 9017fe0c54..9d38aa45e9 100644 --- a/core/fxcrt/widestring_unittest.cpp +++ b/core/fxcrt/widestring_unittest.cpp @@ -999,6 +999,32 @@ TEST(WideString, UTF16LE_Encode) { } } +TEST(WideString, ToDefANSI) { + EXPECT_EQ("", WideString().ToDefANSI()); +#if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ + const char* kResult = + "x" + "?" + "\xff" + "A" + "?" + "y"; +#else + const char* kResult = + "x" + "\x80" + "\xff" + "y"; +#endif + EXPECT_EQ(kResult, WideString(L"x" + L"\u0080" + L"\u00ff" + L"\u0100" + L"\u208c" + L"y") + .ToDefANSI()); +} + TEST(WideStringView, FromVector) { std::vector<WideStringView::UnsignedType> null_vec; WideStringView null_string(null_vec); |