summaryrefslogtreecommitdiff
path: root/core/fxcrt
diff options
context:
space:
mode:
authorTom Sepez <tsepez@chromium.org>2018-08-08 17:49:02 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-08-08 17:49:02 +0000
commit34dab07ed6e826666fd0589069f2c9b5bd2ba4dc (patch)
tree0eb30bd1c76f54890a6d365258a7157ae9972748 /core/fxcrt
parent6d9897b103aef10b369eb999a40c22011a8ae4f5 (diff)
downloadpdfium-34dab07ed6e826666fd0589069f2c9b5bd2ba4dc.tar.xz
Move ByteString::FromUnicode() to WideString::ToDefANSI()
Turns out that "FromUnicode" is misleading in that, on linux, it simply removes any characters beyond 0xFF and passes the rest unchanged, so no unicode decoding actually takes place. On Windows, it passes it into the system function specifying FX_CODEPAGE_DefANSI, converting it into the so-called "default ANSI code plane", passing some characters, converting others to '?' and still others to 'A'. Either way, nothing resembling UTF8 comes out of this, so pick a better name. These now immediately look suspicious, so a follow-up CL will see which ones should really be WideString::UTF8Encode() instead. Making this a normal method on a widestring rather than a static method on a bytestring feels more natural; this is parallel to the UTF8Encode and UTF16LE_Encode functions. Add a test that shows these conversions. Change-Id: Ia7551b47199eba61b5c328a97bfe9176ac8e583c Reviewed-on: https://pdfium-review.googlesource.com/39690 Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Tom Sepez <tsepez@chromium.org>
Diffstat (limited to 'core/fxcrt')
-rw-r--r--core/fxcrt/bytestring.cpp20
-rw-r--r--core/fxcrt/bytestring.h2
-rw-r--r--core/fxcrt/widestring.cpp19
-rw-r--r--core/fxcrt/widestring.h1
-rw-r--r--core/fxcrt/widestring_unittest.cpp26
5 files changed, 46 insertions, 22 deletions
diff --git a/core/fxcrt/bytestring.cpp b/core/fxcrt/bytestring.cpp
index 4d55c98912..b6c1ce7bbd 100644
--- a/core/fxcrt/bytestring.cpp
+++ b/core/fxcrt/bytestring.cpp
@@ -676,26 +676,6 @@ WideString ByteString::UTF8Decode() const {
return WideString(decoder.GetResult());
}
-// static
-ByteString ByteString::FromUnicode(const WideString& wstr) {
- int src_len = wstr.GetLength();
- int dest_len =
- FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, wstr.c_str(), src_len,
- nullptr, 0, nullptr, nullptr);
- if (!dest_len)
- return ByteString();
-
- ByteString bstr;
- {
- // Span's lifetime must end before ReleaseBuffer() below.
- pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
- FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, wstr.c_str(), src_len,
- dest_buf.data(), dest_len, nullptr, nullptr);
- }
- bstr.ReleaseBuffer(dest_len);
- return bstr;
-}
-
int ByteString::Compare(const ByteStringView& str) const {
if (!m_pData)
return str.IsEmpty() ? 0 : -1;
diff --git a/core/fxcrt/bytestring.h b/core/fxcrt/bytestring.h
index 5722c4925d..5975acbddd 100644
--- a/core/fxcrt/bytestring.h
+++ b/core/fxcrt/bytestring.h
@@ -66,8 +66,6 @@ class ByteString {
void clear() { m_pData.Reset(); }
- static ByteString FromUnicode(const WideString& str) WARN_UNUSED_RESULT;
-
// Explicit conversion to C-style string.
// Note: Any subsequent modification of |this| will invalidate the result.
const char* c_str() const { return m_pData ? m_pData->m_String : ""; }
diff --git a/core/fxcrt/widestring.cpp b/core/fxcrt/widestring.cpp
index cde1973d26..7dd1c30eb0 100644
--- a/core/fxcrt/widestring.cpp
+++ b/core/fxcrt/widestring.cpp
@@ -673,6 +673,25 @@ intptr_t WideString::ReferenceCountForTesting() const {
return m_pData ? m_pData->m_nRefs : 0;
}
+// static
+ByteString WideString::ToDefANSI() const {
+ int src_len = GetLength();
+ int dest_len = FXSYS_WideCharToMultiByte(
+ FX_CODEPAGE_DefANSI, 0, c_str(), src_len, nullptr, 0, nullptr, nullptr);
+ if (!dest_len)
+ return ByteString();
+
+ ByteString bstr;
+ {
+ // Span's lifetime must end before ReleaseBuffer() below.
+ pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
+ FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, c_str(), src_len,
+ dest_buf.data(), dest_len, nullptr, nullptr);
+ }
+ bstr.ReleaseBuffer(dest_len);
+ return bstr;
+}
+
ByteString WideString::UTF8Encode() const {
return FX_UTF8Encode(AsStringView());
}
diff --git a/core/fxcrt/widestring.h b/core/fxcrt/widestring.h
index b531292c57..dc5dd23428 100644
--- a/core/fxcrt/widestring.h
+++ b/core/fxcrt/widestring.h
@@ -195,6 +195,7 @@ class WideString {
size_t Replace(const WideStringView& pOld, const WideStringView& pNew);
size_t Remove(wchar_t ch);
+ ByteString ToDefANSI() const;
ByteString UTF8Encode() const;
// This method will add \0\0 to the end of the string to represent the
diff --git a/core/fxcrt/widestring_unittest.cpp b/core/fxcrt/widestring_unittest.cpp
index 9017fe0c54..9d38aa45e9 100644
--- a/core/fxcrt/widestring_unittest.cpp
+++ b/core/fxcrt/widestring_unittest.cpp
@@ -999,6 +999,32 @@ TEST(WideString, UTF16LE_Encode) {
}
}
+TEST(WideString, ToDefANSI) {
+ EXPECT_EQ("", WideString().ToDefANSI());
+#if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
+ const char* kResult =
+ "x"
+ "?"
+ "\xff"
+ "A"
+ "?"
+ "y";
+#else
+ const char* kResult =
+ "x"
+ "\x80"
+ "\xff"
+ "y";
+#endif
+ EXPECT_EQ(kResult, WideString(L"x"
+ L"\u0080"
+ L"\u00ff"
+ L"\u0100"
+ L"\u208c"
+ L"y")
+ .ToDefANSI());
+}
+
TEST(WideStringView, FromVector) {
std::vector<WideStringView::UnsignedType> null_vec;
WideStringView null_string(null_vec);