From 54b9166366085b30b7ee3094c2b71cd36e377153 Mon Sep 17 00:00:00 2001 From: Nicolas Pena Date: Fri, 5 May 2017 16:49:30 -0400 Subject: Encode unicodes in UTF-16BE in ToUnicode map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: pdfium:667 Change-Id: I811571c334ff28162905a65781ca14f03caf2966 Reviewed-on: https://pdfium-review.googlesource.com/4910 Commit-Queue: Nicolás Peña Reviewed-by: Tom Sepez Reviewed-by: Lei Zhang --- core/fxcrt/fx_extension.cpp | 25 ++++++++++++++++++ core/fxcrt/fx_extension.h | 6 +++++ core/fxcrt/fx_extension_unittest.cpp | 50 ++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) (limited to 'core') diff --git a/core/fxcrt/fx_extension.cpp b/core/fxcrt/fx_extension.cpp index 209584b68d..2b290ed79d 100644 --- a/core/fxcrt/fx_extension.cpp +++ b/core/fxcrt/fx_extension.cpp @@ -137,6 +137,31 @@ uint32_t FX_HashCode_GetW(const CFX_WideStringC& str, bool bIgnoreCase) { return dwHashCode; } +void FXSYS_IntToTwoHexChars(uint8_t n, char* buf) { + static const char kHex[] = "0123456789ABCDEF"; + buf[0] = kHex[n / 16]; + buf[1] = kHex[n % 16]; +} + +void FXSYS_IntToFourHexChars(uint16_t n, char* buf) { + FXSYS_IntToTwoHexChars(n / 256, buf); + FXSYS_IntToTwoHexChars(n % 256, buf + 2); +} + +size_t FXSYS_ToUTF16BE(uint32_t unicode, char* buf) { + ASSERT(unicode <= 0xD7FF || (unicode > 0xDFFF && unicode <= 0x10FFFF)); + if (unicode <= 0xFFFF) { + FXSYS_IntToFourHexChars(unicode, buf); + return 4; + } + unicode -= 0x010000; + // High ten bits plus 0xD800 + FXSYS_IntToFourHexChars(0xD800 + unicode / 0x400, buf); + // Low ten bits plus 0xDC00 + FXSYS_IntToFourHexChars(0xDC00 + unicode % 0x400, buf + 4); + return 8; +} + void* FX_Random_MT_Start(uint32_t dwSeed) { FX_MTRANDOMCONTEXT* pContext = FX_Alloc(FX_MTRANDOMCONTEXT, 1); pContext->mt[0] = dwSeed; diff --git a/core/fxcrt/fx_extension.h b/core/fxcrt/fx_extension.h index f55153c0ad..255ee2e3df 100644 --- a/core/fxcrt/fx_extension.h +++ b/core/fxcrt/fx_extension.h @@ -76,6 +76,12 @@ inline int FXSYS_DecimalCharToInt(const wchar_t c) { return std::iswdigit(c) ? c - L'0' : 0; } +void FXSYS_IntToTwoHexChars(uint8_t c, char* buf); + +void FXSYS_IntToFourHexChars(uint16_t c, char* buf); + +size_t FXSYS_ToUTF16BE(uint32_t unicode, char* buf); + float FXSYS_FractionalScale(size_t scale_factor, int value); int FXSYS_FractionalScaleCount(); diff --git a/core/fxcrt/fx_extension_unittest.cpp b/core/fxcrt/fx_extension_unittest.cpp index 1bc3ec6298..38b66ba2d2 100644 --- a/core/fxcrt/fx_extension_unittest.cpp +++ b/core/fxcrt/fx_extension_unittest.cpp @@ -39,3 +39,53 @@ TEST(fxcrt, FX_HashCode_Wide) { EXPECT_EQ(97u, FX_HashCode_GetW(L"A", true)); EXPECT_EQ(1313 * 65u + 66u, FX_HashCode_GetW(L"AB", false)); } + +TEST(fxcrt, FXSYS_IntToTwoHexChars) { + char buf[3] = {0}; + FXSYS_IntToTwoHexChars(0x0, buf); + EXPECT_STREQ("00", buf); + FXSYS_IntToTwoHexChars(0x9, buf); + EXPECT_STREQ("09", buf); + FXSYS_IntToTwoHexChars(0xA, buf); + EXPECT_STREQ("0A", buf); + FXSYS_IntToTwoHexChars(0x8C, buf); + EXPECT_STREQ("8C", buf); + FXSYS_IntToTwoHexChars(0xBE, buf); + EXPECT_STREQ("BE", buf); + FXSYS_IntToTwoHexChars(0xD0, buf); + EXPECT_STREQ("D0", buf); + FXSYS_IntToTwoHexChars(0xFF, buf); + EXPECT_STREQ("FF", buf); +} + +TEST(fxcrt, FXSYS_IntToFourHexChars) { + char buf[5] = {0}; + FXSYS_IntToFourHexChars(0x0, buf); + EXPECT_STREQ("0000", buf); + FXSYS_IntToFourHexChars(0xA23, buf); + EXPECT_STREQ("0A23", buf); + FXSYS_IntToFourHexChars(0xB701, buf); + EXPECT_STREQ("B701", buf); + FXSYS_IntToFourHexChars(0xFFFF, buf); + EXPECT_STREQ("FFFF", buf); +} + +TEST(fxcrt, FXSYS_ToUTF16BE) { + char buf[9] = {0}; + // Test U+0000 to U+D7FF and U+E000 to U+FFFF + EXPECT_EQ(4U, FXSYS_ToUTF16BE(0x0, buf)); + EXPECT_STREQ("0000", buf); + EXPECT_EQ(4U, FXSYS_ToUTF16BE(0xD7FF, buf)); + EXPECT_STREQ("D7FF", buf); + EXPECT_EQ(4U, FXSYS_ToUTF16BE(0xE000, buf)); + EXPECT_STREQ("E000", buf); + EXPECT_EQ(4U, FXSYS_ToUTF16BE(0xFFFF, buf)); + EXPECT_STREQ("FFFF", buf); + // Test U+10000 to U+10FFFF + EXPECT_EQ(8U, FXSYS_ToUTF16BE(0x10000, buf)); + EXPECT_STREQ("D800DC00", buf); + EXPECT_EQ(8U, FXSYS_ToUTF16BE(0x10FFFF, buf)); + EXPECT_STREQ("DBFFDFFF", buf); + EXPECT_EQ(8U, FXSYS_ToUTF16BE(0x2003E, buf)); + EXPECT_STREQ("D840DC3E", buf); +} -- cgit v1.2.3