summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas Pena <npm@chromium.org>2017-05-05 16:49:30 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-05-05 21:42:22 +0000
commit54b9166366085b30b7ee3094c2b71cd36e377153 (patch)
treecb7eded9f2304cefbb2ea0ce2542b4864780199f
parent0ec418f043b946134bee1a37c1dde1cc987579df (diff)
downloadpdfium-54b9166366085b30b7ee3094c2b71cd36e377153.tar.xz
Encode unicodes in UTF-16BE in ToUnicode map
Bug: pdfium:667 Change-Id: I811571c334ff28162905a65781ca14f03caf2966 Reviewed-on: https://pdfium-review.googlesource.com/4910 Commit-Queue: Nicolás Peña <npm@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org>
-rw-r--r--core/fxcrt/fx_extension.cpp25
-rw-r--r--core/fxcrt/fx_extension.h6
-rw-r--r--core/fxcrt/fx_extension_unittest.cpp50
-rw-r--r--fpdfsdk/fpdfedittext.cpp59
4 files changed, 113 insertions, 27 deletions
diff --git a/core/fxcrt/fx_extension.cpp b/core/fxcrt/fx_extension.cpp
index 209584b68d..2b290ed79d 100644
--- a/core/fxcrt/fx_extension.cpp
+++ b/core/fxcrt/fx_extension.cpp
@@ -137,6 +137,31 @@ uint32_t FX_HashCode_GetW(const CFX_WideStringC& str, bool bIgnoreCase) {
return dwHashCode;
}
+void FXSYS_IntToTwoHexChars(uint8_t n, char* buf) {
+ static const char kHex[] = "0123456789ABCDEF";
+ buf[0] = kHex[n / 16];
+ buf[1] = kHex[n % 16];
+}
+
+void FXSYS_IntToFourHexChars(uint16_t n, char* buf) {
+ FXSYS_IntToTwoHexChars(n / 256, buf);
+ FXSYS_IntToTwoHexChars(n % 256, buf + 2);
+}
+
+size_t FXSYS_ToUTF16BE(uint32_t unicode, char* buf) {
+ ASSERT(unicode <= 0xD7FF || (unicode > 0xDFFF && unicode <= 0x10FFFF));
+ if (unicode <= 0xFFFF) {
+ FXSYS_IntToFourHexChars(unicode, buf);
+ return 4;
+ }
+ unicode -= 0x010000;
+ // High ten bits plus 0xD800
+ FXSYS_IntToFourHexChars(0xD800 + unicode / 0x400, buf);
+ // Low ten bits plus 0xDC00
+ FXSYS_IntToFourHexChars(0xDC00 + unicode % 0x400, buf + 4);
+ return 8;
+}
+
void* FX_Random_MT_Start(uint32_t dwSeed) {
FX_MTRANDOMCONTEXT* pContext = FX_Alloc(FX_MTRANDOMCONTEXT, 1);
pContext->mt[0] = dwSeed;
diff --git a/core/fxcrt/fx_extension.h b/core/fxcrt/fx_extension.h
index f55153c0ad..255ee2e3df 100644
--- a/core/fxcrt/fx_extension.h
+++ b/core/fxcrt/fx_extension.h
@@ -76,6 +76,12 @@ inline int FXSYS_DecimalCharToInt(const wchar_t c) {
return std::iswdigit(c) ? c - L'0' : 0;
}
+void FXSYS_IntToTwoHexChars(uint8_t c, char* buf);
+
+void FXSYS_IntToFourHexChars(uint16_t c, char* buf);
+
+size_t FXSYS_ToUTF16BE(uint32_t unicode, char* buf);
+
float FXSYS_FractionalScale(size_t scale_factor, int value);
int FXSYS_FractionalScaleCount();
diff --git a/core/fxcrt/fx_extension_unittest.cpp b/core/fxcrt/fx_extension_unittest.cpp
index 1bc3ec6298..38b66ba2d2 100644
--- a/core/fxcrt/fx_extension_unittest.cpp
+++ b/core/fxcrt/fx_extension_unittest.cpp
@@ -39,3 +39,53 @@ TEST(fxcrt, FX_HashCode_Wide) {
EXPECT_EQ(97u, FX_HashCode_GetW(L"A", true));
EXPECT_EQ(1313 * 65u + 66u, FX_HashCode_GetW(L"AB", false));
}
+
+TEST(fxcrt, FXSYS_IntToTwoHexChars) {
+ char buf[3] = {0};
+ FXSYS_IntToTwoHexChars(0x0, buf);
+ EXPECT_STREQ("00", buf);
+ FXSYS_IntToTwoHexChars(0x9, buf);
+ EXPECT_STREQ("09", buf);
+ FXSYS_IntToTwoHexChars(0xA, buf);
+ EXPECT_STREQ("0A", buf);
+ FXSYS_IntToTwoHexChars(0x8C, buf);
+ EXPECT_STREQ("8C", buf);
+ FXSYS_IntToTwoHexChars(0xBE, buf);
+ EXPECT_STREQ("BE", buf);
+ FXSYS_IntToTwoHexChars(0xD0, buf);
+ EXPECT_STREQ("D0", buf);
+ FXSYS_IntToTwoHexChars(0xFF, buf);
+ EXPECT_STREQ("FF", buf);
+}
+
+TEST(fxcrt, FXSYS_IntToFourHexChars) {
+ char buf[5] = {0};
+ FXSYS_IntToFourHexChars(0x0, buf);
+ EXPECT_STREQ("0000", buf);
+ FXSYS_IntToFourHexChars(0xA23, buf);
+ EXPECT_STREQ("0A23", buf);
+ FXSYS_IntToFourHexChars(0xB701, buf);
+ EXPECT_STREQ("B701", buf);
+ FXSYS_IntToFourHexChars(0xFFFF, buf);
+ EXPECT_STREQ("FFFF", buf);
+}
+
+TEST(fxcrt, FXSYS_ToUTF16BE) {
+ char buf[9] = {0};
+ // Test U+0000 to U+D7FF and U+E000 to U+FFFF
+ EXPECT_EQ(4U, FXSYS_ToUTF16BE(0x0, buf));
+ EXPECT_STREQ("0000", buf);
+ EXPECT_EQ(4U, FXSYS_ToUTF16BE(0xD7FF, buf));
+ EXPECT_STREQ("D7FF", buf);
+ EXPECT_EQ(4U, FXSYS_ToUTF16BE(0xE000, buf));
+ EXPECT_STREQ("E000", buf);
+ EXPECT_EQ(4U, FXSYS_ToUTF16BE(0xFFFF, buf));
+ EXPECT_STREQ("FFFF", buf);
+ // Test U+10000 to U+10FFFF
+ EXPECT_EQ(8U, FXSYS_ToUTF16BE(0x10000, buf));
+ EXPECT_STREQ("D800DC00", buf);
+ EXPECT_EQ(8U, FXSYS_ToUTF16BE(0x10FFFF, buf));
+ EXPECT_STREQ("DBFFDFFF", buf);
+ EXPECT_EQ(8U, FXSYS_ToUTF16BE(0x2003E, buf));
+ EXPECT_STREQ("D840DC3E", buf);
+}
diff --git a/fpdfsdk/fpdfedittext.cpp b/fpdfsdk/fpdfedittext.cpp
index 9b01775235..54388ef701 100644
--- a/fpdfsdk/fpdfedittext.cpp
+++ b/fpdfsdk/fpdfedittext.cpp
@@ -19,6 +19,7 @@
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fxcrt/fx_extension.h"
#include "core/fxge/cfx_fontmgr.h"
#include "core/fxge/fx_font.h"
#include "fpdfsdk/fsdk_define.h"
@@ -90,20 +91,27 @@ const char ToUnicodeStart[] =
"1 begincodespacerange\n"
"<0000> <FFFFF>\n";
-const char hex[] = "0123456789ABCDEF";
-
-void AddNum(CFX_ByteTextBuf* pBuffer, uint32_t number) {
+void AddCharcode(CFX_ByteTextBuf* pBuffer, uint32_t number) {
+ ASSERT(number <= 0xFFFF);
*pBuffer << "<";
char ans[4];
- for (size_t i = 0; i < 4; ++i) {
- ans[3 - i] = hex[number % 16];
- number /= 16;
- }
+ FXSYS_IntToFourHexChars(number, ans);
for (size_t i = 0; i < 4; ++i)
pBuffer->AppendChar(ans[i]);
*pBuffer << ">";
}
+// PDF spec 1.7 Section 5.9.2: "Unicode character sequences as expressed in
+// UTF-16BE encoding." See https://en.wikipedia.org/wiki/UTF-16#Description
+void AddUnicode(CFX_ByteTextBuf* pBuffer, uint32_t unicode) {
+ char ans[8];
+ *pBuffer << "<";
+ size_t numChars = FXSYS_ToUTF16BE(unicode, ans);
+ for (size_t i = 0; i < numChars; ++i)
+ pBuffer->AppendChar(ans[i]);
+ *pBuffer << ">";
+}
+
// Loads the charcode to unicode mapping into a stream
CPDF_Stream* LoadUnicode(CPDF_Document* pDoc,
const std::map<uint32_t, uint32_t>& to_unicode) {
@@ -173,37 +181,37 @@ CPDF_Stream* LoadUnicode(CPDF_Document* pDoc,
}
// Add maps to buffer
buffer << static_cast<uint32_t>(char_to_uni.size()) << " beginbfchar\n";
- for (auto iter : char_to_uni) {
- AddNum(&buffer, iter.first);
+ for (const auto& iter : char_to_uni) {
+ AddCharcode(&buffer, iter.first);
buffer << " ";
- AddNum(&buffer, iter.second);
+ AddUnicode(&buffer, iter.second);
buffer << "\n";
}
buffer << "endbfchar\n"
<< static_cast<uint32_t>(map_range_vector.size() + map_range.size())
<< " beginbfrange\n";
- for (auto iter : map_range_vector) {
+ for (const auto& iter : map_range_vector) {
const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
- AddNum(&buffer, charcodeRange.first);
+ AddCharcode(&buffer, charcodeRange.first);
buffer << " ";
- AddNum(&buffer, charcodeRange.second);
+ AddCharcode(&buffer, charcodeRange.second);
buffer << " [";
const std::vector<uint32_t>& unicodes = iter.second;
for (size_t i = 0; i < unicodes.size(); ++i) {
uint32_t uni = unicodes[i];
- AddNum(&buffer, uni);
+ AddUnicode(&buffer, uni);
if (i != unicodes.size() - 1)
buffer << " ";
}
buffer << "]\n";
}
- for (auto iter : map_range) {
+ for (const auto& iter : map_range) {
const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
- AddNum(&buffer, charcodeRange.first);
+ AddCharcode(&buffer, charcodeRange.first);
buffer << " ";
- AddNum(&buffer, charcodeRange.second);
+ AddCharcode(&buffer, charcodeRange.second);
buffer << " ";
- AddNum(&buffer, iter.second);
+ AddUnicode(&buffer, iter.second);
buffer << "\n";
}
// TODO(npm): Encrypt / Compress?
@@ -389,10 +397,10 @@ DLLEXPORT FPDF_PAGEOBJECT STDCALL FPDFPageObj_NewTextObj(FPDF_DOCUMENT document,
DLLEXPORT FPDF_BOOL STDCALL FPDFText_SetText(FPDF_PAGEOBJECT text_object,
FPDF_WIDESTRING text) {
- if (!text_object)
+ auto* pTextObj = static_cast<CPDF_TextObject*>(text_object);
+ if (!pTextObj)
return false;
- auto* pTextObj = reinterpret_cast<CPDF_TextObject*>(text_object);
FX_STRSIZE len = CFX_WideString::WStringLength(text);
CFX_WideString encodedText = CFX_WideString::FromUTF16LE(text, len);
CFX_ByteString byteText;
@@ -428,10 +436,10 @@ DLLEXPORT FPDF_FONT STDCALL FPDFText_LoadFont(FPDF_DOCUMENT document,
}
DLLEXPORT void STDCALL FPDFFont_Close(FPDF_FONT font) {
- if (!font)
+ CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
+ if (!pFont)
return;
- CPDF_Font* pFont = reinterpret_cast<CPDF_Font*>(font);
CPDF_Document* pDoc = pFont->GetDocument();
if (!pDoc)
return;
@@ -445,14 +453,11 @@ DLLEXPORT FPDF_PAGEOBJECT STDCALL
FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document,
FPDF_FONT font,
float font_size) {
- if (!font)
- return nullptr;
-
CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
- if (!pDoc)
+ CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
+ if (!pDoc || !pFont)
return nullptr;
- CPDF_Font* pFont = reinterpret_cast<CPDF_Font*>(font);
auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
pTextObj->m_TextState.SetFont(pDoc->LoadFont(pFont->GetFontDict()));
pTextObj->m_TextState.SetFontSize(font_size);