From 84be3a3cfec5107aac9a58ea00b58b733d393c7d Mon Sep 17 00:00:00 2001 From: npm Date: Thu, 15 Sep 2016 13:27:21 -0700 Subject: Use ToUnicode mapping even when unicode is 0. CPDF_Font::UnicodeFromCharcode returns 0 only if ToUnicode map maps the charcode to 0. CPDF_SimpleFont::UnicodeFromCharcode and CPDF_CID_Font:: UnicodeFromCharCode return 0 only if the call to CPDF_Font returns 0. In other cases, these methods return an empty string. So when processing text, a 0 return from the method should not be replaced with the charcode. BUG=pdfium:583 Review-Url: https://codereview.chromium.org/2342073002 --- core/fpdftext/cpdf_textpage.cpp | 8 +- fpdfsdk/fpdftext_embeddertest.cpp | 15 +++ testing/resources/bug_583.pdf | 216 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 233 insertions(+), 6 deletions(-) create mode 100644 testing/resources/bug_583.pdf diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index c691d4b3a0..1056943292 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp @@ -1131,12 +1131,8 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { spacing = 0; CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); bool bNoUnicode = false; - FX_WCHAR wChar = wstrItem.GetAt(0); - if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { - if (wstrItem.IsEmpty()) - wstrItem += (FX_WCHAR)item.m_CharCode; - else - wstrItem.SetAt(0, (FX_WCHAR)item.m_CharCode); + if (wstrItem.IsEmpty() && item.m_CharCode) { + wstrItem += static_cast(item.m_CharCode); bNoUnicode = true; } charinfo.m_Index = -1; diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp index 3070c30771..957e813f64 100644 --- a/fpdfsdk/fpdftext_embeddertest.cpp +++ b/fpdfsdk/fpdftext_embeddertest.cpp @@ -388,3 +388,18 @@ TEST_F(FPDFTextEmbeddertest, GetFontSize) { FPDFText_ClosePage(textpage); UnloadPage(page); } + +TEST_F(FPDFTextEmbeddertest, ToUnicode) { + EXPECT_TRUE(OpenDocument("bug_583.pdf")); + FPDF_PAGE page = LoadPage(0); + EXPECT_TRUE(page); + + FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); + EXPECT_TRUE(textpage); + + ASSERT_EQ(1, FPDFText_CountChars(textpage)); + EXPECT_EQ(static_cast(0), FPDFText_GetUnicode(textpage, 0)); + + FPDFText_ClosePage(textpage); + UnloadPage(page); +} diff --git a/testing/resources/bug_583.pdf b/testing/resources/bug_583.pdf new file mode 100644 index 0000000000..fcb30d4c2a --- /dev/null +++ b/testing/resources/bug_583.pdf @@ -0,0 +1,216 @@ +%PDF-1.4 +%Óëéá +1 0 obj +<> +endobj +2 0 obj +<> stream +1 0 0 -1 0 48 cm +1 1 1 RG 1 1 1 rg +/G0 gs +0 0 48 48 re +f +0 0 0 RG 0 0 0 rg +BT +/F0 18 Tf +1 0 0 -1 16 32 Tm +<01> Tj +ET + +endstream +endobj +3 0 obj +<> +endobj +4 0 obj +<> +endobj +5 0 obj +<> +/Font <>>> +/MediaBox [0 0 48 48] +/Contents 2 0 R +/Parent 4 0 R>> +endobj +6 0 obj +<> +endobj +7 0 obj +<> +/CharProcs <>>> +endobj +8 0 obj +<> stream +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (Adobe) +/Ordering (UCS) +/Supplement 0 +>> def +/CMapName /Adobe-Identity-UCS def +/CMapType 2 def +1 begincodespacerange +<0001> <0001> +endcodespacerange +1 beginbfchar +<0001> <0000> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +endstream +endobj +9 0 obj +<> stream + +endstream +endobj +10 0 obj +<> stream +640 0 0 -740 640 40 d1 +640 -150 m +640 -660 l +520 -660 l +320 -610 l +390 -655 l +390 -710 l +360 -740 l +300 -740 l +260 -700 l +260 -670 l +280 -650 l +300 -650 l +290 -670 l +300 -690 l +320 -700 l +340 -700 l +350 -680 l +340 -660 l +300 -620 l +240 -590 l +40 -540 l +20 -540 l +0 -550 l +0 -40 l +120 -40 l +320 -90 l +250 -45 l +250 10 l +280 40 l +340 40 l +380 0 l +380 -30 l +360 -50 l +340 -50 l +350 -30 l +340 -10 l +320 0 l +300 0 l +290 -20 l +300 -40 l +340 -80 l +400 -110 l +600 -160 l +620 -160 l +640 -150 l +h +600 -620 m +40 -480 l +40 -500 l +560 -630 l +600 -630 l +600 -620 l +h +541 -567 m +530 -525 l +530 -240 l +471 -225 l +375 -373 l +367 -394 l +370 -371 l +370 -238 l +380 -203 l +284 -179 l +295 -219 l +295 -468 l +282 -502 l +375 -526 l +451 -408 l +460 -387 l +457 -410 l +457 -506 l +445 -543 l +541 -567 l +h +600 -200 m +80 -70 l +40 -70 l +40 -80 l +600 -220 l +600 -200 l +h +206 -159 m +99 -132 l +110 -172 l +110 -421 l +99 -456 l +206 -483 l +195 -444 l +195 -193 l +206 -159 l +h +f + +endstream +endobj +xref +0 11 +0000000000 65535 f +0000000015 00000 n +0000000138 00000 n +0000000304 00000 n +0000000351 00000 n +0000000406 00000 n +0000000596 00000 n +0000000706 00000 n +0000001006 00000 n +0000001393 00000 n +0000001440 00000 n +trailer +<> +startxref +2428 +%%EOF \ No newline at end of file -- cgit v1.2.3