diff options
author | npm <npm@chromium.org> | 2016-09-15 13:27:21 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-09-15 13:27:21 -0700 |
commit | 84be3a3cfec5107aac9a58ea00b58b733d393c7d (patch) | |
tree | e70d04e86ec278f98abe09d3655f9e63154d6a83 | |
parent | 163a9a6ded2ef483cb81435fc6aefbf6a45a4cb5 (diff) | |
download | pdfium-84be3a3cfec5107aac9a58ea00b58b733d393c7d.tar.xz |
Use ToUnicode mapping even when unicode is 0.
CPDF_Font::UnicodeFromCharcode returns 0 only if ToUnicode map maps the
charcode to 0. CPDF_SimpleFont::UnicodeFromCharcode and CPDF_CID_Font::
UnicodeFromCharCode return 0 only if the call to CPDF_Font returns 0.
In other cases, these methods return an empty string. So when
processing text, a 0 return from the method should not be replaced
with the charcode.
BUG=pdfium:583
Review-Url: https://codereview.chromium.org/2342073002
-rw-r--r-- | core/fpdftext/cpdf_textpage.cpp | 8 | ||||
-rw-r--r-- | fpdfsdk/fpdftext_embeddertest.cpp | 15 | ||||
-rw-r--r-- | testing/resources/bug_583.pdf | 216 |
3 files changed, 233 insertions, 6 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index c691d4b3a0..1056943292 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp @@ -1131,12 +1131,8 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { spacing = 0; CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); bool bNoUnicode = false; - FX_WCHAR wChar = wstrItem.GetAt(0); - if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) { - if (wstrItem.IsEmpty()) - wstrItem += (FX_WCHAR)item.m_CharCode; - else - wstrItem.SetAt(0, (FX_WCHAR)item.m_CharCode); + if (wstrItem.IsEmpty() && item.m_CharCode) { + wstrItem += static_cast<FX_WCHAR>(item.m_CharCode); bNoUnicode = true; } charinfo.m_Index = -1; diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp index 3070c30771..957e813f64 100644 --- a/fpdfsdk/fpdftext_embeddertest.cpp +++ b/fpdfsdk/fpdftext_embeddertest.cpp @@ -388,3 +388,18 @@ TEST_F(FPDFTextEmbeddertest, GetFontSize) { FPDFText_ClosePage(textpage); UnloadPage(page); } + +TEST_F(FPDFTextEmbeddertest, ToUnicode) { + EXPECT_TRUE(OpenDocument("bug_583.pdf")); + FPDF_PAGE page = LoadPage(0); + EXPECT_TRUE(page); + + FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); + EXPECT_TRUE(textpage); + + ASSERT_EQ(1, FPDFText_CountChars(textpage)); + EXPECT_EQ(static_cast<unsigned int>(0), FPDFText_GetUnicode(textpage, 0)); + + FPDFText_ClosePage(textpage); + UnloadPage(page); +} diff --git a/testing/resources/bug_583.pdf b/testing/resources/bug_583.pdf new file mode 100644 index 0000000000..fcb30d4c2a --- /dev/null +++ b/testing/resources/bug_583.pdf @@ -0,0 +1,216 @@ +%PDF-1.4 +%Óëéá +1 0 obj +<</Title (skbug_5606_b) +/Subject (rendering correctness test) +/Creator (Skia/DM) +/Producer (Skia/PDF m55)>> +endobj +2 0 obj +<</Length 117>> stream +1 0 0 -1 0 48 cm +1 1 1 RG 1 1 1 rg +/G0 gs +0 0 48 48 re +f +0 0 0 RG 0 0 0 rg +BT +/F0 18 Tf +1 0 0 -1 16 32 Tm +<01> Tj +ET + +endstream +endobj +3 0 obj +<</Type /Catalog +/Pages 4 0 R>> +endobj +4 0 obj +<</Type /Pages +/Count 1 +/Kids [5 0 R]>> +endobj +5 0 obj +<</Type /Page +/Resources <</ProcSets [/PDF /Text /ImageB /ImageC /ImageI] +/ExtGState <</G0 6 0 R>> +/Font <</F0 7 0 R>>>> +/MediaBox [0 0 48 48] +/Contents 2 0 R +/Parent 4 0 R>> +endobj +6 0 obj +<</Type /ExtGState +/Type /ExtGState +/CA 1 +/ca 1 +/LC 0 +/LJ 0 +/LW 0 +/ML 4 +/SA true +/BM /Normal>> +endobj +7 0 obj +<</Type /Font +/Subtype /Type3 +/FontMatrix [.00100000005 0 0 -.00100000005 0 0] +/FirstChar 0 +/LastChar 1 +/FontBBox [0 40 640 -740] +/CIDToGIDMap /Identity +/ToUnicode 8 0 R +/Widths [500 640] +/Encoding <</Type /Encoding +/Differences [0 /g0 /g100]>> +/CharProcs <</g0 9 0 R +/g100 10 0 R>>>> +endobj +8 0 obj +<</Length 338>> stream +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (Adobe) +/Ordering (UCS) +/Supplement 0 +>> def +/CMapName /Adobe-Identity-UCS def +/CMapType 2 def +1 begincodespacerange +<0001> <0001> +endcodespacerange +1 beginbfchar +<0001> <0000> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +endstream +endobj +9 0 obj +<</Length 0>> stream + +endstream +endobj +10 0 obj +<</Length 938>> stream +640 0 0 -740 640 40 d1 +640 -150 m +640 -660 l +520 -660 l +320 -610 l +390 -655 l +390 -710 l +360 -740 l +300 -740 l +260 -700 l +260 -670 l +280 -650 l +300 -650 l +290 -670 l +300 -690 l +320 -700 l +340 -700 l +350 -680 l +340 -660 l +300 -620 l +240 -590 l +40 -540 l +20 -540 l +0 -550 l +0 -40 l +120 -40 l +320 -90 l +250 -45 l +250 10 l +280 40 l +340 40 l +380 0 l +380 -30 l +360 -50 l +340 -50 l +350 -30 l +340 -10 l +320 0 l +300 0 l +290 -20 l +300 -40 l +340 -80 l +400 -110 l +600 -160 l +620 -160 l +640 -150 l +h +600 -620 m +40 -480 l +40 -500 l +560 -630 l +600 -630 l +600 -620 l +h +541 -567 m +530 -525 l +530 -240 l +471 -225 l +375 -373 l +367 -394 l +370 -371 l +370 -238 l +380 -203 l +284 -179 l +295 -219 l +295 -468 l +282 -502 l +375 -526 l +451 -408 l +460 -387 l +457 -410 l +457 -506 l +445 -543 l +541 -567 l +h +600 -200 m +80 -70 l +40 -70 l +40 -80 l +600 -220 l +600 -200 l +h +206 -159 m +99 -132 l +110 -172 l +110 -421 l +99 -456 l +206 -483 l +195 -444 l +195 -193 l +206 -159 l +h +f + +endstream +endobj +xref +0 11 +0000000000 65535 f +0000000015 00000 n +0000000138 00000 n +0000000304 00000 n +0000000351 00000 n +0000000406 00000 n +0000000596 00000 n +0000000706 00000 n +0000001006 00000 n +0000001393 00000 n +0000001440 00000 n +trailer +<</Size 11 +/Root 3 0 R +/Info 1 0 R>> +startxref +2428 +%%EOF
\ No newline at end of file |