summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornpm <npm@chromium.org>2016-09-15 13:27:21 -0700
committerCommit bot <commit-bot@chromium.org>2016-09-15 13:27:21 -0700
commit84be3a3cfec5107aac9a58ea00b58b733d393c7d (patch)
treee70d04e86ec278f98abe09d3655f9e63154d6a83
parent163a9a6ded2ef483cb81435fc6aefbf6a45a4cb5 (diff)
downloadpdfium-84be3a3cfec5107aac9a58ea00b58b733d393c7d.tar.xz
Use ToUnicode mapping even when unicode is 0.
CPDF_Font::UnicodeFromCharcode returns 0 only if ToUnicode map maps the charcode to 0. CPDF_SimpleFont::UnicodeFromCharcode and CPDF_CID_Font:: UnicodeFromCharCode return 0 only if the call to CPDF_Font returns 0. In other cases, these methods return an empty string. So when processing text, a 0 return from the method should not be replaced with the charcode. BUG=pdfium:583 Review-Url: https://codereview.chromium.org/2342073002
-rw-r--r--core/fpdftext/cpdf_textpage.cpp8
-rw-r--r--fpdfsdk/fpdftext_embeddertest.cpp15
-rw-r--r--testing/resources/bug_583.pdf216
3 files changed, 233 insertions, 6 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index c691d4b3a0..1056943292 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -1131,12 +1131,8 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
spacing = 0;
CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
bool bNoUnicode = false;
- FX_WCHAR wChar = wstrItem.GetAt(0);
- if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode) {
- if (wstrItem.IsEmpty())
- wstrItem += (FX_WCHAR)item.m_CharCode;
- else
- wstrItem.SetAt(0, (FX_WCHAR)item.m_CharCode);
+ if (wstrItem.IsEmpty() && item.m_CharCode) {
+ wstrItem += static_cast<FX_WCHAR>(item.m_CharCode);
bNoUnicode = true;
}
charinfo.m_Index = -1;
diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp
index 3070c30771..957e813f64 100644
--- a/fpdfsdk/fpdftext_embeddertest.cpp
+++ b/fpdfsdk/fpdftext_embeddertest.cpp
@@ -388,3 +388,18 @@ TEST_F(FPDFTextEmbeddertest, GetFontSize) {
FPDFText_ClosePage(textpage);
UnloadPage(page);
}
+
+TEST_F(FPDFTextEmbeddertest, ToUnicode) {
+ EXPECT_TRUE(OpenDocument("bug_583.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ EXPECT_TRUE(page);
+
+ FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+ EXPECT_TRUE(textpage);
+
+ ASSERT_EQ(1, FPDFText_CountChars(textpage));
+ EXPECT_EQ(static_cast<unsigned int>(0), FPDFText_GetUnicode(textpage, 0));
+
+ FPDFText_ClosePage(textpage);
+ UnloadPage(page);
+}
diff --git a/testing/resources/bug_583.pdf b/testing/resources/bug_583.pdf
new file mode 100644
index 0000000000..fcb30d4c2a
--- /dev/null
+++ b/testing/resources/bug_583.pdf
@@ -0,0 +1,216 @@
+%PDF-1.4
+%Óëéá
+1 0 obj
+<</Title (skbug_5606_b)
+/Subject (rendering correctness test)
+/Creator (Skia/DM)
+/Producer (Skia/PDF m55)>>
+endobj
+2 0 obj
+<</Length 117>> stream
+1 0 0 -1 0 48 cm
+1 1 1 RG 1 1 1 rg
+/G0 gs
+0 0 48 48 re
+f
+0 0 0 RG 0 0 0 rg
+BT
+/F0 18 Tf
+1 0 0 -1 16 32 Tm
+<01> Tj
+ET
+
+endstream
+endobj
+3 0 obj
+<</Type /Catalog
+/Pages 4 0 R>>
+endobj
+4 0 obj
+<</Type /Pages
+/Count 1
+/Kids [5 0 R]>>
+endobj
+5 0 obj
+<</Type /Page
+/Resources <</ProcSets [/PDF /Text /ImageB /ImageC /ImageI]
+/ExtGState <</G0 6 0 R>>
+/Font <</F0 7 0 R>>>>
+/MediaBox [0 0 48 48]
+/Contents 2 0 R
+/Parent 4 0 R>>
+endobj
+6 0 obj
+<</Type /ExtGState
+/Type /ExtGState
+/CA 1
+/ca 1
+/LC 0
+/LJ 0
+/LW 0
+/ML 4
+/SA true
+/BM /Normal>>
+endobj
+7 0 obj
+<</Type /Font
+/Subtype /Type3
+/FontMatrix [.00100000005 0 0 -.00100000005 0 0]
+/FirstChar 0
+/LastChar 1
+/FontBBox [0 40 640 -740]
+/CIDToGIDMap /Identity
+/ToUnicode 8 0 R
+/Widths [500 640]
+/Encoding <</Type /Encoding
+/Differences [0 /g0 /g100]>>
+/CharProcs <</g0 9 0 R
+/g100 10 0 R>>>>
+endobj
+8 0 obj
+<</Length 338>> stream
+/CIDInit /ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo
+<< /Registry (Adobe)
+/Ordering (UCS)
+/Supplement 0
+>> def
+/CMapName /Adobe-Identity-UCS def
+/CMapType 2 def
+1 begincodespacerange
+<0001> <0001>
+endcodespacerange
+1 beginbfchar
+<0001> <0000>
+endbfchar
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end
+endstream
+endobj
+9 0 obj
+<</Length 0>> stream
+
+endstream
+endobj
+10 0 obj
+<</Length 938>> stream
+640 0 0 -740 640 40 d1
+640 -150 m
+640 -660 l
+520 -660 l
+320 -610 l
+390 -655 l
+390 -710 l
+360 -740 l
+300 -740 l
+260 -700 l
+260 -670 l
+280 -650 l
+300 -650 l
+290 -670 l
+300 -690 l
+320 -700 l
+340 -700 l
+350 -680 l
+340 -660 l
+300 -620 l
+240 -590 l
+40 -540 l
+20 -540 l
+0 -550 l
+0 -40 l
+120 -40 l
+320 -90 l
+250 -45 l
+250 10 l
+280 40 l
+340 40 l
+380 0 l
+380 -30 l
+360 -50 l
+340 -50 l
+350 -30 l
+340 -10 l
+320 0 l
+300 0 l
+290 -20 l
+300 -40 l
+340 -80 l
+400 -110 l
+600 -160 l
+620 -160 l
+640 -150 l
+h
+600 -620 m
+40 -480 l
+40 -500 l
+560 -630 l
+600 -630 l
+600 -620 l
+h
+541 -567 m
+530 -525 l
+530 -240 l
+471 -225 l
+375 -373 l
+367 -394 l
+370 -371 l
+370 -238 l
+380 -203 l
+284 -179 l
+295 -219 l
+295 -468 l
+282 -502 l
+375 -526 l
+451 -408 l
+460 -387 l
+457 -410 l
+457 -506 l
+445 -543 l
+541 -567 l
+h
+600 -200 m
+80 -70 l
+40 -70 l
+40 -80 l
+600 -220 l
+600 -200 l
+h
+206 -159 m
+99 -132 l
+110 -172 l
+110 -421 l
+99 -456 l
+206 -483 l
+195 -444 l
+195 -193 l
+206 -159 l
+h
+f
+
+endstream
+endobj
+xref
+0 11
+0000000000 65535 f
+0000000015 00000 n
+0000000138 00000 n
+0000000304 00000 n
+0000000351 00000 n
+0000000406 00000 n
+0000000596 00000 n
+0000000706 00000 n
+0000001006 00000 n
+0000001393 00000 n
+0000001440 00000 n
+trailer
+<</Size 11
+/Root 3 0 R
+/Info 1 0 R>>
+startxref
+2428
+%%EOF \ No newline at end of file