From dd40b8b802da1c068dab450b68c934be0358b6de Mon Sep 17 00:00:00 2001 From: Nicolas Pena Date: Tue, 14 Feb 2017 10:59:53 -0500 Subject: Add ProcessText supporting standard fonts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BUG=pdfium:667 Change-Id: I701719144127881ebdb5be01a51e833d1e576477 Reviewed-on: https://pdfium-review.googlesource.com/2691 Commit-Queue: Nicolás Peña Reviewed-by: dsinclair --- core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp | 47 ++++++++++++++++++++++ core/fpdfapi/edit/cpdf_pagecontentgenerator.h | 8 ++++ .../edit/cpdf_pagecontentgenerator_unittest.cpp | 46 ++++++++++++++++++--- core/fpdfapi/page/cpdf_textobject.h | 1 + 4 files changed, 96 insertions(+), 6 deletions(-) diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp index 9de97ee718..4de89a47de 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp @@ -9,12 +9,14 @@ #include #include +#include "core/fpdfapi/font/cpdf_font.h" #include "core/fpdfapi/page/cpdf_docpagedata.h" #include "core/fpdfapi/page/cpdf_image.h" #include "core/fpdfapi/page/cpdf_imageobject.h" #include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/page/cpdf_path.h" #include "core/fpdfapi/page/cpdf_pathobject.h" +#include "core/fpdfapi/page/cpdf_textobject.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_name.h" @@ -63,6 +65,8 @@ void CPDF_PageContentGenerator::GenerateContent() { ProcessImage(&buf, pImageObject); else if (CPDF_PathObject* pPathObj = pPageObj->AsPath()) ProcessPath(&buf, pPathObj); + else if (CPDF_TextObject* pTextObj = pPageObj->AsText()) + ProcessText(&buf, pTextObj); } CPDF_Dictionary* pPageDict = m_pPage->m_pFormDict; CPDF_Object* pContent = @@ -240,3 +244,46 @@ bool CPDF_PageContentGenerator::GraphicsData::operator<( return fillAlpha < other.fillAlpha; return strokeAlpha < other.strokeAlpha; } + +bool CPDF_PageContentGenerator::FontData::operator<( + const FontData& other) const { + return baseFont < other.baseFont; +} + +// This method adds text to the buffer, BT begins the text object, ET ends it. +// Tm sets the text matrix (allows positioning and transforming text). +// Tf sets the font name (from Font in Resources) and font size. +// Tj sets the actual text, <####...> is used when specifying charcodes. +void CPDF_PageContentGenerator::ProcessText(CFX_ByteTextBuf* buf, + CPDF_TextObject* pTextObj) { + // TODO(npm): Add support for something other than standard type1 fonts. + *buf << "BT " << pTextObj->GetTextMatrix() << " Tm "; + CPDF_Font* pFont = pTextObj->GetFont(); + if (!pFont) + pFont = CPDF_Font::GetStockFont(m_pDocument, "Helvetica"); + FontData fontD; + fontD.baseFont = pFont->GetBaseFont(); + auto it = m_FontsMap.find(fontD); + CFX_ByteString dictName; + if (it != m_FontsMap.end()) { + dictName = it->second; + } else { + auto fontDict = pdfium::MakeUnique(); + fontDict->SetNewFor("Type", "Font"); + fontDict->SetNewFor("Subtype", "Type1"); + fontDict->SetNewFor("BaseFont", fontD.baseFont); + CPDF_Object* pDict = m_pDocument->AddIndirectObject(std::move(fontDict)); + uint32_t dwObjNum = pDict->GetObjNum(); + dictName = RealizeResource(dwObjNum, "Font"); + m_FontsMap[fontD] = dictName; + } + *buf << "/" << PDF_NameEncode(dictName) << " " << pTextObj->GetFontSize() + << " Tf "; + CFX_ByteString text; + for (uint32_t charcode : pTextObj->m_CharCodes) { + if (charcode == CPDF_Font::kInvalidCharCode) + continue; + pFont->AppendChar(text, charcode); + } + *buf << PDF_EncodeString(text, true) << " Tj ET\n"; +} diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h index e48ea4a7c9..fd80bd8f44 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h @@ -18,6 +18,7 @@ class CPDF_ImageObject; class CPDF_Page; class CPDF_PageObject; class CPDF_PathObject; +class CPDF_TextObject; class CPDF_PageContentGenerator { public: @@ -32,6 +33,7 @@ class CPDF_PageContentGenerator { void ProcessPath(CFX_ByteTextBuf* buf, CPDF_PathObject* pPathObj); void ProcessImage(CFX_ByteTextBuf* buf, CPDF_ImageObject* pImageObj); void ProcessGraphics(CFX_ByteTextBuf* buf, CPDF_PageObject* pPageObj); + void ProcessText(CFX_ByteTextBuf* buf, CPDF_TextObject* pTextObj); CFX_ByteString RealizeResource(uint32_t dwResourceObjNum, const CFX_ByteString& bsType); @@ -41,7 +43,13 @@ class CPDF_PageContentGenerator { bool operator<(const GraphicsData& other) const; }; + struct FontData { + CFX_ByteString baseFont; + bool operator<(const FontData& other) const; + }; + std::map m_GraphicsMap; + std::map m_FontsMap; CPDF_Page* const m_pPage; CPDF_Document* const m_pDocument; std::vector m_pageObjects; diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator_unittest.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator_unittest.cpp index 4846b1bd3c..0a636a1888 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator_unittest.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator_unittest.cpp @@ -5,8 +5,10 @@ #include "core/fpdfapi/edit/cpdf_pagecontentgenerator.h" #include "core/fpdfapi/cpdf_modulemgr.h" +#include "core/fpdfapi/font/cpdf_font.h" #include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/page/cpdf_pathobject.h" +#include "core/fpdfapi/page/cpdf_textobject.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_parser.h" #include "testing/gtest/include/gtest/gtest.h" @@ -24,10 +26,16 @@ class CPDF_PageContentGeneratorTest : public testing::Test { pGen->ProcessPath(buf, pPathObj); } - CPDF_Dictionary* TestGetGS(CPDF_PageContentGenerator* pGen, - const CFX_ByteString& name) { - return pGen->m_pPage->m_pResources->GetDictFor("ExtGState") - ->GetDictFor(name); + CPDF_Dictionary* TestGetResource(CPDF_PageContentGenerator* pGen, + const CFX_ByteString& type, + const CFX_ByteString& name) { + return pGen->m_pPage->m_pResources->GetDictFor(type)->GetDictFor(name); + } + + void TestProcessText(CPDF_PageContentGenerator* pGen, + CFX_ByteTextBuf* buf, + CPDF_TextObject* pTextObj) { + pGen->ProcessText(buf, pTextObj); } }; @@ -162,8 +170,8 @@ TEST_F(CPDF_PageContentGeneratorTest, ProcessGraphics) { pathString.Left(48)); EXPECT_EQ(" gs 1 2 m 3 4 l 5 6 l h B Q\n", pathString.Right(28)); ASSERT_TRUE(pathString.GetLength() > 76); - CPDF_Dictionary* externalGS = - TestGetGS(&generator, pathString.Mid(48, pathString.GetLength() - 76)); + CPDF_Dictionary* externalGS = TestGetResource( + &generator, "ExtGState", pathString.Mid(48, pathString.GetLength() - 76)); ASSERT_TRUE(externalGS); EXPECT_EQ(0.5f, externalGS->GetNumberFor("ca")); EXPECT_EQ(0.8f, externalGS->GetNumberFor("CA")); @@ -181,3 +189,29 @@ TEST_F(CPDF_PageContentGeneratorTest, ProcessGraphics) { EXPECT_EQ(pathString.Mid(48, pathString.GetLength() - 76), pathString2.Mid(55, pathString2.GetLength() - 83)); } + +TEST_F(CPDF_PageContentGeneratorTest, ProcessText) { + auto pDoc = pdfium::MakeUnique(nullptr); + pDoc->CreateNewDoc(); + CPDF_Dictionary* pPageDict = pDoc->CreateNewPage(0); + auto pTestPage = pdfium::MakeUnique(pDoc.get(), pPageDict, false); + CPDF_PageContentGenerator generator(pTestPage.get()); + auto pTextObj = pdfium::MakeUnique(); + CPDF_Font* pFont = CPDF_Font::GetStockFont(pDoc.get(), "Times-Roman"); + pTextObj->m_TextState.SetFont(pFont); + pTextObj->m_TextState.SetFontSize(10.0f); + pTextObj->Transform(CFX_Matrix(1, 0, 0, 1, 100, 100)); + pTextObj->SetText("Hello World"); + CFX_ByteTextBuf buf; + TestProcessText(&generator, &buf, pTextObj.get()); + CFX_ByteString textString = buf.MakeString(); + EXPECT_LT(61, textString.GetLength()); + EXPECT_EQ("BT 1 0 0 1 100 100 Tm /", textString.Left(23)); + EXPECT_EQ(" 10 Tf <48656C6C6F20576F726C64> Tj ET\n", textString.Right(38)); + CPDF_Dictionary* fontDict = TestGetResource( + &generator, "Font", textString.Mid(23, textString.GetLength() - 61)); + ASSERT_TRUE(fontDict); + EXPECT_EQ("Font", fontDict->GetStringFor("Type")); + EXPECT_EQ("Type1", fontDict->GetStringFor("Subtype")); + EXPECT_EQ("Times-Roman", fontDict->GetStringFor("BaseFont")); +} diff --git a/core/fpdfapi/page/cpdf_textobject.h b/core/fpdfapi/page/cpdf_textobject.h index bef1b1ebc4..07a2aabbe2 100644 --- a/core/fpdfapi/page/cpdf_textobject.h +++ b/core/fpdfapi/page/cpdf_textobject.h @@ -54,6 +54,7 @@ class CPDF_TextObject : public CPDF_PageObject { friend class CPDF_RenderStatus; friend class CPDF_StreamContentParser; friend class CPDF_TextRenderer; + friend class CPDF_PageContentGenerator; void SetSegments(const CFX_ByteString* pStrs, const FX_FLOAT* pKerning, -- cgit v1.2.3