// Copyright 2017 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include #include #include #include #include "core/fpdfapi/cpdf_modulemgr.h" #include "core/fpdfapi/font/cpdf_font.h" #include "core/fpdfapi/font/cpdf_type1font.h" #include "core/fpdfapi/page/cpdf_docpagedata.h" #include "core/fpdfapi/page/cpdf_textobject.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fxcrt/fx_extension.h" #include "core/fxge/cfx_fontmgr.h" #include "core/fxge/fx_font.h" #include "fpdfsdk/fsdk_define.h" #include "public/fpdf_edit.h" namespace { CPDF_Dictionary* LoadFontDesc(CPDF_Document* pDoc, const CFX_ByteString& font_name, CFX_Font* pFont, const uint8_t* data, uint32_t size, int font_type) { CPDF_Dictionary* fontDesc = pDoc->NewIndirect(); fontDesc->SetNewFor("Type", "FontDescriptor"); fontDesc->SetNewFor("FontName", font_name); int flags = 0; if (FXFT_Is_Face_fixedwidth(pFont->GetFace())) flags |= FXFONT_FIXED_PITCH; if (font_name.Find("Serif") > -1) flags |= FXFONT_SERIF; if (FXFT_Is_Face_Italic(pFont->GetFace())) flags |= FXFONT_ITALIC; if (FXFT_Is_Face_Bold(pFont->GetFace())) flags |= FXFONT_BOLD; // TODO(npm): How do I know if a font is symbolic, script, allcap, smallcap flags |= FXFONT_NONSYMBOLIC; fontDesc->SetNewFor("Flags", flags); FX_RECT bbox; pFont->GetBBox(bbox); auto pBBox = pdfium::MakeUnique(); pBBox->AddNew(bbox.left); pBBox->AddNew(bbox.bottom); pBBox->AddNew(bbox.right); pBBox->AddNew(bbox.top); fontDesc->SetFor("FontBBox", std::move(pBBox)); // TODO(npm): calculate italic angle correctly fontDesc->SetNewFor("ItalicAngle", pFont->IsItalic() ? -12 : 0); fontDesc->SetNewFor("Ascent", pFont->GetAscent()); fontDesc->SetNewFor("Descent", pFont->GetDescent()); // TODO(npm): calculate the capheight, stemV correctly fontDesc->SetNewFor("CapHeight", pFont->GetAscent()); fontDesc->SetNewFor("StemV", pFont->IsBold() ? 120 : 70); CPDF_Stream* pStream = pDoc->NewIndirect(); pStream->SetData(data, size); CFX_ByteString fontFile = font_type == FPDF_FONT_TYPE1 ? "FontFile" : "FontFile2"; fontDesc->SetNewFor(fontFile, pDoc, pStream->GetObjNum()); return fontDesc; } const char ToUnicodeStart[] = "/CIDInit /ProcSet findresource begin\n" "12 dict begin\n" "begincmap\n" "/CIDSystemInfo\n" "<> def\n" "/CMapName /Adobe-Identity-H def\n" "CMapType 2 def\n" "1 begincodespacerange\n" "<0000> \n" "endcodespacerange\n"; const char ToUnicodeEnd[] = "endcmap\n" "CMapName currentdict /CMap defineresource pop\n" "end\n" "end\n"; void AddCharcode(CFX_ByteTextBuf* pBuffer, uint32_t number) { ASSERT(number <= 0xFFFF); *pBuffer << "<"; char ans[4]; FXSYS_IntToFourHexChars(number, ans); for (size_t i = 0; i < 4; ++i) pBuffer->AppendChar(ans[i]); *pBuffer << ">"; } // PDF spec 1.7 Section 5.9.2: "Unicode character sequences as expressed in // UTF-16BE encoding." See https://en.wikipedia.org/wiki/UTF-16#Description void AddUnicode(CFX_ByteTextBuf* pBuffer, uint32_t unicode) { char ans[8]; *pBuffer << "<"; size_t numChars = FXSYS_ToUTF16BE(unicode, ans); for (size_t i = 0; i < numChars; ++i) pBuffer->AppendChar(ans[i]); *pBuffer << ">"; } // Loads the charcode to unicode mapping into a stream CPDF_Stream* LoadUnicode(CPDF_Document* pDoc, const std::map& to_unicode) { // A map charcode->unicode std::map char_to_uni; // A map to vector v of unicode characters of size (end // - start + 1). This abbreviates: start->v[0], start+1->v[1], etc. PDF spec // 1.7 Section 5.9.2 says that only the last byte of the unicode may change. std::map, std::vector> map_range_vector; // A map -> unicode // This abbreviates: start->unicode, start+1->unicode+1, etc. // PDF spec 1.7 Section 5.9.2 says that only the last byte of the unicode may // change. std::map, uint32_t> map_range; // Calculate the maps for (auto iter = to_unicode.begin(); iter != to_unicode.end(); ++iter) { uint32_t firstCharcode = iter->first; uint32_t firstUnicode = iter->second; if (std::next(iter) == to_unicode.end() || firstCharcode + 1 != std::next(iter)->first) { char_to_uni[firstCharcode] = firstUnicode; continue; } ++iter; uint32_t curCharcode = iter->first; uint32_t curUnicode = iter->second; if (curCharcode % 256 == 0) { char_to_uni[firstCharcode] = firstUnicode; char_to_uni[curCharcode] = curUnicode; continue; } const size_t maxExtra = 255 - (curCharcode % 256); auto next_it = std::next(iter); if (firstUnicode + 1 != curUnicode) { // Consecutive charcodes mapping to non-consecutive unicodes std::vector unicodes; unicodes.push_back(firstUnicode); unicodes.push_back(curUnicode); for (size_t i = 0; i < maxExtra; ++i) { if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first) break; ++iter; ++curCharcode; unicodes.push_back(iter->second); next_it = std::next(iter); } ASSERT(iter->first - firstCharcode + 1 == unicodes.size()); map_range_vector[std::make_pair(firstCharcode, iter->first)] = unicodes; continue; } // Consecutive charcodes mapping to consecutive unicodes for (size_t i = 0; i < maxExtra; ++i) { if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first || curUnicode + 1 != next_it->second) { break; } ++iter; ++curCharcode; ++curUnicode; next_it = std::next(iter); } map_range[std::make_pair(firstCharcode, curCharcode)] = firstUnicode; } CFX_ByteTextBuf buffer; buffer << ToUnicodeStart; // Add maps to buffer buffer << static_cast(char_to_uni.size()) << " beginbfchar\n"; for (const auto& iter : char_to_uni) { AddCharcode(&buffer, iter.first); buffer << " "; AddUnicode(&buffer, iter.second); buffer << "\n"; } buffer << "endbfchar\n" << static_cast(map_range_vector.size() + map_range.size()) << " beginbfrange\n"; for (const auto& iter : map_range_vector) { const std::pair& charcodeRange = iter.first; AddCharcode(&buffer, charcodeRange.first); buffer << " "; AddCharcode(&buffer, charcodeRange.second); buffer << " ["; const std::vector& unicodes = iter.second; for (size_t i = 0; i < unicodes.size(); ++i) { uint32_t uni = unicodes[i]; AddUnicode(&buffer, uni); if (i != unicodes.size() - 1) buffer << " "; } buffer << "]\n"; } for (const auto& iter : map_range) { const std::pair& charcodeRange = iter.first; AddCharcode(&buffer, charcodeRange.first); buffer << " "; AddCharcode(&buffer, charcodeRange.second); buffer << " "; AddUnicode(&buffer, iter.second); buffer << "\n"; } buffer << "endbfrange\n"; buffer << ToUnicodeEnd; // TODO(npm): Encrypt / Compress? uint32_t bufferSize = buffer.GetSize(); auto pDict = pdfium::MakeUnique(); pDict->SetNewFor("Length", static_cast(bufferSize)); return pDoc->NewIndirect(buffer.DetachBuffer(), bufferSize, std::move(pDict)); } void* LoadSimpleFont(CPDF_Document* pDoc, std::unique_ptr pFont, const uint8_t* data, uint32_t size, int font_type) { CPDF_Dictionary* fontDict = pDoc->NewIndirect(); fontDict->SetNewFor("Type", "Font"); fontDict->SetNewFor( "Subtype", font_type == FPDF_FONT_TYPE1 ? "Type1" : "TrueType"); CFX_ByteString name = pFont->GetFaceName(); if (name.IsEmpty()) name = "Unnamed"; fontDict->SetNewFor("BaseFont", name); uint32_t glyphIndex; int currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex); fontDict->SetNewFor("FirstChar", currentChar); CPDF_Array* widthsArray = pDoc->NewIndirect(); while (true) { widthsArray->AddNew(pFont->GetGlyphWidth(glyphIndex)); int nextChar = FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex); // Simple fonts have 1-byte charcodes only. if (nextChar > 0xff || glyphIndex == 0) break; for (int i = currentChar + 1; i < nextChar; i++) widthsArray->AddNew(0); currentChar = nextChar; } fontDict->SetNewFor("LastChar", currentChar); fontDict->SetNewFor("Widths", pDoc, widthsArray->GetObjNum()); CPDF_Dictionary* fontDesc = LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type); fontDict->SetNewFor("FontDescriptor", pDoc, fontDesc->GetObjNum()); return pDoc->LoadFont(fontDict); } void* LoadCompositeFont(CPDF_Document* pDoc, std::unique_ptr pFont, const uint8_t* data, uint32_t size, int font_type) { CPDF_Dictionary* fontDict = pDoc->NewIndirect(); fontDict->SetNewFor("Type", "Font"); fontDict->SetNewFor("Subtype", "Type0"); // TODO(npm): Get the correct encoding, if it's not identity. CFX_ByteString encoding = "Identity-H"; fontDict->SetNewFor("Encoding", encoding); CFX_ByteString name = pFont->GetFaceName(); if (name.IsEmpty()) name = "Unnamed"; fontDict->SetNewFor( "BaseFont", font_type == FPDF_FONT_TYPE1 ? name + "-" + encoding : name); CPDF_Dictionary* pCIDFont = pDoc->NewIndirect(); pCIDFont->SetNewFor("Type", "Font"); pCIDFont->SetNewFor("Subtype", font_type == FPDF_FONT_TYPE1 ? "CIDFontType0" : "CIDFontType2"); pCIDFont->SetNewFor("BaseFont", name); // TODO(npm): Maybe use FT_Get_CID_Registry_Ordering_Supplement to get the // CIDSystemInfo CPDF_Dictionary* pCIDSystemInfo = pDoc->NewIndirect(); pCIDSystemInfo->SetNewFor("Registry", "Adobe"); pCIDSystemInfo->SetNewFor("Ordering", "Identity"); pCIDSystemInfo->SetNewFor("Supplement", 0); pCIDFont->SetNewFor("CIDSystemInfo", pDoc, pCIDSystemInfo->GetObjNum()); CPDF_Dictionary* fontDesc = LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type); pCIDFont->SetNewFor("FontDescriptor", pDoc, fontDesc->GetObjNum()); uint32_t glyphIndex; int currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex); // If it doesn't have a single char, just fail if (glyphIndex == 0) return nullptr; std::map to_unicode; std::map widths; while (true) { if (currentChar > 0x10FFFF) break; widths[glyphIndex] = pFont->GetGlyphWidth(glyphIndex); to_unicode[glyphIndex] = currentChar; currentChar = FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex); if (glyphIndex == 0) break; } CPDF_Array* widthsArray = pDoc->NewIndirect(); for (auto it = widths.begin(); it != widths.end(); ++it) { int ch = it->first; int w = it->second; if (std::next(it) == widths.end()) { // Only one char left, use format c [w] auto oneW = pdfium::MakeUnique(); oneW->AddNew(w); widthsArray->AddNew(ch); widthsArray->Add(std::move(oneW)); break; } ++it; int next_ch = it->first; int next_w = it->second; if (next_ch == ch + 1 && next_w == w) { // The array can have a group c_first c_last w: all CIDs in the range from // c_first to c_last will have width w widthsArray->AddNew(ch); ch = next_ch; while (true) { auto next_it = std::next(it); if (next_it == widths.end() || next_it->first != it->first + 1 || next_it->second != it->second) { break; } ++it; ch = it->first; } widthsArray->AddNew(ch); widthsArray->AddNew(w); continue; } // Otherwise we can have a group of the form c [w1 w2 ...]: c has width // w1, c+1 has width w2, etc. widthsArray->AddNew(ch); auto curWidthArray = pdfium::MakeUnique(); curWidthArray->AddNew(w); curWidthArray->AddNew(next_w); while (true) { auto next_it = std::next(it); if (next_it == widths.end() || next_it->first != it->first + 1) break; ++it; curWidthArray->AddNew(static_cast(it->second)); } widthsArray->Add(std::move(curWidthArray)); } pCIDFont->SetNewFor("W", pDoc, widthsArray->GetObjNum()); // TODO(npm): Support vertical writing auto pDescendant = pdfium::MakeUnique(); pDescendant->AddNew(pDoc, pCIDFont->GetObjNum()); fontDict->SetFor("DescendantFonts", std::move(pDescendant)); CPDF_Stream* toUnicodeStream = LoadUnicode(pDoc, to_unicode); fontDict->SetNewFor("ToUnicode", pDoc, toUnicodeStream->GetObjNum()); return pDoc->LoadFont(fontDict); } } // namespace DLLEXPORT FPDF_PAGEOBJECT STDCALL FPDFPageObj_NewTextObj(FPDF_DOCUMENT document, FPDF_BYTESTRING font, float font_size) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc) return nullptr; CPDF_Font* pFont = CPDF_Font::GetStockFont(pDoc, CFX_ByteStringC(font)); if (!pFont) return nullptr; auto pTextObj = pdfium::MakeUnique(); pTextObj->m_TextState.SetFont(pFont); pTextObj->m_TextState.SetFontSize(font_size); pTextObj->DefaultStates(); return pTextObj.release(); // Caller takes ownership. } DLLEXPORT FPDF_BOOL STDCALL FPDFText_SetText(FPDF_PAGEOBJECT text_object, FPDF_WIDESTRING text) { auto* pTextObj = static_cast(text_object); if (!pTextObj) return false; FX_STRSIZE len = CFX_WideString::WStringLength(text); CFX_WideString encodedText = CFX_WideString::FromUTF16LE(text, len); CFX_ByteString byteText; for (wchar_t wc : encodedText) { pTextObj->GetFont()->AppendChar( &byteText, pTextObj->GetFont()->CharCodeFromUnicode(wc)); } pTextObj->SetText(byteText); return true; } DLLEXPORT FPDF_FONT STDCALL FPDFText_LoadFont(FPDF_DOCUMENT document, const uint8_t* data, uint32_t size, int font_type, FPDF_BOOL cid) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc || !data || size == 0 || (font_type != FPDF_FONT_TYPE1 && font_type != FPDF_FONT_TRUETYPE)) { return nullptr; } auto pFont = pdfium::MakeUnique(); // TODO(npm): Maybe use FT_Get_X11_Font_Format to check format? Otherwise, we // are allowing giving any font that can be loaded on freetype and setting it // as any font type. if (!pFont->LoadEmbedded(data, size)) return nullptr; return cid ? LoadCompositeFont(pDoc, std::move(pFont), data, size, font_type) : LoadSimpleFont(pDoc, std::move(pFont), data, size, font_type); } DLLEXPORT FPDF_BOOL STDCALL FPDFText_SetFillColor(FPDF_PAGEOBJECT text_object, unsigned int R, unsigned int G, unsigned int B, unsigned int A) { return FPDFPageObj_SetFillColor(text_object, R, G, B, A); } DLLEXPORT void STDCALL FPDFFont_Close(FPDF_FONT font) { CPDF_Font* pFont = static_cast(font); if (!pFont) return; CPDF_Document* pDoc = pFont->GetDocument(); if (!pDoc) return; CPDF_DocPageData* pPageData = pDoc->GetPageData(); if (!pPageData->IsForceClear()) pPageData->ReleaseFont(pFont->GetFontDict()); } DLLEXPORT FPDF_PAGEOBJECT STDCALL FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document, FPDF_FONT font, float font_size) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); CPDF_Font* pFont = static_cast(font); if (!pDoc || !pFont) return nullptr; auto pTextObj = pdfium::MakeUnique(); pTextObj->m_TextState.SetFont(pDoc->LoadFont(pFont->GetFontDict())); pTextObj->m_TextState.SetFontSize(font_size); pTextObj->DefaultStates(); return pTextObj.release(); }