summaryrefslogtreecommitdiff
path: root/fpdfsdk/fpdf_edittext.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fpdfsdk/fpdf_edittext.cpp')
-rw-r--r--fpdfsdk/fpdf_edittext.cpp499
1 files changed, 499 insertions, 0 deletions
diff --git a/fpdfsdk/fpdf_edittext.cpp b/fpdfsdk/fpdf_edittext.cpp
new file mode 100644
index 0000000000..8155003a1f
--- /dev/null
+++ b/fpdfsdk/fpdf_edittext.cpp
@@ -0,0 +1,499 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "core/fpdfapi/cpdf_modulemgr.h"
+#include "core/fpdfapi/font/cpdf_font.h"
+#include "core/fpdfapi/font/cpdf_type1font.h"
+#include "core/fpdfapi/page/cpdf_docpagedata.h"
+#include "core/fpdfapi/page/cpdf_textobject.h"
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/cpdf_name.h"
+#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_reference.h"
+#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fxcrt/fx_extension.h"
+#include "core/fxge/cfx_fontmgr.h"
+#include "core/fxge/fx_font.h"
+#include "fpdfsdk/cpdfsdk_helpers.h"
+#include "public/fpdf_edit.h"
+
+namespace {
+
+CPDF_Dictionary* LoadFontDesc(CPDF_Document* pDoc,
+ const ByteString& font_name,
+ CFX_Font* pFont,
+ const uint8_t* data,
+ uint32_t size,
+ int font_type) {
+ CPDF_Dictionary* pFontDesc = pDoc->NewIndirect<CPDF_Dictionary>();
+ pFontDesc->SetNewFor<CPDF_Name>("Type", "FontDescriptor");
+ pFontDesc->SetNewFor<CPDF_Name>("FontName", font_name);
+ int flags = 0;
+ if (FXFT_Is_Face_fixedwidth(pFont->GetFace()))
+ flags |= FXFONT_FIXED_PITCH;
+ if (font_name.Contains("Serif"))
+ flags |= FXFONT_SERIF;
+ if (FXFT_Is_Face_Italic(pFont->GetFace()))
+ flags |= FXFONT_ITALIC;
+ if (FXFT_Is_Face_Bold(pFont->GetFace()))
+ flags |= FXFONT_BOLD;
+
+ // TODO(npm): How do I know if a font is symbolic, script, allcap, smallcap
+ flags |= FXFONT_NONSYMBOLIC;
+
+ pFontDesc->SetNewFor<CPDF_Number>("Flags", flags);
+ FX_RECT bbox;
+ pFont->GetBBox(bbox);
+ pFontDesc->SetRectFor("FontBBox", CFX_FloatRect(bbox));
+
+ // TODO(npm): calculate italic angle correctly
+ pFontDesc->SetNewFor<CPDF_Number>("ItalicAngle", pFont->IsItalic() ? -12 : 0);
+
+ pFontDesc->SetNewFor<CPDF_Number>("Ascent", pFont->GetAscent());
+ pFontDesc->SetNewFor<CPDF_Number>("Descent", pFont->GetDescent());
+
+ // TODO(npm): calculate the capheight, stemV correctly
+ pFontDesc->SetNewFor<CPDF_Number>("CapHeight", pFont->GetAscent());
+ pFontDesc->SetNewFor<CPDF_Number>("StemV", pFont->IsBold() ? 120 : 70);
+
+ CPDF_Stream* pStream = pDoc->NewIndirect<CPDF_Stream>();
+ pStream->SetData(data, size);
+ // TODO(npm): Lengths for Type1 fonts.
+ if (font_type == FPDF_FONT_TRUETYPE) {
+ pStream->GetDict()->SetNewFor<CPDF_Number>("Length1",
+ static_cast<int>(size));
+ }
+ ByteString fontFile = font_type == FPDF_FONT_TYPE1 ? "FontFile" : "FontFile2";
+ pFontDesc->SetNewFor<CPDF_Reference>(fontFile, pDoc, pStream->GetObjNum());
+ return pFontDesc;
+}
+
+const char ToUnicodeStart[] =
+ "/CIDInit /ProcSet findresource begin\n"
+ "12 dict begin\n"
+ "begincmap\n"
+ "/CIDSystemInfo\n"
+ "<</Registry (Adobe)\n"
+ "/Ordering (Identity)\n"
+ "/Supplement 0\n"
+ ">> def\n"
+ "/CMapName /Adobe-Identity-H def\n"
+ "CMapType 2 def\n"
+ "1 begincodespacerange\n"
+ "<0000> <FFFFF>\n"
+ "endcodespacerange\n";
+
+const char ToUnicodeEnd[] =
+ "endcmap\n"
+ "CMapName currentdict /CMap defineresource pop\n"
+ "end\n"
+ "end\n";
+
+void AddCharcode(std::ostringstream* pBuffer, uint32_t number) {
+ ASSERT(number <= 0xFFFF);
+ *pBuffer << "<";
+ char ans[4];
+ FXSYS_IntToFourHexChars(number, ans);
+ for (size_t i = 0; i < 4; ++i)
+ *pBuffer << ans[i];
+ *pBuffer << ">";
+}
+
+// PDF spec 1.7 Section 5.9.2: "Unicode character sequences as expressed in
+// UTF-16BE encoding." See https://en.wikipedia.org/wiki/UTF-16#Description
+void AddUnicode(std::ostringstream* pBuffer, uint32_t unicode) {
+ if (unicode >= 0xD800 && unicode <= 0xDFFF)
+ unicode = 0;
+
+ char ans[8];
+ *pBuffer << "<";
+ size_t numChars = FXSYS_ToUTF16BE(unicode, ans);
+ for (size_t i = 0; i < numChars; ++i)
+ *pBuffer << ans[i];
+ *pBuffer << ">";
+}
+
+// Loads the charcode to unicode mapping into a stream
+CPDF_Stream* LoadUnicode(CPDF_Document* pDoc,
+ const std::map<uint32_t, uint32_t>& to_unicode) {
+ // A map charcode->unicode
+ std::map<uint32_t, uint32_t> char_to_uni;
+ // A map <char_start, char_end> to vector v of unicode characters of size (end
+ // - start + 1). This abbreviates: start->v[0], start+1->v[1], etc. PDF spec
+ // 1.7 Section 5.9.2 says that only the last byte of the unicode may change.
+ std::map<std::pair<uint32_t, uint32_t>, std::vector<uint32_t>>
+ map_range_vector;
+ // A map <start, end> -> unicode
+ // This abbreviates: start->unicode, start+1->unicode+1, etc.
+ // PDF spec 1.7 Section 5.9.2 says that only the last byte of the unicode may
+ // change.
+ std::map<std::pair<uint32_t, uint32_t>, uint32_t> map_range;
+
+ // Calculate the maps
+ for (auto iter = to_unicode.begin(); iter != to_unicode.end(); ++iter) {
+ uint32_t firstCharcode = iter->first;
+ uint32_t firstUnicode = iter->second;
+ if (std::next(iter) == to_unicode.end() ||
+ firstCharcode + 1 != std::next(iter)->first) {
+ char_to_uni[firstCharcode] = firstUnicode;
+ continue;
+ }
+ ++iter;
+ uint32_t curCharcode = iter->first;
+ uint32_t curUnicode = iter->second;
+ if (curCharcode % 256 == 0) {
+ char_to_uni[firstCharcode] = firstUnicode;
+ char_to_uni[curCharcode] = curUnicode;
+ continue;
+ }
+ const size_t maxExtra = 255 - (curCharcode % 256);
+ auto next_it = std::next(iter);
+ if (firstUnicode + 1 != curUnicode) {
+ // Consecutive charcodes mapping to non-consecutive unicodes
+ std::vector<uint32_t> unicodes;
+ unicodes.push_back(firstUnicode);
+ unicodes.push_back(curUnicode);
+ for (size_t i = 0; i < maxExtra; ++i) {
+ if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first)
+ break;
+ ++iter;
+ ++curCharcode;
+ unicodes.push_back(iter->second);
+ next_it = std::next(iter);
+ }
+ ASSERT(iter->first - firstCharcode + 1 == unicodes.size());
+ map_range_vector[std::make_pair(firstCharcode, iter->first)] = unicodes;
+ continue;
+ }
+ // Consecutive charcodes mapping to consecutive unicodes
+ for (size_t i = 0; i < maxExtra; ++i) {
+ if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first ||
+ curUnicode + 1 != next_it->second) {
+ break;
+ }
+ ++iter;
+ ++curCharcode;
+ ++curUnicode;
+ next_it = std::next(iter);
+ }
+ map_range[std::make_pair(firstCharcode, curCharcode)] = firstUnicode;
+ }
+ std::ostringstream buffer;
+ buffer << ToUnicodeStart;
+ // Add maps to buffer
+ buffer << static_cast<uint32_t>(char_to_uni.size()) << " beginbfchar\n";
+ for (const auto& iter : char_to_uni) {
+ AddCharcode(&buffer, iter.first);
+ buffer << " ";
+ AddUnicode(&buffer, iter.second);
+ buffer << "\n";
+ }
+ buffer << "endbfchar\n"
+ << static_cast<uint32_t>(map_range_vector.size() + map_range.size())
+ << " beginbfrange\n";
+ for (const auto& iter : map_range_vector) {
+ const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
+ AddCharcode(&buffer, charcodeRange.first);
+ buffer << " ";
+ AddCharcode(&buffer, charcodeRange.second);
+ buffer << " [";
+ const std::vector<uint32_t>& unicodes = iter.second;
+ for (size_t i = 0; i < unicodes.size(); ++i) {
+ uint32_t uni = unicodes[i];
+ AddUnicode(&buffer, uni);
+ if (i != unicodes.size() - 1)
+ buffer << " ";
+ }
+ buffer << "]\n";
+ }
+ for (const auto& iter : map_range) {
+ const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
+ AddCharcode(&buffer, charcodeRange.first);
+ buffer << " ";
+ AddCharcode(&buffer, charcodeRange.second);
+ buffer << " ";
+ AddUnicode(&buffer, iter.second);
+ buffer << "\n";
+ }
+ buffer << "endbfrange\n";
+ buffer << ToUnicodeEnd;
+ // TODO(npm): Encrypt / Compress?
+ CPDF_Stream* stream = pDoc->NewIndirect<CPDF_Stream>();
+ stream->SetData(&buffer);
+ return stream;
+}
+
+const uint32_t kMaxSimpleFontChar = 0xFF;
+
+void* LoadSimpleFont(CPDF_Document* pDoc,
+ std::unique_ptr<CFX_Font> pFont,
+ const uint8_t* data,
+ uint32_t size,
+ int font_type) {
+ CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>();
+ fontDict->SetNewFor<CPDF_Name>("Type", "Font");
+ fontDict->SetNewFor<CPDF_Name>(
+ "Subtype", font_type == FPDF_FONT_TYPE1 ? "Type1" : "TrueType");
+ ByteString name = pFont->GetFaceName();
+ if (name.IsEmpty())
+ name = "Unnamed";
+ fontDict->SetNewFor<CPDF_Name>("BaseFont", name);
+
+ uint32_t glyphIndex;
+ uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex);
+ if (currentChar > kMaxSimpleFontChar || glyphIndex == 0)
+ return nullptr;
+ fontDict->SetNewFor<CPDF_Number>("FirstChar", static_cast<int>(currentChar));
+ CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>();
+ while (true) {
+ uint32_t width =
+ std::min(pFont->GetGlyphWidth(glyphIndex),
+ static_cast<uint32_t>(std::numeric_limits<int>::max()));
+ widthsArray->AddNew<CPDF_Number>(static_cast<int>(width));
+ uint32_t nextChar =
+ FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex);
+ // Simple fonts have 1-byte charcodes only.
+ if (nextChar > kMaxSimpleFontChar || glyphIndex == 0)
+ break;
+ for (uint32_t i = currentChar + 1; i < nextChar; i++)
+ widthsArray->AddNew<CPDF_Number>(0);
+ currentChar = nextChar;
+ }
+ fontDict->SetNewFor<CPDF_Number>("LastChar", static_cast<int>(currentChar));
+ fontDict->SetNewFor<CPDF_Reference>("Widths", pDoc, widthsArray->GetObjNum());
+ CPDF_Dictionary* pFontDesc =
+ LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type);
+
+ fontDict->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
+ pFontDesc->GetObjNum());
+ return pDoc->LoadFont(fontDict);
+}
+
+const uint32_t kMaxUnicode = 0x10FFFF;
+
+void* LoadCompositeFont(CPDF_Document* pDoc,
+ std::unique_ptr<CFX_Font> pFont,
+ const uint8_t* data,
+ uint32_t size,
+ int font_type) {
+ CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>();
+ fontDict->SetNewFor<CPDF_Name>("Type", "Font");
+ fontDict->SetNewFor<CPDF_Name>("Subtype", "Type0");
+ // TODO(npm): Get the correct encoding, if it's not identity.
+ ByteString encoding = "Identity-H";
+ fontDict->SetNewFor<CPDF_Name>("Encoding", encoding);
+ ByteString name = pFont->GetFaceName();
+ if (name.IsEmpty())
+ name = "Unnamed";
+ fontDict->SetNewFor<CPDF_Name>(
+ "BaseFont", font_type == FPDF_FONT_TYPE1 ? name + "-" + encoding : name);
+
+ CPDF_Dictionary* pCIDFont = pDoc->NewIndirect<CPDF_Dictionary>();
+ pCIDFont->SetNewFor<CPDF_Name>("Type", "Font");
+ pCIDFont->SetNewFor<CPDF_Name>("Subtype", font_type == FPDF_FONT_TYPE1
+ ? "CIDFontType0"
+ : "CIDFontType2");
+ pCIDFont->SetNewFor<CPDF_Name>("BaseFont", name);
+
+ // TODO(npm): Maybe use FT_Get_CID_Registry_Ordering_Supplement to get the
+ // CIDSystemInfo
+ CPDF_Dictionary* pCIDSystemInfo = pDoc->NewIndirect<CPDF_Dictionary>();
+ pCIDSystemInfo->SetNewFor<CPDF_Name>("Registry", "Adobe");
+ pCIDSystemInfo->SetNewFor<CPDF_Name>("Ordering", "Identity");
+ pCIDSystemInfo->SetNewFor<CPDF_Number>("Supplement", 0);
+ pCIDFont->SetNewFor<CPDF_Reference>("CIDSystemInfo", pDoc,
+ pCIDSystemInfo->GetObjNum());
+
+ CPDF_Dictionary* pFontDesc =
+ LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type);
+ pCIDFont->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
+ pFontDesc->GetObjNum());
+
+ uint32_t glyphIndex;
+ uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex);
+ // If it doesn't have a single char, just fail
+ if (glyphIndex == 0 || currentChar > kMaxUnicode)
+ return nullptr;
+
+ std::map<uint32_t, uint32_t> to_unicode;
+ std::map<uint32_t, uint32_t> widths;
+ while (true) {
+ if (currentChar > kMaxUnicode)
+ break;
+
+ widths[glyphIndex] = pFont->GetGlyphWidth(glyphIndex);
+ to_unicode[glyphIndex] = currentChar;
+ currentChar =
+ FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex);
+ if (glyphIndex == 0)
+ break;
+ }
+ CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>();
+ for (auto it = widths.begin(); it != widths.end(); ++it) {
+ int ch = it->first;
+ int w = it->second;
+ if (std::next(it) == widths.end()) {
+ // Only one char left, use format c [w]
+ auto oneW = pdfium::MakeUnique<CPDF_Array>();
+ oneW->AddNew<CPDF_Number>(w);
+ widthsArray->AddNew<CPDF_Number>(ch);
+ widthsArray->Add(std::move(oneW));
+ break;
+ }
+ ++it;
+ int next_ch = it->first;
+ int next_w = it->second;
+ if (next_ch == ch + 1 && next_w == w) {
+ // The array can have a group c_first c_last w: all CIDs in the range from
+ // c_first to c_last will have width w
+ widthsArray->AddNew<CPDF_Number>(ch);
+ ch = next_ch;
+ while (true) {
+ auto next_it = std::next(it);
+ if (next_it == widths.end() || next_it->first != it->first + 1 ||
+ next_it->second != it->second) {
+ break;
+ }
+ ++it;
+ ch = it->first;
+ }
+ widthsArray->AddNew<CPDF_Number>(ch);
+ widthsArray->AddNew<CPDF_Number>(w);
+ continue;
+ }
+ // Otherwise we can have a group of the form c [w1 w2 ...]: c has width
+ // w1, c+1 has width w2, etc.
+ widthsArray->AddNew<CPDF_Number>(ch);
+ auto curWidthArray = pdfium::MakeUnique<CPDF_Array>();
+ curWidthArray->AddNew<CPDF_Number>(w);
+ curWidthArray->AddNew<CPDF_Number>(next_w);
+ while (true) {
+ auto next_it = std::next(it);
+ if (next_it == widths.end() || next_it->first != it->first + 1)
+ break;
+ ++it;
+ curWidthArray->AddNew<CPDF_Number>(static_cast<int>(it->second));
+ }
+ widthsArray->Add(std::move(curWidthArray));
+ }
+ pCIDFont->SetNewFor<CPDF_Reference>("W", pDoc, widthsArray->GetObjNum());
+ // TODO(npm): Support vertical writing
+
+ auto pDescendant = pdfium::MakeUnique<CPDF_Array>();
+ pDescendant->AddNew<CPDF_Reference>(pDoc, pCIDFont->GetObjNum());
+ fontDict->SetFor("DescendantFonts", std::move(pDescendant));
+ CPDF_Stream* toUnicodeStream = LoadUnicode(pDoc, to_unicode);
+ fontDict->SetNewFor<CPDF_Reference>("ToUnicode", pDoc,
+ toUnicodeStream->GetObjNum());
+ return pDoc->LoadFont(fontDict);
+}
+
+} // namespace
+
+FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV
+FPDFPageObj_NewTextObj(FPDF_DOCUMENT document,
+ FPDF_BYTESTRING font,
+ float font_size) {
+ CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
+ if (!pDoc)
+ return nullptr;
+
+ CPDF_Font* pFont = CPDF_Font::GetStockFont(pDoc, ByteStringView(font));
+ if (!pFont)
+ return nullptr;
+
+ auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
+ pTextObj->m_TextState.SetFont(pFont);
+ pTextObj->m_TextState.SetFontSize(font_size);
+ pTextObj->DefaultStates();
+ return pTextObj.release(); // Caller takes ownership.
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+FPDFText_SetText(FPDF_PAGEOBJECT text_object, FPDF_WIDESTRING text) {
+ auto* pTextObj = static_cast<CPDF_TextObject*>(text_object);
+ if (!pTextObj)
+ return false;
+
+ size_t len = WideString::WStringLength(text);
+ WideString encodedText = WideString::FromUTF16LE(text, len);
+ ByteString byteText;
+ for (wchar_t wc : encodedText) {
+ pTextObj->GetFont()->AppendChar(
+ &byteText, pTextObj->GetFont()->CharCodeFromUnicode(wc));
+ }
+ pTextObj->SetText(byteText);
+ return true;
+}
+
+FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadFont(FPDF_DOCUMENT document,
+ const uint8_t* data,
+ uint32_t size,
+ int font_type,
+ FPDF_BOOL cid) {
+ CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
+ if (!pDoc || !data || size == 0 ||
+ (font_type != FPDF_FONT_TYPE1 && font_type != FPDF_FONT_TRUETYPE)) {
+ return nullptr;
+ }
+
+ auto pFont = pdfium::MakeUnique<CFX_Font>();
+
+ // TODO(npm): Maybe use FT_Get_X11_Font_Format to check format? Otherwise, we
+ // are allowing giving any font that can be loaded on freetype and setting it
+ // as any font type.
+ if (!pFont->LoadEmbedded(data, size))
+ return nullptr;
+
+ return cid ? LoadCompositeFont(pDoc, std::move(pFont), data, size, font_type)
+ : LoadSimpleFont(pDoc, std::move(pFont), data, size, font_type);
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+FPDFText_SetFillColor(FPDF_PAGEOBJECT text_object,
+ unsigned int R,
+ unsigned int G,
+ unsigned int B,
+ unsigned int A) {
+ return FPDFPageObj_SetFillColor(text_object, R, G, B, A);
+}
+
+FPDF_EXPORT void FPDF_CALLCONV FPDFFont_Close(FPDF_FONT font) {
+ CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
+ if (!pFont)
+ return;
+
+ CPDF_Document* pDoc = pFont->GetDocument();
+ if (!pDoc)
+ return;
+
+ CPDF_DocPageData* pPageData = pDoc->GetPageData();
+ if (!pPageData->IsForceClear())
+ pPageData->ReleaseFont(pFont->GetFontDict());
+}
+
+FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV
+FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document,
+ FPDF_FONT font,
+ float font_size) {
+ CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
+ CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
+ if (!pDoc || !pFont)
+ return nullptr;
+
+ auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
+ pTextObj->m_TextState.SetFont(pDoc->LoadFont(pFont->GetFontDict()));
+ pTextObj->m_TextState.SetFontSize(font_size);
+ pTextObj->DefaultStates();
+ return pTextObj.release();
+}