1 files changed, 499 insertions, 0 deletions
diff --git a/fpdfsdk/fpdf_edittext.cpp b/fpdfsdk/fpdf_edittext.cpp
new file mode 100644
index 0000000000..8155003a1f
--- /dev/null
+++ b/fpdfsdk/fpdf_edittext.cpp
@@ -0,0 +1,499 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "core/fpdfapi/cpdf_modulemgr.h"
+#include "core/fpdfapi/font/cpdf_font.h"
+#include "core/fpdfapi/font/cpdf_type1font.h"
+#include "core/fpdfapi/page/cpdf_docpagedata.h"
+#include "core/fpdfapi/page/cpdf_textobject.h"
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/cpdf_name.h"
+#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_reference.h"
+#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fxcrt/fx_extension.h"
+#include "core/fxge/cfx_fontmgr.h"
+#include "core/fxge/fx_font.h"
+#include "fpdfsdk/cpdfsdk_helpers.h"
+#include "public/fpdf_edit.h"
+
+namespace {
+
+CPDF_Dictionary* LoadFontDesc(CPDF_Document* pDoc,
+                              const ByteString& font_name,
+                              CFX_Font* pFont,
+                              const uint8_t* data,
+                              uint32_t size,
+                              int font_type) {
+  CPDF_Dictionary* pFontDesc = pDoc->NewIndirect<CPDF_Dictionary>();
+  pFontDesc->SetNewFor<CPDF_Name>("Type", "FontDescriptor");
+  pFontDesc->SetNewFor<CPDF_Name>("FontName", font_name);
+  int flags = 0;
+  if (FXFT_Is_Face_fixedwidth(pFont->GetFace()))
+    flags |= FXFONT_FIXED_PITCH;
+  if (font_name.Contains("Serif"))
+    flags |= FXFONT_SERIF;
+  if (FXFT_Is_Face_Italic(pFont->GetFace()))
+    flags |= FXFONT_ITALIC;
+  if (FXFT_Is_Face_Bold(pFont->GetFace()))
+    flags |= FXFONT_BOLD;
+
+  // TODO(npm): How do I know if a  font is symbolic, script, allcap, smallcap
+  flags |= FXFONT_NONSYMBOLIC;
+
+  pFontDesc->SetNewFor<CPDF_Number>("Flags", flags);
+  FX_RECT bbox;
+  pFont->GetBBox(bbox);
+  pFontDesc->SetRectFor("FontBBox", CFX_FloatRect(bbox));
+
+  // TODO(npm): calculate italic angle correctly
+  pFontDesc->SetNewFor<CPDF_Number>("ItalicAngle", pFont->IsItalic() ? -12 : 0);
+
+  pFontDesc->SetNewFor<CPDF_Number>("Ascent", pFont->GetAscent());
+  pFontDesc->SetNewFor<CPDF_Number>("Descent", pFont->GetDescent());
+
+  // TODO(npm): calculate the capheight, stemV correctly
+  pFontDesc->SetNewFor<CPDF_Number>("CapHeight", pFont->GetAscent());
+  pFontDesc->SetNewFor<CPDF_Number>("StemV", pFont->IsBold() ? 120 : 70);
+
+  CPDF_Stream* pStream = pDoc->NewIndirect<CPDF_Stream>();
+  pStream->SetData(data, size);
+  // TODO(npm): Lengths for Type1 fonts.
+  if (font_type == FPDF_FONT_TRUETYPE) {
+    pStream->GetDict()->SetNewFor<CPDF_Number>("Length1",
+                                               static_cast<int>(size));
+  }
+  ByteString fontFile = font_type == FPDF_FONT_TYPE1 ? "FontFile" : "FontFile2";
+  pFontDesc->SetNewFor<CPDF_Reference>(fontFile, pDoc, pStream->GetObjNum());
+  return pFontDesc;
+}
+
+const char ToUnicodeStart[] =
+    "/CIDInit /ProcSet findresource begin\n"
+    "12 dict begin\n"
+    "begincmap\n"
+    "/CIDSystemInfo\n"
+    "<</Registry (Adobe)\n"
+    "/Ordering (Identity)\n"
+    "/Supplement 0\n"
+    ">> def\n"
+    "/CMapName /Adobe-Identity-H def\n"
+    "CMapType 2 def\n"
+    "1 begincodespacerange\n"
+    "<0000> <FFFFF>\n"
+    "endcodespacerange\n";
+
+const char ToUnicodeEnd[] =
+    "endcmap\n"
+    "CMapName currentdict /CMap defineresource pop\n"
+    "end\n"
+    "end\n";
+
+void AddCharcode(std::ostringstream* pBuffer, uint32_t number) {
+  ASSERT(number <= 0xFFFF);
+  *pBuffer << "<";
+  char ans[4];
+  FXSYS_IntToFourHexChars(number, ans);
+  for (size_t i = 0; i < 4; ++i)
+    *pBuffer << ans[i];
+  *pBuffer << ">";
+}
+
+// PDF spec 1.7 Section 5.9.2: "Unicode character sequences as expressed in
+// UTF-16BE encoding." See https://en.wikipedia.org/wiki/UTF-16#Description
+void AddUnicode(std::ostringstream* pBuffer, uint32_t unicode) {
+  if (unicode >= 0xD800 && unicode <= 0xDFFF)
+    unicode = 0;
+
+  char ans[8];
+  *pBuffer << "<";
+  size_t numChars = FXSYS_ToUTF16BE(unicode, ans);
+  for (size_t i = 0; i < numChars; ++i)
+    *pBuffer << ans[i];
+  *pBuffer << ">";
+}
+
+// Loads the charcode to unicode mapping into a stream
+CPDF_Stream* LoadUnicode(CPDF_Document* pDoc,
+                         const std::map<uint32_t, uint32_t>& to_unicode) {
+  // A map charcode->unicode
+  std::map<uint32_t, uint32_t> char_to_uni;
+  // A map <char_start, char_end> to vector v of unicode characters of size (end
+  // - start + 1). This abbreviates: start->v[0], start+1->v[1], etc. PDF spec
+  // 1.7 Section 5.9.2 says that only the last byte of the unicode may change.
+  std::map<std::pair<uint32_t, uint32_t>, std::vector<uint32_t>>
+      map_range_vector;
+  // A map <start, end> -> unicode
+  // This abbreviates: start->unicode, start+1->unicode+1, etc.
+  // PDF spec 1.7 Section 5.9.2 says that only the last byte of the unicode may
+  // change.
+  std::map<std::pair<uint32_t, uint32_t>, uint32_t> map_range;
+
+  // Calculate the maps
+  for (auto iter = to_unicode.begin(); iter != to_unicode.end(); ++iter) {
+    uint32_t firstCharcode = iter->first;
+    uint32_t firstUnicode = iter->second;
+    if (std::next(iter) == to_unicode.end() ||
+        firstCharcode + 1 != std::next(iter)->first) {
+      char_to_uni[firstCharcode] = firstUnicode;
+      continue;
+    }
+    ++iter;
+    uint32_t curCharcode = iter->first;
+    uint32_t curUnicode = iter->second;
+    if (curCharcode % 256 == 0) {
+      char_to_uni[firstCharcode] = firstUnicode;
+      char_to_uni[curCharcode] = curUnicode;
+      continue;
+    }
+    const size_t maxExtra = 255 - (curCharcode % 256);
+    auto next_it = std::next(iter);
+    if (firstUnicode + 1 != curUnicode) {
+      // Consecutive charcodes mapping to non-consecutive unicodes
+      std::vector<uint32_t> unicodes;
+      unicodes.push_back(firstUnicode);
+      unicodes.push_back(curUnicode);
+      for (size_t i = 0; i < maxExtra; ++i) {
+        if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first)
+          break;
+        ++iter;
+        ++curCharcode;
+        unicodes.push_back(iter->second);
+        next_it = std::next(iter);
+      }
+      ASSERT(iter->first - firstCharcode + 1 == unicodes.size());
+      map_range_vector[std::make_pair(firstCharcode, iter->first)] = unicodes;
+      continue;
+    }
+    // Consecutive charcodes mapping to consecutive unicodes
+    for (size_t i = 0; i < maxExtra; ++i) {
+      if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first ||
+          curUnicode + 1 != next_it->second) {
+        break;
+      }
+      ++iter;
+      ++curCharcode;
+      ++curUnicode;
+      next_it = std::next(iter);
+    }
+    map_range[std::make_pair(firstCharcode, curCharcode)] = firstUnicode;
+  }
+  std::ostringstream buffer;
+  buffer << ToUnicodeStart;
+  // Add maps to buffer
+  buffer << static_cast<uint32_t>(char_to_uni.size()) << " beginbfchar\n";
+  for (const auto& iter : char_to_uni) {
+    AddCharcode(&buffer, iter.first);
+    buffer << " ";
+    AddUnicode(&buffer, iter.second);
+    buffer << "\n";
+  }
+  buffer << "endbfchar\n"
+         << static_cast<uint32_t>(map_range_vector.size() + map_range.size())
+         << " beginbfrange\n";
+  for (const auto& iter : map_range_vector) {
+    const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
+    AddCharcode(&buffer, charcodeRange.first);
+    buffer << " ";
+    AddCharcode(&buffer, charcodeRange.second);
+    buffer << " [";
+    const std::vector<uint32_t>& unicodes = iter.second;
+    for (size_t i = 0; i < unicodes.size(); ++i) {
+      uint32_t uni = unicodes[i];
+      AddUnicode(&buffer, uni);
+      if (i != unicodes.size() - 1)
+        buffer << " ";
+    }
+    buffer << "]\n";
+  }
+  for (const auto& iter : map_range) {
+    const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
+    AddCharcode(&buffer, charcodeRange.first);
+    buffer << " ";
+    AddCharcode(&buffer, charcodeRange.second);
+    buffer << " ";
+    AddUnicode(&buffer, iter.second);
+    buffer << "\n";
+  }
+  buffer << "endbfrange\n";
+  buffer << ToUnicodeEnd;
+  // TODO(npm): Encrypt / Compress?
+  CPDF_Stream* stream = pDoc->NewIndirect<CPDF_Stream>();
+  stream->SetData(&buffer);
+  return stream;
+}
+
+const uint32_t kMaxSimpleFontChar = 0xFF;
+
+void* LoadSimpleFont(CPDF_Document* pDoc,
+                     std::unique_ptr<CFX_Font> pFont,
+                     const uint8_t* data,
+                     uint32_t size,
+                     int font_type) {
+  CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>();
+  fontDict->SetNewFor<CPDF_Name>("Type", "Font");
+  fontDict->SetNewFor<CPDF_Name>(
+      "Subtype", font_type == FPDF_FONT_TYPE1 ? "Type1" : "TrueType");
+  ByteString name = pFont->GetFaceName();
+  if (name.IsEmpty())
+    name = "Unnamed";
+  fontDict->SetNewFor<CPDF_Name>("BaseFont", name);
+
+  uint32_t glyphIndex;
+  uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex);
+  if (currentChar > kMaxSimpleFontChar || glyphIndex == 0)
+    return nullptr;
+  fontDict->SetNewFor<CPDF_Number>("FirstChar", static_cast<int>(currentChar));
+  CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>();
+  while (true) {
+    uint32_t width =
+        std::min(pFont->GetGlyphWidth(glyphIndex),
+                 static_cast<uint32_t>(std::numeric_limits<int>::max()));
+    widthsArray->AddNew<CPDF_Number>(static_cast<int>(width));
+    uint32_t nextChar =
+        FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex);
+    // Simple fonts have 1-byte charcodes only.
+    if (nextChar > kMaxSimpleFontChar || glyphIndex == 0)
+      break;
+    for (uint32_t i = currentChar + 1; i < nextChar; i++)
+      widthsArray->AddNew<CPDF_Number>(0);
+    currentChar = nextChar;
+  }
+  fontDict->SetNewFor<CPDF_Number>("LastChar", static_cast<int>(currentChar));
+  fontDict->SetNewFor<CPDF_Reference>("Widths", pDoc, widthsArray->GetObjNum());
+  CPDF_Dictionary* pFontDesc =
+      LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type);
+
+  fontDict->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
+                                      pFontDesc->GetObjNum());
+  return pDoc->LoadFont(fontDict);
+}
+
+const uint32_t kMaxUnicode = 0x10FFFF;
+
+void* LoadCompositeFont(CPDF_Document* pDoc,
+                        std::unique_ptr<CFX_Font> pFont,
+                        const uint8_t* data,
+                        uint32_t size,
+                        int font_type) {
+  CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>();
+  fontDict->SetNewFor<CPDF_Name>("Type", "Font");
+  fontDict->SetNewFor<CPDF_Name>("Subtype", "Type0");
+  // TODO(npm): Get the correct encoding, if it's not identity.
+  ByteString encoding = "Identity-H";
+  fontDict->SetNewFor<CPDF_Name>("Encoding", encoding);
+  ByteString name = pFont->GetFaceName();
+  if (name.IsEmpty())
+    name = "Unnamed";
+  fontDict->SetNewFor<CPDF_Name>(
+      "BaseFont", font_type == FPDF_FONT_TYPE1 ? name + "-" + encoding : name);
+
+  CPDF_Dictionary* pCIDFont = pDoc->NewIndirect<CPDF_Dictionary>();
+  pCIDFont->SetNewFor<CPDF_Name>("Type", "Font");
+  pCIDFont->SetNewFor<CPDF_Name>("Subtype", font_type == FPDF_FONT_TYPE1
+                                                ? "CIDFontType0"
+                                                : "CIDFontType2");
+  pCIDFont->SetNewFor<CPDF_Name>("BaseFont", name);
+
+  // TODO(npm): Maybe use FT_Get_CID_Registry_Ordering_Supplement to get the
+  // CIDSystemInfo
+  CPDF_Dictionary* pCIDSystemInfo = pDoc->NewIndirect<CPDF_Dictionary>();
+  pCIDSystemInfo->SetNewFor<CPDF_Name>("Registry", "Adobe");
+  pCIDSystemInfo->SetNewFor<CPDF_Name>("Ordering", "Identity");
+  pCIDSystemInfo->SetNewFor<CPDF_Number>("Supplement", 0);
+  pCIDFont->SetNewFor<CPDF_Reference>("CIDSystemInfo", pDoc,
+                                      pCIDSystemInfo->GetObjNum());
+
+  CPDF_Dictionary* pFontDesc =
+      LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type);
+  pCIDFont->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
+                                      pFontDesc->GetObjNum());
+
+  uint32_t glyphIndex;
+  uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex);
+  // If it doesn't have a single char, just fail
+  if (glyphIndex == 0 || currentChar > kMaxUnicode)
+    return nullptr;
+
+  std::map<uint32_t, uint32_t> to_unicode;
+  std::map<uint32_t, uint32_t> widths;
+  while (true) {
+    if (currentChar > kMaxUnicode)
+      break;
+
+    widths[glyphIndex] = pFont->GetGlyphWidth(glyphIndex);
+    to_unicode[glyphIndex] = currentChar;
+    currentChar =
+        FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex);
+    if (glyphIndex == 0)
+      break;
+  }
+  CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>();
+  for (auto it = widths.begin(); it != widths.end(); ++it) {
+    int ch = it->first;
+    int w = it->second;
+    if (std::next(it) == widths.end()) {
+      // Only one char left, use format c [w]
+      auto oneW = pdfium::MakeUnique<CPDF_Array>();
+      oneW->AddNew<CPDF_Number>(w);
+      widthsArray->AddNew<CPDF_Number>(ch);
+      widthsArray->Add(std::move(oneW));
+      break;
+    }
+    ++it;
+    int next_ch = it->first;
+    int next_w = it->second;
+    if (next_ch == ch + 1 && next_w == w) {
+      // The array can have a group c_first c_last w: all CIDs in the range from
+      // c_first to c_last will have width w
+      widthsArray->AddNew<CPDF_Number>(ch);
+      ch = next_ch;
+      while (true) {
+        auto next_it = std::next(it);
+        if (next_it == widths.end() || next_it->first != it->first + 1 ||
+            next_it->second != it->second) {
+          break;
+        }
+        ++it;
+        ch = it->first;
+      }
+      widthsArray->AddNew<CPDF_Number>(ch);
+      widthsArray->AddNew<CPDF_Number>(w);
+      continue;
+    }
+    // Otherwise we can have a group of the form c [w1 w2 ...]: c has width
+    // w1, c+1 has width w2, etc.
+    widthsArray->AddNew<CPDF_Number>(ch);
+    auto curWidthArray = pdfium::MakeUnique<CPDF_Array>();
+    curWidthArray->AddNew<CPDF_Number>(w);
+    curWidthArray->AddNew<CPDF_Number>(next_w);
+    while (true) {
+      auto next_it = std::next(it);
+      if (next_it == widths.end() || next_it->first != it->first + 1)
+        break;
+      ++it;
+      curWidthArray->AddNew<CPDF_Number>(static_cast<int>(it->second));
+    }
+    widthsArray->Add(std::move(curWidthArray));
+  }
+  pCIDFont->SetNewFor<CPDF_Reference>("W", pDoc, widthsArray->GetObjNum());
+  // TODO(npm): Support vertical writing
+
+  auto pDescendant = pdfium::MakeUnique<CPDF_Array>();
+  pDescendant->AddNew<CPDF_Reference>(pDoc, pCIDFont->GetObjNum());
+  fontDict->SetFor("DescendantFonts", std::move(pDescendant));
+  CPDF_Stream* toUnicodeStream = LoadUnicode(pDoc, to_unicode);
+  fontDict->SetNewFor<CPDF_Reference>("ToUnicode", pDoc,
+                                      toUnicodeStream->GetObjNum());
+  return pDoc->LoadFont(fontDict);
+}
+
+}  // namespace
+
+FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV
+FPDFPageObj_NewTextObj(FPDF_DOCUMENT document,
+                       FPDF_BYTESTRING font,
+                       float font_size) {
+  CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
+  if (!pDoc)
+    return nullptr;
+
+  CPDF_Font* pFont = CPDF_Font::GetStockFont(pDoc, ByteStringView(font));
+  if (!pFont)
+    return nullptr;
+
+  auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
+  pTextObj->m_TextState.SetFont(pFont);
+  pTextObj->m_TextState.SetFontSize(font_size);
+  pTextObj->DefaultStates();
+  return pTextObj.release();  // Caller takes ownership.
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+FPDFText_SetText(FPDF_PAGEOBJECT text_object, FPDF_WIDESTRING text) {
+  auto* pTextObj = static_cast<CPDF_TextObject*>(text_object);
+  if (!pTextObj)
+    return false;
+
+  size_t len = WideString::WStringLength(text);
+  WideString encodedText = WideString::FromUTF16LE(text, len);
+  ByteString byteText;
+  for (wchar_t wc : encodedText) {
+    pTextObj->GetFont()->AppendChar(
+        &byteText, pTextObj->GetFont()->CharCodeFromUnicode(wc));
+  }
+  pTextObj->SetText(byteText);
+  return true;
+}
+
+FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadFont(FPDF_DOCUMENT document,
+                                                      const uint8_t* data,
+                                                      uint32_t size,
+                                                      int font_type,
+                                                      FPDF_BOOL cid) {
+  CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
+  if (!pDoc || !data || size == 0 ||
+      (font_type != FPDF_FONT_TYPE1 && font_type != FPDF_FONT_TRUETYPE)) {
+    return nullptr;
+  }
+
+  auto pFont = pdfium::MakeUnique<CFX_Font>();
+
+  // TODO(npm): Maybe use FT_Get_X11_Font_Format to check format? Otherwise, we
+  // are allowing giving any font that can be loaded on freetype and setting it
+  // as any font type.
+  if (!pFont->LoadEmbedded(data, size))
+    return nullptr;
+
+  return cid ? LoadCompositeFont(pDoc, std::move(pFont), data, size, font_type)
+             : LoadSimpleFont(pDoc, std::move(pFont), data, size, font_type);
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+FPDFText_SetFillColor(FPDF_PAGEOBJECT text_object,
+                      unsigned int R,
+                      unsigned int G,
+                      unsigned int B,
+                      unsigned int A) {
+  return FPDFPageObj_SetFillColor(text_object, R, G, B, A);
+}
+
+FPDF_EXPORT void FPDF_CALLCONV FPDFFont_Close(FPDF_FONT font) {
+  CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
+  if (!pFont)
+    return;
+
+  CPDF_Document* pDoc = pFont->GetDocument();
+  if (!pDoc)
+    return;
+
+  CPDF_DocPageData* pPageData = pDoc->GetPageData();
+  if (!pPageData->IsForceClear())
+    pPageData->ReleaseFont(pFont->GetFontDict());
+}
+
+FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV
+FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document,
+                          FPDF_FONT font,
+                          float font_size) {
+  CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
+  CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
+  if (!pDoc || !pFont)
+    return nullptr;
+
+  auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
+  pTextObj->m_TextState.SetFont(pDoc->LoadFont(pFont->GetFontDict()));
+  pTextObj->m_TextState.SetFontSize(font_size);
+  pTextObj->DefaultStates();
+  return pTextObj.release();
+}