Move codepage/charset methods into related places.

Change-Id: I71417cc5b1bd00f77d42740198cc17487ebd686e Reviewed-on: https://pdfium-review.googlesource.com/33330 Reviewed-by: dsinclair <dsinclair@chromium.org> Commit-Queue: Art Snake <art-snake@yandex-team.ru>
author: Artem Strygin <art-snake@yandex-team.ru> 2018-05-31 14:08:11 +0000
committer: Chromium commit bot <commit-bot@chromium.org> 2018-05-31 14:08:11 +0000
commit: 656eb84f83fc1701737d9c65658371a99428d727 (patch)
tree: 6fb28e4283c1ef8696b42d8b7d200a13c32742fc
parent: 8f7ee98e2c622c21f452cd9fd5956fe85bcb2b7c (diff)
download: pdfium-656eb84f83fc1701737d9c65658371a99428d727.tar.xz
15 files changed, 209 insertions, 316 deletions
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
index 9dd66f673b..7e7c337d96 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
@@ -1142,7 +1142,8 @@ CPDF_Font* CPDF_StreamContentParser::FindFont(const ByteString& name) {
   CPDF_Dictionary* pFontDict = ToDictionary(FindResourceObj("Font", name));
   if (!pFontDict) {
     m_bResourceMissing = true;
-    return CPDF_Font::GetStockFont(m_pDocument.Get(), "Helvetica");
+    return CPDF_Font::GetStockFont(m_pDocument.Get(),
+                                   CFX_Font::kDefaultAnsiFontName);
   }
 
   CPDF_Font* pFont = m_pDocument->LoadFont(pFontDict);
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 2e4baabe91..412f726eca 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -736,9 +736,7 @@ CPDF_Font* CPDF_Document::AddFont(CFX_Font* pFont, int charset, bool bVert) {
   if (!pFont)
     return nullptr;
 
-  bool bCJK = charset == FX_CHARSET_ChineseTraditional ||
-              charset == FX_CHARSET_ChineseSimplified ||
-              charset == FX_CHARSET_Hangul || charset == FX_CHARSET_ShiftJIS;
+  const bool bCJK = FX_CharSetIsCJK(charset);
   ByteString basefont = pFont->GetFamilyName();
   basefont.Replace(" ", "");
   int flags =
@@ -856,10 +854,7 @@ CPDF_Font* CPDF_Document::AddWindowsFont(LOGFONTA* pLogFont,
                              (pLogFont->lfPitchAndFamily & 0xf8) == FF_SCRIPT,
                              pLogFont->lfCharSet == FX_CHARSET_Symbol);
 
-  bool bCJK = pLogFont->lfCharSet == FX_CHARSET_ChineseTraditional ||
-              pLogFont->lfCharSet == FX_CHARSET_ChineseSimplified ||
-              pLogFont->lfCharSet == FX_CHARSET_Hangul ||
-              pLogFont->lfCharSet == FX_CHARSET_ShiftJIS;
+  const bool bCJK = FX_CharSetIsCJK(pLogFont->lfCharSet);
   ByteString basefont;
   if (bTranslateName && bCJK)
     basefont = FPDF_GetPSNameFromTT(hDC);
diff --git a/core/fpdfdoc/cpdf_interform.cpp b/core/fpdfdoc/cpdf_interform.cpp
index ad7753b499..ea408ec0df 100644
--- a/core/fpdfdoc/cpdf_interform.cpp
+++ b/core/fpdfdoc/cpdf_interform.cpp
@@ -75,13 +75,14 @@ void InitDict(CPDF_Dictionary*& pFormDict, CPDF_Document* pDocument) {
   if (!pFormDict->KeyExist("DR")) {
     ByteString csBaseName;
     uint8_t charSet = CPDF_InterForm::GetNativeCharSet();
-    CPDF_Font* pFont = CPDF_InterForm::AddStandardFont(pDocument, "Helvetica");
+    CPDF_Font* pFont = CPDF_InterForm::AddStandardFont(
+        pDocument, CFX_Font::kDefaultAnsiFontName);
     if (pFont)
       AddFont(pFormDict, pDocument, pFont, &csBaseName);
 
     if (charSet != FX_CHARSET_ANSI) {
       ByteString csFontName = CPDF_InterForm::GetNativeFont(charSet, nullptr);
-      if (!pFont || csFontName != "Helvetica") {
+      if (!pFont || csFontName != CFX_Font::kDefaultAnsiFontName) {
         pFont = CPDF_InterForm::AddNativeFont(pDocument);
         if (pFont) {
           csBaseName.clear();
@@ -564,63 +565,7 @@ CPDF_Font* AddNativeInterFormFont(CPDF_Dictionary*& pFormDict,
 
 // static
 uint8_t CPDF_InterForm::GetNativeCharSet() {
-#if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
-  uint8_t charSet = FX_CHARSET_ANSI;
-  UINT iCodePage = ::GetACP();
-  switch (iCodePage) {
-    case FX_CODEPAGE_ShiftJIS:
-      charSet = FX_CHARSET_ShiftJIS;
-      break;
-    case FX_CODEPAGE_ChineseSimplified:
-      charSet = FX_CHARSET_ChineseSimplified;
-      break;
-    case FX_CODEPAGE_ChineseTraditional:
-      charSet = FX_CHARSET_ChineseTraditional;
-      break;
-    case FX_CODEPAGE_MSWin_WesternEuropean:
-      charSet = FX_CHARSET_ANSI;
-      break;
-    case FX_CODEPAGE_MSDOS_Thai:
-      charSet = FX_CHARSET_Thai;
-      break;
-    case FX_CODEPAGE_Hangul:
-      charSet = FX_CHARSET_Hangul;
-      break;
-    case FX_CODEPAGE_UTF16LE:
-      charSet = FX_CHARSET_ANSI;
-      break;
-    case FX_CODEPAGE_MSWin_EasternEuropean:
-      charSet = FX_CHARSET_MSWin_EasternEuropean;
-      break;
-    case FX_CODEPAGE_MSWin_Cyrillic:
-      charSet = FX_CHARSET_MSWin_Cyrillic;
-      break;
-    case FX_CODEPAGE_MSWin_Greek:
-      charSet = FX_CHARSET_MSWin_Greek;
-      break;
-    case FX_CODEPAGE_MSWin_Turkish:
-      charSet = FX_CHARSET_MSWin_Turkish;
-      break;
-    case FX_CODEPAGE_MSWin_Hebrew:
-      charSet = FX_CHARSET_MSWin_Hebrew;
-      break;
-    case FX_CODEPAGE_MSWin_Arabic:
-      charSet = FX_CHARSET_MSWin_Arabic;
-      break;
-    case FX_CODEPAGE_MSWin_Baltic:
-      charSet = FX_CHARSET_MSWin_Baltic;
-      break;
-    case FX_CODEPAGE_MSWin_Vietnamese:
-      charSet = FX_CHARSET_MSWin_Vietnamese;
-      break;
-    case FX_CODEPAGE_Johab:
-      charSet = FX_CHARSET_Johab;
-      break;
-  }
-  return charSet;
-#else
-  return 0;
-#endif
+  return FX_GetCharsetFromCodePage(FXSYS_GetACP());
 }
 
 CPDF_InterForm::CPDF_InterForm(CPDF_Document* pDocument)
@@ -727,23 +672,19 @@ ByteString CPDF_InterForm::GetNativeFont(uint8_t charSet, void* pLogFont) {
 #if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
   LOGFONTA lf = {};
   if (charSet == FX_CHARSET_ANSI) {
-    csFontName = "Helvetica";
+    csFontName = CFX_Font::kDefaultAnsiFontName;
     return csFontName;
   }
   bool bRet = false;
-  if (charSet == FX_CHARSET_ShiftJIS) {
+  const ByteString default_font_name =
+      CFX_Font::GetDefaultFontNameByCharset(charSet);
+  if (!default_font_name.IsEmpty()) {
     bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE,
-                                "MS Mincho", lf);
-  } else if (charSet == FX_CHARSET_ChineseSimplified) {
-    bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE, "SimSun",
-                                lf);
-  } else if (charSet == FX_CHARSET_ChineseTraditional) {
-    bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE, "MingLiU",
-                                lf);
+                                default_font_name.c_str(), lf);
   }
   if (!bRet) {
     bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE,
-                                "Arial Unicode MS", lf);
+                                CFX_Font::kUniversalDefaultFontName, lf);
   }
   if (!bRet) {
     bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE,
@@ -773,7 +714,7 @@ CPDF_Font* CPDF_InterForm::AddNativeFont(uint8_t charSet,
   LOGFONTA lf;
   ByteString csFontName = GetNativeFont(charSet, &lf);
   if (!csFontName.IsEmpty()) {
-    if (csFontName == "Helvetica")
+    if (csFontName == CFX_Font::kDefaultAnsiFontName)
       return AddStandardFont(pDocument, csFontName);
     return pDocument->AddWindowsFont(&lf, false, true);
   }
diff --git a/core/fpdfdoc/cpvt_generateap.cpp b/core/fpdfdoc/cpvt_generateap.cpp
index e395fbfa4a..cf55e956ea 100644
--- a/core/fpdfdoc/cpvt_generateap.cpp
+++ b/core/fpdfdoc/cpvt_generateap.cpp
@@ -399,7 +399,7 @@ std::unique_ptr<CPDF_Dictionary> GenerateResourceFontDict(
   CPDF_Dictionary* pFontDict = pDoc->NewIndirect<CPDF_Dictionary>();
   pFontDict->SetNewFor<CPDF_Name>("Type", "Font");
   pFontDict->SetNewFor<CPDF_Name>("Subtype", "Type1");
-  pFontDict->SetNewFor<CPDF_Name>("BaseFont", "Helvetica");
+  pFontDict->SetNewFor<CPDF_Name>("BaseFont", CFX_Font::kDefaultAnsiFontName);
   pFontDict->SetNewFor<CPDF_Name>("Encoding", "WinAnsiEncoding");
 
   auto pResourceFontDict =
@@ -949,7 +949,7 @@ void CPVT_GenerateAP::GenerateFormAP(Type type,
     pFontDict = pDoc->NewIndirect<CPDF_Dictionary>();
     pFontDict->SetNewFor<CPDF_Name>("Type", "Font");
     pFontDict->SetNewFor<CPDF_Name>("Subtype", "Type1");
-    pFontDict->SetNewFor<CPDF_Name>("BaseFont", "Helvetica");
+    pFontDict->SetNewFor<CPDF_Name>("BaseFont", CFX_Font::kDefaultAnsiFontName);
     pFontDict->SetNewFor<CPDF_Name>("Encoding", "WinAnsiEncoding");
     pDRFontDict->SetNewFor<CPDF_Reference>(
         sFontName.Right(sFontName.GetLength() - 1), pDoc,
diff --git a/core/fxcrt/fx_codepage.cpp b/core/fxcrt/fx_codepage.cpp
index 56fad30c76..d59c93ed85 100644
--- a/core/fxcrt/fx_codepage.cpp
+++ b/core/fxcrt/fx_codepage.cpp
@@ -6,6 +6,9 @@
 
 #include "core/fxcrt/fx_codepage.h"
 
+#include <algorithm>
+#include <utility>
+
 namespace {
 
 const uint16_t g_FX_MSDOSThaiUnicodes[128] = {
@@ -152,6 +155,45 @@ const uint16_t g_FX_MSWinBalticUnicodes[128] = {
     0x017E, 0x02D9,
 };
 
+struct FX_CHARSET_MAP {
+  uint16_t charset;
+  uint16_t codepage;
+};
+
+const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
+    {FX_CHARSET_ANSI, FX_CODEPAGE_MSWin_WesternEuropean},
+    {FX_CHARSET_Default, FX_CODEPAGE_DefANSI},
+    {FX_CHARSET_Symbol, FX_CODEPAGE_Symbol},
+    {FX_CHARSET_MAC_Roman, FX_CODEPAGE_MAC_Roman},
+    {FX_CHARSET_MAC_ShiftJIS, FX_CODEPAGE_MAC_ShiftJIS},
+    {FX_CHARSET_MAC_Korean, FX_CODEPAGE_MAC_Korean},
+    {FX_CHARSET_MAC_ChineseSimplified, FX_CODEPAGE_MAC_ChineseSimplified},
+    {FX_CHARSET_MAC_ChineseTraditional, FX_CODEPAGE_MAC_ChineseTraditional},
+    {FX_CHARSET_MAC_Hebrew, FX_CODEPAGE_MAC_Hebrew},
+    {FX_CHARSET_MAC_Arabic, FX_CODEPAGE_MAC_Arabic},
+    {FX_CHARSET_MAC_Greek, FX_CODEPAGE_MAC_Greek},
+    {FX_CHARSET_MAC_Turkish, FX_CODEPAGE_MAC_Turkish},
+    {FX_CHARSET_MAC_Thai, FX_CODEPAGE_MAC_Thai},
+    {FX_CHARSET_MAC_EasternEuropean, FX_CODEPAGE_MAC_EasternEuropean},
+    {FX_CHARSET_MAC_Cyrillic, FX_CODEPAGE_MAC_Cyrillic},
+    {FX_CHARSET_ShiftJIS, FX_CODEPAGE_ShiftJIS},
+    {FX_CHARSET_Hangul, FX_CODEPAGE_Hangul},
+    {FX_CHARSET_Johab, FX_CODEPAGE_Johab},
+    {FX_CHARSET_ChineseSimplified, FX_CODEPAGE_ChineseSimplified},
+    {FX_CHARSET_ChineseTraditional, FX_CODEPAGE_ChineseTraditional},
+    {FX_CHARSET_MSWin_Greek, FX_CODEPAGE_MSWin_Greek},
+    {FX_CHARSET_MSWin_Turkish, FX_CODEPAGE_MSWin_Turkish},
+    {FX_CHARSET_MSWin_Vietnamese, FX_CODEPAGE_MSWin_Vietnamese},
+    {FX_CHARSET_MSWin_Hebrew, FX_CODEPAGE_MSWin_Hebrew},
+    {FX_CHARSET_MSWin_Arabic, FX_CODEPAGE_MSWin_Arabic},
+    {FX_CHARSET_MSWin_Baltic, FX_CODEPAGE_MSWin_Baltic},
+    {FX_CHARSET_MSWin_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic},
+    {FX_CHARSET_Thai, FX_CODEPAGE_MSDOS_Thai},
+    {FX_CHARSET_MSWin_EasternEuropean, FX_CODEPAGE_MSWin_EasternEuropean},
+    {FX_CHARSET_US, FX_CODEPAGE_MSDOS_US},
+    {FX_CHARSET_OEM, FX_CODEPAGE_MSDOS_WesternEuropean},
+};
+
 }  // namespace
 
 const FX_CharsetUnicodes g_FX_CharsetUnicodes[8] = {
@@ -164,3 +206,31 @@ const FX_CharsetUnicodes g_FX_CharsetUnicodes[8] = {
     {FX_CHARSET_MSWin_Arabic, g_FX_MSWinArabicUnicodes},
     {FX_CHARSET_MSWin_Baltic, g_FX_MSWinBalticUnicodes},
 };
+
+uint16_t FX_GetCodePageFromCharset(uint8_t charset) {
+  auto* result =
+      std::lower_bound(std::begin(g_FXCharset2CodePageTable),
+                       std::end(g_FXCharset2CodePageTable), charset,
+                       [](const FX_CHARSET_MAP& iter, const uint16_t& charset) {
+                         return iter.charset < charset;
+                       });
+  if (result != std::end(g_FXCharset2CodePageTable) &&
+      result->charset == charset) {
+    return result->codepage;
+  }
+  return 0xFFFF;
+}
+
+uint8_t FX_GetCharsetFromCodePage(uint16_t codepage) {
+  for (const auto& it : g_FXCharset2CodePageTable) {
+    if (it.codepage == codepage)
+      return it.charset;
+  }
+  return FX_CHARSET_ANSI;
+}
+
+bool FX_CharSetIsCJK(uint8_t uCharset) {
+  return (uCharset == FX_CHARSET_ChineseSimplified) ||
+         (uCharset == FX_CHARSET_ChineseTraditional) ||
+         (uCharset == FX_CHARSET_Hangul) || (uCharset == FX_CHARSET_ShiftJIS);
+}
diff --git a/core/fxcrt/fx_codepage.h b/core/fxcrt/fx_codepage.h
index 43692286a5..4a6c6d86b3 100644
--- a/core/fxcrt/fx_codepage.h
+++ b/core/fxcrt/fx_codepage.h
@@ -98,4 +98,8 @@ struct FX_CharsetUnicodes {
 
 extern const FX_CharsetUnicodes g_FX_CharsetUnicodes[8];
 
+uint16_t FX_GetCodePageFromCharset(uint8_t charset);
+uint8_t FX_GetCharsetFromCodePage(uint16_t codepage);
+bool FX_CharSetIsCJK(uint8_t uCharset);
+
 #endif  // CORE_FXCRT_FX_CODEPAGE_H_
diff --git a/core/fxge/android/cfpf_skiafontmgr.cpp b/core/fxge/android/cfpf_skiafontmgr.cpp
index d44d7d56d2..0b49f6283c 100644
--- a/core/fxge/android/cfpf_skiafontmgr.cpp
+++ b/core/fxge/android/cfpf_skiafontmgr.cpp
@@ -187,9 +187,7 @@ uint32_t FPF_SKIAGetFamilyHash(const ByteStringView& bsFamily,
 }
 
 bool FPF_SkiaIsCJK(uint8_t uCharset) {
-  return (uCharset == FX_CHARSET_ChineseSimplified) ||
-         (uCharset == FX_CHARSET_ChineseTraditional) ||
-         (uCharset == FX_CHARSET_Hangul) || (uCharset == FX_CHARSET_ShiftJIS);
+  return FX_CharSetIsCJK(uCharset);
 }
 
 bool FPF_SkiaMaybeSymbol(const ByteStringView& bsFacename) {
diff --git a/core/fxge/cfx_font.cpp b/core/fxge/cfx_font.cpp
index d04fc0dac0..bee1d789ef 100644
--- a/core/fxge/cfx_font.cpp
+++ b/core/fxge/cfx_font.cpp
@@ -209,6 +209,88 @@ const uint8_t CFX_Font::s_WeightPow_SHIFTJIS[] = {
     59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60,
 };
 
+const CFX_Font::CharsetFontMap CFX_Font::defaultTTFMap[] = {
+    {FX_CHARSET_ANSI, kDefaultAnsiFontName},
+    {FX_CHARSET_ChineseSimplified, "SimSun"},
+    {FX_CHARSET_ChineseTraditional, "MingLiU"},
+    {FX_CHARSET_ShiftJIS, "MS Gothic"},
+    {FX_CHARSET_Hangul, "Batang"},
+    {FX_CHARSET_MSWin_Cyrillic, "Arial"},
+#if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
+    {FX_CHARSET_MSWin_EasternEuropean, "Arial"},
+#else
+    {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"},
+#endif
+    {FX_CHARSET_MSWin_Arabic, "Arial"},
+    {-1, nullptr}};
+
+// static
+const char CFX_Font::kDefaultAnsiFontName[] = "Helvetica";
+// static
+const char CFX_Font::kUniversalDefaultFontName[] = "Arial Unicode MS";
+
+// static
+ByteString CFX_Font::GetDefaultFontNameByCharset(uint8_t nCharset) {
+  int i = 0;
+  while (defaultTTFMap[i].charset != -1) {
+    if (nCharset == static_cast<uint8_t>(defaultTTFMap[i].charset))
+      return defaultTTFMap[i].fontname;
+    ++i;
+  }
+  return kUniversalDefaultFontName;
+}
+
+// static
+uint8_t CFX_Font::GetCharSetFromUnicode(uint16_t word) {
+  // to avoid CJK Font to show ASCII
+  if (word < 0x7F)
+    return FX_CHARSET_ANSI;
+
+  // find new charset
+  if ((word >= 0x4E00 && word <= 0x9FA5) ||
+      (word >= 0xE7C7 && word <= 0xE7F3) ||
+      (word >= 0x3000 && word <= 0x303F) ||
+      (word >= 0x2000 && word <= 0x206F)) {
+    return FX_CHARSET_ChineseSimplified;
+  }
+
+  if (((word >= 0x3040) && (word <= 0x309F)) ||
+      ((word >= 0x30A0) && (word <= 0x30FF)) ||
+      ((word >= 0x31F0) && (word <= 0x31FF)) ||
+      ((word >= 0xFF00) && (word <= 0xFFEF))) {
+    return FX_CHARSET_ShiftJIS;
+  }
+
+  if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
+      ((word >= 0x1100) && (word <= 0x11FF)) ||
+      ((word >= 0x3130) && (word <= 0x318F))) {
+    return FX_CHARSET_Hangul;
+  }
+
+  if (word >= 0x0E00 && word <= 0x0E7F)
+    return FX_CHARSET_Thai;
+
+  if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
+    return FX_CHARSET_MSWin_Greek;
+
+  if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
+    return FX_CHARSET_MSWin_Arabic;
+
+  if (word >= 0x0590 && word <= 0x05FF)
+    return FX_CHARSET_MSWin_Hebrew;
+
+  if (word >= 0x0400 && word <= 0x04FF)
+    return FX_CHARSET_MSWin_Cyrillic;
+
+  if (word >= 0x0100 && word <= 0x024F)
+    return FX_CHARSET_MSWin_EasternEuropean;
+
+  if (word >= 0x1E00 && word <= 0x1EFF)
+    return FX_CHARSET_MSWin_Vietnamese;
+
+  return FX_CHARSET_ANSI;
+}
+
 CFX_Font::CFX_Font()
     :
       m_Face(nullptr),
diff --git a/core/fxge/cfx_font.h b/core/fxge/cfx_font.h
index fbf35eaca1..a5a0057c16 100644
--- a/core/fxge/cfx_font.h
+++ b/core/fxge/cfx_font.h
@@ -30,6 +30,11 @@ class CFX_Font {
   CFX_Font();
   ~CFX_Font();
 
+  static const char kDefaultAnsiFontName[];
+  static const char kUniversalDefaultFontName[];
+  static ByteString GetDefaultFontNameByCharset(uint8_t nCharset);
+  static uint8_t GetCharSetFromUnicode(uint16_t word);
+
   void LoadSubst(const ByteString& face_name,
                  bool bTrueType,
                  uint32_t flags,
@@ -96,6 +101,19 @@ class CFX_Font {
   static const uint8_t s_WeightPow_11[kWeightPowArraySize];
   static const uint8_t s_WeightPow_SHIFTJIS[kWeightPowArraySize];
 
+  // This struct shoub same as FPDF_CharsetFontMap
+  typedef struct {
+    int charset;           // Character Set Enum value, see FX_CHARSET_XXX.
+    const char* fontname;  // Name of default font to use with that charset.
+  } CharsetFontMap;
+
+  /**
+   *    Pointer to the default character set to TT Font name map. The
+   *    map is an array of CharsetFontMap structs, with its end indicated
+   *    by a { -1, NULL } entry.
+   **/
+  static const CharsetFontMap defaultTTFMap[];
+
 #ifdef PDF_ENABLE_XFA
  protected:
   std::unique_ptr<FXFT_StreamRec> m_pOwnedStream;
diff --git a/core/fxge/cfx_fontmapper.cpp b/core/fxge/cfx_fontmapper.cpp
index b9a8eac1a1..e6fc984edc 100644
--- a/core/fxge/cfx_fontmapper.cpp
+++ b/core/fxge/cfx_fontmapper.cpp
@@ -145,19 +145,6 @@ const struct AltFontFamily {
     {"ForteMT", "Forte"},
 };
 
-const struct CODEPAGE_MAP {
-  uint16_t codepage;
-  uint8_t charset;
-} g_Codepage2CharsetTable[] = {
-    {0, 1},      {42, 2},     {437, 254},  {850, 255},  {874, 222},
-    {932, 128},  {936, 134},  {949, 129},  {950, 136},  {1250, 238},
-    {1251, 204}, {1252, 0},   {1253, 161}, {1254, 162}, {1255, 177},
-    {1256, 178}, {1257, 186}, {1258, 163}, {1361, 130}, {10000, 77},
-    {10001, 78}, {10002, 81}, {10003, 79}, {10004, 84}, {10005, 83},
-    {10006, 85}, {10007, 89}, {10008, 80}, {10021, 87}, {10029, 88},
-    {10081, 86},
-};
-
 ByteString TT_NormalizeName(const char* family) {
   ByteString norm(family);
   norm.Remove(' ');
@@ -170,19 +157,6 @@ ByteString TT_NormalizeName(const char* family) {
   return norm;
 }
 
-uint8_t GetCharsetFromCodePage(uint16_t codepage) {
-  const CODEPAGE_MAP* pEnd =
-      g_Codepage2CharsetTable + FX_ArraySize(g_Codepage2CharsetTable);
-  const CODEPAGE_MAP* pCharmap =
-      std::lower_bound(g_Codepage2CharsetTable, pEnd, codepage,
-                       [](const CODEPAGE_MAP& charset, uint16_t page) {
-                         return charset.codepage < page;
-                       });
-  if (pCharmap < pEnd && codepage == pCharmap->codepage)
-    return pCharmap->charset;
-  return FX_CHARSET_Default;
-}
-
 void GetFontFamily(uint32_t nStyle, ByteString* fontName) {
   if (fontName->Contains("Script")) {
     if (FontStyleIsBold(nStyle))
@@ -544,13 +518,10 @@ FXFT_Face CFX_FontMapper::FindSubstFont(const ByteString& name,
 
   int Charset = FX_CHARSET_ANSI;
   if (WindowCP)
-    Charset = GetCharsetFromCodePage(WindowCP);
+    Charset = FX_GetCharsetFromCodePage(WindowCP);
   else if (iBaseFont == kNumStandardFonts && FontStyleIsSymbolic(flags))
     Charset = FX_CHARSET_Symbol;
-  const bool bCJK = (Charset == FX_CHARSET_ShiftJIS ||
-                     Charset == FX_CHARSET_ChineseSimplified ||
-                     Charset == FX_CHARSET_Hangul ||
-                     Charset == FX_CHARSET_ChineseTraditional);
+  const bool bCJK = FX_CharSetIsCJK(Charset);
   bool bItalic = FontStyleIsItalic(nStyle);
 
   GetFontFamily(nStyle, &family);
diff --git a/fpdfsdk/cfx_systemhandler.cpp b/fpdfsdk/cfx_systemhandler.cpp
index 28170d2878..62d7c735b0 100644
--- a/fpdfsdk/cfx_systemhandler.cpp
+++ b/fpdfsdk/cfx_systemhandler.cpp
@@ -20,22 +20,6 @@
 #include "fpdfsdk/cpdfsdk_widget.h"
 #include "fpdfsdk/formfiller/cffl_formfiller.h"
 
-namespace {
-
-int CharSet2CP(int charset) {
-  if (charset == FX_CHARSET_ShiftJIS)
-    return FX_CODEPAGE_ShiftJIS;
-  if (charset == FX_CHARSET_ChineseSimplified)
-    return FX_CODEPAGE_ChineseSimplified;
-  if (charset == FX_CHARSET_Hangul)
-    return FX_CODEPAGE_Hangul;
-  if (charset == FX_CHARSET_ChineseTraditional)
-    return FX_CODEPAGE_ChineseTraditional;
-  return FX_CODEPAGE_DefANSI;
-}
-
-}  // namespace
-
 CFX_SystemHandler::CFX_SystemHandler(CPDFSDK_FormFillEnvironment* pFormFillEnv)
     : m_pFormFillEnv(pFormFillEnv) {}
 
@@ -121,7 +105,8 @@ CPDF_Font* CFX_SystemHandler::AddNativeTrueTypeFontToPDF(
     return nullptr;
 
   auto pFXFont = pdfium::MakeUnique<CFX_Font>();
-  pFXFont->LoadSubst(sFontFaceName, true, 0, 0, 0, CharSet2CP(nCharset), false);
+  pFXFont->LoadSubst(sFontFaceName, true, 0, 0, 0,
+                     FX_GetCodePageFromCharset(nCharset), false);
   return pDoc->AddFont(pFXFont.get(), nCharset, false);
 }
 
diff --git a/fpdfsdk/fpdf_sysfontinfo.cpp b/fpdfsdk/fpdf_sysfontinfo.cpp
index 3a3f9e1257..564912b85c 100644
--- a/fpdfsdk/fpdf_sysfontinfo.cpp
+++ b/fpdfsdk/fpdf_sysfontinfo.cpp
@@ -9,6 +9,7 @@
 #include <memory>
 
 #include "core/fxcrt/fx_codepage.h"
+#include "core/fxge/cfx_font.h"
 #include "core/fxge/cfx_fontmapper.h"
 #include "core/fxge/cfx_fontmgr.h"
 #include "core/fxge/cfx_gemodule.h"
@@ -30,6 +31,8 @@ static_assert(FXFONT_GB2312_CHARSET == FX_CHARSET_ChineseSimplified,
               "Charset must match");
 static_assert(FXFONT_CHINESEBIG5_CHARSET == FX_CHARSET_ChineseTraditional,
               "Charset must match");
+static_assert(sizeof(CFX_Font::CharsetFontMap) == sizeof(FPDF_CharsetFontMap),
+              "CFX_Font::CharsetFontMap should be same as FPDF_CharsetFontMap");
 
 class CFX_ExternalFontInfo final : public SystemFontInfoIface {
  public:
@@ -122,7 +125,7 @@ FPDF_SetSystemFontInfo(FPDF_SYSFONTINFO* pFontInfoExt) {
 }
 
 FPDF_EXPORT const FPDF_CharsetFontMap* FPDF_CALLCONV FPDF_GetDefaultTTFMap() {
-  return CPWL_FontMap::defaultTTFMap;
+  return reinterpret_cast<const FPDF_CharsetFontMap*>(CFX_Font::defaultTTFMap);
 }
 
 struct FPDF_SYSFONTINFO_DEFAULT : public FPDF_SYSFONTINFO {
diff --git a/fpdfsdk/pwl/cpwl_font_map.cpp b/fpdfsdk/pwl/cpwl_font_map.cpp
index 5e5556ebb0..f60e7d2fb1 100644
--- a/fpdfsdk/pwl/cpwl_font_map.cpp
+++ b/fpdfsdk/pwl/cpwl_font_map.cpp
@@ -21,8 +21,6 @@
 
 namespace {
 
-const char kDefaultFontName[] = "Helvetica";
-
 const char* const g_sDEStandardFontName[] = {"Courier",
                                              "Courier-Bold",
                                              "Courier-BoldOblique",
@@ -100,7 +98,8 @@ int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word,
     if (KnowWord(nNewFontIndex, word))
       return nNewFontIndex;
   }
-  nNewFontIndex = GetFontIndex("Arial Unicode MS", FX_CHARSET_Default, false);
+  nNewFontIndex = GetFontIndex(CFX_Font::kUniversalDefaultFontName,
+                               FX_CHARSET_Default, false);
   if (nNewFontIndex >= 0) {
     if (KnowWord(nNewFontIndex, word))
       return nNewFontIndex;
@@ -145,7 +144,7 @@ void CPWL_FontMap::Empty() {
 }
 
 void CPWL_FontMap::Initialize() {
-  GetFontIndex(kDefaultFontName, FX_CHARSET_ANSI, false);
+  GetFontIndex(CFX_Font::kDefaultAnsiFontName, FX_CHARSET_ANSI, false);
 }
 
 bool CPWL_FontMap::IsStandardFont(const ByteString& sFontName) {
@@ -210,7 +209,7 @@ ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
   if (nCharset == FX_CHARSET_Default)
     nCharset = GetNativeCharset();
 
-  ByteString sFontName = GetDefaultFontByCharset(nCharset);
+  ByteString sFontName = CFX_Font::GetDefaultFontNameByCharset(nCharset);
   if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName))
     return ByteString();
 
@@ -274,135 +273,17 @@ const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
 }
 
 int32_t CPWL_FontMap::GetNativeCharset() {
-  uint8_t nCharset = FX_CHARSET_ANSI;
-  int32_t iCodePage = FXSYS_GetACP();
-  switch (iCodePage) {
-    case FX_CODEPAGE_ShiftJIS:
-      nCharset = FX_CHARSET_ShiftJIS;
-      break;
-    case FX_CODEPAGE_ChineseSimplified:
-      nCharset = FX_CHARSET_ChineseSimplified;
-      break;
-    case FX_CODEPAGE_ChineseTraditional:
-      nCharset = FX_CHARSET_ChineseTraditional;
-      break;
-    case FX_CODEPAGE_MSWin_WesternEuropean:
-      nCharset = FX_CHARSET_ANSI;
-      break;
-    case FX_CODEPAGE_MSDOS_Thai:
-      nCharset = FX_CHARSET_Thai;
-      break;
-    case FX_CODEPAGE_Hangul:
-      nCharset = FX_CHARSET_Hangul;
-      break;
-    case FX_CODEPAGE_UTF16LE:
-      nCharset = FX_CHARSET_ANSI;
-      break;
-    case FX_CODEPAGE_MSWin_EasternEuropean:
-      nCharset = FX_CHARSET_MSWin_EasternEuropean;
-      break;
-    case FX_CODEPAGE_MSWin_Cyrillic:
-      nCharset = FX_CHARSET_MSWin_Cyrillic;
-      break;
-    case FX_CODEPAGE_MSWin_Greek:
-      nCharset = FX_CHARSET_MSWin_Greek;
-      break;
-    case FX_CODEPAGE_MSWin_Turkish:
-      nCharset = FX_CHARSET_MSWin_Turkish;
-      break;
-    case FX_CODEPAGE_MSWin_Hebrew:
-      nCharset = FX_CHARSET_MSWin_Hebrew;
-      break;
-    case FX_CODEPAGE_MSWin_Arabic:
-      nCharset = FX_CHARSET_MSWin_Arabic;
-      break;
-    case FX_CODEPAGE_MSWin_Baltic:
-      nCharset = FX_CHARSET_MSWin_Baltic;
-      break;
-    case FX_CODEPAGE_MSWin_Vietnamese:
-      nCharset = FX_CHARSET_MSWin_Vietnamese;
-      break;
-    case FX_CODEPAGE_Johab:
-      nCharset = FX_CHARSET_Johab;
-      break;
-  }
-  return nCharset;
-}
-
-const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
-    {FX_CHARSET_ANSI, "Helvetica"},
-    {FX_CHARSET_ChineseSimplified, "SimSun"},
-    {FX_CHARSET_ChineseTraditional, "MingLiU"},
-    {FX_CHARSET_ShiftJIS, "MS Gothic"},
-    {FX_CHARSET_Hangul, "Batang"},
-    {FX_CHARSET_MSWin_Cyrillic, "Arial"},
-#if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
-    {FX_CHARSET_MSWin_EasternEuropean, "Arial"},
-#else
-    {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"},
-#endif
-    {FX_CHARSET_MSWin_Arabic, "Arial"},
-    {-1, nullptr}};
-
-ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
-  int i = 0;
-  while (defaultTTFMap[i].charset != -1) {
-    if (nCharset == defaultTTFMap[i].charset)
-      return defaultTTFMap[i].fontname;
-    ++i;
-  }
-  return "";
+  return FX_GetCharsetFromCodePage(FXSYS_GetACP());
 }
 
 int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) {
   // to avoid CJK Font to show ASCII
   if (word < 0x7F)
     return FX_CHARSET_ANSI;
+
   // follow the old charset
   if (nOldCharset != FX_CHARSET_Default)
     return nOldCharset;
 
-  // find new charset
-  if ((word >= 0x4E00 && word <= 0x9FA5) ||
-      (word >= 0xE7C7 && word <= 0xE7F3) ||
-      (word >= 0x3000 && word <= 0x303F) ||
-      (word >= 0x2000 && word <= 0x206F)) {
-    return FX_CHARSET_ChineseSimplified;
-  }
-
-  if (((word >= 0x3040) && (word <= 0x309F)) ||
-      ((word >= 0x30A0) && (word <= 0x30FF)) ||
-      ((word >= 0x31F0) && (word <= 0x31FF)) ||
-      ((word >= 0xFF00) && (word <= 0xFFEF))) {
-    return FX_CHARSET_ShiftJIS;
-  }
-
-  if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
-      ((word >= 0x1100) && (word <= 0x11FF)) ||
-      ((word >= 0x3130) && (word <= 0x318F))) {
-    return FX_CHARSET_Hangul;
-  }
-
-  if (word >= 0x0E00 && word <= 0x0E7F)
-    return FX_CHARSET_Thai;
-
-  if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
-    return FX_CHARSET_MSWin_Greek;
-
-  if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
-    return FX_CHARSET_MSWin_Arabic;
-
-  if (word >= 0x0590 && word <= 0x05FF)
-    return FX_CHARSET_MSWin_Hebrew;
-
-  if (word >= 0x0400 && word <= 0x04FF)
-    return FX_CHARSET_MSWin_Cyrillic;
-
-  if (word >= 0x0100 && word <= 0x024F)
-    return FX_CHARSET_MSWin_EasternEuropean;
-
-  if (word >= 0x1E00 && word <= 0x1EFF)
-    return FX_CHARSET_MSWin_Vietnamese;
-
-  return FX_CHARSET_ANSI;
+  return CFX_Font::GetCharSetFromUnicode(word);
 }
diff --git a/fpdfsdk/pwl/cpwl_font_map.h b/fpdfsdk/pwl/cpwl_font_map.h
index 592c3fafed..c465a0fc0c 100644
--- a/fpdfsdk/pwl/cpwl_font_map.h
+++ b/fpdfsdk/pwl/cpwl_font_map.h
@@ -46,9 +46,6 @@ class CPWL_FontMap : public IPVT_FontMap {
   static int32_t GetNativeCharset();
   ByteString GetNativeFontName(int32_t nCharset);
 
-  static ByteString GetDefaultFontByCharset(int32_t nCharset);
-  static const FPDF_CharsetFontMap defaultTTFMap[];
-
  protected:
   virtual void Initialize();
   virtual CPDF_Document* GetDocument();
diff --git a/xfa/fgas/font/cfgas_fontmgr.cpp b/xfa/fgas/font/cfgas_fontmgr.cpp
index d7acc62eef..354db0be43 100644
--- a/xfa/fgas/font/cfgas_fontmgr.cpp
+++ b/xfa/fgas/font/cfgas_fontmgr.cpp
@@ -27,59 +27,6 @@
 
 namespace {
 
-struct FX_CHARSET_MAP {
-  uint16_t charset;
-  uint16_t codepage;
-};
-
-const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
-    {FX_CHARSET_ANSI, FX_CODEPAGE_MSWin_WesternEuropean},
-    {FX_CHARSET_Default, FX_CODEPAGE_DefANSI},
-    {FX_CHARSET_Symbol, FX_CODEPAGE_Symbol},
-    {FX_CHARSET_MAC_Roman, FX_CODEPAGE_MAC_Roman},
-    {FX_CHARSET_MAC_ShiftJIS, FX_CODEPAGE_MAC_ShiftJIS},
-    {FX_CHARSET_MAC_Korean, FX_CODEPAGE_MAC_Korean},
-    {FX_CHARSET_MAC_ChineseSimplified, FX_CODEPAGE_MAC_ChineseSimplified},
-    {FX_CHARSET_MAC_ChineseTraditional, FX_CODEPAGE_MAC_ChineseTraditional},
-    {FX_CHARSET_MAC_Hebrew, FX_CODEPAGE_MAC_Hebrew},
-    {FX_CHARSET_MAC_Arabic, FX_CODEPAGE_MAC_Arabic},
-    {FX_CHARSET_MAC_Greek, FX_CODEPAGE_MAC_Greek},
-    {FX_CHARSET_MAC_Turkish, FX_CODEPAGE_MAC_Turkish},
-    {FX_CHARSET_MAC_Thai, FX_CODEPAGE_MAC_Thai},
-    {FX_CHARSET_MAC_EasternEuropean, FX_CODEPAGE_MAC_EasternEuropean},
-    {FX_CHARSET_MAC_Cyrillic, FX_CODEPAGE_MAC_Cyrillic},
-    {FX_CHARSET_ShiftJIS, FX_CODEPAGE_ShiftJIS},
-    {FX_CHARSET_Hangul, FX_CODEPAGE_Hangul},
-    {FX_CHARSET_Johab, FX_CODEPAGE_Johab},
-    {FX_CHARSET_ChineseSimplified, FX_CODEPAGE_ChineseSimplified},
-    {FX_CHARSET_ChineseTraditional, FX_CODEPAGE_ChineseTraditional},
-    {FX_CHARSET_MSWin_Greek, FX_CODEPAGE_MSWin_Greek},
-    {FX_CHARSET_MSWin_Turkish, FX_CODEPAGE_MSWin_Turkish},
-    {FX_CHARSET_MSWin_Vietnamese, FX_CODEPAGE_MSWin_Vietnamese},
-    {FX_CHARSET_MSWin_Hebrew, FX_CODEPAGE_MSWin_Hebrew},
-    {FX_CHARSET_MSWin_Arabic, FX_CODEPAGE_MSWin_Arabic},
-    {FX_CHARSET_MSWin_Baltic, FX_CODEPAGE_MSWin_Baltic},
-    {FX_CHARSET_MSWin_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic},
-    {FX_CHARSET_Thai, FX_CODEPAGE_MSDOS_Thai},
-    {FX_CHARSET_MSWin_EasternEuropean, FX_CODEPAGE_MSWin_EasternEuropean},
-    {FX_CHARSET_US, FX_CODEPAGE_MSDOS_US},
-    {FX_CHARSET_OEM, FX_CODEPAGE_MSDOS_WesternEuropean},
-};
-
-uint16_t GetCodePageFromCharset(uint8_t charset) {
-  auto* result =
-      std::lower_bound(std::begin(g_FXCharset2CodePageTable),
-                       std::end(g_FXCharset2CodePageTable), charset,
-                       [](const FX_CHARSET_MAP& iter, const uint16_t& charset) {
-                         return iter.charset < charset;
-                       });
-  if (result != std::end(g_FXCharset2CodePageTable) &&
-      result->charset == charset) {
-    return result->codepage;
-  }
-  return 0xFFFF;
-}
-
 int32_t GetSimilarityScore(FX_FONTDESCRIPTOR const* pFont,
                            uint32_t dwFontStyles) {
   int32_t iValue = 0;
@@ -118,7 +65,7 @@ const FX_FONTDESCRIPTOR* MatchDefaultFont(
     if (font.uCharSet == FX_CHARSET_Symbol)
       continue;
     if (pParams->wCodePage != 0xFFFF) {
-      if (GetCodePageFromCharset(font.uCharSet) != pParams->wCodePage)
+      if (FX_GetCodePageFromCharset(font.uCharSet) != pParams->wCodePage)
         continue;
     } else {
       if (pParams->dwUSB < 128) {
@@ -1033,7 +980,7 @@ RetainPtr<CFGAS_GEFont> CFGAS_FontMgr::GetFontByUnicode(
   if (!pFD)
     return nullptr;
 
-  uint16_t newCodePage = GetCodePageFromCharset(pFD->uCharSet);
+  uint16_t newCodePage = FX_GetCodePageFromCharset(pFD->uCharSet);
   const wchar_t* pFontFace = pFD->wsFontFace;
   RetainPtr<CFGAS_GEFont> pFont =
       CFGAS_GEFont::LoadFont(pFontFace, dwFontStyles, newCodePage, this);
author	Artem Strygin <art-snake@yandex-team.ru>	2018-05-31 14:08:11 +0000
committer	Chromium commit bot <commit-bot@chromium.org>	2018-05-31 14:08:11 +0000
commit	656eb84f83fc1701737d9c65658371a99428d727 (patch)
tree	6fb28e4283c1ef8696b42d8b7d200a13c32742fc
parent	8f7ee98e2c622c21f452cd9fd5956fe85bcb2b7c (diff)
download	pdfium-656eb84f83fc1701737d9c65658371a99428d727.tar.xz