summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2018-05-31 14:08:11 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-05-31 14:08:11 +0000
commit656eb84f83fc1701737d9c65658371a99428d727 (patch)
tree6fb28e4283c1ef8696b42d8b7d200a13c32742fc
parent8f7ee98e2c622c21f452cd9fd5956fe85bcb2b7c (diff)
downloadpdfium-656eb84f83fc1701737d9c65658371a99428d727.tar.xz
Move codepage/charset methods into related places.
Change-Id: I71417cc5b1bd00f77d42740198cc17487ebd686e Reviewed-on: https://pdfium-review.googlesource.com/33330 Reviewed-by: dsinclair <dsinclair@chromium.org> Commit-Queue: Art Snake <art-snake@yandex-team.ru>
-rw-r--r--core/fpdfapi/page/cpdf_streamcontentparser.cpp3
-rw-r--r--core/fpdfapi/parser/cpdf_document.cpp9
-rw-r--r--core/fpdfdoc/cpdf_interform.cpp81
-rw-r--r--core/fpdfdoc/cpvt_generateap.cpp4
-rw-r--r--core/fxcrt/fx_codepage.cpp70
-rw-r--r--core/fxcrt/fx_codepage.h4
-rw-r--r--core/fxge/android/cfpf_skiafontmgr.cpp4
-rw-r--r--core/fxge/cfx_font.cpp82
-rw-r--r--core/fxge/cfx_font.h18
-rw-r--r--core/fxge/cfx_fontmapper.cpp33
-rw-r--r--fpdfsdk/cfx_systemhandler.cpp19
-rw-r--r--fpdfsdk/fpdf_sysfontinfo.cpp5
-rw-r--r--fpdfsdk/pwl/cpwl_font_map.cpp133
-rw-r--r--fpdfsdk/pwl/cpwl_font_map.h3
-rw-r--r--xfa/fgas/font/cfgas_fontmgr.cpp57
15 files changed, 209 insertions, 316 deletions
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
index 9dd66f673b..7e7c337d96 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
@@ -1142,7 +1142,8 @@ CPDF_Font* CPDF_StreamContentParser::FindFont(const ByteString& name) {
CPDF_Dictionary* pFontDict = ToDictionary(FindResourceObj("Font", name));
if (!pFontDict) {
m_bResourceMissing = true;
- return CPDF_Font::GetStockFont(m_pDocument.Get(), "Helvetica");
+ return CPDF_Font::GetStockFont(m_pDocument.Get(),
+ CFX_Font::kDefaultAnsiFontName);
}
CPDF_Font* pFont = m_pDocument->LoadFont(pFontDict);
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 2e4baabe91..412f726eca 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -736,9 +736,7 @@ CPDF_Font* CPDF_Document::AddFont(CFX_Font* pFont, int charset, bool bVert) {
if (!pFont)
return nullptr;
- bool bCJK = charset == FX_CHARSET_ChineseTraditional ||
- charset == FX_CHARSET_ChineseSimplified ||
- charset == FX_CHARSET_Hangul || charset == FX_CHARSET_ShiftJIS;
+ const bool bCJK = FX_CharSetIsCJK(charset);
ByteString basefont = pFont->GetFamilyName();
basefont.Replace(" ", "");
int flags =
@@ -856,10 +854,7 @@ CPDF_Font* CPDF_Document::AddWindowsFont(LOGFONTA* pLogFont,
(pLogFont->lfPitchAndFamily & 0xf8) == FF_SCRIPT,
pLogFont->lfCharSet == FX_CHARSET_Symbol);
- bool bCJK = pLogFont->lfCharSet == FX_CHARSET_ChineseTraditional ||
- pLogFont->lfCharSet == FX_CHARSET_ChineseSimplified ||
- pLogFont->lfCharSet == FX_CHARSET_Hangul ||
- pLogFont->lfCharSet == FX_CHARSET_ShiftJIS;
+ const bool bCJK = FX_CharSetIsCJK(pLogFont->lfCharSet);
ByteString basefont;
if (bTranslateName && bCJK)
basefont = FPDF_GetPSNameFromTT(hDC);
diff --git a/core/fpdfdoc/cpdf_interform.cpp b/core/fpdfdoc/cpdf_interform.cpp
index ad7753b499..ea408ec0df 100644
--- a/core/fpdfdoc/cpdf_interform.cpp
+++ b/core/fpdfdoc/cpdf_interform.cpp
@@ -75,13 +75,14 @@ void InitDict(CPDF_Dictionary*& pFormDict, CPDF_Document* pDocument) {
if (!pFormDict->KeyExist("DR")) {
ByteString csBaseName;
uint8_t charSet = CPDF_InterForm::GetNativeCharSet();
- CPDF_Font* pFont = CPDF_InterForm::AddStandardFont(pDocument, "Helvetica");
+ CPDF_Font* pFont = CPDF_InterForm::AddStandardFont(
+ pDocument, CFX_Font::kDefaultAnsiFontName);
if (pFont)
AddFont(pFormDict, pDocument, pFont, &csBaseName);
if (charSet != FX_CHARSET_ANSI) {
ByteString csFontName = CPDF_InterForm::GetNativeFont(charSet, nullptr);
- if (!pFont || csFontName != "Helvetica") {
+ if (!pFont || csFontName != CFX_Font::kDefaultAnsiFontName) {
pFont = CPDF_InterForm::AddNativeFont(pDocument);
if (pFont) {
csBaseName.clear();
@@ -564,63 +565,7 @@ CPDF_Font* AddNativeInterFormFont(CPDF_Dictionary*& pFormDict,
// static
uint8_t CPDF_InterForm::GetNativeCharSet() {
-#if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
- uint8_t charSet = FX_CHARSET_ANSI;
- UINT iCodePage = ::GetACP();
- switch (iCodePage) {
- case FX_CODEPAGE_ShiftJIS:
- charSet = FX_CHARSET_ShiftJIS;
- break;
- case FX_CODEPAGE_ChineseSimplified:
- charSet = FX_CHARSET_ChineseSimplified;
- break;
- case FX_CODEPAGE_ChineseTraditional:
- charSet = FX_CHARSET_ChineseTraditional;
- break;
- case FX_CODEPAGE_MSWin_WesternEuropean:
- charSet = FX_CHARSET_ANSI;
- break;
- case FX_CODEPAGE_MSDOS_Thai:
- charSet = FX_CHARSET_Thai;
- break;
- case FX_CODEPAGE_Hangul:
- charSet = FX_CHARSET_Hangul;
- break;
- case FX_CODEPAGE_UTF16LE:
- charSet = FX_CHARSET_ANSI;
- break;
- case FX_CODEPAGE_MSWin_EasternEuropean:
- charSet = FX_CHARSET_MSWin_EasternEuropean;
- break;
- case FX_CODEPAGE_MSWin_Cyrillic:
- charSet = FX_CHARSET_MSWin_Cyrillic;
- break;
- case FX_CODEPAGE_MSWin_Greek:
- charSet = FX_CHARSET_MSWin_Greek;
- break;
- case FX_CODEPAGE_MSWin_Turkish:
- charSet = FX_CHARSET_MSWin_Turkish;
- break;
- case FX_CODEPAGE_MSWin_Hebrew:
- charSet = FX_CHARSET_MSWin_Hebrew;
- break;
- case FX_CODEPAGE_MSWin_Arabic:
- charSet = FX_CHARSET_MSWin_Arabic;
- break;
- case FX_CODEPAGE_MSWin_Baltic:
- charSet = FX_CHARSET_MSWin_Baltic;
- break;
- case FX_CODEPAGE_MSWin_Vietnamese:
- charSet = FX_CHARSET_MSWin_Vietnamese;
- break;
- case FX_CODEPAGE_Johab:
- charSet = FX_CHARSET_Johab;
- break;
- }
- return charSet;
-#else
- return 0;
-#endif
+ return FX_GetCharsetFromCodePage(FXSYS_GetACP());
}
CPDF_InterForm::CPDF_InterForm(CPDF_Document* pDocument)
@@ -727,23 +672,19 @@ ByteString CPDF_InterForm::GetNativeFont(uint8_t charSet, void* pLogFont) {
#if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
LOGFONTA lf = {};
if (charSet == FX_CHARSET_ANSI) {
- csFontName = "Helvetica";
+ csFontName = CFX_Font::kDefaultAnsiFontName;
return csFontName;
}
bool bRet = false;
- if (charSet == FX_CHARSET_ShiftJIS) {
+ const ByteString default_font_name =
+ CFX_Font::GetDefaultFontNameByCharset(charSet);
+ if (!default_font_name.IsEmpty()) {
bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE,
- "MS Mincho", lf);
- } else if (charSet == FX_CHARSET_ChineseSimplified) {
- bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE, "SimSun",
- lf);
- } else if (charSet == FX_CHARSET_ChineseTraditional) {
- bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE, "MingLiU",
- lf);
+ default_font_name.c_str(), lf);
}
if (!bRet) {
bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE,
- "Arial Unicode MS", lf);
+ CFX_Font::kUniversalDefaultFontName, lf);
}
if (!bRet) {
bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE,
@@ -773,7 +714,7 @@ CPDF_Font* CPDF_InterForm::AddNativeFont(uint8_t charSet,
LOGFONTA lf;
ByteString csFontName = GetNativeFont(charSet, &lf);
if (!csFontName.IsEmpty()) {
- if (csFontName == "Helvetica")
+ if (csFontName == CFX_Font::kDefaultAnsiFontName)
return AddStandardFont(pDocument, csFontName);
return pDocument->AddWindowsFont(&lf, false, true);
}
diff --git a/core/fpdfdoc/cpvt_generateap.cpp b/core/fpdfdoc/cpvt_generateap.cpp
index e395fbfa4a..cf55e956ea 100644
--- a/core/fpdfdoc/cpvt_generateap.cpp
+++ b/core/fpdfdoc/cpvt_generateap.cpp
@@ -399,7 +399,7 @@ std::unique_ptr<CPDF_Dictionary> GenerateResourceFontDict(
CPDF_Dictionary* pFontDict = pDoc->NewIndirect<CPDF_Dictionary>();
pFontDict->SetNewFor<CPDF_Name>("Type", "Font");
pFontDict->SetNewFor<CPDF_Name>("Subtype", "Type1");
- pFontDict->SetNewFor<CPDF_Name>("BaseFont", "Helvetica");
+ pFontDict->SetNewFor<CPDF_Name>("BaseFont", CFX_Font::kDefaultAnsiFontName);
pFontDict->SetNewFor<CPDF_Name>("Encoding", "WinAnsiEncoding");
auto pResourceFontDict =
@@ -949,7 +949,7 @@ void CPVT_GenerateAP::GenerateFormAP(Type type,
pFontDict = pDoc->NewIndirect<CPDF_Dictionary>();
pFontDict->SetNewFor<CPDF_Name>("Type", "Font");
pFontDict->SetNewFor<CPDF_Name>("Subtype", "Type1");
- pFontDict->SetNewFor<CPDF_Name>("BaseFont", "Helvetica");
+ pFontDict->SetNewFor<CPDF_Name>("BaseFont", CFX_Font::kDefaultAnsiFontName);
pFontDict->SetNewFor<CPDF_Name>("Encoding", "WinAnsiEncoding");
pDRFontDict->SetNewFor<CPDF_Reference>(
sFontName.Right(sFontName.GetLength() - 1), pDoc,
diff --git a/core/fxcrt/fx_codepage.cpp b/core/fxcrt/fx_codepage.cpp
index 56fad30c76..d59c93ed85 100644
--- a/core/fxcrt/fx_codepage.cpp
+++ b/core/fxcrt/fx_codepage.cpp
@@ -6,6 +6,9 @@
#include "core/fxcrt/fx_codepage.h"
+#include <algorithm>
+#include <utility>
+
namespace {
const uint16_t g_FX_MSDOSThaiUnicodes[128] = {
@@ -152,6 +155,45 @@ const uint16_t g_FX_MSWinBalticUnicodes[128] = {
0x017E, 0x02D9,
};
+struct FX_CHARSET_MAP {
+ uint16_t charset;
+ uint16_t codepage;
+};
+
+const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
+ {FX_CHARSET_ANSI, FX_CODEPAGE_MSWin_WesternEuropean},
+ {FX_CHARSET_Default, FX_CODEPAGE_DefANSI},
+ {FX_CHARSET_Symbol, FX_CODEPAGE_Symbol},
+ {FX_CHARSET_MAC_Roman, FX_CODEPAGE_MAC_Roman},
+ {FX_CHARSET_MAC_ShiftJIS, FX_CODEPAGE_MAC_ShiftJIS},
+ {FX_CHARSET_MAC_Korean, FX_CODEPAGE_MAC_Korean},
+ {FX_CHARSET_MAC_ChineseSimplified, FX_CODEPAGE_MAC_ChineseSimplified},
+ {FX_CHARSET_MAC_ChineseTraditional, FX_CODEPAGE_MAC_ChineseTraditional},
+ {FX_CHARSET_MAC_Hebrew, FX_CODEPAGE_MAC_Hebrew},
+ {FX_CHARSET_MAC_Arabic, FX_CODEPAGE_MAC_Arabic},
+ {FX_CHARSET_MAC_Greek, FX_CODEPAGE_MAC_Greek},
+ {FX_CHARSET_MAC_Turkish, FX_CODEPAGE_MAC_Turkish},
+ {FX_CHARSET_MAC_Thai, FX_CODEPAGE_MAC_Thai},
+ {FX_CHARSET_MAC_EasternEuropean, FX_CODEPAGE_MAC_EasternEuropean},
+ {FX_CHARSET_MAC_Cyrillic, FX_CODEPAGE_MAC_Cyrillic},
+ {FX_CHARSET_ShiftJIS, FX_CODEPAGE_ShiftJIS},
+ {FX_CHARSET_Hangul, FX_CODEPAGE_Hangul},
+ {FX_CHARSET_Johab, FX_CODEPAGE_Johab},
+ {FX_CHARSET_ChineseSimplified, FX_CODEPAGE_ChineseSimplified},
+ {FX_CHARSET_ChineseTraditional, FX_CODEPAGE_ChineseTraditional},
+ {FX_CHARSET_MSWin_Greek, FX_CODEPAGE_MSWin_Greek},
+ {FX_CHARSET_MSWin_Turkish, FX_CODEPAGE_MSWin_Turkish},
+ {FX_CHARSET_MSWin_Vietnamese, FX_CODEPAGE_MSWin_Vietnamese},
+ {FX_CHARSET_MSWin_Hebrew, FX_CODEPAGE_MSWin_Hebrew},
+ {FX_CHARSET_MSWin_Arabic, FX_CODEPAGE_MSWin_Arabic},
+ {FX_CHARSET_MSWin_Baltic, FX_CODEPAGE_MSWin_Baltic},
+ {FX_CHARSET_MSWin_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic},
+ {FX_CHARSET_Thai, FX_CODEPAGE_MSDOS_Thai},
+ {FX_CHARSET_MSWin_EasternEuropean, FX_CODEPAGE_MSWin_EasternEuropean},
+ {FX_CHARSET_US, FX_CODEPAGE_MSDOS_US},
+ {FX_CHARSET_OEM, FX_CODEPAGE_MSDOS_WesternEuropean},
+};
+
} // namespace
const FX_CharsetUnicodes g_FX_CharsetUnicodes[8] = {
@@ -164,3 +206,31 @@ const FX_CharsetUnicodes g_FX_CharsetUnicodes[8] = {
{FX_CHARSET_MSWin_Arabic, g_FX_MSWinArabicUnicodes},
{FX_CHARSET_MSWin_Baltic, g_FX_MSWinBalticUnicodes},
};
+
+uint16_t FX_GetCodePageFromCharset(uint8_t charset) {
+ auto* result =
+ std::lower_bound(std::begin(g_FXCharset2CodePageTable),
+ std::end(g_FXCharset2CodePageTable), charset,
+ [](const FX_CHARSET_MAP& iter, const uint16_t& charset) {
+ return iter.charset < charset;
+ });
+ if (result != std::end(g_FXCharset2CodePageTable) &&
+ result->charset == charset) {
+ return result->codepage;
+ }
+ return 0xFFFF;
+}
+
+uint8_t FX_GetCharsetFromCodePage(uint16_t codepage) {
+ for (const auto& it : g_FXCharset2CodePageTable) {
+ if (it.codepage == codepage)
+ return it.charset;
+ }
+ return FX_CHARSET_ANSI;
+}
+
+bool FX_CharSetIsCJK(uint8_t uCharset) {
+ return (uCharset == FX_CHARSET_ChineseSimplified) ||
+ (uCharset == FX_CHARSET_ChineseTraditional) ||
+ (uCharset == FX_CHARSET_Hangul) || (uCharset == FX_CHARSET_ShiftJIS);
+}
diff --git a/core/fxcrt/fx_codepage.h b/core/fxcrt/fx_codepage.h
index 43692286a5..4a6c6d86b3 100644
--- a/core/fxcrt/fx_codepage.h
+++ b/core/fxcrt/fx_codepage.h
@@ -98,4 +98,8 @@ struct FX_CharsetUnicodes {
extern const FX_CharsetUnicodes g_FX_CharsetUnicodes[8];
+uint16_t FX_GetCodePageFromCharset(uint8_t charset);
+uint8_t FX_GetCharsetFromCodePage(uint16_t codepage);
+bool FX_CharSetIsCJK(uint8_t uCharset);
+
#endif // CORE_FXCRT_FX_CODEPAGE_H_
diff --git a/core/fxge/android/cfpf_skiafontmgr.cpp b/core/fxge/android/cfpf_skiafontmgr.cpp
index d44d7d56d2..0b49f6283c 100644
--- a/core/fxge/android/cfpf_skiafontmgr.cpp
+++ b/core/fxge/android/cfpf_skiafontmgr.cpp
@@ -187,9 +187,7 @@ uint32_t FPF_SKIAGetFamilyHash(const ByteStringView& bsFamily,
}
bool FPF_SkiaIsCJK(uint8_t uCharset) {
- return (uCharset == FX_CHARSET_ChineseSimplified) ||
- (uCharset == FX_CHARSET_ChineseTraditional) ||
- (uCharset == FX_CHARSET_Hangul) || (uCharset == FX_CHARSET_ShiftJIS);
+ return FX_CharSetIsCJK(uCharset);
}
bool FPF_SkiaMaybeSymbol(const ByteStringView& bsFacename) {
diff --git a/core/fxge/cfx_font.cpp b/core/fxge/cfx_font.cpp
index d04fc0dac0..bee1d789ef 100644
--- a/core/fxge/cfx_font.cpp
+++ b/core/fxge/cfx_font.cpp
@@ -209,6 +209,88 @@ const uint8_t CFX_Font::s_WeightPow_SHIFTJIS[] = {
59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60,
};
+const CFX_Font::CharsetFontMap CFX_Font::defaultTTFMap[] = {
+ {FX_CHARSET_ANSI, kDefaultAnsiFontName},
+ {FX_CHARSET_ChineseSimplified, "SimSun"},
+ {FX_CHARSET_ChineseTraditional, "MingLiU"},
+ {FX_CHARSET_ShiftJIS, "MS Gothic"},
+ {FX_CHARSET_Hangul, "Batang"},
+ {FX_CHARSET_MSWin_Cyrillic, "Arial"},
+#if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
+ {FX_CHARSET_MSWin_EasternEuropean, "Arial"},
+#else
+ {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"},
+#endif
+ {FX_CHARSET_MSWin_Arabic, "Arial"},
+ {-1, nullptr}};
+
+// static
+const char CFX_Font::kDefaultAnsiFontName[] = "Helvetica";
+// static
+const char CFX_Font::kUniversalDefaultFontName[] = "Arial Unicode MS";
+
+// static
+ByteString CFX_Font::GetDefaultFontNameByCharset(uint8_t nCharset) {
+ int i = 0;
+ while (defaultTTFMap[i].charset != -1) {
+ if (nCharset == static_cast<uint8_t>(defaultTTFMap[i].charset))
+ return defaultTTFMap[i].fontname;
+ ++i;
+ }
+ return kUniversalDefaultFontName;
+}
+
+// static
+uint8_t CFX_Font::GetCharSetFromUnicode(uint16_t word) {
+ // to avoid CJK Font to show ASCII
+ if (word < 0x7F)
+ return FX_CHARSET_ANSI;
+
+ // find new charset
+ if ((word >= 0x4E00 && word <= 0x9FA5) ||
+ (word >= 0xE7C7 && word <= 0xE7F3) ||
+ (word >= 0x3000 && word <= 0x303F) ||
+ (word >= 0x2000 && word <= 0x206F)) {
+ return FX_CHARSET_ChineseSimplified;
+ }
+
+ if (((word >= 0x3040) && (word <= 0x309F)) ||
+ ((word >= 0x30A0) && (word <= 0x30FF)) ||
+ ((word >= 0x31F0) && (word <= 0x31FF)) ||
+ ((word >= 0xFF00) && (word <= 0xFFEF))) {
+ return FX_CHARSET_ShiftJIS;
+ }
+
+ if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
+ ((word >= 0x1100) && (word <= 0x11FF)) ||
+ ((word >= 0x3130) && (word <= 0x318F))) {
+ return FX_CHARSET_Hangul;
+ }
+
+ if (word >= 0x0E00 && word <= 0x0E7F)
+ return FX_CHARSET_Thai;
+
+ if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
+ return FX_CHARSET_MSWin_Greek;
+
+ if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
+ return FX_CHARSET_MSWin_Arabic;
+
+ if (word >= 0x0590 && word <= 0x05FF)
+ return FX_CHARSET_MSWin_Hebrew;
+
+ if (word >= 0x0400 && word <= 0x04FF)
+ return FX_CHARSET_MSWin_Cyrillic;
+
+ if (word >= 0x0100 && word <= 0x024F)
+ return FX_CHARSET_MSWin_EasternEuropean;
+
+ if (word >= 0x1E00 && word <= 0x1EFF)
+ return FX_CHARSET_MSWin_Vietnamese;
+
+ return FX_CHARSET_ANSI;
+}
+
CFX_Font::CFX_Font()
:
m_Face(nullptr),
diff --git a/core/fxge/cfx_font.h b/core/fxge/cfx_font.h
index fbf35eaca1..a5a0057c16 100644
--- a/core/fxge/cfx_font.h
+++ b/core/fxge/cfx_font.h
@@ -30,6 +30,11 @@ class CFX_Font {
CFX_Font();
~CFX_Font();
+ static const char kDefaultAnsiFontName[];
+ static const char kUniversalDefaultFontName[];
+ static ByteString GetDefaultFontNameByCharset(uint8_t nCharset);
+ static uint8_t GetCharSetFromUnicode(uint16_t word);
+
void LoadSubst(const ByteString& face_name,
bool bTrueType,
uint32_t flags,
@@ -96,6 +101,19 @@ class CFX_Font {
static const uint8_t s_WeightPow_11[kWeightPowArraySize];
static const uint8_t s_WeightPow_SHIFTJIS[kWeightPowArraySize];
+ // This struct shoub same as FPDF_CharsetFontMap
+ typedef struct {
+ int charset; // Character Set Enum value, see FX_CHARSET_XXX.
+ const char* fontname; // Name of default font to use with that charset.
+ } CharsetFontMap;
+
+ /**
+ * Pointer to the default character set to TT Font name map. The
+ * map is an array of CharsetFontMap structs, with its end indicated
+ * by a { -1, NULL } entry.
+ **/
+ static const CharsetFontMap defaultTTFMap[];
+
#ifdef PDF_ENABLE_XFA
protected:
std::unique_ptr<FXFT_StreamRec> m_pOwnedStream;
diff --git a/core/fxge/cfx_fontmapper.cpp b/core/fxge/cfx_fontmapper.cpp
index b9a8eac1a1..e6fc984edc 100644
--- a/core/fxge/cfx_fontmapper.cpp
+++ b/core/fxge/cfx_fontmapper.cpp
@@ -145,19 +145,6 @@ const struct AltFontFamily {
{"ForteMT", "Forte"},
};
-const struct CODEPAGE_MAP {
- uint16_t codepage;
- uint8_t charset;
-} g_Codepage2CharsetTable[] = {
- {0, 1}, {42, 2}, {437, 254}, {850, 255}, {874, 222},
- {932, 128}, {936, 134}, {949, 129}, {950, 136}, {1250, 238},
- {1251, 204}, {1252, 0}, {1253, 161}, {1254, 162}, {1255, 177},
- {1256, 178}, {1257, 186}, {1258, 163}, {1361, 130}, {10000, 77},
- {10001, 78}, {10002, 81}, {10003, 79}, {10004, 84}, {10005, 83},
- {10006, 85}, {10007, 89}, {10008, 80}, {10021, 87}, {10029, 88},
- {10081, 86},
-};
-
ByteString TT_NormalizeName(const char* family) {
ByteString norm(family);
norm.Remove(' ');
@@ -170,19 +157,6 @@ ByteString TT_NormalizeName(const char* family) {
return norm;
}
-uint8_t GetCharsetFromCodePage(uint16_t codepage) {
- const CODEPAGE_MAP* pEnd =
- g_Codepage2CharsetTable + FX_ArraySize(g_Codepage2CharsetTable);
- const CODEPAGE_MAP* pCharmap =
- std::lower_bound(g_Codepage2CharsetTable, pEnd, codepage,
- [](const CODEPAGE_MAP& charset, uint16_t page) {
- return charset.codepage < page;
- });
- if (pCharmap < pEnd && codepage == pCharmap->codepage)
- return pCharmap->charset;
- return FX_CHARSET_Default;
-}
-
void GetFontFamily(uint32_t nStyle, ByteString* fontName) {
if (fontName->Contains("Script")) {
if (FontStyleIsBold(nStyle))
@@ -544,13 +518,10 @@ FXFT_Face CFX_FontMapper::FindSubstFont(const ByteString& name,
int Charset = FX_CHARSET_ANSI;
if (WindowCP)
- Charset = GetCharsetFromCodePage(WindowCP);
+ Charset = FX_GetCharsetFromCodePage(WindowCP);
else if (iBaseFont == kNumStandardFonts && FontStyleIsSymbolic(flags))
Charset = FX_CHARSET_Symbol;
- const bool bCJK = (Charset == FX_CHARSET_ShiftJIS ||
- Charset == FX_CHARSET_ChineseSimplified ||
- Charset == FX_CHARSET_Hangul ||
- Charset == FX_CHARSET_ChineseTraditional);
+ const bool bCJK = FX_CharSetIsCJK(Charset);
bool bItalic = FontStyleIsItalic(nStyle);
GetFontFamily(nStyle, &family);
diff --git a/fpdfsdk/cfx_systemhandler.cpp b/fpdfsdk/cfx_systemhandler.cpp
index 28170d2878..62d7c735b0 100644
--- a/fpdfsdk/cfx_systemhandler.cpp
+++ b/fpdfsdk/cfx_systemhandler.cpp
@@ -20,22 +20,6 @@
#include "fpdfsdk/cpdfsdk_widget.h"
#include "fpdfsdk/formfiller/cffl_formfiller.h"
-namespace {
-
-int CharSet2CP(int charset) {
- if (charset == FX_CHARSET_ShiftJIS)
- return FX_CODEPAGE_ShiftJIS;
- if (charset == FX_CHARSET_ChineseSimplified)
- return FX_CODEPAGE_ChineseSimplified;
- if (charset == FX_CHARSET_Hangul)
- return FX_CODEPAGE_Hangul;
- if (charset == FX_CHARSET_ChineseTraditional)
- return FX_CODEPAGE_ChineseTraditional;
- return FX_CODEPAGE_DefANSI;
-}
-
-} // namespace
-
CFX_SystemHandler::CFX_SystemHandler(CPDFSDK_FormFillEnvironment* pFormFillEnv)
: m_pFormFillEnv(pFormFillEnv) {}
@@ -121,7 +105,8 @@ CPDF_Font* CFX_SystemHandler::AddNativeTrueTypeFontToPDF(
return nullptr;
auto pFXFont = pdfium::MakeUnique<CFX_Font>();
- pFXFont->LoadSubst(sFontFaceName, true, 0, 0, 0, CharSet2CP(nCharset), false);
+ pFXFont->LoadSubst(sFontFaceName, true, 0, 0, 0,
+ FX_GetCodePageFromCharset(nCharset), false);
return pDoc->AddFont(pFXFont.get(), nCharset, false);
}
diff --git a/fpdfsdk/fpdf_sysfontinfo.cpp b/fpdfsdk/fpdf_sysfontinfo.cpp
index 3a3f9e1257..564912b85c 100644
--- a/fpdfsdk/fpdf_sysfontinfo.cpp
+++ b/fpdfsdk/fpdf_sysfontinfo.cpp
@@ -9,6 +9,7 @@
#include <memory>
#include "core/fxcrt/fx_codepage.h"
+#include "core/fxge/cfx_font.h"
#include "core/fxge/cfx_fontmapper.h"
#include "core/fxge/cfx_fontmgr.h"
#include "core/fxge/cfx_gemodule.h"
@@ -30,6 +31,8 @@ static_assert(FXFONT_GB2312_CHARSET == FX_CHARSET_ChineseSimplified,
"Charset must match");
static_assert(FXFONT_CHINESEBIG5_CHARSET == FX_CHARSET_ChineseTraditional,
"Charset must match");
+static_assert(sizeof(CFX_Font::CharsetFontMap) == sizeof(FPDF_CharsetFontMap),
+ "CFX_Font::CharsetFontMap should be same as FPDF_CharsetFontMap");
class CFX_ExternalFontInfo final : public SystemFontInfoIface {
public:
@@ -122,7 +125,7 @@ FPDF_SetSystemFontInfo(FPDF_SYSFONTINFO* pFontInfoExt) {
}
FPDF_EXPORT const FPDF_CharsetFontMap* FPDF_CALLCONV FPDF_GetDefaultTTFMap() {
- return CPWL_FontMap::defaultTTFMap;
+ return reinterpret_cast<const FPDF_CharsetFontMap*>(CFX_Font::defaultTTFMap);
}
struct FPDF_SYSFONTINFO_DEFAULT : public FPDF_SYSFONTINFO {
diff --git a/fpdfsdk/pwl/cpwl_font_map.cpp b/fpdfsdk/pwl/cpwl_font_map.cpp
index 5e5556ebb0..f60e7d2fb1 100644
--- a/fpdfsdk/pwl/cpwl_font_map.cpp
+++ b/fpdfsdk/pwl/cpwl_font_map.cpp
@@ -21,8 +21,6 @@
namespace {
-const char kDefaultFontName[] = "Helvetica";
-
const char* const g_sDEStandardFontName[] = {"Courier",
"Courier-Bold",
"Courier-BoldOblique",
@@ -100,7 +98,8 @@ int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word,
if (KnowWord(nNewFontIndex, word))
return nNewFontIndex;
}
- nNewFontIndex = GetFontIndex("Arial Unicode MS", FX_CHARSET_Default, false);
+ nNewFontIndex = GetFontIndex(CFX_Font::kUniversalDefaultFontName,
+ FX_CHARSET_Default, false);
if (nNewFontIndex >= 0) {
if (KnowWord(nNewFontIndex, word))
return nNewFontIndex;
@@ -145,7 +144,7 @@ void CPWL_FontMap::Empty() {
}
void CPWL_FontMap::Initialize() {
- GetFontIndex(kDefaultFontName, FX_CHARSET_ANSI, false);
+ GetFontIndex(CFX_Font::kDefaultAnsiFontName, FX_CHARSET_ANSI, false);
}
bool CPWL_FontMap::IsStandardFont(const ByteString& sFontName) {
@@ -210,7 +209,7 @@ ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
if (nCharset == FX_CHARSET_Default)
nCharset = GetNativeCharset();
- ByteString sFontName = GetDefaultFontByCharset(nCharset);
+ ByteString sFontName = CFX_Font::GetDefaultFontNameByCharset(nCharset);
if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName))
return ByteString();
@@ -274,135 +273,17 @@ const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
}
int32_t CPWL_FontMap::GetNativeCharset() {
- uint8_t nCharset = FX_CHARSET_ANSI;
- int32_t iCodePage = FXSYS_GetACP();
- switch (iCodePage) {
- case FX_CODEPAGE_ShiftJIS:
- nCharset = FX_CHARSET_ShiftJIS;
- break;
- case FX_CODEPAGE_ChineseSimplified:
- nCharset = FX_CHARSET_ChineseSimplified;
- break;
- case FX_CODEPAGE_ChineseTraditional:
- nCharset = FX_CHARSET_ChineseTraditional;
- break;
- case FX_CODEPAGE_MSWin_WesternEuropean:
- nCharset = FX_CHARSET_ANSI;
- break;
- case FX_CODEPAGE_MSDOS_Thai:
- nCharset = FX_CHARSET_Thai;
- break;
- case FX_CODEPAGE_Hangul:
- nCharset = FX_CHARSET_Hangul;
- break;
- case FX_CODEPAGE_UTF16LE:
- nCharset = FX_CHARSET_ANSI;
- break;
- case FX_CODEPAGE_MSWin_EasternEuropean:
- nCharset = FX_CHARSET_MSWin_EasternEuropean;
- break;
- case FX_CODEPAGE_MSWin_Cyrillic:
- nCharset = FX_CHARSET_MSWin_Cyrillic;
- break;
- case FX_CODEPAGE_MSWin_Greek:
- nCharset = FX_CHARSET_MSWin_Greek;
- break;
- case FX_CODEPAGE_MSWin_Turkish:
- nCharset = FX_CHARSET_MSWin_Turkish;
- break;
- case FX_CODEPAGE_MSWin_Hebrew:
- nCharset = FX_CHARSET_MSWin_Hebrew;
- break;
- case FX_CODEPAGE_MSWin_Arabic:
- nCharset = FX_CHARSET_MSWin_Arabic;
- break;
- case FX_CODEPAGE_MSWin_Baltic:
- nCharset = FX_CHARSET_MSWin_Baltic;
- break;
- case FX_CODEPAGE_MSWin_Vietnamese:
- nCharset = FX_CHARSET_MSWin_Vietnamese;
- break;
- case FX_CODEPAGE_Johab:
- nCharset = FX_CHARSET_Johab;
- break;
- }
- return nCharset;
-}
-
-const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
- {FX_CHARSET_ANSI, "Helvetica"},
- {FX_CHARSET_ChineseSimplified, "SimSun"},
- {FX_CHARSET_ChineseTraditional, "MingLiU"},
- {FX_CHARSET_ShiftJIS, "MS Gothic"},
- {FX_CHARSET_Hangul, "Batang"},
- {FX_CHARSET_MSWin_Cyrillic, "Arial"},
-#if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
- {FX_CHARSET_MSWin_EasternEuropean, "Arial"},
-#else
- {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"},
-#endif
- {FX_CHARSET_MSWin_Arabic, "Arial"},
- {-1, nullptr}};
-
-ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
- int i = 0;
- while (defaultTTFMap[i].charset != -1) {
- if (nCharset == defaultTTFMap[i].charset)
- return defaultTTFMap[i].fontname;
- ++i;
- }
- return "";
+ return FX_GetCharsetFromCodePage(FXSYS_GetACP());
}
int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) {
// to avoid CJK Font to show ASCII
if (word < 0x7F)
return FX_CHARSET_ANSI;
+
// follow the old charset
if (nOldCharset != FX_CHARSET_Default)
return nOldCharset;
- // find new charset
- if ((word >= 0x4E00 && word <= 0x9FA5) ||
- (word >= 0xE7C7 && word <= 0xE7F3) ||
- (word >= 0x3000 && word <= 0x303F) ||
- (word >= 0x2000 && word <= 0x206F)) {
- return FX_CHARSET_ChineseSimplified;
- }
-
- if (((word >= 0x3040) && (word <= 0x309F)) ||
- ((word >= 0x30A0) && (word <= 0x30FF)) ||
- ((word >= 0x31F0) && (word <= 0x31FF)) ||
- ((word >= 0xFF00) && (word <= 0xFFEF))) {
- return FX_CHARSET_ShiftJIS;
- }
-
- if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
- ((word >= 0x1100) && (word <= 0x11FF)) ||
- ((word >= 0x3130) && (word <= 0x318F))) {
- return FX_CHARSET_Hangul;
- }
-
- if (word >= 0x0E00 && word <= 0x0E7F)
- return FX_CHARSET_Thai;
-
- if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
- return FX_CHARSET_MSWin_Greek;
-
- if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
- return FX_CHARSET_MSWin_Arabic;
-
- if (word >= 0x0590 && word <= 0x05FF)
- return FX_CHARSET_MSWin_Hebrew;
-
- if (word >= 0x0400 && word <= 0x04FF)
- return FX_CHARSET_MSWin_Cyrillic;
-
- if (word >= 0x0100 && word <= 0x024F)
- return FX_CHARSET_MSWin_EasternEuropean;
-
- if (word >= 0x1E00 && word <= 0x1EFF)
- return FX_CHARSET_MSWin_Vietnamese;
-
- return FX_CHARSET_ANSI;
+ return CFX_Font::GetCharSetFromUnicode(word);
}
diff --git a/fpdfsdk/pwl/cpwl_font_map.h b/fpdfsdk/pwl/cpwl_font_map.h
index 592c3fafed..c465a0fc0c 100644
--- a/fpdfsdk/pwl/cpwl_font_map.h
+++ b/fpdfsdk/pwl/cpwl_font_map.h
@@ -46,9 +46,6 @@ class CPWL_FontMap : public IPVT_FontMap {
static int32_t GetNativeCharset();
ByteString GetNativeFontName(int32_t nCharset);
- static ByteString GetDefaultFontByCharset(int32_t nCharset);
- static const FPDF_CharsetFontMap defaultTTFMap[];
-
protected:
virtual void Initialize();
virtual CPDF_Document* GetDocument();
diff --git a/xfa/fgas/font/cfgas_fontmgr.cpp b/xfa/fgas/font/cfgas_fontmgr.cpp
index d7acc62eef..354db0be43 100644
--- a/xfa/fgas/font/cfgas_fontmgr.cpp
+++ b/xfa/fgas/font/cfgas_fontmgr.cpp
@@ -27,59 +27,6 @@
namespace {
-struct FX_CHARSET_MAP {
- uint16_t charset;
- uint16_t codepage;
-};
-
-const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
- {FX_CHARSET_ANSI, FX_CODEPAGE_MSWin_WesternEuropean},
- {FX_CHARSET_Default, FX_CODEPAGE_DefANSI},
- {FX_CHARSET_Symbol, FX_CODEPAGE_Symbol},
- {FX_CHARSET_MAC_Roman, FX_CODEPAGE_MAC_Roman},
- {FX_CHARSET_MAC_ShiftJIS, FX_CODEPAGE_MAC_ShiftJIS},
- {FX_CHARSET_MAC_Korean, FX_CODEPAGE_MAC_Korean},
- {FX_CHARSET_MAC_ChineseSimplified, FX_CODEPAGE_MAC_ChineseSimplified},
- {FX_CHARSET_MAC_ChineseTraditional, FX_CODEPAGE_MAC_ChineseTraditional},
- {FX_CHARSET_MAC_Hebrew, FX_CODEPAGE_MAC_Hebrew},
- {FX_CHARSET_MAC_Arabic, FX_CODEPAGE_MAC_Arabic},
- {FX_CHARSET_MAC_Greek, FX_CODEPAGE_MAC_Greek},
- {FX_CHARSET_MAC_Turkish, FX_CODEPAGE_MAC_Turkish},
- {FX_CHARSET_MAC_Thai, FX_CODEPAGE_MAC_Thai},
- {FX_CHARSET_MAC_EasternEuropean, FX_CODEPAGE_MAC_EasternEuropean},
- {FX_CHARSET_MAC_Cyrillic, FX_CODEPAGE_MAC_Cyrillic},
- {FX_CHARSET_ShiftJIS, FX_CODEPAGE_ShiftJIS},
- {FX_CHARSET_Hangul, FX_CODEPAGE_Hangul},
- {FX_CHARSET_Johab, FX_CODEPAGE_Johab},
- {FX_CHARSET_ChineseSimplified, FX_CODEPAGE_ChineseSimplified},
- {FX_CHARSET_ChineseTraditional, FX_CODEPAGE_ChineseTraditional},
- {FX_CHARSET_MSWin_Greek, FX_CODEPAGE_MSWin_Greek},
- {FX_CHARSET_MSWin_Turkish, FX_CODEPAGE_MSWin_Turkish},
- {FX_CHARSET_MSWin_Vietnamese, FX_CODEPAGE_MSWin_Vietnamese},
- {FX_CHARSET_MSWin_Hebrew, FX_CODEPAGE_MSWin_Hebrew},
- {FX_CHARSET_MSWin_Arabic, FX_CODEPAGE_MSWin_Arabic},
- {FX_CHARSET_MSWin_Baltic, FX_CODEPAGE_MSWin_Baltic},
- {FX_CHARSET_MSWin_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic},
- {FX_CHARSET_Thai, FX_CODEPAGE_MSDOS_Thai},
- {FX_CHARSET_MSWin_EasternEuropean, FX_CODEPAGE_MSWin_EasternEuropean},
- {FX_CHARSET_US, FX_CODEPAGE_MSDOS_US},
- {FX_CHARSET_OEM, FX_CODEPAGE_MSDOS_WesternEuropean},
-};
-
-uint16_t GetCodePageFromCharset(uint8_t charset) {
- auto* result =
- std::lower_bound(std::begin(g_FXCharset2CodePageTable),
- std::end(g_FXCharset2CodePageTable), charset,
- [](const FX_CHARSET_MAP& iter, const uint16_t& charset) {
- return iter.charset < charset;
- });
- if (result != std::end(g_FXCharset2CodePageTable) &&
- result->charset == charset) {
- return result->codepage;
- }
- return 0xFFFF;
-}
-
int32_t GetSimilarityScore(FX_FONTDESCRIPTOR const* pFont,
uint32_t dwFontStyles) {
int32_t iValue = 0;
@@ -118,7 +65,7 @@ const FX_FONTDESCRIPTOR* MatchDefaultFont(
if (font.uCharSet == FX_CHARSET_Symbol)
continue;
if (pParams->wCodePage != 0xFFFF) {
- if (GetCodePageFromCharset(font.uCharSet) != pParams->wCodePage)
+ if (FX_GetCodePageFromCharset(font.uCharSet) != pParams->wCodePage)
continue;
} else {
if (pParams->dwUSB < 128) {
@@ -1033,7 +980,7 @@ RetainPtr<CFGAS_GEFont> CFGAS_FontMgr::GetFontByUnicode(
if (!pFD)
return nullptr;
- uint16_t newCodePage = GetCodePageFromCharset(pFD->uCharSet);
+ uint16_t newCodePage = FX_GetCodePageFromCharset(pFD->uCharSet);
const wchar_t* pFontFace = pFD->wsFontFace;
RetainPtr<CFGAS_GEFont> pFont =
CFGAS_GEFont::LoadFont(pFontFace, dwFontStyles, newCodePage, this);