From 656eb84f83fc1701737d9c65658371a99428d727 Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Thu, 31 May 2018 14:08:11 +0000 Subject: Move codepage/charset methods into related places. Change-Id: I71417cc5b1bd00f77d42740198cc17487ebd686e Reviewed-on: https://pdfium-review.googlesource.com/33330 Reviewed-by: dsinclair Commit-Queue: Art Snake --- core/fpdfapi/page/cpdf_streamcontentparser.cpp | 3 +- core/fpdfapi/parser/cpdf_document.cpp | 9 +- core/fpdfdoc/cpdf_interform.cpp | 81 ++------------- core/fpdfdoc/cpvt_generateap.cpp | 4 +- core/fxcrt/fx_codepage.cpp | 70 +++++++++++++ core/fxcrt/fx_codepage.h | 4 + core/fxge/android/cfpf_skiafontmgr.cpp | 4 +- core/fxge/cfx_font.cpp | 82 +++++++++++++++ core/fxge/cfx_font.h | 18 ++++ core/fxge/cfx_fontmapper.cpp | 33 +----- fpdfsdk/cfx_systemhandler.cpp | 19 +--- fpdfsdk/fpdf_sysfontinfo.cpp | 5 +- fpdfsdk/pwl/cpwl_font_map.cpp | 133 ++----------------------- fpdfsdk/pwl/cpwl_font_map.h | 3 - xfa/fgas/font/cfgas_fontmgr.cpp | 57 +---------- 15 files changed, 209 insertions(+), 316 deletions(-) diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp index 9dd66f673b..7e7c337d96 100644 --- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp +++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp @@ -1142,7 +1142,8 @@ CPDF_Font* CPDF_StreamContentParser::FindFont(const ByteString& name) { CPDF_Dictionary* pFontDict = ToDictionary(FindResourceObj("Font", name)); if (!pFontDict) { m_bResourceMissing = true; - return CPDF_Font::GetStockFont(m_pDocument.Get(), "Helvetica"); + return CPDF_Font::GetStockFont(m_pDocument.Get(), + CFX_Font::kDefaultAnsiFontName); } CPDF_Font* pFont = m_pDocument->LoadFont(pFontDict); diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp index 2e4baabe91..412f726eca 100644 --- a/core/fpdfapi/parser/cpdf_document.cpp +++ b/core/fpdfapi/parser/cpdf_document.cpp @@ -736,9 +736,7 @@ CPDF_Font* CPDF_Document::AddFont(CFX_Font* pFont, int charset, bool bVert) { if (!pFont) return nullptr; - bool bCJK = charset == FX_CHARSET_ChineseTraditional || - charset == FX_CHARSET_ChineseSimplified || - charset == FX_CHARSET_Hangul || charset == FX_CHARSET_ShiftJIS; + const bool bCJK = FX_CharSetIsCJK(charset); ByteString basefont = pFont->GetFamilyName(); basefont.Replace(" ", ""); int flags = @@ -856,10 +854,7 @@ CPDF_Font* CPDF_Document::AddWindowsFont(LOGFONTA* pLogFont, (pLogFont->lfPitchAndFamily & 0xf8) == FF_SCRIPT, pLogFont->lfCharSet == FX_CHARSET_Symbol); - bool bCJK = pLogFont->lfCharSet == FX_CHARSET_ChineseTraditional || - pLogFont->lfCharSet == FX_CHARSET_ChineseSimplified || - pLogFont->lfCharSet == FX_CHARSET_Hangul || - pLogFont->lfCharSet == FX_CHARSET_ShiftJIS; + const bool bCJK = FX_CharSetIsCJK(pLogFont->lfCharSet); ByteString basefont; if (bTranslateName && bCJK) basefont = FPDF_GetPSNameFromTT(hDC); diff --git a/core/fpdfdoc/cpdf_interform.cpp b/core/fpdfdoc/cpdf_interform.cpp index ad7753b499..ea408ec0df 100644 --- a/core/fpdfdoc/cpdf_interform.cpp +++ b/core/fpdfdoc/cpdf_interform.cpp @@ -75,13 +75,14 @@ void InitDict(CPDF_Dictionary*& pFormDict, CPDF_Document* pDocument) { if (!pFormDict->KeyExist("DR")) { ByteString csBaseName; uint8_t charSet = CPDF_InterForm::GetNativeCharSet(); - CPDF_Font* pFont = CPDF_InterForm::AddStandardFont(pDocument, "Helvetica"); + CPDF_Font* pFont = CPDF_InterForm::AddStandardFont( + pDocument, CFX_Font::kDefaultAnsiFontName); if (pFont) AddFont(pFormDict, pDocument, pFont, &csBaseName); if (charSet != FX_CHARSET_ANSI) { ByteString csFontName = CPDF_InterForm::GetNativeFont(charSet, nullptr); - if (!pFont || csFontName != "Helvetica") { + if (!pFont || csFontName != CFX_Font::kDefaultAnsiFontName) { pFont = CPDF_InterForm::AddNativeFont(pDocument); if (pFont) { csBaseName.clear(); @@ -564,63 +565,7 @@ CPDF_Font* AddNativeInterFormFont(CPDF_Dictionary*& pFormDict, // static uint8_t CPDF_InterForm::GetNativeCharSet() { -#if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ - uint8_t charSet = FX_CHARSET_ANSI; - UINT iCodePage = ::GetACP(); - switch (iCodePage) { - case FX_CODEPAGE_ShiftJIS: - charSet = FX_CHARSET_ShiftJIS; - break; - case FX_CODEPAGE_ChineseSimplified: - charSet = FX_CHARSET_ChineseSimplified; - break; - case FX_CODEPAGE_ChineseTraditional: - charSet = FX_CHARSET_ChineseTraditional; - break; - case FX_CODEPAGE_MSWin_WesternEuropean: - charSet = FX_CHARSET_ANSI; - break; - case FX_CODEPAGE_MSDOS_Thai: - charSet = FX_CHARSET_Thai; - break; - case FX_CODEPAGE_Hangul: - charSet = FX_CHARSET_Hangul; - break; - case FX_CODEPAGE_UTF16LE: - charSet = FX_CHARSET_ANSI; - break; - case FX_CODEPAGE_MSWin_EasternEuropean: - charSet = FX_CHARSET_MSWin_EasternEuropean; - break; - case FX_CODEPAGE_MSWin_Cyrillic: - charSet = FX_CHARSET_MSWin_Cyrillic; - break; - case FX_CODEPAGE_MSWin_Greek: - charSet = FX_CHARSET_MSWin_Greek; - break; - case FX_CODEPAGE_MSWin_Turkish: - charSet = FX_CHARSET_MSWin_Turkish; - break; - case FX_CODEPAGE_MSWin_Hebrew: - charSet = FX_CHARSET_MSWin_Hebrew; - break; - case FX_CODEPAGE_MSWin_Arabic: - charSet = FX_CHARSET_MSWin_Arabic; - break; - case FX_CODEPAGE_MSWin_Baltic: - charSet = FX_CHARSET_MSWin_Baltic; - break; - case FX_CODEPAGE_MSWin_Vietnamese: - charSet = FX_CHARSET_MSWin_Vietnamese; - break; - case FX_CODEPAGE_Johab: - charSet = FX_CHARSET_Johab; - break; - } - return charSet; -#else - return 0; -#endif + return FX_GetCharsetFromCodePage(FXSYS_GetACP()); } CPDF_InterForm::CPDF_InterForm(CPDF_Document* pDocument) @@ -727,23 +672,19 @@ ByteString CPDF_InterForm::GetNativeFont(uint8_t charSet, void* pLogFont) { #if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ LOGFONTA lf = {}; if (charSet == FX_CHARSET_ANSI) { - csFontName = "Helvetica"; + csFontName = CFX_Font::kDefaultAnsiFontName; return csFontName; } bool bRet = false; - if (charSet == FX_CHARSET_ShiftJIS) { + const ByteString default_font_name = + CFX_Font::GetDefaultFontNameByCharset(charSet); + if (!default_font_name.IsEmpty()) { bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE, - "MS Mincho", lf); - } else if (charSet == FX_CHARSET_ChineseSimplified) { - bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE, "SimSun", - lf); - } else if (charSet == FX_CHARSET_ChineseTraditional) { - bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE, "MingLiU", - lf); + default_font_name.c_str(), lf); } if (!bRet) { bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE, - "Arial Unicode MS", lf); + CFX_Font::kUniversalDefaultFontName, lf); } if (!bRet) { bRet = RetrieveSpecificFont(charSet, DEFAULT_PITCH | FF_DONTCARE, @@ -773,7 +714,7 @@ CPDF_Font* CPDF_InterForm::AddNativeFont(uint8_t charSet, LOGFONTA lf; ByteString csFontName = GetNativeFont(charSet, &lf); if (!csFontName.IsEmpty()) { - if (csFontName == "Helvetica") + if (csFontName == CFX_Font::kDefaultAnsiFontName) return AddStandardFont(pDocument, csFontName); return pDocument->AddWindowsFont(&lf, false, true); } diff --git a/core/fpdfdoc/cpvt_generateap.cpp b/core/fpdfdoc/cpvt_generateap.cpp index e395fbfa4a..cf55e956ea 100644 --- a/core/fpdfdoc/cpvt_generateap.cpp +++ b/core/fpdfdoc/cpvt_generateap.cpp @@ -399,7 +399,7 @@ std::unique_ptr GenerateResourceFontDict( CPDF_Dictionary* pFontDict = pDoc->NewIndirect(); pFontDict->SetNewFor("Type", "Font"); pFontDict->SetNewFor("Subtype", "Type1"); - pFontDict->SetNewFor("BaseFont", "Helvetica"); + pFontDict->SetNewFor("BaseFont", CFX_Font::kDefaultAnsiFontName); pFontDict->SetNewFor("Encoding", "WinAnsiEncoding"); auto pResourceFontDict = @@ -949,7 +949,7 @@ void CPVT_GenerateAP::GenerateFormAP(Type type, pFontDict = pDoc->NewIndirect(); pFontDict->SetNewFor("Type", "Font"); pFontDict->SetNewFor("Subtype", "Type1"); - pFontDict->SetNewFor("BaseFont", "Helvetica"); + pFontDict->SetNewFor("BaseFont", CFX_Font::kDefaultAnsiFontName); pFontDict->SetNewFor("Encoding", "WinAnsiEncoding"); pDRFontDict->SetNewFor( sFontName.Right(sFontName.GetLength() - 1), pDoc, diff --git a/core/fxcrt/fx_codepage.cpp b/core/fxcrt/fx_codepage.cpp index 56fad30c76..d59c93ed85 100644 --- a/core/fxcrt/fx_codepage.cpp +++ b/core/fxcrt/fx_codepage.cpp @@ -6,6 +6,9 @@ #include "core/fxcrt/fx_codepage.h" +#include +#include + namespace { const uint16_t g_FX_MSDOSThaiUnicodes[128] = { @@ -152,6 +155,45 @@ const uint16_t g_FX_MSWinBalticUnicodes[128] = { 0x017E, 0x02D9, }; +struct FX_CHARSET_MAP { + uint16_t charset; + uint16_t codepage; +}; + +const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = { + {FX_CHARSET_ANSI, FX_CODEPAGE_MSWin_WesternEuropean}, + {FX_CHARSET_Default, FX_CODEPAGE_DefANSI}, + {FX_CHARSET_Symbol, FX_CODEPAGE_Symbol}, + {FX_CHARSET_MAC_Roman, FX_CODEPAGE_MAC_Roman}, + {FX_CHARSET_MAC_ShiftJIS, FX_CODEPAGE_MAC_ShiftJIS}, + {FX_CHARSET_MAC_Korean, FX_CODEPAGE_MAC_Korean}, + {FX_CHARSET_MAC_ChineseSimplified, FX_CODEPAGE_MAC_ChineseSimplified}, + {FX_CHARSET_MAC_ChineseTraditional, FX_CODEPAGE_MAC_ChineseTraditional}, + {FX_CHARSET_MAC_Hebrew, FX_CODEPAGE_MAC_Hebrew}, + {FX_CHARSET_MAC_Arabic, FX_CODEPAGE_MAC_Arabic}, + {FX_CHARSET_MAC_Greek, FX_CODEPAGE_MAC_Greek}, + {FX_CHARSET_MAC_Turkish, FX_CODEPAGE_MAC_Turkish}, + {FX_CHARSET_MAC_Thai, FX_CODEPAGE_MAC_Thai}, + {FX_CHARSET_MAC_EasternEuropean, FX_CODEPAGE_MAC_EasternEuropean}, + {FX_CHARSET_MAC_Cyrillic, FX_CODEPAGE_MAC_Cyrillic}, + {FX_CHARSET_ShiftJIS, FX_CODEPAGE_ShiftJIS}, + {FX_CHARSET_Hangul, FX_CODEPAGE_Hangul}, + {FX_CHARSET_Johab, FX_CODEPAGE_Johab}, + {FX_CHARSET_ChineseSimplified, FX_CODEPAGE_ChineseSimplified}, + {FX_CHARSET_ChineseTraditional, FX_CODEPAGE_ChineseTraditional}, + {FX_CHARSET_MSWin_Greek, FX_CODEPAGE_MSWin_Greek}, + {FX_CHARSET_MSWin_Turkish, FX_CODEPAGE_MSWin_Turkish}, + {FX_CHARSET_MSWin_Vietnamese, FX_CODEPAGE_MSWin_Vietnamese}, + {FX_CHARSET_MSWin_Hebrew, FX_CODEPAGE_MSWin_Hebrew}, + {FX_CHARSET_MSWin_Arabic, FX_CODEPAGE_MSWin_Arabic}, + {FX_CHARSET_MSWin_Baltic, FX_CODEPAGE_MSWin_Baltic}, + {FX_CHARSET_MSWin_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic}, + {FX_CHARSET_Thai, FX_CODEPAGE_MSDOS_Thai}, + {FX_CHARSET_MSWin_EasternEuropean, FX_CODEPAGE_MSWin_EasternEuropean}, + {FX_CHARSET_US, FX_CODEPAGE_MSDOS_US}, + {FX_CHARSET_OEM, FX_CODEPAGE_MSDOS_WesternEuropean}, +}; + } // namespace const FX_CharsetUnicodes g_FX_CharsetUnicodes[8] = { @@ -164,3 +206,31 @@ const FX_CharsetUnicodes g_FX_CharsetUnicodes[8] = { {FX_CHARSET_MSWin_Arabic, g_FX_MSWinArabicUnicodes}, {FX_CHARSET_MSWin_Baltic, g_FX_MSWinBalticUnicodes}, }; + +uint16_t FX_GetCodePageFromCharset(uint8_t charset) { + auto* result = + std::lower_bound(std::begin(g_FXCharset2CodePageTable), + std::end(g_FXCharset2CodePageTable), charset, + [](const FX_CHARSET_MAP& iter, const uint16_t& charset) { + return iter.charset < charset; + }); + if (result != std::end(g_FXCharset2CodePageTable) && + result->charset == charset) { + return result->codepage; + } + return 0xFFFF; +} + +uint8_t FX_GetCharsetFromCodePage(uint16_t codepage) { + for (const auto& it : g_FXCharset2CodePageTable) { + if (it.codepage == codepage) + return it.charset; + } + return FX_CHARSET_ANSI; +} + +bool FX_CharSetIsCJK(uint8_t uCharset) { + return (uCharset == FX_CHARSET_ChineseSimplified) || + (uCharset == FX_CHARSET_ChineseTraditional) || + (uCharset == FX_CHARSET_Hangul) || (uCharset == FX_CHARSET_ShiftJIS); +} diff --git a/core/fxcrt/fx_codepage.h b/core/fxcrt/fx_codepage.h index 43692286a5..4a6c6d86b3 100644 --- a/core/fxcrt/fx_codepage.h +++ b/core/fxcrt/fx_codepage.h @@ -98,4 +98,8 @@ struct FX_CharsetUnicodes { extern const FX_CharsetUnicodes g_FX_CharsetUnicodes[8]; +uint16_t FX_GetCodePageFromCharset(uint8_t charset); +uint8_t FX_GetCharsetFromCodePage(uint16_t codepage); +bool FX_CharSetIsCJK(uint8_t uCharset); + #endif // CORE_FXCRT_FX_CODEPAGE_H_ diff --git a/core/fxge/android/cfpf_skiafontmgr.cpp b/core/fxge/android/cfpf_skiafontmgr.cpp index d44d7d56d2..0b49f6283c 100644 --- a/core/fxge/android/cfpf_skiafontmgr.cpp +++ b/core/fxge/android/cfpf_skiafontmgr.cpp @@ -187,9 +187,7 @@ uint32_t FPF_SKIAGetFamilyHash(const ByteStringView& bsFamily, } bool FPF_SkiaIsCJK(uint8_t uCharset) { - return (uCharset == FX_CHARSET_ChineseSimplified) || - (uCharset == FX_CHARSET_ChineseTraditional) || - (uCharset == FX_CHARSET_Hangul) || (uCharset == FX_CHARSET_ShiftJIS); + return FX_CharSetIsCJK(uCharset); } bool FPF_SkiaMaybeSymbol(const ByteStringView& bsFacename) { diff --git a/core/fxge/cfx_font.cpp b/core/fxge/cfx_font.cpp index d04fc0dac0..bee1d789ef 100644 --- a/core/fxge/cfx_font.cpp +++ b/core/fxge/cfx_font.cpp @@ -209,6 +209,88 @@ const uint8_t CFX_Font::s_WeightPow_SHIFTJIS[] = { 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, }; +const CFX_Font::CharsetFontMap CFX_Font::defaultTTFMap[] = { + {FX_CHARSET_ANSI, kDefaultAnsiFontName}, + {FX_CHARSET_ChineseSimplified, "SimSun"}, + {FX_CHARSET_ChineseTraditional, "MingLiU"}, + {FX_CHARSET_ShiftJIS, "MS Gothic"}, + {FX_CHARSET_Hangul, "Batang"}, + {FX_CHARSET_MSWin_Cyrillic, "Arial"}, +#if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ + {FX_CHARSET_MSWin_EasternEuropean, "Arial"}, +#else + {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"}, +#endif + {FX_CHARSET_MSWin_Arabic, "Arial"}, + {-1, nullptr}}; + +// static +const char CFX_Font::kDefaultAnsiFontName[] = "Helvetica"; +// static +const char CFX_Font::kUniversalDefaultFontName[] = "Arial Unicode MS"; + +// static +ByteString CFX_Font::GetDefaultFontNameByCharset(uint8_t nCharset) { + int i = 0; + while (defaultTTFMap[i].charset != -1) { + if (nCharset == static_cast(defaultTTFMap[i].charset)) + return defaultTTFMap[i].fontname; + ++i; + } + return kUniversalDefaultFontName; +} + +// static +uint8_t CFX_Font::GetCharSetFromUnicode(uint16_t word) { + // to avoid CJK Font to show ASCII + if (word < 0x7F) + return FX_CHARSET_ANSI; + + // find new charset + if ((word >= 0x4E00 && word <= 0x9FA5) || + (word >= 0xE7C7 && word <= 0xE7F3) || + (word >= 0x3000 && word <= 0x303F) || + (word >= 0x2000 && word <= 0x206F)) { + return FX_CHARSET_ChineseSimplified; + } + + if (((word >= 0x3040) && (word <= 0x309F)) || + ((word >= 0x30A0) && (word <= 0x30FF)) || + ((word >= 0x31F0) && (word <= 0x31FF)) || + ((word >= 0xFF00) && (word <= 0xFFEF))) { + return FX_CHARSET_ShiftJIS; + } + + if (((word >= 0xAC00) && (word <= 0xD7AF)) || + ((word >= 0x1100) && (word <= 0x11FF)) || + ((word >= 0x3130) && (word <= 0x318F))) { + return FX_CHARSET_Hangul; + } + + if (word >= 0x0E00 && word <= 0x0E7F) + return FX_CHARSET_Thai; + + if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF)) + return FX_CHARSET_MSWin_Greek; + + if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC)) + return FX_CHARSET_MSWin_Arabic; + + if (word >= 0x0590 && word <= 0x05FF) + return FX_CHARSET_MSWin_Hebrew; + + if (word >= 0x0400 && word <= 0x04FF) + return FX_CHARSET_MSWin_Cyrillic; + + if (word >= 0x0100 && word <= 0x024F) + return FX_CHARSET_MSWin_EasternEuropean; + + if (word >= 0x1E00 && word <= 0x1EFF) + return FX_CHARSET_MSWin_Vietnamese; + + return FX_CHARSET_ANSI; +} + CFX_Font::CFX_Font() : m_Face(nullptr), diff --git a/core/fxge/cfx_font.h b/core/fxge/cfx_font.h index fbf35eaca1..a5a0057c16 100644 --- a/core/fxge/cfx_font.h +++ b/core/fxge/cfx_font.h @@ -30,6 +30,11 @@ class CFX_Font { CFX_Font(); ~CFX_Font(); + static const char kDefaultAnsiFontName[]; + static const char kUniversalDefaultFontName[]; + static ByteString GetDefaultFontNameByCharset(uint8_t nCharset); + static uint8_t GetCharSetFromUnicode(uint16_t word); + void LoadSubst(const ByteString& face_name, bool bTrueType, uint32_t flags, @@ -96,6 +101,19 @@ class CFX_Font { static const uint8_t s_WeightPow_11[kWeightPowArraySize]; static const uint8_t s_WeightPow_SHIFTJIS[kWeightPowArraySize]; + // This struct shoub same as FPDF_CharsetFontMap + typedef struct { + int charset; // Character Set Enum value, see FX_CHARSET_XXX. + const char* fontname; // Name of default font to use with that charset. + } CharsetFontMap; + + /** + * Pointer to the default character set to TT Font name map. The + * map is an array of CharsetFontMap structs, with its end indicated + * by a { -1, NULL } entry. + **/ + static const CharsetFontMap defaultTTFMap[]; + #ifdef PDF_ENABLE_XFA protected: std::unique_ptr m_pOwnedStream; diff --git a/core/fxge/cfx_fontmapper.cpp b/core/fxge/cfx_fontmapper.cpp index b9a8eac1a1..e6fc984edc 100644 --- a/core/fxge/cfx_fontmapper.cpp +++ b/core/fxge/cfx_fontmapper.cpp @@ -145,19 +145,6 @@ const struct AltFontFamily { {"ForteMT", "Forte"}, }; -const struct CODEPAGE_MAP { - uint16_t codepage; - uint8_t charset; -} g_Codepage2CharsetTable[] = { - {0, 1}, {42, 2}, {437, 254}, {850, 255}, {874, 222}, - {932, 128}, {936, 134}, {949, 129}, {950, 136}, {1250, 238}, - {1251, 204}, {1252, 0}, {1253, 161}, {1254, 162}, {1255, 177}, - {1256, 178}, {1257, 186}, {1258, 163}, {1361, 130}, {10000, 77}, - {10001, 78}, {10002, 81}, {10003, 79}, {10004, 84}, {10005, 83}, - {10006, 85}, {10007, 89}, {10008, 80}, {10021, 87}, {10029, 88}, - {10081, 86}, -}; - ByteString TT_NormalizeName(const char* family) { ByteString norm(family); norm.Remove(' '); @@ -170,19 +157,6 @@ ByteString TT_NormalizeName(const char* family) { return norm; } -uint8_t GetCharsetFromCodePage(uint16_t codepage) { - const CODEPAGE_MAP* pEnd = - g_Codepage2CharsetTable + FX_ArraySize(g_Codepage2CharsetTable); - const CODEPAGE_MAP* pCharmap = - std::lower_bound(g_Codepage2CharsetTable, pEnd, codepage, - [](const CODEPAGE_MAP& charset, uint16_t page) { - return charset.codepage < page; - }); - if (pCharmap < pEnd && codepage == pCharmap->codepage) - return pCharmap->charset; - return FX_CHARSET_Default; -} - void GetFontFamily(uint32_t nStyle, ByteString* fontName) { if (fontName->Contains("Script")) { if (FontStyleIsBold(nStyle)) @@ -544,13 +518,10 @@ FXFT_Face CFX_FontMapper::FindSubstFont(const ByteString& name, int Charset = FX_CHARSET_ANSI; if (WindowCP) - Charset = GetCharsetFromCodePage(WindowCP); + Charset = FX_GetCharsetFromCodePage(WindowCP); else if (iBaseFont == kNumStandardFonts && FontStyleIsSymbolic(flags)) Charset = FX_CHARSET_Symbol; - const bool bCJK = (Charset == FX_CHARSET_ShiftJIS || - Charset == FX_CHARSET_ChineseSimplified || - Charset == FX_CHARSET_Hangul || - Charset == FX_CHARSET_ChineseTraditional); + const bool bCJK = FX_CharSetIsCJK(Charset); bool bItalic = FontStyleIsItalic(nStyle); GetFontFamily(nStyle, &family); diff --git a/fpdfsdk/cfx_systemhandler.cpp b/fpdfsdk/cfx_systemhandler.cpp index 28170d2878..62d7c735b0 100644 --- a/fpdfsdk/cfx_systemhandler.cpp +++ b/fpdfsdk/cfx_systemhandler.cpp @@ -20,22 +20,6 @@ #include "fpdfsdk/cpdfsdk_widget.h" #include "fpdfsdk/formfiller/cffl_formfiller.h" -namespace { - -int CharSet2CP(int charset) { - if (charset == FX_CHARSET_ShiftJIS) - return FX_CODEPAGE_ShiftJIS; - if (charset == FX_CHARSET_ChineseSimplified) - return FX_CODEPAGE_ChineseSimplified; - if (charset == FX_CHARSET_Hangul) - return FX_CODEPAGE_Hangul; - if (charset == FX_CHARSET_ChineseTraditional) - return FX_CODEPAGE_ChineseTraditional; - return FX_CODEPAGE_DefANSI; -} - -} // namespace - CFX_SystemHandler::CFX_SystemHandler(CPDFSDK_FormFillEnvironment* pFormFillEnv) : m_pFormFillEnv(pFormFillEnv) {} @@ -121,7 +105,8 @@ CPDF_Font* CFX_SystemHandler::AddNativeTrueTypeFontToPDF( return nullptr; auto pFXFont = pdfium::MakeUnique(); - pFXFont->LoadSubst(sFontFaceName, true, 0, 0, 0, CharSet2CP(nCharset), false); + pFXFont->LoadSubst(sFontFaceName, true, 0, 0, 0, + FX_GetCodePageFromCharset(nCharset), false); return pDoc->AddFont(pFXFont.get(), nCharset, false); } diff --git a/fpdfsdk/fpdf_sysfontinfo.cpp b/fpdfsdk/fpdf_sysfontinfo.cpp index 3a3f9e1257..564912b85c 100644 --- a/fpdfsdk/fpdf_sysfontinfo.cpp +++ b/fpdfsdk/fpdf_sysfontinfo.cpp @@ -9,6 +9,7 @@ #include #include "core/fxcrt/fx_codepage.h" +#include "core/fxge/cfx_font.h" #include "core/fxge/cfx_fontmapper.h" #include "core/fxge/cfx_fontmgr.h" #include "core/fxge/cfx_gemodule.h" @@ -30,6 +31,8 @@ static_assert(FXFONT_GB2312_CHARSET == FX_CHARSET_ChineseSimplified, "Charset must match"); static_assert(FXFONT_CHINESEBIG5_CHARSET == FX_CHARSET_ChineseTraditional, "Charset must match"); +static_assert(sizeof(CFX_Font::CharsetFontMap) == sizeof(FPDF_CharsetFontMap), + "CFX_Font::CharsetFontMap should be same as FPDF_CharsetFontMap"); class CFX_ExternalFontInfo final : public SystemFontInfoIface { public: @@ -122,7 +125,7 @@ FPDF_SetSystemFontInfo(FPDF_SYSFONTINFO* pFontInfoExt) { } FPDF_EXPORT const FPDF_CharsetFontMap* FPDF_CALLCONV FPDF_GetDefaultTTFMap() { - return CPWL_FontMap::defaultTTFMap; + return reinterpret_cast(CFX_Font::defaultTTFMap); } struct FPDF_SYSFONTINFO_DEFAULT : public FPDF_SYSFONTINFO { diff --git a/fpdfsdk/pwl/cpwl_font_map.cpp b/fpdfsdk/pwl/cpwl_font_map.cpp index 5e5556ebb0..f60e7d2fb1 100644 --- a/fpdfsdk/pwl/cpwl_font_map.cpp +++ b/fpdfsdk/pwl/cpwl_font_map.cpp @@ -21,8 +21,6 @@ namespace { -const char kDefaultFontName[] = "Helvetica"; - const char* const g_sDEStandardFontName[] = {"Courier", "Courier-Bold", "Courier-BoldOblique", @@ -100,7 +98,8 @@ int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word, if (KnowWord(nNewFontIndex, word)) return nNewFontIndex; } - nNewFontIndex = GetFontIndex("Arial Unicode MS", FX_CHARSET_Default, false); + nNewFontIndex = GetFontIndex(CFX_Font::kUniversalDefaultFontName, + FX_CHARSET_Default, false); if (nNewFontIndex >= 0) { if (KnowWord(nNewFontIndex, word)) return nNewFontIndex; @@ -145,7 +144,7 @@ void CPWL_FontMap::Empty() { } void CPWL_FontMap::Initialize() { - GetFontIndex(kDefaultFontName, FX_CHARSET_ANSI, false); + GetFontIndex(CFX_Font::kDefaultAnsiFontName, FX_CHARSET_ANSI, false); } bool CPWL_FontMap::IsStandardFont(const ByteString& sFontName) { @@ -210,7 +209,7 @@ ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) { if (nCharset == FX_CHARSET_Default) nCharset = GetNativeCharset(); - ByteString sFontName = GetDefaultFontByCharset(nCharset); + ByteString sFontName = CFX_Font::GetDefaultFontNameByCharset(nCharset); if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName)) return ByteString(); @@ -274,135 +273,17 @@ const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const { } int32_t CPWL_FontMap::GetNativeCharset() { - uint8_t nCharset = FX_CHARSET_ANSI; - int32_t iCodePage = FXSYS_GetACP(); - switch (iCodePage) { - case FX_CODEPAGE_ShiftJIS: - nCharset = FX_CHARSET_ShiftJIS; - break; - case FX_CODEPAGE_ChineseSimplified: - nCharset = FX_CHARSET_ChineseSimplified; - break; - case FX_CODEPAGE_ChineseTraditional: - nCharset = FX_CHARSET_ChineseTraditional; - break; - case FX_CODEPAGE_MSWin_WesternEuropean: - nCharset = FX_CHARSET_ANSI; - break; - case FX_CODEPAGE_MSDOS_Thai: - nCharset = FX_CHARSET_Thai; - break; - case FX_CODEPAGE_Hangul: - nCharset = FX_CHARSET_Hangul; - break; - case FX_CODEPAGE_UTF16LE: - nCharset = FX_CHARSET_ANSI; - break; - case FX_CODEPAGE_MSWin_EasternEuropean: - nCharset = FX_CHARSET_MSWin_EasternEuropean; - break; - case FX_CODEPAGE_MSWin_Cyrillic: - nCharset = FX_CHARSET_MSWin_Cyrillic; - break; - case FX_CODEPAGE_MSWin_Greek: - nCharset = FX_CHARSET_MSWin_Greek; - break; - case FX_CODEPAGE_MSWin_Turkish: - nCharset = FX_CHARSET_MSWin_Turkish; - break; - case FX_CODEPAGE_MSWin_Hebrew: - nCharset = FX_CHARSET_MSWin_Hebrew; - break; - case FX_CODEPAGE_MSWin_Arabic: - nCharset = FX_CHARSET_MSWin_Arabic; - break; - case FX_CODEPAGE_MSWin_Baltic: - nCharset = FX_CHARSET_MSWin_Baltic; - break; - case FX_CODEPAGE_MSWin_Vietnamese: - nCharset = FX_CHARSET_MSWin_Vietnamese; - break; - case FX_CODEPAGE_Johab: - nCharset = FX_CHARSET_Johab; - break; - } - return nCharset; -} - -const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = { - {FX_CHARSET_ANSI, "Helvetica"}, - {FX_CHARSET_ChineseSimplified, "SimSun"}, - {FX_CHARSET_ChineseTraditional, "MingLiU"}, - {FX_CHARSET_ShiftJIS, "MS Gothic"}, - {FX_CHARSET_Hangul, "Batang"}, - {FX_CHARSET_MSWin_Cyrillic, "Arial"}, -#if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ - {FX_CHARSET_MSWin_EasternEuropean, "Arial"}, -#else - {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"}, -#endif - {FX_CHARSET_MSWin_Arabic, "Arial"}, - {-1, nullptr}}; - -ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) { - int i = 0; - while (defaultTTFMap[i].charset != -1) { - if (nCharset == defaultTTFMap[i].charset) - return defaultTTFMap[i].fontname; - ++i; - } - return ""; + return FX_GetCharsetFromCodePage(FXSYS_GetACP()); } int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) { // to avoid CJK Font to show ASCII if (word < 0x7F) return FX_CHARSET_ANSI; + // follow the old charset if (nOldCharset != FX_CHARSET_Default) return nOldCharset; - // find new charset - if ((word >= 0x4E00 && word <= 0x9FA5) || - (word >= 0xE7C7 && word <= 0xE7F3) || - (word >= 0x3000 && word <= 0x303F) || - (word >= 0x2000 && word <= 0x206F)) { - return FX_CHARSET_ChineseSimplified; - } - - if (((word >= 0x3040) && (word <= 0x309F)) || - ((word >= 0x30A0) && (word <= 0x30FF)) || - ((word >= 0x31F0) && (word <= 0x31FF)) || - ((word >= 0xFF00) && (word <= 0xFFEF))) { - return FX_CHARSET_ShiftJIS; - } - - if (((word >= 0xAC00) && (word <= 0xD7AF)) || - ((word >= 0x1100) && (word <= 0x11FF)) || - ((word >= 0x3130) && (word <= 0x318F))) { - return FX_CHARSET_Hangul; - } - - if (word >= 0x0E00 && word <= 0x0E7F) - return FX_CHARSET_Thai; - - if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF)) - return FX_CHARSET_MSWin_Greek; - - if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC)) - return FX_CHARSET_MSWin_Arabic; - - if (word >= 0x0590 && word <= 0x05FF) - return FX_CHARSET_MSWin_Hebrew; - - if (word >= 0x0400 && word <= 0x04FF) - return FX_CHARSET_MSWin_Cyrillic; - - if (word >= 0x0100 && word <= 0x024F) - return FX_CHARSET_MSWin_EasternEuropean; - - if (word >= 0x1E00 && word <= 0x1EFF) - return FX_CHARSET_MSWin_Vietnamese; - - return FX_CHARSET_ANSI; + return CFX_Font::GetCharSetFromUnicode(word); } diff --git a/fpdfsdk/pwl/cpwl_font_map.h b/fpdfsdk/pwl/cpwl_font_map.h index 592c3fafed..c465a0fc0c 100644 --- a/fpdfsdk/pwl/cpwl_font_map.h +++ b/fpdfsdk/pwl/cpwl_font_map.h @@ -46,9 +46,6 @@ class CPWL_FontMap : public IPVT_FontMap { static int32_t GetNativeCharset(); ByteString GetNativeFontName(int32_t nCharset); - static ByteString GetDefaultFontByCharset(int32_t nCharset); - static const FPDF_CharsetFontMap defaultTTFMap[]; - protected: virtual void Initialize(); virtual CPDF_Document* GetDocument(); diff --git a/xfa/fgas/font/cfgas_fontmgr.cpp b/xfa/fgas/font/cfgas_fontmgr.cpp index d7acc62eef..354db0be43 100644 --- a/xfa/fgas/font/cfgas_fontmgr.cpp +++ b/xfa/fgas/font/cfgas_fontmgr.cpp @@ -27,59 +27,6 @@ namespace { -struct FX_CHARSET_MAP { - uint16_t charset; - uint16_t codepage; -}; - -const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = { - {FX_CHARSET_ANSI, FX_CODEPAGE_MSWin_WesternEuropean}, - {FX_CHARSET_Default, FX_CODEPAGE_DefANSI}, - {FX_CHARSET_Symbol, FX_CODEPAGE_Symbol}, - {FX_CHARSET_MAC_Roman, FX_CODEPAGE_MAC_Roman}, - {FX_CHARSET_MAC_ShiftJIS, FX_CODEPAGE_MAC_ShiftJIS}, - {FX_CHARSET_MAC_Korean, FX_CODEPAGE_MAC_Korean}, - {FX_CHARSET_MAC_ChineseSimplified, FX_CODEPAGE_MAC_ChineseSimplified}, - {FX_CHARSET_MAC_ChineseTraditional, FX_CODEPAGE_MAC_ChineseTraditional}, - {FX_CHARSET_MAC_Hebrew, FX_CODEPAGE_MAC_Hebrew}, - {FX_CHARSET_MAC_Arabic, FX_CODEPAGE_MAC_Arabic}, - {FX_CHARSET_MAC_Greek, FX_CODEPAGE_MAC_Greek}, - {FX_CHARSET_MAC_Turkish, FX_CODEPAGE_MAC_Turkish}, - {FX_CHARSET_MAC_Thai, FX_CODEPAGE_MAC_Thai}, - {FX_CHARSET_MAC_EasternEuropean, FX_CODEPAGE_MAC_EasternEuropean}, - {FX_CHARSET_MAC_Cyrillic, FX_CODEPAGE_MAC_Cyrillic}, - {FX_CHARSET_ShiftJIS, FX_CODEPAGE_ShiftJIS}, - {FX_CHARSET_Hangul, FX_CODEPAGE_Hangul}, - {FX_CHARSET_Johab, FX_CODEPAGE_Johab}, - {FX_CHARSET_ChineseSimplified, FX_CODEPAGE_ChineseSimplified}, - {FX_CHARSET_ChineseTraditional, FX_CODEPAGE_ChineseTraditional}, - {FX_CHARSET_MSWin_Greek, FX_CODEPAGE_MSWin_Greek}, - {FX_CHARSET_MSWin_Turkish, FX_CODEPAGE_MSWin_Turkish}, - {FX_CHARSET_MSWin_Vietnamese, FX_CODEPAGE_MSWin_Vietnamese}, - {FX_CHARSET_MSWin_Hebrew, FX_CODEPAGE_MSWin_Hebrew}, - {FX_CHARSET_MSWin_Arabic, FX_CODEPAGE_MSWin_Arabic}, - {FX_CHARSET_MSWin_Baltic, FX_CODEPAGE_MSWin_Baltic}, - {FX_CHARSET_MSWin_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic}, - {FX_CHARSET_Thai, FX_CODEPAGE_MSDOS_Thai}, - {FX_CHARSET_MSWin_EasternEuropean, FX_CODEPAGE_MSWin_EasternEuropean}, - {FX_CHARSET_US, FX_CODEPAGE_MSDOS_US}, - {FX_CHARSET_OEM, FX_CODEPAGE_MSDOS_WesternEuropean}, -}; - -uint16_t GetCodePageFromCharset(uint8_t charset) { - auto* result = - std::lower_bound(std::begin(g_FXCharset2CodePageTable), - std::end(g_FXCharset2CodePageTable), charset, - [](const FX_CHARSET_MAP& iter, const uint16_t& charset) { - return iter.charset < charset; - }); - if (result != std::end(g_FXCharset2CodePageTable) && - result->charset == charset) { - return result->codepage; - } - return 0xFFFF; -} - int32_t GetSimilarityScore(FX_FONTDESCRIPTOR const* pFont, uint32_t dwFontStyles) { int32_t iValue = 0; @@ -118,7 +65,7 @@ const FX_FONTDESCRIPTOR* MatchDefaultFont( if (font.uCharSet == FX_CHARSET_Symbol) continue; if (pParams->wCodePage != 0xFFFF) { - if (GetCodePageFromCharset(font.uCharSet) != pParams->wCodePage) + if (FX_GetCodePageFromCharset(font.uCharSet) != pParams->wCodePage) continue; } else { if (pParams->dwUSB < 128) { @@ -1033,7 +980,7 @@ RetainPtr CFGAS_FontMgr::GetFontByUnicode( if (!pFD) return nullptr; - uint16_t newCodePage = GetCodePageFromCharset(pFD->uCharSet); + uint16_t newCodePage = FX_GetCodePageFromCharset(pFD->uCharSet); const wchar_t* pFontFace = pFD->wsFontFace; RetainPtr pFont = CFGAS_GEFont::LoadFont(pFontFace, dwFontStyles, newCodePage, this); -- cgit v1.2.3