summaryrefslogtreecommitdiff
path: root/core/fxcrt
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2018-05-31 14:08:11 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-05-31 14:08:11 +0000
commit656eb84f83fc1701737d9c65658371a99428d727 (patch)
tree6fb28e4283c1ef8696b42d8b7d200a13c32742fc /core/fxcrt
parent8f7ee98e2c622c21f452cd9fd5956fe85bcb2b7c (diff)
downloadpdfium-656eb84f83fc1701737d9c65658371a99428d727.tar.xz
Move codepage/charset methods into related places.
Change-Id: I71417cc5b1bd00f77d42740198cc17487ebd686e Reviewed-on: https://pdfium-review.googlesource.com/33330 Reviewed-by: dsinclair <dsinclair@chromium.org> Commit-Queue: Art Snake <art-snake@yandex-team.ru>
Diffstat (limited to 'core/fxcrt')
-rw-r--r--core/fxcrt/fx_codepage.cpp70
-rw-r--r--core/fxcrt/fx_codepage.h4
2 files changed, 74 insertions, 0 deletions
diff --git a/core/fxcrt/fx_codepage.cpp b/core/fxcrt/fx_codepage.cpp
index 56fad30c76..d59c93ed85 100644
--- a/core/fxcrt/fx_codepage.cpp
+++ b/core/fxcrt/fx_codepage.cpp
@@ -6,6 +6,9 @@
#include "core/fxcrt/fx_codepage.h"
+#include <algorithm>
+#include <utility>
+
namespace {
const uint16_t g_FX_MSDOSThaiUnicodes[128] = {
@@ -152,6 +155,45 @@ const uint16_t g_FX_MSWinBalticUnicodes[128] = {
0x017E, 0x02D9,
};
+struct FX_CHARSET_MAP {
+ uint16_t charset;
+ uint16_t codepage;
+};
+
+const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
+ {FX_CHARSET_ANSI, FX_CODEPAGE_MSWin_WesternEuropean},
+ {FX_CHARSET_Default, FX_CODEPAGE_DefANSI},
+ {FX_CHARSET_Symbol, FX_CODEPAGE_Symbol},
+ {FX_CHARSET_MAC_Roman, FX_CODEPAGE_MAC_Roman},
+ {FX_CHARSET_MAC_ShiftJIS, FX_CODEPAGE_MAC_ShiftJIS},
+ {FX_CHARSET_MAC_Korean, FX_CODEPAGE_MAC_Korean},
+ {FX_CHARSET_MAC_ChineseSimplified, FX_CODEPAGE_MAC_ChineseSimplified},
+ {FX_CHARSET_MAC_ChineseTraditional, FX_CODEPAGE_MAC_ChineseTraditional},
+ {FX_CHARSET_MAC_Hebrew, FX_CODEPAGE_MAC_Hebrew},
+ {FX_CHARSET_MAC_Arabic, FX_CODEPAGE_MAC_Arabic},
+ {FX_CHARSET_MAC_Greek, FX_CODEPAGE_MAC_Greek},
+ {FX_CHARSET_MAC_Turkish, FX_CODEPAGE_MAC_Turkish},
+ {FX_CHARSET_MAC_Thai, FX_CODEPAGE_MAC_Thai},
+ {FX_CHARSET_MAC_EasternEuropean, FX_CODEPAGE_MAC_EasternEuropean},
+ {FX_CHARSET_MAC_Cyrillic, FX_CODEPAGE_MAC_Cyrillic},
+ {FX_CHARSET_ShiftJIS, FX_CODEPAGE_ShiftJIS},
+ {FX_CHARSET_Hangul, FX_CODEPAGE_Hangul},
+ {FX_CHARSET_Johab, FX_CODEPAGE_Johab},
+ {FX_CHARSET_ChineseSimplified, FX_CODEPAGE_ChineseSimplified},
+ {FX_CHARSET_ChineseTraditional, FX_CODEPAGE_ChineseTraditional},
+ {FX_CHARSET_MSWin_Greek, FX_CODEPAGE_MSWin_Greek},
+ {FX_CHARSET_MSWin_Turkish, FX_CODEPAGE_MSWin_Turkish},
+ {FX_CHARSET_MSWin_Vietnamese, FX_CODEPAGE_MSWin_Vietnamese},
+ {FX_CHARSET_MSWin_Hebrew, FX_CODEPAGE_MSWin_Hebrew},
+ {FX_CHARSET_MSWin_Arabic, FX_CODEPAGE_MSWin_Arabic},
+ {FX_CHARSET_MSWin_Baltic, FX_CODEPAGE_MSWin_Baltic},
+ {FX_CHARSET_MSWin_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic},
+ {FX_CHARSET_Thai, FX_CODEPAGE_MSDOS_Thai},
+ {FX_CHARSET_MSWin_EasternEuropean, FX_CODEPAGE_MSWin_EasternEuropean},
+ {FX_CHARSET_US, FX_CODEPAGE_MSDOS_US},
+ {FX_CHARSET_OEM, FX_CODEPAGE_MSDOS_WesternEuropean},
+};
+
} // namespace
const FX_CharsetUnicodes g_FX_CharsetUnicodes[8] = {
@@ -164,3 +206,31 @@ const FX_CharsetUnicodes g_FX_CharsetUnicodes[8] = {
{FX_CHARSET_MSWin_Arabic, g_FX_MSWinArabicUnicodes},
{FX_CHARSET_MSWin_Baltic, g_FX_MSWinBalticUnicodes},
};
+
+uint16_t FX_GetCodePageFromCharset(uint8_t charset) {
+ auto* result =
+ std::lower_bound(std::begin(g_FXCharset2CodePageTable),
+ std::end(g_FXCharset2CodePageTable), charset,
+ [](const FX_CHARSET_MAP& iter, const uint16_t& charset) {
+ return iter.charset < charset;
+ });
+ if (result != std::end(g_FXCharset2CodePageTable) &&
+ result->charset == charset) {
+ return result->codepage;
+ }
+ return 0xFFFF;
+}
+
+uint8_t FX_GetCharsetFromCodePage(uint16_t codepage) {
+ for (const auto& it : g_FXCharset2CodePageTable) {
+ if (it.codepage == codepage)
+ return it.charset;
+ }
+ return FX_CHARSET_ANSI;
+}
+
+bool FX_CharSetIsCJK(uint8_t uCharset) {
+ return (uCharset == FX_CHARSET_ChineseSimplified) ||
+ (uCharset == FX_CHARSET_ChineseTraditional) ||
+ (uCharset == FX_CHARSET_Hangul) || (uCharset == FX_CHARSET_ShiftJIS);
+}
diff --git a/core/fxcrt/fx_codepage.h b/core/fxcrt/fx_codepage.h
index 43692286a5..4a6c6d86b3 100644
--- a/core/fxcrt/fx_codepage.h
+++ b/core/fxcrt/fx_codepage.h
@@ -98,4 +98,8 @@ struct FX_CharsetUnicodes {
extern const FX_CharsetUnicodes g_FX_CharsetUnicodes[8];
+uint16_t FX_GetCodePageFromCharset(uint8_t charset);
+uint8_t FX_GetCharsetFromCodePage(uint16_t codepage);
+bool FX_CharSetIsCJK(uint8_t uCharset);
+
#endif // CORE_FXCRT_FX_CODEPAGE_H_