summaryrefslogtreecommitdiff
path: root/xfa/src/fgas/include/fx_cpg.h
blob: 42953b042bbc660d6ec8c06e32f5132638c47995 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#ifndef _FX_CODEPAGE
#define _FX_CODEPAGE
class IFX_CodePage;
#define FX_CODEPAGE_DefANSI						0
#define FX_CODEPAGE_DefOEM						1
#define FX_CODEPAGE_DefMAC						2
#define FX_CODEPAGE_Thread						3
#define FX_CODEPAGE_Symbol						42
#define FX_CODEPAGE_MSDOS_US					437
#define FX_CODEPAGE_Arabic_ASMO708				708
#define FX_CODEPAGE_Arabic_ASMO449Plus			709
#define FX_CODEPAGE_Arabic_Transparent			710
#define FX_CODEPAGE_Arabic_NafithaEnhanced		711
#define FX_CODEPAGE_Arabic_TransparentASMO		720
#define FX_CODEPAGE_MSDOS_Greek1				737
#define FX_CODEPAGE_MSDOS_Baltic				775
#define FX_CODEPAGE_MSWin31_WesternEuropean		819
#define FX_CODEPAGE_MSDOS_WesternEuropean		850
#define FX_CODEPAGE_MSDOS_EasternEuropean		852
#define FX_CODEPAGE_MSDOS_Latin3				853
#define FX_CODEPAGE_MSDOS_Cyrillic				855
#define FX_CODEPAGE_MSDOS_Turkish				857
#define FX_CODEPAGE_MSDOS_Latin1Euro			858
#define FX_CODEPAGE_MSDOS_Portuguese			860
#define FX_CODEPAGE_MSDOS_Icelandic				861
#define FX_CODEPAGE_MSDOS_Hebrew				862
#define FX_CODEPAGE_MSDOS_FrenchCanadian		863
#define FX_CODEPAGE_MSDOS_Arabic				864
#define FX_CODEPAGE_MSDOS_Norwegian				865
#define FX_CODEPAGE_MSDOS_Russian				866
#define FX_CODEPAGE_MSDOS_Greek2				869
#define FX_CODEPAGE_MSDOS_Thai					874
#define FX_CODEPAGE_MSDOS_KamenickyCS			895
#define FX_CODEPAGE_ShiftJIS					932
#define FX_CODEPAGE_ChineseSimplified			936
#define FX_CODEPAGE_Korean						949
#define FX_CODEPAGE_ChineseTraditional			950
#define FX_CODEPAGE_UTF16LE						1200
#define FX_CODEPAGE_UTF16BE						1201
#define FX_CODEPAGE_MSWin_EasternEuropean		1250
#define FX_CODEPAGE_MSWin_Cyrillic				1251
#define FX_CODEPAGE_MSWin_WesternEuropean		1252
#define FX_CODEPAGE_MSWin_Greek					1253
#define FX_CODEPAGE_MSWin_Turkish				1254
#define FX_CODEPAGE_MSWin_Hebrew				1255
#define FX_CODEPAGE_MSWin_Arabic				1256
#define FX_CODEPAGE_MSWin_Baltic				1257
#define FX_CODEPAGE_MSWin_Vietnamese			1258
#define FX_CODEPAGE_Johab						1361
#define FX_CODEPAGE_MAC_Roman					10000
#define FX_CODEPAGE_MAC_ShiftJIS				10001
#define FX_CODEPAGE_MAC_ChineseTraditional		10002
#define FX_CODEPAGE_MAC_Korean					10003
#define FX_CODEPAGE_MAC_Arabic					10004
#define FX_CODEPAGE_MAC_Hebrew					10005
#define FX_CODEPAGE_MAC_Greek					10006
#define FX_CODEPAGE_MAC_Cyrillic				10007
#define FX_CODEPAGE_MAC_ChineseSimplified		10008
#define FX_CODEPAGE_MAC_Thai					10021
#define FX_CODEPAGE_MAC_EasternEuropean			10029
#define FX_CODEPAGE_MAC_Turkish					10081
#define FX_CODEPAGE_UTF32LE						12000
#define FX_CODEPAGE_UTF32BE						12001
#define FX_CODEPAGE_ISO8859_1					28591
#define FX_CODEPAGE_ISO8859_2					28592
#define FX_CODEPAGE_ISO8859_3					28593
#define FX_CODEPAGE_ISO8859_4					28594
#define FX_CODEPAGE_ISO8859_5					28595
#define FX_CODEPAGE_ISO8859_6					28596
#define FX_CODEPAGE_ISO8859_7					28597
#define FX_CODEPAGE_ISO8859_8					28598
#define FX_CODEPAGE_ISO8859_9					28599
#define FX_CODEPAGE_ISO8859_10					28600
#define FX_CODEPAGE_ISO8859_11					28601
#define FX_CODEPAGE_ISO8859_12					28602
#define FX_CODEPAGE_ISO8859_13					28603
#define FX_CODEPAGE_ISO8859_14					28604
#define FX_CODEPAGE_ISO8859_15					28605
#define FX_CODEPAGE_ISO8859_16					28606
#define FX_CODEPAGE_ISCII_Devanagari			57002
#define FX_CODEPAGE_ISCII_Bengali				57003
#define FX_CODEPAGE_ISCII_Tamil					57004
#define FX_CODEPAGE_ISCII_Telugu				57005
#define FX_CODEPAGE_ISCII_Assamese				57006
#define FX_CODEPAGE_ISCII_Oriya					57007
#define FX_CODEPAGE_ISCII_Kannada				57008
#define FX_CODEPAGE_ISCII_Malayalam				57009
#define FX_CODEPAGE_ISCII_Gujarati				57010
#define FX_CODEPAGE_ISCII_Punjabi				57011
#define FX_CODEPAGE_UTF7						65000
#define FX_CODEPAGE_UTF8						65001
#define FX_CHARSET_ANSI							0
#define FX_CHARSET_Default						1
#define FX_CHARSET_Symbol						2
#define FX_CHARSET_MAC_Roman					77
#define FX_CHARSET_MAC_ShiftJIS					78
#define FX_CHARSET_MAC_Korean					79
#define FX_CHARSET_MAC_ChineseSimplified		80
#define FX_CHARSET_MAC_ChineseTriditional		81
#define FX_CHARSET_MAC_Johab					82
#define FX_CHARSET_MAC_Hebrew					83
#define FX_CHARSET_MAC_Arabic					84
#define FX_CHARSET_MAC_Greek					85
#define FX_CHARSET_MAC_Turkish					86
#define FX_CHARSET_MAC_Thai						87
#define FX_CHARSET_MAC_EasternEuropean			88
#define FX_CHARSET_MAC_Cyrillic					89
#define FX_CHARSET_ShiftJIS						128
#define FX_CHARSET_Korean						129
#define FX_CHARSET_Johab						130
#define FX_CHARSET_ChineseSimplified			134
#define FX_CHARSET_ChineseTriditional			136
#define FX_CHARSET_MSWin_Greek					161
#define FX_CHARSET_MSWin_Turkish				162
#define FX_CHARSET_MSWin_Vietnamese				163
#define FX_CHARSET_MSWin_Hebrew					177
#define FX_CHARSET_MSWin_Arabic					178
#define FX_CHARSET_ArabicTraditional			179
#define FX_CHARSET_ArabicUser					180
#define FX_CHARSET_HebrewUser					181
#define FX_CHARSET_MSWin_Baltic					186
#define FX_CHARSET_MSWin_Cyrillic				204
#define FX_CHARSET_Thai							222
#define FX_CHARSET_MSWin_EasterEuropean			238
#define FX_CHARSET_US							254
#define FX_CHARSET_OEM							255
FX_WORD	FX_GetCodePageFromCharset(uint8_t charset);
FX_WORD FX_GetCharsetFromCodePage(FX_WORD codepage);
FX_WORD	FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength);
FX_WORD FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength);
FX_WORD	FX_GetDefCodePageByLanguage(FX_WORD wLanguage);
void	FX_SwapByteOrder(FX_WCHAR* pStr, int32_t iLength);
void	FX_SwapByteOrderCopy(const FX_WCHAR* pSrc, FX_WCHAR* pDst, int32_t iLength);
void	FX_UTF16ToWChar(void* pBuffer, int32_t iLength);
void	FX_UTF16ToWCharCopy(const FX_WORD *pUTF16, FX_WCHAR* pWChar, int32_t iLength);
void	FX_WCharToUTF16(void* pBuffer, int32_t iLength);
void	FX_WCharToUTF16Copy(const FX_WCHAR* pWChar, FX_WORD *pUTF16, int32_t iLength);
int32_t	FX_DecodeString(FX_WORD wCodePage, const FX_CHAR* pSrc, int32_t *pSrcLen, FX_WCHAR* pDst, int32_t *pDstLen, FX_BOOL bErrBreak = FALSE);
int32_t	FX_UTF8Decode(const FX_CHAR* pSrc, int32_t *pSrcLen, FX_WCHAR* pDst, int32_t *pDstLen);
enum FX_CODESYSTEM {
    FX_MBCS		=  0,
    FX_SBCS			,
    FX_DBCS			,
};
typedef struct _FX_CODEPAGE_HEADER {
    uint16_t		uCPID;
    uint8_t		uMinCharBytes;
    uint8_t		uMaxCharBytes;
    FX_CODESYSTEM	eCPType;
    FX_BOOL			bHasLeadByte;
    FX_WCHAR		wMinChar;
    FX_WCHAR		wMaxChar;
    FX_WCHAR		wDefChar;
    FX_WCHAR		wMinUnicode;
    FX_WCHAR		wMaxUnicode;
    FX_WCHAR		wDefUnicode;
} FX_CODEPAGE_HEADER;
#define FX_CPMAPTYPE_Consecution	1
#define FX_CPMAPTYPE_Strict			2
#define FX_CPMAPTYPE_NoMapping		3
#define FX_CPMAPTYPE_Delta			4
typedef struct _FX_CPCU_MAPTABLE1 {
    uint16_t  uMapType;
    uint16_t  uUniocde;
} FX_CPCU_MAPTABLE1;
typedef struct _FX_CPCU_MAPTABLE2 {
    uint8_t	uTrailByte;
    uint8_t	uMapType;
    uint16_t	uOffset;
} FX_CPCU_MAPTABLE2;
typedef struct _FX_CPCU_MAPINFO {
    FX_CPCU_MAPTABLE1		*pMapTable1;
    FX_CPCU_MAPTABLE2		*pMapTable2;
    const uint8_t*				pMapData;
} FX_CPCU_MAPINFO;
typedef struct _FX_CPUC_MAPTABLE {
    uint16_t	uStartUnicode;
    uint16_t	uEndUnicode;
    uint16_t	uMapType;
    uint16_t	uOffset;
} FX_CPUC_MAPTABLE;
typedef struct _FX_CPUC_MAPINFO {
    uint32_t			uMapCount;
    FX_CPUC_MAPTABLE	*pMapTable;
    const uint8_t*			pMapData;
} FX_CPUC_MAPINFO;
typedef struct _FX_CODEPAGE {
    FX_CODEPAGE_HEADER const	*pCPHeader;
    FX_CPCU_MAPINFO const		*pCPCUMapInfo;
    FX_CPUC_MAPINFO const		*pCPUCMapInfo;
} FX_CODEPAGE, * FX_LPCODEPAGE;
typedef FX_CODEPAGE const * FX_LPCCODEPAGE;
typedef struct _FX_STR2CPHASH {
    uint32_t  uHash;
    uint32_t  uCodePage;
} FX_STR2CPHASH;
typedef struct _FX_CHARSET_MAP {
    uint16_t charset;
    uint16_t codepage;
} FX_CHARSET_MAP;
typedef struct _FX_LANG2CPMAP {
    FX_WORD	wLanguage;
    FX_WORD	wCodepage;
} FX_LANG2CPMAP;

class IFX_CodePage
{
public:
    static IFX_CodePage*	Create(FX_WORD wCodePage);
    virtual ~IFX_CodePage() {}
    virtual void			Release() = 0;
    virtual FX_WORD			GetCodePageNumber() const = 0;
    virtual FX_CODESYSTEM	GetCodeSystemType() const = 0;
    virtual FX_BOOL			HasLeadByte() const = 0;
    virtual FX_BOOL			IsLeadByte(uint8_t byte) const = 0;
    virtual int32_t		GetMinBytesPerChar() const = 0;
    virtual int32_t		GetMaxBytesPerChar() const = 0;
    virtual FX_WCHAR		GetMinCharcode() const = 0;
    virtual FX_WCHAR		GetMaxCharcode() const = 0;
    virtual FX_WCHAR		GetDefCharcode() const = 0;
    virtual FX_WCHAR		GetMinUnicode() const = 0;
    virtual FX_WCHAR		GetMaxUnicode() const = 0;
    virtual FX_WCHAR		GetDefUnicode() const = 0;
    virtual FX_BOOL			IsValidCharcode(FX_WORD wCharcode) const = 0;
    virtual FX_WCHAR		GetUnicode(FX_WORD wCharcode) const = 0;
    virtual FX_BOOL			IsValidUnicode(FX_WCHAR wUnicode) const = 0;
    virtual FX_WORD			GetCharcode(FX_WCHAR wUnicode) const = 0;
};
#endif