diff options
Diffstat (limited to 'xfa/fgas/crt/fgas_codepage.cpp')
-rw-r--r-- | xfa/fgas/crt/fgas_codepage.cpp | 277 |
1 files changed, 220 insertions, 57 deletions
diff --git a/xfa/fgas/crt/fgas_codepage.cpp b/xfa/fgas/crt/fgas_codepage.cpp index 7362a89d30..2087cac138 100644 --- a/xfa/fgas/crt/fgas_codepage.cpp +++ b/xfa/fgas/crt/fgas_codepage.cpp @@ -8,7 +8,24 @@ #include "xfa/fgas/crt/fgas_codepage.h" #include "xfa/fgas/crt/fgas_language.h" -static const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = { +namespace { + +struct FX_STR2CPHASH { + uint32_t uHash; + uint16_t uCodePage; +}; + +struct FX_CHARSET_MAP { + uint16_t charset; + uint16_t codepage; +}; + +struct FX_LANG2CPMAP { + uint16_t wLanguage; + uint16_t wCodepage; +}; + +const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = { {0, 1252}, {1, 0}, {2, 42}, {77, 10000}, {78, 10001}, {79, 10003}, {80, 10008}, {81, 10002}, {83, 10005}, {84, 10004}, {85, 10006}, {86, 10081}, {87, 10021}, {88, 10029}, {89, 10007}, @@ -17,24 +34,8 @@ static const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = { {186, 1257}, {204, 1251}, {222, 874}, {238, 1250}, {254, 437}, {255, 850}, }; -uint16_t FX_GetCodePageFromCharset(uint8_t charset) { - int32_t iEnd = sizeof(g_FXCharset2CodePageTable) / sizeof(FX_CHARSET_MAP) - 1; - ASSERT(iEnd >= 0); - int32_t iStart = 0, iMid; - do { - iMid = (iStart + iEnd) / 2; - const FX_CHARSET_MAP& cp = g_FXCharset2CodePageTable[iMid]; - if (charset == cp.charset) { - return cp.codepage; - } else if (charset < cp.charset) { - iEnd = iMid - 1; - } else { - iStart = iMid + 1; - } - } while (iStart <= iEnd); - return 0xFFFF; -} -static const FX_CHARSET_MAP g_FXCodepage2CharsetTable[] = { + +const FX_CHARSET_MAP g_FXCodepage2CharsetTable[] = { {1, 0}, {2, 42}, {254, 437}, {255, 850}, {222, 874}, {128, 932}, {134, 936}, {129, 949}, {136, 950}, {238, 1250}, {204, 1251}, {0, 1252}, {161, 1253}, {162, 1254}, {177, 1255}, @@ -43,23 +44,7 @@ static const FX_CHARSET_MAP g_FXCodepage2CharsetTable[] = { {84, 10004}, {85, 10006}, {86, 10081}, {87, 10021}, {88, 10029}, {89, 10007}, }; -uint16_t FX_GetCharsetFromCodePage(uint16_t codepage) { - int32_t iEnd = sizeof(g_FXCodepage2CharsetTable) / sizeof(FX_CHARSET_MAP) - 1; - ASSERT(iEnd >= 0); - int32_t iStart = 0, iMid; - do { - iMid = (iStart + iEnd) / 2; - const FX_CHARSET_MAP& cp = g_FXCodepage2CharsetTable[iMid]; - if (codepage == cp.codepage) { - return cp.charset; - } else if (codepage < cp.codepage) { - iEnd = iMid - 1; - } else { - iStart = iMid + 1; - } - } while (iStart <= iEnd); - return 0xFFFF; -} + const FX_LANG2CPMAP g_FXLang2CodepageTable[] = { {FX_LANG_Arabic_SaudiArabia, FX_CODEPAGE_MSWin_Arabic}, {FX_LANG_Bulgarian_Bulgaria, FX_CODEPAGE_MSWin_Cyrillic}, @@ -197,24 +182,8 @@ const FX_LANG2CPMAP g_FXLang2CodepageTable[] = { {FX_LANG_Spanish_Nicaragua, FX_CODEPAGE_MSWin_WesternEuropean}, {FX_LANG_Spanish_PuertoRico, FX_CODEPAGE_MSWin_WesternEuropean}, }; -uint16_t FX_GetDefCodePageByLanguage(uint16_t wLanguage) { - int32_t iEnd = sizeof(g_FXLang2CodepageTable) / sizeof(FX_LANG2CPMAP) - 1; - ASSERT(iEnd >= 0); - int32_t iStart = 0, iMid; - do { - iMid = (iStart + iEnd) / 2; - const FX_LANG2CPMAP& cp = g_FXLang2CodepageTable[iMid]; - if (wLanguage == cp.wLanguage) { - return cp.wCodepage; - } else if (wLanguage < cp.wLanguage) { - iEnd = iMid - 1; - } else { - iStart = iMid + 1; - } - } while (iStart <= iEnd); - return 0xFFFF; -} -static const FX_STR2CPHASH g_FXCPHashTable[] = { + +const FX_STR2CPHASH g_FXCPHashTable[] = { {0xd45, 0x6faf}, {0xd46, 0x6fb0}, {0xd47, 0x6fb1}, {0xd48, 0x6fb2}, {0xd49, 0x4e6}, {0xd4d, 0x6fbd}, {0xe9e, 0x4e4}, {0xc998, 0x1b5}, {0x18ef0, 0x3a8}, @@ -301,7 +270,8 @@ static const FX_STR2CPHASH g_FXCPHashTable[] = { {0xf3d463c2, 0x3a4}, {0xf52a70a3, 0xc42e}, {0xf5693147, 0x6fb3}, {0xf637e157, 0x478}, {0xfc213f3a, 0x2717}, {0xff654d14, 0x3b5}, }; -uint16_t FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength) { + +uint16_t GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength) { ASSERT(pStr != NULL); if (iLength < 0) { iLength = FXSYS_strlen(pStr); @@ -326,7 +296,64 @@ uint16_t FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength) { } while (iStart <= iEnd); return 0xFFFF; } -uint16_t FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength) { + +} // namespace + +uint16_t FX_GetCodePageFromCharset(uint8_t charset) { + int32_t iEnd = sizeof(g_FXCharset2CodePageTable) / sizeof(FX_CHARSET_MAP) - 1; + ASSERT(iEnd >= 0); + int32_t iStart = 0, iMid; + do { + iMid = (iStart + iEnd) / 2; + const FX_CHARSET_MAP& cp = g_FXCharset2CodePageTable[iMid]; + if (charset == cp.charset) { + return cp.codepage; + } else if (charset < cp.charset) { + iEnd = iMid - 1; + } else { + iStart = iMid + 1; + } + } while (iStart <= iEnd); + return 0xFFFF; +} + +uint16_t FX_GetCharsetFromCodePage(uint16_t codepage) { + int32_t iEnd = sizeof(g_FXCodepage2CharsetTable) / sizeof(FX_CHARSET_MAP) - 1; + ASSERT(iEnd >= 0); + int32_t iStart = 0, iMid; + do { + iMid = (iStart + iEnd) / 2; + const FX_CHARSET_MAP& cp = g_FXCodepage2CharsetTable[iMid]; + if (codepage == cp.codepage) { + return cp.charset; + } else if (codepage < cp.codepage) { + iEnd = iMid - 1; + } else { + iStart = iMid + 1; + } + } while (iStart <= iEnd); + return 0xFFFF; +} + +uint16_t FX_GetDefCodePageByLanguage(uint16_t wLanguage) { + int32_t iEnd = sizeof(g_FXLang2CodepageTable) / sizeof(FX_LANG2CPMAP) - 1; + ASSERT(iEnd >= 0); + int32_t iStart = 0, iMid; + do { + iMid = (iStart + iEnd) / 2; + const FX_LANG2CPMAP& cp = g_FXLang2CodepageTable[iMid]; + if (wLanguage == cp.wLanguage) { + return cp.wCodepage; + } else if (wLanguage < cp.wLanguage) { + iEnd = iMid - 1; + } else { + iStart = iMid + 1; + } + } while (iStart <= iEnd); + return 0xFFFF; +} + +uint16_t FX_GetCodePageFromStringW(const FX_WCHAR* pStr, int32_t iLength) { if (iLength < 0) { iLength = FXSYS_wcslen(pStr); } @@ -339,5 +366,141 @@ uint16_t FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength) { *pBuf++ = (FX_CHAR)*pStr++; } csStr.ReleaseBuffer(iLength); - return FX_GetCodePageFromStringA(csStr.c_str(), iLength); + return GetCodePageFromStringA(csStr.c_str(), iLength); +} + +void FX_SwapByteOrder(FX_WCHAR* pStr, int32_t iLength) { + ASSERT(pStr != NULL); + if (iLength < 0) { + iLength = FXSYS_wcslen(pStr); + } + uint16_t wch; + if (sizeof(FX_WCHAR) > 2) { + while (iLength-- > 0) { + wch = (uint16_t)*pStr; + wch = (wch >> 8) | (wch << 8); + wch &= 0x00FF; + *pStr++ = wch; + } + } else { + while (iLength-- > 0) { + wch = (uint16_t)*pStr; + wch = (wch >> 8) | (wch << 8); + *pStr++ = wch; + } + } +} + +void FX_UTF16ToWChar(void* pBuffer, int32_t iLength) { + ASSERT(pBuffer != NULL && iLength > 0); + if (sizeof(FX_WCHAR) == 2) { + return; + } + uint16_t* pSrc = (uint16_t*)pBuffer; + FX_WCHAR* pDst = (FX_WCHAR*)pBuffer; + while (--iLength >= 0) { + pDst[iLength] = (FX_WCHAR)pSrc[iLength]; + } +} + +void FX_WCharToUTF16(void* pBuffer, int32_t iLength) { + ASSERT(pBuffer != NULL && iLength > 0); + if (sizeof(FX_WCHAR) == 2) { + return; + } + const FX_WCHAR* pSrc = (const FX_WCHAR*)pBuffer; + uint16_t* pDst = (uint16_t*)pBuffer; + while (--iLength >= 0) { + *pDst++ = (uint16_t)*pSrc++; + } +} + +int32_t FX_DecodeString(uint16_t wCodePage, + const FX_CHAR* pSrc, + int32_t* pSrcLen, + FX_WCHAR* pDst, + int32_t* pDstLen, + FX_BOOL bErrBreak) { + if (wCodePage == FX_CODEPAGE_UTF8) { + return FX_UTF8Decode(pSrc, pSrcLen, pDst, pDstLen); + } + return -1; +} +int32_t FX_UTF8Decode(const FX_CHAR* pSrc, + int32_t* pSrcLen, + FX_WCHAR* pDst, + int32_t* pDstLen) { + if (pSrcLen == NULL || pDstLen == NULL) { + return -1; + } + int32_t iSrcLen = *pSrcLen; + if (iSrcLen < 1) { + *pSrcLen = *pDstLen = 0; + return 1; + } + int32_t iDstLen = *pDstLen; + FX_BOOL bValidDst = (pDst != NULL && iDstLen > 0); + uint32_t dwCode = 0; + int32_t iPending = 0; + int32_t iSrcNum = 0, iDstNum = 0; + int32_t k = 0; + int32_t iIndex = 0; + k = 1; + while (iIndex < iSrcLen) { + uint8_t byte = (uint8_t) * (pSrc + iIndex); + if (byte < 0x80) { + iPending = 0; + k = 1; + iDstNum++; + iSrcNum += k; + if (bValidDst) { + *pDst++ = byte; + if (iDstNum >= iDstLen) { + break; + } + } + } else if (byte < 0xc0) { + if (iPending < 1) { + break; + } + iPending--; + dwCode |= (byte & 0x3f) << (iPending * 6); + if (iPending == 0) { + iDstNum++; + iSrcNum += k; + if (bValidDst) { + *pDst++ = dwCode; + if (iDstNum >= iDstLen) { + break; + } + } + } + } else if (byte < 0xe0) { + iPending = 1; + k = 2; + dwCode = (byte & 0x1f) << 6; + } else if (byte < 0xf0) { + iPending = 2; + k = 3; + dwCode = (byte & 0x0f) << 12; + } else if (byte < 0xf8) { + iPending = 3; + k = 4; + dwCode = (byte & 0x07) << 18; + } else if (byte < 0xfc) { + iPending = 4; + k = 5; + dwCode = (byte & 0x03) << 24; + } else if (byte < 0xfe) { + iPending = 5; + k = 6; + dwCode = (byte & 0x01) << 30; + } else { + break; + } + iIndex++; + } + *pSrcLen = iSrcNum; + *pDstLen = iDstNum; + return 1; } |