// Copyright 2014 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "core/fxcrt/fx_ext.h" #include "xfa/fgas/crt/fgas_codepage.h" #include "xfa/fgas/crt/fgas_language.h" namespace { struct FX_CHARSET_MAP { uint16_t charset; uint16_t codepage; }; const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = { {FX_CHARSET_ANSI, FX_CODEPAGE_MSWin_WesternEuropean}, {FX_CHARSET_Default, FX_CODEPAGE_DefANSI}, {FX_CHARSET_Symbol, FX_CODEPAGE_Symbol}, {FX_CHARSET_MAC_Roman, FX_CODEPAGE_MAC_Roman}, {FX_CHARSET_MAC_ShiftJIS, FX_CODEPAGE_MAC_ShiftJIS}, {FX_CHARSET_MAC_Korean, FX_CODEPAGE_MAC_Korean}, {FX_CHARSET_MAC_ChineseSimplified, FX_CODEPAGE_MAC_ChineseSimplified}, {FX_CHARSET_MAC_ChineseTriditional, FX_CODEPAGE_MAC_ChineseTraditional}, {FX_CHARSET_MAC_Hebrew, FX_CODEPAGE_MAC_Hebrew}, {FX_CHARSET_MAC_Arabic, FX_CODEPAGE_MAC_Arabic}, {FX_CHARSET_MAC_Greek, FX_CODEPAGE_MAC_Greek}, {FX_CHARSET_MAC_Turkish, FX_CODEPAGE_MAC_Turkish}, {FX_CHARSET_MAC_Thai, FX_CODEPAGE_MAC_Thai}, {FX_CHARSET_MAC_EasternEuropean, FX_CODEPAGE_MAC_EasternEuropean}, {FX_CHARSET_MAC_Cyrillic, FX_CODEPAGE_MAC_Cyrillic}, {FX_CHARSET_ShiftJIS, FX_CODEPAGE_ShiftJIS}, {FX_CHARSET_Korean, FX_CODEPAGE_Korean}, {FX_CHARSET_Johab, FX_CODEPAGE_Johab}, {FX_CHARSET_ChineseSimplified, FX_CODEPAGE_ChineseSimplified}, {FX_CHARSET_ChineseTriditional, FX_CODEPAGE_ChineseTraditional}, {FX_CHARSET_MSWin_Greek, FX_CODEPAGE_MSWin_Greek}, {FX_CHARSET_MSWin_Turkish, FX_CODEPAGE_MSWin_Turkish}, {FX_CHARSET_MSWin_Vietnamese, FX_CODEPAGE_MSWin_Vietnamese}, {FX_CHARSET_MSWin_Hebrew, FX_CODEPAGE_MSWin_Hebrew}, {FX_CHARSET_MSWin_Arabic, FX_CODEPAGE_MSWin_Arabic}, {FX_CHARSET_MSWin_Baltic, FX_CODEPAGE_MSWin_Baltic}, {FX_CHARSET_MSWin_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic}, {FX_CHARSET_Thai, FX_CODEPAGE_MSDOS_Thai}, {FX_CHARSET_MSWin_EasterEuropean, FX_CODEPAGE_MSWin_EasternEuropean}, {FX_CHARSET_US, FX_CODEPAGE_MSDOS_US}, {FX_CHARSET_OEM, FX_CODEPAGE_MSDOS_WesternEuropean}, }; } // namespace uint16_t FX_GetCodePageFromCharset(uint8_t charset) { int32_t iEnd = sizeof(g_FXCharset2CodePageTable) / sizeof(FX_CHARSET_MAP) - 1; ASSERT(iEnd >= 0); int32_t iStart = 0, iMid; do { iMid = (iStart + iEnd) / 2; const FX_CHARSET_MAP& cp = g_FXCharset2CodePageTable[iMid]; if (charset == cp.charset) return cp.codepage; if (charset < cp.charset) iEnd = iMid - 1; else iStart = iMid + 1; } while (iStart <= iEnd); return 0xFFFF; } void FX_SwapByteOrder(wchar_t* pStr, int32_t iLength) { ASSERT(pStr); if (iLength < 0) iLength = FXSYS_wcslen(pStr); uint16_t wch; if (sizeof(wchar_t) > 2) { while (iLength-- > 0) { wch = (uint16_t)*pStr; wch = (wch >> 8) | (wch << 8); wch &= 0x00FF; *pStr++ = wch; } return; } while (iLength-- > 0) { wch = (uint16_t)*pStr; wch = (wch >> 8) | (wch << 8); *pStr++ = wch; } } void FX_UTF16ToWChar(void* pBuffer, int32_t iLength) { ASSERT(pBuffer && iLength > 0); if (sizeof(wchar_t) == 2) return; uint16_t* pSrc = static_cast(pBuffer); wchar_t* pDst = static_cast(pBuffer); while (--iLength >= 0) pDst[iLength] = static_cast(pSrc[iLength]); } int32_t FX_DecodeString(uint16_t wCodePage, const char* pSrc, int32_t* pSrcLen, wchar_t* pDst, int32_t* pDstLen, bool bErrBreak) { if (wCodePage == FX_CODEPAGE_UTF8) return FX_UTF8Decode(pSrc, pSrcLen, pDst, pDstLen); return -1; } int32_t FX_UTF8Decode(const char* pSrc, int32_t* pSrcLen, wchar_t* pDst, int32_t* pDstLen) { if (!pSrcLen || !pDstLen) return -1; int32_t iSrcLen = *pSrcLen; if (iSrcLen < 1) { *pSrcLen = *pDstLen = 0; return 1; } int32_t iDstLen = *pDstLen; bool bValidDst = (pDst && iDstLen > 0); uint32_t dwCode = 0; int32_t iPending = 0; int32_t iSrcNum = 0; int32_t iDstNum = 0; int32_t iIndex = 0; int32_t k = 1; while (iIndex < iSrcLen) { uint8_t byte = static_cast(*(pSrc + iIndex)); if (byte < 0x80) { iPending = 0; k = 1; iDstNum++; iSrcNum += k; if (bValidDst) { *pDst++ = byte; if (iDstNum >= iDstLen) break; } } else if (byte < 0xc0) { if (iPending < 1) break; iPending--; dwCode |= (byte & 0x3f) << (iPending * 6); if (iPending == 0) { iDstNum++; iSrcNum += k; if (bValidDst) { *pDst++ = dwCode; if (iDstNum >= iDstLen) break; } } } else if (byte < 0xe0) { iPending = 1; k = 2; dwCode = (byte & 0x1f) << 6; } else if (byte < 0xf0) { iPending = 2; k = 3; dwCode = (byte & 0x0f) << 12; } else if (byte < 0xf8) { iPending = 3; k = 4; dwCode = (byte & 0x07) << 18; } else if (byte < 0xfc) { iPending = 4; k = 5; dwCode = (byte & 0x03) << 24; } else if (byte < 0xfe) { iPending = 5; k = 6; dwCode = (byte & 0x01) << 30; } else { break; } iIndex++; } *pSrcLen = iSrcNum; *pDstLen = iDstNum; return 1; }