summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
Diffstat (limited to 'core')
-rw-r--r--core/fxcrt/cfx_seekablestreamproxy.cpp307
-rw-r--r--core/fxcrt/cfx_seekablestreamproxy.h54
-rw-r--r--core/fxcrt/fx_codepage.h137
3 files changed, 498 insertions, 0 deletions
diff --git a/core/fxcrt/cfx_seekablestreamproxy.cpp b/core/fxcrt/cfx_seekablestreamproxy.cpp
new file mode 100644
index 0000000000..fe6b8dd375
--- /dev/null
+++ b/core/fxcrt/cfx_seekablestreamproxy.cpp
@@ -0,0 +1,307 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fxcrt/cfx_seekablestreamproxy.h"
+
+#if _FX_OS_ == _FX_WIN32_DESKTOP_ || _FX_OS_ == _FX_WIN32_MOBILE_ || \
+ _FX_OS_ == _FX_WIN64_
+#include <io.h>
+#endif
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "core/fxcrt/fx_codepage.h"
+#include "core/fxcrt/fx_ext.h"
+#include "third_party/base/ptr_util.h"
+#include "third_party/base/stl_util.h"
+
+namespace {
+
+// Returns {src bytes consumed, dst bytes produced}.
+std::pair<FX_STRSIZE, FX_STRSIZE> UTF8Decode(const char* pSrc,
+ FX_STRSIZE srcLen,
+ wchar_t* pDst,
+ FX_STRSIZE dstLen) {
+ ASSERT(pDst && dstLen > 0);
+
+ if (srcLen < 1)
+ return {0, 0};
+
+ uint32_t dwCode = 0;
+ int32_t iPending = 0;
+ FX_STRSIZE iSrcNum = 0;
+ FX_STRSIZE iDstNum = 0;
+ FX_STRSIZE iIndex = 0;
+ int32_t k = 1;
+ while (iIndex < srcLen) {
+ uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex));
+ if (byte < 0x80) {
+ iPending = 0;
+ k = 1;
+ iDstNum++;
+ iSrcNum += k;
+ *pDst++ = byte;
+ if (iDstNum >= dstLen)
+ break;
+ } else if (byte < 0xc0) {
+ if (iPending < 1)
+ break;
+
+ iPending--;
+ dwCode |= (byte & 0x3f) << (iPending * 6);
+ if (iPending == 0) {
+ iDstNum++;
+ iSrcNum += k;
+ *pDst++ = dwCode;
+ if (iDstNum >= dstLen)
+ break;
+ }
+ } else if (byte < 0xe0) {
+ iPending = 1;
+ k = 2;
+ dwCode = (byte & 0x1f) << 6;
+ } else if (byte < 0xf0) {
+ iPending = 2;
+ k = 3;
+ dwCode = (byte & 0x0f) << 12;
+ } else if (byte < 0xf8) {
+ iPending = 3;
+ k = 4;
+ dwCode = (byte & 0x07) << 18;
+ } else if (byte < 0xfc) {
+ iPending = 4;
+ k = 5;
+ dwCode = (byte & 0x03) << 24;
+ } else if (byte < 0xfe) {
+ iPending = 5;
+ k = 6;
+ dwCode = (byte & 0x01) << 30;
+ } else {
+ break;
+ }
+ iIndex++;
+ }
+ return {iSrcNum, iDstNum};
+}
+
+void UTF16ToWChar(void* pBuffer, FX_STRSIZE iLength) {
+ ASSERT(pBuffer && iLength > 0);
+
+ if (sizeof(wchar_t) == 2)
+ return;
+
+ uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
+ wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
+ while (--iLength >= 0)
+ pDst[iLength] = static_cast<wchar_t>(pSrc[iLength]);
+}
+
+void SwapByteOrder(wchar_t* pStr, FX_STRSIZE iLength) {
+ ASSERT(pStr);
+
+ if (iLength < 0)
+ iLength = FXSYS_wcslen(pStr);
+
+ uint16_t wch;
+ if (sizeof(wchar_t) > 2) {
+ while (iLength-- > 0) {
+ wch = static_cast<uint16_t>(*pStr);
+ wch = (wch >> 8) | (wch << 8);
+ wch &= 0x00FF;
+ *pStr = wch;
+ ++pStr;
+ }
+ return;
+ }
+
+ while (iLength-- > 0) {
+ wch = static_cast<uint16_t>(*pStr);
+ wch = (wch >> 8) | (wch << 8);
+ *pStr = wch;
+ ++pStr;
+ }
+}
+
+} // namespace
+
+#if _FX_ENDIAN_ == _FX_LITTLE_ENDIAN_
+#define BOM_MASK 0x00FFFFFF
+#define BOM_UTF8 0x00BFBBEF
+#define BOM_UTF16_MASK 0x0000FFFF
+#define BOM_UTF16_BE 0x0000FFFE
+#define BOM_UTF16_LE 0x0000FEFF
+#else
+#define BOM_MASK 0xFFFFFF00
+#define BOM_UTF8 0xEFBBBF00
+#define BOM_UTF16_MASK 0xFFFF0000
+#define BOM_UTF16_BE 0xFEFF0000
+#define BOM_UTF16_LE 0xFFFE0000
+#endif // _FX_ENDIAN_ == _FX_LITTLE_ENDIAN_
+
+CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
+ const CFX_RetainPtr<IFX_SeekableStream>& stream,
+ bool isWriteStream)
+ : m_IsWriteStream(isWriteStream),
+ m_wCodePage(FX_CODEPAGE_DefANSI),
+ m_wBOMLength(0),
+ m_iPosition(0),
+ m_pStream(stream) {
+ ASSERT(m_pStream);
+
+ if (isWriteStream) {
+ m_iPosition = m_pStream->GetSize();
+ return;
+ }
+
+ FX_FILESIZE iPosition = GetPosition();
+ Seek(CFX_SeekableStreamProxy::Pos::Begin, 0);
+
+ uint32_t bom;
+ ReadData(reinterpret_cast<uint8_t*>(&bom), 3);
+
+ bom &= BOM_MASK;
+ if (bom == BOM_UTF8) {
+ m_wBOMLength = 3;
+ m_wCodePage = FX_CODEPAGE_UTF8;
+ } else {
+ bom &= BOM_UTF16_MASK;
+ if (bom == BOM_UTF16_BE) {
+ m_wBOMLength = 2;
+ m_wCodePage = FX_CODEPAGE_UTF16BE;
+ } else if (bom == BOM_UTF16_LE) {
+ m_wBOMLength = 2;
+ m_wCodePage = FX_CODEPAGE_UTF16LE;
+ } else {
+ m_wBOMLength = 0;
+ m_wCodePage = FXSYS_GetACP();
+ }
+ }
+
+ Seek(CFX_SeekableStreamProxy::Pos::Begin,
+ std::max(static_cast<FX_FILESIZE>(m_wBOMLength), iPosition));
+}
+
+CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(uint8_t* data, FX_STRSIZE size)
+ : CFX_SeekableStreamProxy(IFX_MemoryStream::Create(data, size), false) {}
+
+CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() {}
+
+void CFX_SeekableStreamProxy::Seek(CFX_SeekableStreamProxy::Pos eSeek,
+ FX_FILESIZE iOffset) {
+ switch (eSeek) {
+ case CFX_SeekableStreamProxy::Pos::Begin:
+ m_iPosition = iOffset;
+ break;
+ case CFX_SeekableStreamProxy::Pos::Current:
+ m_iPosition += iOffset;
+ break;
+ }
+ m_iPosition =
+ pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
+}
+
+void CFX_SeekableStreamProxy::SetCodePage(uint16_t wCodePage) {
+ if (m_wBOMLength > 0)
+ return;
+ m_wCodePage = wCodePage;
+}
+
+FX_STRSIZE CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer,
+ FX_STRSIZE iBufferSize) {
+ ASSERT(pBuffer && iBufferSize > 0);
+
+ if (m_IsWriteStream)
+ return -1;
+
+ iBufferSize = std::min(
+ iBufferSize, static_cast<FX_STRSIZE>(m_pStream->GetSize() - m_iPosition));
+ if (iBufferSize <= 0)
+ return 0;
+
+ if (m_pStream->ReadBlock(pBuffer, m_iPosition, iBufferSize)) {
+ pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
+ new_pos += iBufferSize;
+ if (!new_pos.IsValid())
+ return 0;
+
+ m_iPosition = new_pos.ValueOrDie();
+ return iBufferSize;
+ }
+ return 0;
+}
+
+FX_STRSIZE CFX_SeekableStreamProxy::ReadString(wchar_t* pStr,
+ FX_STRSIZE iMaxLength,
+ bool* bEOS) {
+ ASSERT(pStr && iMaxLength > 0);
+
+ if (m_IsWriteStream)
+ return -1;
+
+ if (m_wCodePage == FX_CODEPAGE_UTF16LE ||
+ m_wCodePage == FX_CODEPAGE_UTF16BE) {
+ FX_FILESIZE iBytes = iMaxLength * 2;
+ FX_STRSIZE iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
+ iMaxLength = iLen / 2;
+ if (sizeof(wchar_t) > 2)
+ UTF16ToWChar(pStr, iMaxLength);
+
+#if _FX_ENDIAN_ == _FX_BIG_ENDIAN_
+ if (m_wCodePage == FX_CODEPAGE_UTF16LE)
+ SwapByteOrder(pStr, iMaxLength);
+#else
+ if (m_wCodePage == FX_CODEPAGE_UTF16BE)
+ SwapByteOrder(pStr, iMaxLength);
+#endif
+
+ } else {
+ FX_FILESIZE pos = GetPosition();
+ FX_STRSIZE iBytes =
+ std::min(iMaxLength, static_cast<FX_STRSIZE>(GetLength() - pos));
+
+ if (iBytes > 0) {
+ std::vector<uint8_t> buf(iBytes);
+
+ FX_STRSIZE iLen = ReadData(buf.data(), iBytes);
+ if (m_wCodePage != FX_CODEPAGE_UTF8)
+ return -1;
+
+ FX_STRSIZE iSrc = 0;
+ std::tie(iSrc, iMaxLength) = UTF8Decode(
+ reinterpret_cast<const char*>(buf.data()), iLen, pStr, iMaxLength);
+ Seek(CFX_SeekableStreamProxy::Pos::Current, iSrc - iLen);
+ } else {
+ iMaxLength = 0;
+ }
+ }
+
+ *bEOS = IsEOF();
+ return iMaxLength;
+}
+
+void CFX_SeekableStreamProxy::WriteString(const CFX_WideStringC& str) {
+ if (!m_IsWriteStream || str.GetLength() == 0 ||
+ m_wCodePage != FX_CODEPAGE_UTF8) {
+ return;
+ }
+ if (!m_pStream->WriteBlock(str.c_str(), m_iPosition,
+ str.GetLength() * sizeof(wchar_t))) {
+ return;
+ }
+
+ pdfium::base::CheckedNumeric<FX_STRSIZE> new_pos = m_iPosition;
+ new_pos += str.GetLength() * sizeof(wchar_t);
+ if (!new_pos.IsValid()) {
+ m_iPosition = std::numeric_limits<FX_STRSIZE>::max();
+ return;
+ }
+
+ m_iPosition = new_pos.ValueOrDie();
+}
diff --git a/core/fxcrt/cfx_seekablestreamproxy.h b/core/fxcrt/cfx_seekablestreamproxy.h
new file mode 100644
index 0000000000..d059fb8956
--- /dev/null
+++ b/core/fxcrt/cfx_seekablestreamproxy.h
@@ -0,0 +1,54 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FXCRT_CFX_SEEKABLESTREAMPROXY_H_
+#define CORE_FXCRT_CFX_SEEKABLESTREAMPROXY_H_
+
+#include <algorithm>
+
+#include "core/fxcrt/cfx_retain_ptr.h"
+#include "core/fxcrt/fx_stream.h"
+#include "core/fxcrt/fx_system.h"
+
+class CFX_SeekableStreamProxy : public CFX_Retainable {
+ public:
+ enum class Pos {
+ Begin = 0,
+ Current,
+ };
+
+ template <typename T, typename... Args>
+ friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
+
+ FX_FILESIZE GetLength() const { return m_pStream->GetSize(); }
+ FX_FILESIZE GetPosition() { return m_iPosition; }
+ FX_STRSIZE GetBOMLength() const { return std::max(0, m_wBOMLength); }
+ bool IsEOF() const { return m_iPosition >= GetLength(); }
+
+ void Seek(CFX_SeekableStreamProxy::Pos eSeek, FX_FILESIZE iOffset);
+ FX_STRSIZE ReadString(wchar_t* pStr, FX_STRSIZE iMaxLength, bool* bEOS);
+
+ void WriteString(const CFX_WideStringC& str);
+
+ uint16_t GetCodePage() const { return m_wCodePage; }
+ void SetCodePage(uint16_t wCodePage);
+
+ private:
+ CFX_SeekableStreamProxy(const CFX_RetainPtr<IFX_SeekableStream>& stream,
+ bool isWriteSteam);
+ CFX_SeekableStreamProxy(uint8_t* data, FX_STRSIZE size);
+ ~CFX_SeekableStreamProxy() override;
+
+ FX_STRSIZE ReadData(uint8_t* pBuffer, FX_STRSIZE iBufferSize);
+
+ bool m_IsWriteStream;
+ uint16_t m_wCodePage;
+ FX_STRSIZE m_wBOMLength;
+ FX_FILESIZE m_iPosition;
+ CFX_RetainPtr<IFX_SeekableStream> m_pStream;
+};
+
+#endif // CORE_FXCRT_CFX_SEEKABLESTREAMPROXY_H_
diff --git a/core/fxcrt/fx_codepage.h b/core/fxcrt/fx_codepage.h
new file mode 100644
index 0000000000..db8655dbf6
--- /dev/null
+++ b/core/fxcrt/fx_codepage.h
@@ -0,0 +1,137 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FXCRT_FX_CODEPAGE_H_
+#define CORE_FXCRT_FX_CODEPAGE_H_
+
+#include "core/fxcrt/fx_basic.h"
+
+#define FX_CODEPAGE_DefANSI 0
+#define FX_CODEPAGE_DefOEM 1
+#define FX_CODEPAGE_DefMAC 2
+#define FX_CODEPAGE_Thread 3
+#define FX_CODEPAGE_Symbol 42
+#define FX_CODEPAGE_MSDOS_US 437
+#define FX_CODEPAGE_Arabic_ASMO708 708
+#define FX_CODEPAGE_Arabic_ASMO449Plus 709
+#define FX_CODEPAGE_Arabic_Transparent 710
+#define FX_CODEPAGE_Arabic_NafithaEnhanced 711
+#define FX_CODEPAGE_Arabic_TransparentASMO 720
+#define FX_CODEPAGE_MSDOS_Greek1 737
+#define FX_CODEPAGE_MSDOS_Baltic 775
+#define FX_CODEPAGE_MSWin31_WesternEuropean 819
+#define FX_CODEPAGE_MSDOS_WesternEuropean 850
+#define FX_CODEPAGE_MSDOS_EasternEuropean 852
+#define FX_CODEPAGE_MSDOS_Latin3 853
+#define FX_CODEPAGE_MSDOS_Cyrillic 855
+#define FX_CODEPAGE_MSDOS_Turkish 857
+#define FX_CODEPAGE_MSDOS_Latin1Euro 858
+#define FX_CODEPAGE_MSDOS_Portuguese 860
+#define FX_CODEPAGE_MSDOS_Icelandic 861
+#define FX_CODEPAGE_MSDOS_Hebrew 862
+#define FX_CODEPAGE_MSDOS_FrenchCanadian 863
+#define FX_CODEPAGE_MSDOS_Arabic 864
+#define FX_CODEPAGE_MSDOS_Norwegian 865
+#define FX_CODEPAGE_MSDOS_Russian 866
+#define FX_CODEPAGE_MSDOS_Greek2 869
+#define FX_CODEPAGE_MSDOS_Thai 874
+#define FX_CODEPAGE_MSDOS_KamenickyCS 895
+#define FX_CODEPAGE_ShiftJIS 932
+#define FX_CODEPAGE_ChineseSimplified 936
+#define FX_CODEPAGE_Korean 949
+#define FX_CODEPAGE_ChineseTraditional 950
+#define FX_CODEPAGE_UTF16LE 1200
+#define FX_CODEPAGE_UTF16BE 1201
+#define FX_CODEPAGE_MSWin_EasternEuropean 1250
+#define FX_CODEPAGE_MSWin_Cyrillic 1251
+#define FX_CODEPAGE_MSWin_WesternEuropean 1252
+#define FX_CODEPAGE_MSWin_Greek 1253
+#define FX_CODEPAGE_MSWin_Turkish 1254
+#define FX_CODEPAGE_MSWin_Hebrew 1255
+#define FX_CODEPAGE_MSWin_Arabic 1256
+#define FX_CODEPAGE_MSWin_Baltic 1257
+#define FX_CODEPAGE_MSWin_Vietnamese 1258
+#define FX_CODEPAGE_Johab 1361
+#define FX_CODEPAGE_MAC_Roman 10000
+#define FX_CODEPAGE_MAC_ShiftJIS 10001
+#define FX_CODEPAGE_MAC_ChineseTraditional 10002
+#define FX_CODEPAGE_MAC_Korean 10003
+#define FX_CODEPAGE_MAC_Arabic 10004
+#define FX_CODEPAGE_MAC_Hebrew 10005
+#define FX_CODEPAGE_MAC_Greek 10006
+#define FX_CODEPAGE_MAC_Cyrillic 10007
+#define FX_CODEPAGE_MAC_ChineseSimplified 10008
+#define FX_CODEPAGE_MAC_Thai 10021
+#define FX_CODEPAGE_MAC_EasternEuropean 10029
+#define FX_CODEPAGE_MAC_Turkish 10081
+#define FX_CODEPAGE_UTF32LE 12000
+#define FX_CODEPAGE_UTF32BE 12001
+#define FX_CODEPAGE_ISO8859_1 28591
+#define FX_CODEPAGE_ISO8859_2 28592
+#define FX_CODEPAGE_ISO8859_3 28593
+#define FX_CODEPAGE_ISO8859_4 28594
+#define FX_CODEPAGE_ISO8859_5 28595
+#define FX_CODEPAGE_ISO8859_6 28596
+#define FX_CODEPAGE_ISO8859_7 28597
+#define FX_CODEPAGE_ISO8859_8 28598
+#define FX_CODEPAGE_ISO8859_9 28599
+#define FX_CODEPAGE_ISO8859_10 28600
+#define FX_CODEPAGE_ISO8859_11 28601
+#define FX_CODEPAGE_ISO8859_12 28602
+#define FX_CODEPAGE_ISO8859_13 28603
+#define FX_CODEPAGE_ISO8859_14 28604
+#define FX_CODEPAGE_ISO8859_15 28605
+#define FX_CODEPAGE_ISO8859_16 28606
+#define FX_CODEPAGE_ISCII_Devanagari 57002
+#define FX_CODEPAGE_ISCII_Bengali 57003
+#define FX_CODEPAGE_ISCII_Tamil 57004
+#define FX_CODEPAGE_ISCII_Telugu 57005
+#define FX_CODEPAGE_ISCII_Assamese 57006
+#define FX_CODEPAGE_ISCII_Oriya 57007
+#define FX_CODEPAGE_ISCII_Kannada 57008
+#define FX_CODEPAGE_ISCII_Malayalam 57009
+#define FX_CODEPAGE_ISCII_Gujarati 57010
+#define FX_CODEPAGE_ISCII_Punjabi 57011
+#define FX_CODEPAGE_UTF7 65000
+#define FX_CODEPAGE_UTF8 65001
+
+#define FX_CHARSET_ANSI 0
+#define FX_CHARSET_Default 1
+#define FX_CHARSET_Symbol 2
+#define FX_CHARSET_MAC_Roman 77
+#define FX_CHARSET_MAC_ShiftJIS 78
+#define FX_CHARSET_MAC_Korean 79
+#define FX_CHARSET_MAC_ChineseSimplified 80
+#define FX_CHARSET_MAC_ChineseTriditional 81
+#define FX_CHARSET_MAC_Johab 82
+#define FX_CHARSET_MAC_Hebrew 83
+#define FX_CHARSET_MAC_Arabic 84
+#define FX_CHARSET_MAC_Greek 85
+#define FX_CHARSET_MAC_Turkish 86
+#define FX_CHARSET_MAC_Thai 87
+#define FX_CHARSET_MAC_EasternEuropean 88
+#define FX_CHARSET_MAC_Cyrillic 89
+#define FX_CHARSET_ShiftJIS 128
+#define FX_CHARSET_Korean 129
+#define FX_CHARSET_Johab 130
+#define FX_CHARSET_ChineseSimplified 134
+#define FX_CHARSET_ChineseTriditional 136
+#define FX_CHARSET_MSWin_Greek 161
+#define FX_CHARSET_MSWin_Turkish 162
+#define FX_CHARSET_MSWin_Vietnamese 163
+#define FX_CHARSET_MSWin_Hebrew 177
+#define FX_CHARSET_MSWin_Arabic 178
+#define FX_CHARSET_ArabicTraditional 179
+#define FX_CHARSET_ArabicUser 180
+#define FX_CHARSET_HebrewUser 181
+#define FX_CHARSET_MSWin_Baltic 186
+#define FX_CHARSET_MSWin_Cyrillic 204
+#define FX_CHARSET_Thai 222
+#define FX_CHARSET_MSWin_EasterEuropean 238
+#define FX_CHARSET_US 254
+#define FX_CHARSET_OEM 255
+
+#endif // CORE_FXCRT_FX_CODEPAGE_H_