// Copyright 2017 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "core/fxcrt/cfx_seekablestreamproxy.h" #if _FX_OS_ == _FX_WIN32_DESKTOP_ || _FX_OS_ == _FX_WIN32_MOBILE_ || \ _FX_OS_ == _FX_WIN64_ #include <io.h> #endif #include <algorithm> #include <limits> #include <memory> #include <utility> #include <vector> #include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/fx_extension.h" #include "third_party/base/ptr_util.h" #include "third_party/base/stl_util.h" namespace { // Returns {src bytes consumed, dst bytes produced}. std::pair<FX_STRSIZE, FX_STRSIZE> UTF8Decode(const char* pSrc, FX_STRSIZE srcLen, wchar_t* pDst, FX_STRSIZE dstLen) { ASSERT(pDst && dstLen > 0); if (srcLen < 1) return {0, 0}; uint32_t dwCode = 0; int32_t iPending = 0; FX_STRSIZE iSrcNum = 0; FX_STRSIZE iDstNum = 0; FX_STRSIZE iIndex = 0; int32_t k = 1; while (iIndex < srcLen) { uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex)); if (byte < 0x80) { iPending = 0; k = 1; iDstNum++; iSrcNum += k; *pDst++ = byte; if (iDstNum >= dstLen) break; } else if (byte < 0xc0) { if (iPending < 1) break; iPending--; dwCode |= (byte & 0x3f) << (iPending * 6); if (iPending == 0) { iDstNum++; iSrcNum += k; *pDst++ = dwCode; if (iDstNum >= dstLen) break; } } else if (byte < 0xe0) { iPending = 1; k = 2; dwCode = (byte & 0x1f) << 6; } else if (byte < 0xf0) { iPending = 2; k = 3; dwCode = (byte & 0x0f) << 12; } else if (byte < 0xf8) { iPending = 3; k = 4; dwCode = (byte & 0x07) << 18; } else if (byte < 0xfc) { iPending = 4; k = 5; dwCode = (byte & 0x03) << 24; } else if (byte < 0xfe) { iPending = 5; k = 6; dwCode = (byte & 0x01) << 30; } else { break; } iIndex++; } return {iSrcNum, iDstNum}; } void UTF16ToWChar(void* pBuffer, FX_STRSIZE iLength) { ASSERT(pBuffer && iLength > 0); if (sizeof(wchar_t) == 2) return; uint16_t* pSrc = static_cast<uint16_t*>(pBuffer); wchar_t* pDst = static_cast<wchar_t*>(pBuffer); while (--iLength >= 0) pDst[iLength] = static_cast<wchar_t>(pSrc[iLength]); } void SwapByteOrder(wchar_t* pStr, FX_STRSIZE iLength) { ASSERT(pStr); if (iLength < 0) iLength = FXSYS_wcslen(pStr); uint16_t wch; if (sizeof(wchar_t) > 2) { while (iLength-- > 0) { wch = static_cast<uint16_t>(*pStr); wch = (wch >> 8) | (wch << 8); wch &= 0x00FF; *pStr = wch; ++pStr; } return; } while (iLength-- > 0) { wch = static_cast<uint16_t>(*pStr); wch = (wch >> 8) | (wch << 8); *pStr = wch; ++pStr; } } } // namespace #if _FX_ENDIAN_ == _FX_LITTLE_ENDIAN_ #define BOM_MASK 0x00FFFFFF #define BOM_UTF8 0x00BFBBEF #define BOM_UTF16_MASK 0x0000FFFF #define BOM_UTF16_BE 0x0000FFFE #define BOM_UTF16_LE 0x0000FEFF #else #define BOM_MASK 0xFFFFFF00 #define BOM_UTF8 0xEFBBBF00 #define BOM_UTF16_MASK 0xFFFF0000 #define BOM_UTF16_BE 0xFEFF0000 #define BOM_UTF16_LE 0xFFFE0000 #endif // _FX_ENDIAN_ == _FX_LITTLE_ENDIAN_ CFX_SeekableStreamProxy::CFX_SeekableStreamProxy( const CFX_RetainPtr<IFX_SeekableStream>& stream, bool isWriteStream) : m_IsWriteStream(isWriteStream), m_wCodePage(FX_CODEPAGE_DefANSI), m_wBOMLength(0), m_iPosition(0), m_pStream(stream) { ASSERT(m_pStream); if (isWriteStream) { m_iPosition = m_pStream->GetSize(); return; } FX_FILESIZE iPosition = GetPosition(); Seek(CFX_SeekableStreamProxy::Pos::Begin, 0); uint32_t bom = 0; ReadData(reinterpret_cast<uint8_t*>(&bom), 3); bom &= BOM_MASK; if (bom == BOM_UTF8) { m_wBOMLength = 3; m_wCodePage = FX_CODEPAGE_UTF8; } else { bom &= BOM_UTF16_MASK; if (bom == BOM_UTF16_BE) { m_wBOMLength = 2; m_wCodePage = FX_CODEPAGE_UTF16BE; } else if (bom == BOM_UTF16_LE) { m_wBOMLength = 2; m_wCodePage = FX_CODEPAGE_UTF16LE; } else { m_wBOMLength = 0; m_wCodePage = FXSYS_GetACP(); } } Seek(CFX_SeekableStreamProxy::Pos::Begin, std::max(static_cast<FX_FILESIZE>(m_wBOMLength), iPosition)); } CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(uint8_t* data, FX_STRSIZE size) : CFX_SeekableStreamProxy( pdfium::MakeRetain<CFX_MemoryStream>(data, size, false), false) {} CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() {} void CFX_SeekableStreamProxy::Seek(CFX_SeekableStreamProxy::Pos eSeek, FX_FILESIZE iOffset) { switch (eSeek) { case CFX_SeekableStreamProxy::Pos::Begin: m_iPosition = iOffset; break; case CFX_SeekableStreamProxy::Pos::Current: m_iPosition += iOffset; break; } m_iPosition = pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength()); } void CFX_SeekableStreamProxy::SetCodePage(uint16_t wCodePage) { if (m_wBOMLength > 0) return; m_wCodePage = wCodePage; } FX_STRSIZE CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer, FX_STRSIZE iBufferSize) { ASSERT(pBuffer && iBufferSize > 0); if (m_IsWriteStream) return -1; iBufferSize = std::min( iBufferSize, static_cast<FX_STRSIZE>(m_pStream->GetSize() - m_iPosition)); if (iBufferSize <= 0) return 0; if (m_pStream->ReadBlock(pBuffer, m_iPosition, iBufferSize)) { pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition; new_pos += iBufferSize; if (!new_pos.IsValid()) return 0; m_iPosition = new_pos.ValueOrDie(); return iBufferSize; } return 0; } FX_STRSIZE CFX_SeekableStreamProxy::ReadString(wchar_t* pStr, FX_STRSIZE iMaxLength, bool* bEOS) { ASSERT(pStr && iMaxLength > 0); if (m_IsWriteStream) return -1; if (m_wCodePage == FX_CODEPAGE_UTF16LE || m_wCodePage == FX_CODEPAGE_UTF16BE) { FX_FILESIZE iBytes = iMaxLength * 2; FX_STRSIZE iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes); iMaxLength = iLen / 2; if (sizeof(wchar_t) > 2) UTF16ToWChar(pStr, iMaxLength); #if _FX_ENDIAN_ == _FX_BIG_ENDIAN_ if (m_wCodePage == FX_CODEPAGE_UTF16LE) SwapByteOrder(pStr, iMaxLength); #else if (m_wCodePage == FX_CODEPAGE_UTF16BE) SwapByteOrder(pStr, iMaxLength); #endif } else { FX_FILESIZE pos = GetPosition(); FX_STRSIZE iBytes = std::min(iMaxLength, static_cast<FX_STRSIZE>(GetLength() - pos)); if (iBytes > 0) { std::vector<uint8_t> buf(iBytes); FX_STRSIZE iLen = ReadData(buf.data(), iBytes); if (m_wCodePage != FX_CODEPAGE_UTF8) return -1; FX_STRSIZE iSrc = 0; std::tie(iSrc, iMaxLength) = UTF8Decode( reinterpret_cast<const char*>(buf.data()), iLen, pStr, iMaxLength); Seek(CFX_SeekableStreamProxy::Pos::Current, iSrc - iLen); } else { iMaxLength = 0; } } *bEOS = IsEOF(); return iMaxLength; } void CFX_SeekableStreamProxy::WriteString(const CFX_WideStringC& str) { if (!m_IsWriteStream || str.GetLength() == 0 || m_wCodePage != FX_CODEPAGE_UTF8) { return; } if (!m_pStream->WriteBlock(str.unterminated_c_str(), m_iPosition, str.GetLength() * sizeof(wchar_t))) { return; } pdfium::base::CheckedNumeric<FX_STRSIZE> new_pos = m_iPosition; new_pos += str.GetLength() * sizeof(wchar_t); if (!new_pos.IsValid()) { m_iPosition = std::numeric_limits<FX_STRSIZE>::max(); return; } m_iPosition = new_pos.ValueOrDie(); }