// Copyright 2017 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#include "core/fxcrt/cfx_seekablestreamproxy.h"

#if _FX_OS_ == _FX_WIN32_DESKTOP_ || _FX_OS_ == _FX_WIN32_MOBILE_ || \
    _FX_OS_ == _FX_WIN64_
#include <io.h>
#endif

#include <algorithm>
#include <limits>
#include <memory>
#include <utility>
#include <vector>

#include "core/fxcrt/cfx_memorystream.h"
#include "core/fxcrt/fx_codepage.h"
#include "core/fxcrt/fx_extension.h"
#include "third_party/base/ptr_util.h"
#include "third_party/base/stl_util.h"

namespace {

// Returns {src bytes consumed, dst bytes produced}.
std::pair<FX_STRSIZE, FX_STRSIZE> UTF8Decode(const char* pSrc,
                                             FX_STRSIZE srcLen,
                                             wchar_t* pDst,
                                             FX_STRSIZE dstLen) {
  ASSERT(pDst && dstLen > 0);

  if (srcLen < 1)
    return {0, 0};

  uint32_t dwCode = 0;
  int32_t iPending = 0;
  FX_STRSIZE iSrcNum = 0;
  FX_STRSIZE iDstNum = 0;
  FX_STRSIZE iIndex = 0;
  int32_t k = 1;
  while (iIndex < srcLen) {
    uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex));
    if (byte < 0x80) {
      iPending = 0;
      k = 1;
      iDstNum++;
      iSrcNum += k;
      *pDst++ = byte;
      if (iDstNum >= dstLen)
        break;
    } else if (byte < 0xc0) {
      if (iPending < 1)
        break;

      iPending--;
      dwCode |= (byte & 0x3f) << (iPending * 6);
      if (iPending == 0) {
        iDstNum++;
        iSrcNum += k;
        *pDst++ = dwCode;
        if (iDstNum >= dstLen)
          break;
      }
    } else if (byte < 0xe0) {
      iPending = 1;
      k = 2;
      dwCode = (byte & 0x1f) << 6;
    } else if (byte < 0xf0) {
      iPending = 2;
      k = 3;
      dwCode = (byte & 0x0f) << 12;
    } else if (byte < 0xf8) {
      iPending = 3;
      k = 4;
      dwCode = (byte & 0x07) << 18;
    } else if (byte < 0xfc) {
      iPending = 4;
      k = 5;
      dwCode = (byte & 0x03) << 24;
    } else if (byte < 0xfe) {
      iPending = 5;
      k = 6;
      dwCode = (byte & 0x01) << 30;
    } else {
      break;
    }
    iIndex++;
  }
  return {iSrcNum, iDstNum};
}

void UTF16ToWChar(void* pBuffer, FX_STRSIZE iLength) {
  ASSERT(pBuffer && iLength > 0);

  if (sizeof(wchar_t) == 2)
    return;

  uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
  wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
  while (--iLength >= 0)
    pDst[iLength] = static_cast<wchar_t>(pSrc[iLength]);
}

void SwapByteOrder(wchar_t* pStr, FX_STRSIZE iLength) {
  ASSERT(pStr);

  if (iLength < 0)
    iLength = FXSYS_wcslen(pStr);

  uint16_t wch;
  if (sizeof(wchar_t) > 2) {
    while (iLength-- > 0) {
      wch = static_cast<uint16_t>(*pStr);
      wch = (wch >> 8) | (wch << 8);
      wch &= 0x00FF;
      *pStr = wch;
      ++pStr;
    }
    return;
  }

  while (iLength-- > 0) {
    wch = static_cast<uint16_t>(*pStr);
    wch = (wch >> 8) | (wch << 8);
    *pStr = wch;
    ++pStr;
  }
}

}  // namespace

#if _FX_ENDIAN_ == _FX_LITTLE_ENDIAN_
#define BOM_MASK 0x00FFFFFF
#define BOM_UTF8 0x00BFBBEF
#define BOM_UTF16_MASK 0x0000FFFF
#define BOM_UTF16_BE 0x0000FFFE
#define BOM_UTF16_LE 0x0000FEFF
#else
#define BOM_MASK 0xFFFFFF00
#define BOM_UTF8 0xEFBBBF00
#define BOM_UTF16_MASK 0xFFFF0000
#define BOM_UTF16_BE 0xFEFF0000
#define BOM_UTF16_LE 0xFFFE0000
#endif  // _FX_ENDIAN_ == _FX_LITTLE_ENDIAN_

CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
    const CFX_RetainPtr<IFX_SeekableStream>& stream,
    bool isWriteStream)
    : m_IsWriteStream(isWriteStream),
      m_wCodePage(FX_CODEPAGE_DefANSI),
      m_wBOMLength(0),
      m_iPosition(0),
      m_pStream(stream) {
  ASSERT(m_pStream);

  if (isWriteStream) {
    m_iPosition = m_pStream->GetSize();
    return;
  }

  FX_FILESIZE iPosition = GetPosition();
  Seek(CFX_SeekableStreamProxy::Pos::Begin, 0);

  uint32_t bom = 0;
  ReadData(reinterpret_cast<uint8_t*>(&bom), 3);

  bom &= BOM_MASK;
  if (bom == BOM_UTF8) {
    m_wBOMLength = 3;
    m_wCodePage = FX_CODEPAGE_UTF8;
  } else {
    bom &= BOM_UTF16_MASK;
    if (bom == BOM_UTF16_BE) {
      m_wBOMLength = 2;
      m_wCodePage = FX_CODEPAGE_UTF16BE;
    } else if (bom == BOM_UTF16_LE) {
      m_wBOMLength = 2;
      m_wCodePage = FX_CODEPAGE_UTF16LE;
    } else {
      m_wBOMLength = 0;
      m_wCodePage = FXSYS_GetACP();
    }
  }

  Seek(CFX_SeekableStreamProxy::Pos::Begin,
       std::max(static_cast<FX_FILESIZE>(m_wBOMLength), iPosition));
}

CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(uint8_t* data, FX_STRSIZE size)
    : CFX_SeekableStreamProxy(
          pdfium::MakeRetain<CFX_MemoryStream>(data, size, false),
          false) {}

CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() {}

void CFX_SeekableStreamProxy::Seek(CFX_SeekableStreamProxy::Pos eSeek,
                                   FX_FILESIZE iOffset) {
  switch (eSeek) {
    case CFX_SeekableStreamProxy::Pos::Begin:
      m_iPosition = iOffset;
      break;
    case CFX_SeekableStreamProxy::Pos::Current:
      m_iPosition += iOffset;
      break;
  }
  m_iPosition =
      pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
}

void CFX_SeekableStreamProxy::SetCodePage(uint16_t wCodePage) {
  if (m_wBOMLength > 0)
    return;
  m_wCodePage = wCodePage;
}

FX_STRSIZE CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer,
                                             FX_STRSIZE iBufferSize) {
  ASSERT(pBuffer && iBufferSize > 0);

  if (m_IsWriteStream)
    return -1;

  iBufferSize = std::min(
      iBufferSize, static_cast<FX_STRSIZE>(m_pStream->GetSize() - m_iPosition));
  if (iBufferSize <= 0)
    return 0;

  if (m_pStream->ReadBlock(pBuffer, m_iPosition, iBufferSize)) {
    pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
    new_pos += iBufferSize;
    if (!new_pos.IsValid())
      return 0;

    m_iPosition = new_pos.ValueOrDie();
    return iBufferSize;
  }
  return 0;
}

FX_STRSIZE CFX_SeekableStreamProxy::ReadString(wchar_t* pStr,
                                               FX_STRSIZE iMaxLength,
                                               bool* bEOS) {
  ASSERT(pStr && iMaxLength > 0);

  if (m_IsWriteStream)
    return -1;

  if (m_wCodePage == FX_CODEPAGE_UTF16LE ||
      m_wCodePage == FX_CODEPAGE_UTF16BE) {
    FX_FILESIZE iBytes = iMaxLength * 2;
    FX_STRSIZE iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
    iMaxLength = iLen / 2;
    if (sizeof(wchar_t) > 2)
      UTF16ToWChar(pStr, iMaxLength);

#if _FX_ENDIAN_ == _FX_BIG_ENDIAN_
    if (m_wCodePage == FX_CODEPAGE_UTF16LE)
      SwapByteOrder(pStr, iMaxLength);
#else
    if (m_wCodePage == FX_CODEPAGE_UTF16BE)
      SwapByteOrder(pStr, iMaxLength);
#endif

  } else {
    FX_FILESIZE pos = GetPosition();
    FX_STRSIZE iBytes =
        std::min(iMaxLength, static_cast<FX_STRSIZE>(GetLength() - pos));

    if (iBytes > 0) {
      std::vector<uint8_t> buf(iBytes);

      FX_STRSIZE iLen = ReadData(buf.data(), iBytes);
      if (m_wCodePage != FX_CODEPAGE_UTF8)
        return -1;

      FX_STRSIZE iSrc = 0;
      std::tie(iSrc, iMaxLength) = UTF8Decode(
          reinterpret_cast<const char*>(buf.data()), iLen, pStr, iMaxLength);
      Seek(CFX_SeekableStreamProxy::Pos::Current, iSrc - iLen);
    } else {
      iMaxLength = 0;
    }
  }

  *bEOS = IsEOF();
  return iMaxLength;
}

void CFX_SeekableStreamProxy::WriteString(const CFX_WideStringC& str) {
  if (!m_IsWriteStream || str.GetLength() == 0 ||
      m_wCodePage != FX_CODEPAGE_UTF8) {
    return;
  }
  if (!m_pStream->WriteBlock(str.unterminated_c_str(), m_iPosition,
                             str.GetLength() * sizeof(wchar_t))) {
    return;
  }

  pdfium::base::CheckedNumeric<FX_STRSIZE> new_pos = m_iPosition;
  new_pos += str.GetLength() * sizeof(wchar_t);
  if (!new_pos.IsValid()) {
    m_iPosition = std::numeric_limits<FX_STRSIZE>::max();
    return;
  }

  m_iPosition = new_pos.ValueOrDie();
}