1 files changed, 228 insertions, 0 deletions
diff --git a/core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
new file mode 100644
index 0000000000..1f0ab5f876
--- /dev/null
+++ b/core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
@@ -0,0 +1,228 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
+
+#include "core/include/fpdfapi/cpdf_array.h"
+#include "core/include/fpdfapi/cpdf_dictionary.h"
+#include "core/include/fpdfapi/cpdf_number.h"
+#include "core/include/fpdfapi/cpdf_reference.h"
+#include "core/include/fpdfapi/cpdf_stream.h"
+#include "core/include/fpdfapi/cpdf_string.h"
+#include "core/include/fpdfapi/fpdf_parser_decode.h"
+#include "core/include/fxcrt/fx_ext.h"
+
+// Indexed by 8-bit character code, contains either:
+//   'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
+//   'N' - for numeric: 0123456789+-.
+//   'D' - for delimiter: %()/<>[]{}
+//   'R' - otherwise.
+const char PDF_CharType[256] = {
+    // NUL  SOH  STX  ETX  EOT  ENQ  ACK  BEL  BS   HT   LF   VT   FF   CR   SO
+    // SI
+    'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
+    'R',
+
+    // DLE  DC1  DC2  DC3  DC4  NAK  SYN  ETB  CAN  EM   SUB  ESC  FS   GS   RS
+    // US
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R',
+
+    // SP    !    "    #    $    %    &    ´    (    )    *    +    ,    -    .
+    // /
+    'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
+    'D',
+
+    // 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    > ?
+    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
+    'R',
+
+    // @    A    B    C    D    E    F    G    H    I    J    K    L    M    N O
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R',
+
+    // P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^ _
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
+    'R',
+
+    // `    a    b    c    d    e    f    g    h    i    j    k    l    m    n o
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R',
+
+    // p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
+    // DEL
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
+    'R',
+
+    'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
+    'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
+
+int32_t GetHeaderOffset(IFX_FileRead* pFile) {
+  // TODO(dsinclair): This is a complicated way of saying %PDF, simplify?
+  const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
+
+  const size_t kBufSize = 4;
+  uint8_t buf[kBufSize];
+  int32_t offset = 0;
+  while (offset <= 1024) {
+    if (!pFile->ReadBlock(buf, offset, kBufSize))
+      return -1;
+
+    if (*(FX_DWORD*)buf == tag)
+      return offset;
+
+    ++offset;
+  }
+  return -1;
+}
+
+int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) {
+  CPDF_Number* pObj = ToNumber(pDict->GetElement(key));
+  return pObj ? pObj->GetInteger() : 0;
+}
+
+CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) {
+  int size = bstr.GetLength();
+  const FX_CHAR* pSrc = bstr.GetCStr();
+  if (!FXSYS_memchr(pSrc, '#', size)) {
+    return bstr;
+  }
+  CFX_ByteString result;
+  FX_CHAR* pDestStart = result.GetBuffer(size);
+  FX_CHAR* pDest = pDestStart;
+  for (int i = 0; i < size; i++) {
+    if (pSrc[i] == '#' && i < size - 2) {
+      *pDest++ =
+          FXSYS_toHexDigit(pSrc[i + 1]) * 16 + FXSYS_toHexDigit(pSrc[i + 2]);
+      i += 2;
+    } else {
+      *pDest++ = pSrc[i];
+    }
+  }
+  result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart));
+  return result;
+}
+
+CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) {
+  if (!FXSYS_memchr(orig.c_str(), '#', orig.GetLength())) {
+    return orig;
+  }
+  return PDF_NameDecode(CFX_ByteStringC(orig));
+}
+
+CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) {
+  uint8_t* src_buf = (uint8_t*)orig.c_str();
+  int src_len = orig.GetLength();
+  int dest_len = 0;
+  int i;
+  for (i = 0; i < src_len; i++) {
+    uint8_t ch = src_buf[i];
+    if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
+        PDFCharIsDelimiter(ch)) {
+      dest_len += 3;
+    } else {
+      dest_len++;
+    }
+  }
+  if (dest_len == src_len)
+    return orig;
+
+  CFX_ByteString res;
+  FX_CHAR* dest_buf = res.GetBuffer(dest_len);
+  dest_len = 0;
+  for (i = 0; i < src_len; i++) {
+    uint8_t ch = src_buf[i];
+    if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
+        PDFCharIsDelimiter(ch)) {
+      dest_buf[dest_len++] = '#';
+      dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
+      dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];
+    } else {
+      dest_buf[dest_len++] = ch;
+    }
+  }
+  dest_buf[dest_len] = 0;
+  res.ReleaseBuffer();
+  return res;
+}
+
+CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) {
+  if (!pObj) {
+    buf << " null";
+    return buf;
+  }
+  switch (pObj->GetType()) {
+    case CPDF_Object::NULLOBJ:
+      buf << " null";
+      break;
+    case CPDF_Object::BOOLEAN:
+    case CPDF_Object::NUMBER:
+      buf << " " << pObj->GetString();
+      break;
+    case CPDF_Object::STRING:
+      buf << PDF_EncodeString(pObj->GetString(), pObj->AsString()->IsHex());
+      break;
+    case CPDF_Object::NAME: {
+      CFX_ByteString str = pObj->GetString();
+      buf << "/" << PDF_NameEncode(str);
+      break;
+    }
+    case CPDF_Object::REFERENCE: {
+      buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
+      break;
+    }
+    case CPDF_Object::ARRAY: {
+      const CPDF_Array* p = pObj->AsArray();
+      buf << "[";
+      for (FX_DWORD i = 0; i < p->GetCount(); i++) {
+        CPDF_Object* pElement = p->GetElement(i);
+        if (pElement->GetObjNum()) {
+          buf << " " << pElement->GetObjNum() << " 0 R";
+        } else {
+          buf << pElement;
+        }
+      }
+      buf << "]";
+      break;
+    }
+    case CPDF_Object::DICTIONARY: {
+      const CPDF_Dictionary* p = pObj->AsDictionary();
+      buf << "<<";
+      for (const auto& it : *p) {
+        const CFX_ByteString& key = it.first;
+        CPDF_Object* pValue = it.second;
+        buf << "/" << PDF_NameEncode(key);
+        if (pValue && pValue->GetObjNum()) {
+          buf << " " << pValue->GetObjNum() << " 0 R ";
+        } else {
+          buf << pValue;
+        }
+      }
+      buf << ">>";
+      break;
+    }
+    case CPDF_Object::STREAM: {
+      const CPDF_Stream* p = pObj->AsStream();
+      buf << p->GetDict() << "stream\r\n";
+      CPDF_StreamAcc acc;
+      acc.LoadAllData(p, TRUE);
+      buf.AppendBlock(acc.GetData(), acc.GetSize());
+      buf << "\r\nendstream";
+      break;
+    }
+    default:
+      ASSERT(FALSE);
+      break;
+  }
+  return buf;
+}