diff options
Diffstat (limited to 'core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp')
-rw-r--r-- | core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp new file mode 100644 index 0000000000..1f0ab5f876 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp @@ -0,0 +1,228 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" + +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_number.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fpdfapi/cpdf_string.h" +#include "core/include/fpdfapi/fpdf_parser_decode.h" +#include "core/include/fxcrt/fx_ext.h" + +// Indexed by 8-bit character code, contains either: +// 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff +// 'N' - for numeric: 0123456789+-. +// 'D' - for delimiter: %()/<>[]{} +// 'R' - otherwise. +const char PDF_CharType[256] = { + // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO + // SI + 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R', + 'R', + + // DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS + // US + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', + + // SP ! " # $ % & ยด ( ) * + , - . + // / + 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N', + 'D', + + // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D', + 'R', + + // @ A B C D E F G H I J K L M N O + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', + + // P Q R S T U V W X Y Z [ \ ] ^ _ + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', + 'R', + + // ` a b c d e f g h i j k l m n o + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', + + // p q r s t u v w x y z { | } ~ + // DEL + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', + 'R', + + 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'}; + +int32_t GetHeaderOffset(IFX_FileRead* pFile) { + // TODO(dsinclair): This is a complicated way of saying %PDF, simplify? + const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025); + + const size_t kBufSize = 4; + uint8_t buf[kBufSize]; + int32_t offset = 0; + while (offset <= 1024) { + if (!pFile->ReadBlock(buf, offset, kBufSize)) + return -1; + + if (*(FX_DWORD*)buf == tag) + return offset; + + ++offset; + } + return -1; +} + +int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) { + CPDF_Number* pObj = ToNumber(pDict->GetElement(key)); + return pObj ? pObj->GetInteger() : 0; +} + +CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { + int size = bstr.GetLength(); + const FX_CHAR* pSrc = bstr.GetCStr(); + if (!FXSYS_memchr(pSrc, '#', size)) { + return bstr; + } + CFX_ByteString result; + FX_CHAR* pDestStart = result.GetBuffer(size); + FX_CHAR* pDest = pDestStart; + for (int i = 0; i < size; i++) { + if (pSrc[i] == '#' && i < size - 2) { + *pDest++ = + FXSYS_toHexDigit(pSrc[i + 1]) * 16 + FXSYS_toHexDigit(pSrc[i + 2]); + i += 2; + } else { + *pDest++ = pSrc[i]; + } + } + result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart)); + return result; +} + +CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) { + if (!FXSYS_memchr(orig.c_str(), '#', orig.GetLength())) { + return orig; + } + return PDF_NameDecode(CFX_ByteStringC(orig)); +} + +CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) { + uint8_t* src_buf = (uint8_t*)orig.c_str(); + int src_len = orig.GetLength(); + int dest_len = 0; + int i; + for (i = 0; i < src_len; i++) { + uint8_t ch = src_buf[i]; + if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || + PDFCharIsDelimiter(ch)) { + dest_len += 3; + } else { + dest_len++; + } + } + if (dest_len == src_len) + return orig; + + CFX_ByteString res; + FX_CHAR* dest_buf = res.GetBuffer(dest_len); + dest_len = 0; + for (i = 0; i < src_len; i++) { + uint8_t ch = src_buf[i]; + if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || + PDFCharIsDelimiter(ch)) { + dest_buf[dest_len++] = '#'; + dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16]; + dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16]; + } else { + dest_buf[dest_len++] = ch; + } + } + dest_buf[dest_len] = 0; + res.ReleaseBuffer(); + return res; +} + +CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) { + if (!pObj) { + buf << " null"; + return buf; + } + switch (pObj->GetType()) { + case CPDF_Object::NULLOBJ: + buf << " null"; + break; + case CPDF_Object::BOOLEAN: + case CPDF_Object::NUMBER: + buf << " " << pObj->GetString(); + break; + case CPDF_Object::STRING: + buf << PDF_EncodeString(pObj->GetString(), pObj->AsString()->IsHex()); + break; + case CPDF_Object::NAME: { + CFX_ByteString str = pObj->GetString(); + buf << "/" << PDF_NameEncode(str); + break; + } + case CPDF_Object::REFERENCE: { + buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R "; + break; + } + case CPDF_Object::ARRAY: { + const CPDF_Array* p = pObj->AsArray(); + buf << "["; + for (FX_DWORD i = 0; i < p->GetCount(); i++) { + CPDF_Object* pElement = p->GetElement(i); + if (pElement->GetObjNum()) { + buf << " " << pElement->GetObjNum() << " 0 R"; + } else { + buf << pElement; + } + } + buf << "]"; + break; + } + case CPDF_Object::DICTIONARY: { + const CPDF_Dictionary* p = pObj->AsDictionary(); + buf << "<<"; + for (const auto& it : *p) { + const CFX_ByteString& key = it.first; + CPDF_Object* pValue = it.second; + buf << "/" << PDF_NameEncode(key); + if (pValue && pValue->GetObjNum()) { + buf << " " << pValue->GetObjNum() << " 0 R "; + } else { + buf << pValue; + } + } + buf << ">>"; + break; + } + case CPDF_Object::STREAM: { + const CPDF_Stream* p = pObj->AsStream(); + buf << p->GetDict() << "stream\r\n"; + CPDF_StreamAcc acc; + acc.LoadAllData(p, TRUE); + buf.AppendBlock(acc.GetData(), acc.GetSize()); + buf << "\r\nendstream"; + break; + } + default: + ASSERT(FALSE); + break; + } + return buf; +} |