From 014b012278b7438ef8d4b66730b8598c7eb4623a Mon Sep 17 00:00:00 2001 From: npm Date: Mon, 7 Nov 2016 08:42:11 -0800 Subject: Clean up fpdf_page_parsers - The code in fpdf_page_parser is only called by CPDF_StreamContentParser, so moved there. - Split fpdf_page_parser_old into its two classes - Renamed the corresponding unittests accordingly. - Moved PDF_ReplaceAbbr to namespace - Fixed few nits - Added TODO because CPDF_StreamParser has a lot of code similar to CPDF_SyntaxParser Review-Url: https://codereview.chromium.org/2474303003 --- BUILD.gn | 8 +- core/fpdfapi/page/cpdf_contentparser.cpp | 215 ++++++ core/fpdfapi/page/cpdf_streamcontentparser.cpp | 117 ++- .../page/cpdf_streamcontentparser_unittest.cpp | 34 + core/fpdfapi/page/cpdf_streamparser.cpp | 627 +++++++++++++++ core/fpdfapi/page/cpdf_streamparser_unittest.cpp | 47 ++ core/fpdfapi/page/fpdf_page_parser.cpp | 158 ---- core/fpdfapi/page/fpdf_page_parser_old.cpp | 842 --------------------- .../fpdfapi/page/fpdf_page_parser_old_unittest.cpp | 47 -- core/fpdfapi/page/fpdf_page_parser_unittest.cpp | 34 - core/fpdfapi/page/pageint.h | 4 +- 11 files changed, 1044 insertions(+), 1089 deletions(-) create mode 100644 core/fpdfapi/page/cpdf_contentparser.cpp create mode 100644 core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp create mode 100644 core/fpdfapi/page/cpdf_streamparser.cpp create mode 100644 core/fpdfapi/page/cpdf_streamparser_unittest.cpp delete mode 100644 core/fpdfapi/page/fpdf_page_parser.cpp delete mode 100644 core/fpdfapi/page/fpdf_page_parser_old.cpp delete mode 100644 core/fpdfapi/page/fpdf_page_parser_old_unittest.cpp delete mode 100644 core/fpdfapi/page/fpdf_page_parser_unittest.cpp diff --git a/BUILD.gn b/BUILD.gn index 0cf5f54c77..bf6918d7bc 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -432,6 +432,7 @@ static_library("fpdfapi") { "core/fpdfapi/page/cpdf_contentmark.h", "core/fpdfapi/page/cpdf_contentmarkitem.cpp", "core/fpdfapi/page/cpdf_contentmarkitem.h", + "core/fpdfapi/page/cpdf_contentparser.cpp", "core/fpdfapi/page/cpdf_countedobject.h", "core/fpdfapi/page/cpdf_docpagedata.cpp", "core/fpdfapi/page/cpdf_docpagedata.h", @@ -471,6 +472,7 @@ static_library("fpdfapi") { "core/fpdfapi/page/cpdf_shadingpattern.h", "core/fpdfapi/page/cpdf_streamcontentparser.cpp", "core/fpdfapi/page/cpdf_streamcontentparser.h", + "core/fpdfapi/page/cpdf_streamparser.cpp", "core/fpdfapi/page/cpdf_textobject.cpp", "core/fpdfapi/page/cpdf_textobject.h", "core/fpdfapi/page/cpdf_textstate.cpp", @@ -479,8 +481,6 @@ static_library("fpdfapi") { "core/fpdfapi/page/cpdf_tilingpattern.h", "core/fpdfapi/page/fpdf_page_colors.cpp", "core/fpdfapi/page/fpdf_page_func.cpp", - "core/fpdfapi/page/fpdf_page_parser.cpp", - "core/fpdfapi/page/fpdf_page_parser_old.cpp", "core/fpdfapi/page/pageint.h", "core/fpdfapi/parser/cfdf_document.cpp", "core/fpdfapi/parser/cfdf_document.h", @@ -1636,8 +1636,8 @@ test("pdfium_unittests") { sources = [ "core/fpdfapi/font/fpdf_font_cid_unittest.cpp", "core/fpdfapi/font/fpdf_font_unittest.cpp", - "core/fpdfapi/page/fpdf_page_parser_old_unittest.cpp", - "core/fpdfapi/page/fpdf_page_parser_unittest.cpp", + "core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp", + "core/fpdfapi/page/cpdf_streamparser_unittest.cpp", "core/fpdfapi/parser/cpdf_array_unittest.cpp", "core/fpdfapi/parser/cpdf_document_unittest.cpp", "core/fpdfapi/parser/cpdf_object_unittest.cpp", diff --git a/core/fpdfapi/page/cpdf_contentparser.cpp b/core/fpdfapi/page/cpdf_contentparser.cpp new file mode 100644 index 0000000000..f581047835 --- /dev/null +++ b/core/fpdfapi/page/cpdf_contentparser.cpp @@ -0,0 +1,215 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/page/pageint.h" + +#include "core/fpdfapi/font/cpdf_type3char.h" +#include "core/fpdfapi/page/cpdf_allstates.h" +#include "core/fpdfapi/page/cpdf_form.h" +#include "core/fpdfapi/page/cpdf_page.h" +#include "core/fpdfapi/page/cpdf_pageobject.h" +#include "core/fpdfapi/page/cpdf_path.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_stream.h" +#include "core/fpdfapi/parser/cpdf_stream_acc.h" +#include "core/fxcrt/fx_safe_types.h" + +CPDF_ContentParser::CPDF_ContentParser() + : m_Status(Ready), + m_InternalStage(STAGE_GETCONTENT), + m_pObjectHolder(nullptr), + m_bForm(false), + m_pType3Char(nullptr), + m_pData(nullptr), + m_Size(0), + m_CurrentOffset(0) {} + +CPDF_ContentParser::~CPDF_ContentParser() { + if (!m_pSingleStream) + FX_Free(m_pData); +} + +void CPDF_ContentParser::Start(CPDF_Page* pPage) { + if (m_Status != Ready || !pPage || !pPage->m_pDocument || + !pPage->m_pFormDict) { + m_Status = Done; + return; + } + m_pObjectHolder = pPage; + m_bForm = false; + m_Status = ToBeContinued; + m_InternalStage = STAGE_GETCONTENT; + m_CurrentOffset = 0; + + CPDF_Object* pContent = pPage->m_pFormDict->GetDirectObjectFor("Contents"); + if (!pContent) { + m_Status = Done; + return; + } + if (CPDF_Stream* pStream = pContent->AsStream()) { + m_nStreams = 0; + m_pSingleStream.reset(new CPDF_StreamAcc); + m_pSingleStream->LoadAllData(pStream, false); + } else if (CPDF_Array* pArray = pContent->AsArray()) { + m_nStreams = pArray->GetCount(); + if (m_nStreams) + m_StreamArray.resize(m_nStreams); + else + m_Status = Done; + } else { + m_Status = Done; + } +} + +void CPDF_ContentParser::Start(CPDF_Form* pForm, + CPDF_AllStates* pGraphicStates, + const CFX_Matrix* pParentMatrix, + CPDF_Type3Char* pType3Char, + int level) { + m_pType3Char = pType3Char; + m_pObjectHolder = pForm; + m_bForm = true; + CFX_Matrix form_matrix = pForm->m_pFormDict->GetMatrixFor("Matrix"); + if (pGraphicStates) + form_matrix.Concat(pGraphicStates->m_CTM); + CPDF_Array* pBBox = pForm->m_pFormDict->GetArrayFor("BBox"); + CFX_FloatRect form_bbox; + CPDF_Path ClipPath; + if (pBBox) { + form_bbox = pBBox->GetRect(); + ClipPath.Emplace(); + ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, + form_bbox.top); + ClipPath.Transform(&form_matrix); + if (pParentMatrix) + ClipPath.Transform(pParentMatrix); + form_bbox.Transform(&form_matrix); + if (pParentMatrix) + form_bbox.Transform(pParentMatrix); + } + CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDictFor("Resources"); + m_pParser.reset(new CPDF_StreamContentParser( + pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, + pParentMatrix, pForm, pResources, &form_bbox, pGraphicStates, level)); + m_pParser->GetCurStates()->m_CTM = form_matrix; + m_pParser->GetCurStates()->m_ParentMatrix = form_matrix; + if (ClipPath) { + m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, + true); + } + if (pForm->m_Transparency & PDFTRANS_GROUP) { + CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState; + pState->SetBlendType(FXDIB_BLEND_NORMAL); + pState->SetStrokeAlpha(1.0f); + pState->SetFillAlpha(1.0f); + pState->SetSoftMask(nullptr); + } + m_nStreams = 0; + m_pSingleStream.reset(new CPDF_StreamAcc); + m_pSingleStream->LoadAllData(pForm->m_pFormStream, false); + m_pData = (uint8_t*)m_pSingleStream->GetData(); + m_Size = m_pSingleStream->GetSize(); + m_Status = ToBeContinued; + m_InternalStage = STAGE_PARSE; + m_CurrentOffset = 0; +} + +void CPDF_ContentParser::Continue(IFX_Pause* pPause) { + int steps = 0; + while (m_Status == ToBeContinued) { + if (m_InternalStage == STAGE_GETCONTENT) { + if (m_CurrentOffset == m_nStreams) { + if (!m_StreamArray.empty()) { + FX_SAFE_UINT32 safeSize = 0; + for (const auto& stream : m_StreamArray) { + safeSize += stream->GetSize(); + safeSize += 1; + } + if (!safeSize.IsValid()) { + m_Status = Done; + return; + } + m_Size = safeSize.ValueOrDie(); + m_pData = FX_Alloc(uint8_t, m_Size); + uint32_t pos = 0; + for (const auto& stream : m_StreamArray) { + FXSYS_memcpy(m_pData + pos, stream->GetData(), stream->GetSize()); + pos += stream->GetSize(); + m_pData[pos++] = ' '; + } + m_StreamArray.clear(); + } else { + m_pData = (uint8_t*)m_pSingleStream->GetData(); + m_Size = m_pSingleStream->GetSize(); + } + m_InternalStage = STAGE_PARSE; + m_CurrentOffset = 0; + } else { + CPDF_Array* pContent = + m_pObjectHolder->m_pFormDict->GetArrayFor("Contents"); + m_StreamArray[m_CurrentOffset].reset(new CPDF_StreamAcc); + CPDF_Stream* pStreamObj = ToStream( + pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr); + m_StreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, false); + m_CurrentOffset++; + } + } + if (m_InternalStage == STAGE_PARSE) { + if (!m_pParser) { + m_pParser.reset(new CPDF_StreamContentParser( + m_pObjectHolder->m_pDocument, m_pObjectHolder->m_pPageResources, + nullptr, nullptr, m_pObjectHolder, m_pObjectHolder->m_pResources, + &m_pObjectHolder->m_BBox, nullptr, 0)); + m_pParser->GetCurStates()->m_ColorState.SetDefault(); + } + if (m_CurrentOffset >= m_Size) { + m_InternalStage = STAGE_CHECKCLIP; + } else { + m_CurrentOffset += + m_pParser->Parse(m_pData + m_CurrentOffset, + m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); + } + } + if (m_InternalStage == STAGE_CHECKCLIP) { + if (m_pType3Char) { + m_pType3Char->m_bColored = m_pParser->IsColored(); + m_pType3Char->m_Width = + FXSYS_round(m_pParser->GetType3Data()[0] * 1000); + m_pType3Char->m_BBox.left = + FXSYS_round(m_pParser->GetType3Data()[2] * 1000); + m_pType3Char->m_BBox.bottom = + FXSYS_round(m_pParser->GetType3Data()[3] * 1000); + m_pType3Char->m_BBox.right = + FXSYS_round(m_pParser->GetType3Data()[4] * 1000); + m_pType3Char->m_BBox.top = + FXSYS_round(m_pParser->GetType3Data()[5] * 1000); + } + for (auto& pObj : *m_pObjectHolder->GetPageObjectList()) { + if (!pObj->m_ClipPath) + continue; + if (pObj->m_ClipPath.GetPathCount() != 1) + continue; + if (pObj->m_ClipPath.GetTextCount()) + continue; + CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0); + if (!ClipPath.IsRect() || pObj->IsShading()) + continue; + CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0), + ClipPath.GetPointX(2), ClipPath.GetPointY(2)); + CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, + pObj->m_Top); + if (old_rect.Contains(obj_rect)) + pObj->m_ClipPath.SetNull(); + } + m_Status = Done; + return; + } + steps++; + if (pPause && pPause->NeedToPauseNow()) + break; + } +} diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp index 7618f8271f..cd77c0b633 100644 --- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp +++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp @@ -6,6 +6,10 @@ #include "core/fpdfapi/page/cpdf_streamcontentparser.h" +#include +#include +#include + #include "core/fpdfapi/font/cpdf_font.h" #include "core/fpdfapi/font/cpdf_type3font.h" #include "core/fpdfapi/page/cpdf_allstates.h" @@ -22,11 +26,14 @@ #include "core/fpdfapi/page/cpdf_textobject.h" #include "core/fpdfapi/page/pageint.h" #include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fxcrt/fx_safe_types.h" +#include "core/fxge/cfx_graphstatedata.h" #include "third_party/base/ptr_util.h" namespace { @@ -122,8 +129,116 @@ CFX_FloatRect GetShadingBBox(CPDF_ShadingPattern* pShading, return rect; } +struct AbbrPair { + const FX_CHAR* abbr; + const FX_CHAR* full_name; +}; + +const AbbrPair InlineKeyAbbr[] = { + {"BPC", "BitsPerComponent"}, {"CS", "ColorSpace"}, {"D", "Decode"}, + {"DP", "DecodeParms"}, {"F", "Filter"}, {"H", "Height"}, + {"IM", "ImageMask"}, {"I", "Interpolate"}, {"W", "Width"}, +}; + +const AbbrPair InlineValueAbbr[] = { + {"G", "DeviceGray"}, {"RGB", "DeviceRGB"}, + {"CMYK", "DeviceCMYK"}, {"I", "Indexed"}, + {"AHx", "ASCIIHexDecode"}, {"A85", "ASCII85Decode"}, + {"LZW", "LZWDecode"}, {"Fl", "FlateDecode"}, + {"RL", "RunLengthDecode"}, {"CCF", "CCITTFaxDecode"}, + {"DCT", "DCTDecode"}, +}; + +struct AbbrReplacementOp { + bool is_replace_key; + CFX_ByteString key; + CFX_ByteStringC replacement; +}; + +CFX_ByteStringC FindFullName(const AbbrPair* table, + size_t count, + const CFX_ByteStringC& abbr) { + auto it = std::find_if(table, table + count, [abbr](const AbbrPair& pair) { + return pair.abbr == abbr; + }); + return it != table + count ? CFX_ByteStringC(it->full_name) + : CFX_ByteStringC(); +} + +void ReplaceAbbr(CPDF_Object* pObj) { + switch (pObj->GetType()) { + case CPDF_Object::DICTIONARY: { + CPDF_Dictionary* pDict = pObj->AsDictionary(); + std::vector replacements; + for (const auto& it : *pDict) { + CFX_ByteString key = it.first; + CPDF_Object* value = it.second; + CFX_ByteStringC fullname = FindFullName( + InlineKeyAbbr, FX_ArraySize(InlineKeyAbbr), key.AsStringC()); + if (!fullname.IsEmpty()) { + AbbrReplacementOp op; + op.is_replace_key = true; + op.key = key; + op.replacement = fullname; + replacements.push_back(op); + key = fullname; + } + + if (value->IsName()) { + CFX_ByteString name = value->GetString(); + fullname = FindFullName( + InlineValueAbbr, FX_ArraySize(InlineValueAbbr), name.AsStringC()); + if (!fullname.IsEmpty()) { + AbbrReplacementOp op; + op.is_replace_key = false; + op.key = key; + op.replacement = fullname; + replacements.push_back(op); + } + } else { + ReplaceAbbr(value); + } + } + for (const auto& op : replacements) { + if (op.is_replace_key) + pDict->ReplaceKey(op.key, CFX_ByteString(op.replacement)); + else + pDict->SetNameFor(op.key, CFX_ByteString(op.replacement)); + } + break; + } + case CPDF_Object::ARRAY: { + CPDF_Array* pArray = pObj->AsArray(); + for (size_t i = 0; i < pArray->GetCount(); i++) { + CPDF_Object* pElement = pArray->GetObjectAt(i); + if (pElement->IsName()) { + CFX_ByteString name = pElement->GetString(); + CFX_ByteStringC fullname = FindFullName( + InlineValueAbbr, FX_ArraySize(InlineValueAbbr), name.AsStringC()); + if (!fullname.IsEmpty()) + pArray->SetAt(i, new CPDF_Name(CFX_ByteString(fullname))); + } else { + ReplaceAbbr(pElement); + } + } + break; + } + default: + break; + } +} + } // namespace +CFX_ByteStringC PDF_FindKeyAbbreviationForTesting(const CFX_ByteStringC& abbr) { + return FindFullName(InlineKeyAbbr, FX_ArraySize(InlineKeyAbbr), abbr); +} + +CFX_ByteStringC PDF_FindValueAbbreviationForTesting( + const CFX_ByteStringC& abbr) { + return FindFullName(InlineValueAbbr, FX_ArraySize(InlineValueAbbr), abbr); +} + CPDF_StreamContentParser::CPDF_StreamContentParser( CPDF_Document* pDocument, CPDF_Dictionary* pPageResources, @@ -543,7 +658,7 @@ void CPDF_StreamContentParser::Handle_BeginImage() { pDict->SetFor(key, pObj.release()); } } - PDF_ReplaceAbbr(pDict); + ReplaceAbbr(pDict); CPDF_Object* pCSObj = nullptr; if (pDict->KeyExist("ColorSpace")) { pCSObj = pDict->GetDirectObjectFor("ColorSpace"); diff --git a/core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp b/core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp new file mode 100644 index 0000000000..be2fcb09e4 --- /dev/null +++ b/core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp @@ -0,0 +1,34 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/page/pageint.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(cpdf_streamcontentparser, PDF_FindKeyAbbreviation) { + EXPECT_EQ(CFX_ByteStringC("BitsPerComponent"), + PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("BPC"))); + EXPECT_EQ(CFX_ByteStringC("Width"), + PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("W"))); + EXPECT_EQ(CFX_ByteStringC(""), + PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC(""))); + EXPECT_EQ(CFX_ByteStringC(""), + PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("NoInList"))); + // Prefix should not match. + EXPECT_EQ(CFX_ByteStringC(""), + PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("WW"))); +} + +TEST(cpdf_streamcontentparser, PDF_FindValueAbbreviation) { + EXPECT_EQ(CFX_ByteStringC("DeviceGray"), + PDF_FindValueAbbreviationForTesting(CFX_ByteStringC("G"))); + EXPECT_EQ(CFX_ByteStringC("DCTDecode"), + PDF_FindValueAbbreviationForTesting(CFX_ByteStringC("DCT"))); + EXPECT_EQ(CFX_ByteStringC(""), + PDF_FindValueAbbreviationForTesting(CFX_ByteStringC(""))); + EXPECT_EQ(CFX_ByteStringC(""), + PDF_FindValueAbbreviationForTesting(CFX_ByteStringC("NoInList"))); + // Prefix should not match. + EXPECT_EQ(CFX_ByteStringC(""), + PDF_FindValueAbbreviationForTesting(CFX_ByteStringC("II"))); +} diff --git a/core/fpdfapi/page/cpdf_streamparser.cpp b/core/fpdfapi/page/cpdf_streamparser.cpp new file mode 100644 index 0000000000..9d36d0a38b --- /dev/null +++ b/core/fpdfapi/page/cpdf_streamparser.cpp @@ -0,0 +1,627 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/page/pageint.h" + +#include + +#include "core/fpdfapi/cpdf_modulemgr.h" +#include "core/fpdfapi/page/cpdf_docpagedata.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_boolean.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_name.h" +#include "core/fpdfapi/parser/cpdf_null.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_stream.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "core/fpdfapi/parser/fpdf_parser_decode.h" +#include "core/fpdfapi/parser/fpdf_parser_utility.h" +#include "core/fxcodec/fx_codec.h" +#include "core/fxcrt/fx_ext.h" + +CCodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder( + const uint8_t* src_buf, + uint32_t src_size, + int width, + int height, + const CPDF_Dictionary* pParams); + +namespace { + +const uint32_t kMaxNestedArrayLevel = 512; +const uint32_t kMaxWordBuffer = 256; +const FX_STRSIZE kMaxStringLength = 32767; + +uint32_t DecodeAllScanlines(CCodec_ScanlineDecoder* pDecoder, + uint8_t*& dest_buf, + uint32_t& dest_size) { + if (!pDecoder) + return FX_INVALID_OFFSET; + int ncomps = pDecoder->CountComps(); + int bpc = pDecoder->GetBPC(); + int width = pDecoder->GetWidth(); + int height = pDecoder->GetHeight(); + int pitch = (width * ncomps * bpc + 7) / 8; + if (height == 0 || pitch > (1 << 30) / height) { + delete pDecoder; + return FX_INVALID_OFFSET; + } + dest_buf = FX_Alloc2D(uint8_t, pitch, height); + dest_size = pitch * height; // Safe since checked alloc returned. + for (int row = 0; row < height; row++) { + const uint8_t* pLine = pDecoder->GetScanline(row); + if (!pLine) + break; + + FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch); + } + uint32_t srcoff = pDecoder->GetSrcOffset(); + delete pDecoder; + return srcoff; +} + +uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf, + uint32_t limit, + int width, + int height, + CFX_ByteString& decoder, + CPDF_Dictionary* pParam, + uint8_t*& dest_buf, + uint32_t& dest_size) { + if (decoder == "CCITTFaxDecode" || decoder == "CCF") { + CCodec_ScanlineDecoder* pDecoder = + FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam); + return DecodeAllScanlines(pDecoder, dest_buf, dest_size); + } + if (decoder == "ASCII85Decode" || decoder == "A85") + return A85Decode(src_buf, limit, dest_buf, dest_size); + if (decoder == "ASCIIHexDecode" || decoder == "AHx") + return HexDecode(src_buf, limit, dest_buf, dest_size); + if (decoder == "FlateDecode" || decoder == "Fl") { + return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, dest_size, + dest_buf, dest_size); + } + if (decoder == "LZWDecode" || decoder == "LZW") { + return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf, + dest_size); + } + if (decoder == "DCTDecode" || decoder == "DCT") { + CCodec_ScanlineDecoder* pDecoder = + CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder( + src_buf, limit, width, height, 0, + !pParam || pParam->GetIntegerFor("ColorTransform", 1)); + return DecodeAllScanlines(pDecoder, dest_buf, dest_size); + } + if (decoder == "RunLengthDecode" || decoder == "RL") + return RunLengthDecode(src_buf, limit, dest_buf, dest_size); + dest_size = 0; + dest_buf = 0; + return (uint32_t)-1; +} + +} // namespace + +CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize) + : m_pBuf(pData), + m_Size(dwSize), + m_Pos(0), + m_pLastObj(nullptr), + m_pPool(nullptr) {} + +CPDF_StreamParser::CPDF_StreamParser( + const uint8_t* pData, + uint32_t dwSize, + const CFX_WeakPtr& pPool) + : m_pBuf(pData), + m_Size(dwSize), + m_Pos(0), + m_pLastObj(nullptr), + m_pPool(pPool) {} + +CPDF_StreamParser::~CPDF_StreamParser() { + delete m_pLastObj; +} + +CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, + CPDF_Dictionary* pDict, + CPDF_Object* pCSObj) { + if (m_Pos == m_Size) + return nullptr; + + if (PDFCharIsWhitespace(m_pBuf[m_Pos])) + m_Pos++; + + CFX_ByteString Decoder; + CPDF_Dictionary* pParam = nullptr; + CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter"); + if (pFilter) { + if (CPDF_Array* pArray = pFilter->AsArray()) { + Decoder = pArray->GetStringAt(0); + CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms"); + if (pParams) + pParam = pParams->GetDictAt(0); + } else { + Decoder = pFilter->GetString(); + pParam = pDict->GetDictFor("DecodeParms"); + } + } + uint32_t width = pDict->GetIntegerFor("Width"); + uint32_t height = pDict->GetIntegerFor("Height"); + uint32_t OrigSize = 0; + if (pCSObj) { + uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent"); + uint32_t nComponents = 1; + CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj); + if (pCS) { + nComponents = pCS->CountComponents(); + pDoc->GetPageData()->ReleaseColorSpace(pCSObj); + } else { + nComponents = 3; + } + uint32_t pitch = width; + if (bpc && pitch > INT_MAX / bpc) + return nullptr; + + pitch *= bpc; + if (nComponents && pitch > INT_MAX / nComponents) + return nullptr; + + pitch *= nComponents; + if (pitch > INT_MAX - 7) + return nullptr; + + pitch += 7; + pitch /= 8; + OrigSize = pitch; + } else { + if (width > INT_MAX - 7) + return nullptr; + + OrigSize = ((width + 7) / 8); + } + if (height && OrigSize > INT_MAX / height) + return nullptr; + + OrigSize *= height; + uint8_t* pData = nullptr; + uint32_t dwStreamSize; + if (Decoder.IsEmpty()) { + if (OrigSize > m_Size - m_Pos) + OrigSize = m_Size - m_Pos; + pData = FX_Alloc(uint8_t, OrigSize); + FXSYS_memcpy(pData, m_pBuf + m_Pos, OrigSize); + dwStreamSize = OrigSize; + m_Pos += OrigSize; + } else { + uint32_t dwDestSize = OrigSize; + dwStreamSize = + PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, + Decoder, pParam, pData, dwDestSize); + FX_Free(pData); + if (static_cast(dwStreamSize) < 0) + return nullptr; + + uint32_t dwSavePos = m_Pos; + m_Pos += dwStreamSize; + while (1) { + uint32_t dwPrevPos = m_Pos; + CPDF_StreamParser::SyntaxType type = ParseNextElement(); + if (type == CPDF_StreamParser::EndOfData) + break; + + if (type != CPDF_StreamParser::Keyword) { + dwStreamSize += m_Pos - dwPrevPos; + continue; + } + if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' && + GetWordBuf()[1] == 'I') { + m_Pos = dwPrevPos; + break; + } + dwStreamSize += m_Pos - dwPrevPos; + } + m_Pos = dwSavePos; + pData = FX_Alloc(uint8_t, dwStreamSize); + FXSYS_memcpy(pData, m_pBuf + m_Pos, dwStreamSize); + m_Pos += dwStreamSize; + } + pDict->SetIntegerFor("Length", (int)dwStreamSize); + return new CPDF_Stream(pData, dwStreamSize, pDict); +} + +CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { + delete m_pLastObj; + m_pLastObj = nullptr; + + m_WordSize = 0; + bool bIsNumber = true; + if (!PositionIsInBounds()) + return EndOfData; + + int ch = m_pBuf[m_Pos++]; + while (1) { + while (PDFCharIsWhitespace(ch)) { + if (!PositionIsInBounds()) + return EndOfData; + + ch = m_pBuf[m_Pos++]; + } + + if (ch != '%') + break; + + while (1) { + if (!PositionIsInBounds()) + return EndOfData; + + ch = m_pBuf[m_Pos++]; + if (PDFCharIsLineEnding(ch)) + break; + } + } + + if (PDFCharIsDelimiter(ch) && ch != '/') { + m_Pos--; + m_pLastObj = ReadNextObject(false, 0); + return Others; + } + + while (1) { + if (m_WordSize < kMaxWordBuffer) + m_WordBuffer[m_WordSize++] = ch; + + if (!PDFCharIsNumeric(ch)) + bIsNumber = false; + + if (!PositionIsInBounds()) + break; + + ch = m_pBuf[m_Pos++]; + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { + m_Pos--; + break; + } + } + + m_WordBuffer[m_WordSize] = 0; + if (bIsNumber) + return Number; + + if (m_WordBuffer[0] == '/') + return Name; + + if (m_WordSize == 4) { + if (memcmp(m_WordBuffer, "true", 4) == 0) { + m_pLastObj = new CPDF_Boolean(true); + return Others; + } + if (memcmp(m_WordBuffer, "null", 4) == 0) { + m_pLastObj = new CPDF_Null; + return Others; + } + } else if (m_WordSize == 5) { + if (memcmp(m_WordBuffer, "false", 5) == 0) { + m_pLastObj = new CPDF_Boolean(false); + return Others; + } + } + return Keyword; +} + +CPDF_Object* CPDF_StreamParser::GetObject() { + CPDF_Object* pObj = m_pLastObj; + m_pLastObj = nullptr; + return pObj; +} + +CPDF_Object* CPDF_StreamParser::ReadNextObject(bool bAllowNestedArray, + uint32_t dwInArrayLevel) { + bool bIsNumber; + GetNextWord(bIsNumber); + if (!m_WordSize) + return nullptr; + + if (bIsNumber) { + m_WordBuffer[m_WordSize] = 0; + return new CPDF_Number(CFX_ByteStringC(m_WordBuffer, m_WordSize)); + } + + int first_char = m_WordBuffer[0]; + if (first_char == '/') { + CFX_ByteString name = + PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)); + return new CPDF_Name(m_pPool ? m_pPool->Intern(name) : name); + } + + if (first_char == '(') { + CFX_ByteString str = ReadString(); + return new CPDF_String(m_pPool ? m_pPool->Intern(str) : str, false); + } + + if (first_char == '<') { + if (m_WordSize == 1) + return new CPDF_String(ReadHexString(), true); + + CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pPool); + while (1) { + GetNextWord(bIsNumber); + if (m_WordSize == 2 && m_WordBuffer[0] == '>') + break; + + if (!m_WordSize || m_WordBuffer[0] != '/') { + delete pDict; + return nullptr; + } + + CFX_ByteString key = + PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)); + CPDF_Object* pObj = ReadNextObject(true, 0); + if (!pObj) { + delete pDict; + return nullptr; + } + + if (key.IsEmpty()) + delete pObj; + else + pDict->SetFor(key, pObj); + } + return pDict; + } + + if (first_char == '[') { + if ((!bAllowNestedArray && dwInArrayLevel) || + dwInArrayLevel > kMaxNestedArrayLevel) { + return nullptr; + } + + CPDF_Array* pArray = new CPDF_Array; + while (1) { + CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, dwInArrayLevel + 1); + if (pObj) { + pArray->Add(pObj); + continue; + } + + if (!m_WordSize || m_WordBuffer[0] == ']') + break; + } + return pArray; + } + + if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5)) + return new CPDF_Boolean(false); + + if (m_WordSize == 4) { + if (memcmp(m_WordBuffer, "true", 4) == 0) + return new CPDF_Boolean(true); + + if (memcmp(m_WordBuffer, "null", 4) == 0) + return new CPDF_Null; + } + + return nullptr; +} + +// TODO(npm): the following methods are almost identical in cpdf_syntaxparser +void CPDF_StreamParser::GetNextWord(bool& bIsNumber) { + m_WordSize = 0; + bIsNumber = true; + if (!PositionIsInBounds()) + return; + + int ch = m_pBuf[m_Pos++]; + while (1) { + while (PDFCharIsWhitespace(ch)) { + if (!PositionIsInBounds()) { + return; + } + ch = m_pBuf[m_Pos++]; + } + + if (ch != '%') + break; + + while (1) { + if (!PositionIsInBounds()) + return; + ch = m_pBuf[m_Pos++]; + if (PDFCharIsLineEnding(ch)) + break; + } + } + + if (PDFCharIsDelimiter(ch)) { + bIsNumber = false; + m_WordBuffer[m_WordSize++] = ch; + if (ch == '/') { + while (1) { + if (!PositionIsInBounds()) + return; + ch = m_pBuf[m_Pos++]; + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { + m_Pos--; + return; + } + + if (m_WordSize < kMaxWordBuffer) + m_WordBuffer[m_WordSize++] = ch; + } + } else if (ch == '<') { + if (!PositionIsInBounds()) + return; + ch = m_pBuf[m_Pos++]; + if (ch == '<') + m_WordBuffer[m_WordSize++] = ch; + else + m_Pos--; + } else if (ch == '>') { + if (!PositionIsInBounds()) + return; + ch = m_pBuf[m_Pos++]; + if (ch == '>') + m_WordBuffer[m_WordSize++] = ch; + else + m_Pos--; + } + return; + } + + while (1) { + if (m_WordSize < kMaxWordBuffer) + m_WordBuffer[m_WordSize++] = ch; + if (!PDFCharIsNumeric(ch)) + bIsNumber = false; + + if (!PositionIsInBounds()) + return; + ch = m_pBuf[m_Pos++]; + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { + m_Pos--; + break; + } + } +} + +CFX_ByteString CPDF_StreamParser::ReadString() { + if (!PositionIsInBounds()) + return CFX_ByteString(); + + uint8_t ch = m_pBuf[m_Pos++]; + CFX_ByteTextBuf buf; + int parlevel = 0; + int status = 0; + int iEscCode = 0; + while (1) { + switch (status) { + case 0: + if (ch == ')') { + if (parlevel == 0) { + if (buf.GetLength() > kMaxStringLength) { + return CFX_ByteString(buf.GetBuffer(), kMaxStringLength); + } + return buf.MakeString(); + } + parlevel--; + buf.AppendChar(')'); + } else if (ch == '(') { + parlevel++; + buf.AppendChar('('); + } else if (ch == '\\') { + status = 1; + } else { + buf.AppendChar((char)ch); + } + break; + case 1: + if (ch >= '0' && ch <= '7') { + iEscCode = FXSYS_toDecimalDigit(static_cast(ch)); + status = 2; + break; + } + if (ch == 'n') { + buf.AppendChar('\n'); + } else if (ch == 'r') { + buf.AppendChar('\r'); + } else if (ch == 't') { + buf.AppendChar('\t'); + } else if (ch == 'b') { + buf.AppendChar('\b'); + } else if (ch == 'f') { + buf.AppendChar('\f'); + } else if (ch == '\r') { + status = 4; + break; + } else if (ch == '\n') { + } else { + buf.AppendChar(ch); + } + status = 0; + break; + case 2: + if (ch >= '0' && ch <= '7') { + iEscCode = + iEscCode * 8 + FXSYS_toDecimalDigit(static_cast(ch)); + status = 3; + } else { + buf.AppendChar(iEscCode); + status = 0; + continue; + } + break; + case 3: + if (ch >= '0' && ch <= '7') { + iEscCode = + iEscCode * 8 + FXSYS_toDecimalDigit(static_cast(ch)); + buf.AppendChar(iEscCode); + status = 0; + } else { + buf.AppendChar(iEscCode); + status = 0; + continue; + } + break; + case 4: + status = 0; + if (ch != '\n') { + continue; + } + break; + } + if (!PositionIsInBounds()) + break; + + ch = m_pBuf[m_Pos++]; + } + if (PositionIsInBounds()) + ++m_Pos; + + if (buf.GetLength() > kMaxStringLength) { + return CFX_ByteString(buf.GetBuffer(), kMaxStringLength); + } + return buf.MakeString(); +} + +CFX_ByteString CPDF_StreamParser::ReadHexString() { + if (!PositionIsInBounds()) + return CFX_ByteString(); + + CFX_ByteTextBuf buf; + bool bFirst = true; + int code = 0; + while (PositionIsInBounds()) { + int ch = m_pBuf[m_Pos++]; + + if (ch == '>') + break; + + if (!std::isxdigit(ch)) + continue; + + int val = FXSYS_toHexDigit(ch); + if (bFirst) { + code = val * 16; + } else { + code += val; + buf.AppendByte((uint8_t)code); + } + bFirst = !bFirst; + } + if (!bFirst) + buf.AppendChar((char)code); + + if (buf.GetLength() > kMaxStringLength) + return CFX_ByteString(buf.GetBuffer(), kMaxStringLength); + + return buf.MakeString(); +} + +bool CPDF_StreamParser::PositionIsInBounds() const { + return m_Pos < m_Size; +} diff --git a/core/fpdfapi/page/cpdf_streamparser_unittest.cpp b/core/fpdfapi/page/cpdf_streamparser_unittest.cpp new file mode 100644 index 0000000000..f2a5a542f8 --- /dev/null +++ b/core/fpdfapi/page/cpdf_streamparser_unittest.cpp @@ -0,0 +1,47 @@ +// Copyright 2015 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/page/pageint.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(cpdf_streamparser, ReadHexString) { + { + // Position out of bounds. + uint8_t data[] = "12ab>"; + CPDF_StreamParser parser(data, 5); + parser.SetPos(6); + EXPECT_EQ("", parser.ReadHexString()); + } + + { + // Regular conversion. + uint8_t data[] = "1A2b>abcd"; + CPDF_StreamParser parser(data, 5); + EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); + EXPECT_EQ(5u, parser.GetPos()); + } + + { + // Missing ending > + uint8_t data[] = "1A2b"; + CPDF_StreamParser parser(data, 5); + EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); + EXPECT_EQ(5u, parser.GetPos()); + } + + { + // Uneven number of bytes. + uint8_t data[] = "1A2>asdf"; + CPDF_StreamParser parser(data, 5); + EXPECT_EQ("\x1a\x20", parser.ReadHexString()); + EXPECT_EQ(4u, parser.GetPos()); + } + + { + uint8_t data[] = ">"; + CPDF_StreamParser parser(data, 5); + EXPECT_EQ("", parser.ReadHexString()); + EXPECT_EQ(1u, parser.GetPos()); + } +} diff --git a/core/fpdfapi/page/fpdf_page_parser.cpp b/core/fpdfapi/page/fpdf_page_parser.cpp deleted file mode 100644 index b6f0bc605c..0000000000 --- a/core/fpdfapi/page/fpdf_page_parser.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fpdfapi/page/pageint.h" - -#include -#include -#include - -#include "core/fpdfapi/edit/cpdf_creator.h" -#include "core/fpdfapi/font/cpdf_font.h" -#include "core/fpdfapi/font/cpdf_type3font.h" -#include "core/fpdfapi/page/cpdf_allstates.h" -#include "core/fpdfapi/page/cpdf_docpagedata.h" -#include "core/fpdfapi/page/cpdf_form.h" -#include "core/fpdfapi/page/cpdf_formobject.h" -#include "core/fpdfapi/page/cpdf_image.h" -#include "core/fpdfapi/page/cpdf_imageobject.h" -#include "core/fpdfapi/page/cpdf_meshstream.h" -#include "core/fpdfapi/page/cpdf_pageobject.h" -#include "core/fpdfapi/page/cpdf_pathobject.h" -#include "core/fpdfapi/page/cpdf_shadingobject.h" -#include "core/fpdfapi/page/cpdf_shadingpattern.h" -#include "core/fpdfapi/page/cpdf_textobject.h" -#include "core/fpdfapi/parser/cpdf_array.h" -#include "core/fpdfapi/parser/cpdf_dictionary.h" -#include "core/fpdfapi/parser/cpdf_document.h" -#include "core/fpdfapi/parser/cpdf_name.h" -#include "core/fpdfapi/parser/cpdf_number.h" -#include "core/fpdfapi/parser/cpdf_reference.h" -#include "core/fpdfapi/parser/cpdf_stream.h" -#include "core/fpdfapi/parser/cpdf_stream_acc.h" -#include "core/fpdfapi/parser/fpdf_parser_decode.h" -#include "core/fxcrt/fx_safe_types.h" -#include "core/fxge/cfx_graphstatedata.h" -#include "core/fxge/cfx_pathdata.h" -#include "third_party/base/ptr_util.h" - -namespace { - -struct PDF_AbbrPair { - const FX_CHAR* abbr; - const FX_CHAR* full_name; -}; - -const PDF_AbbrPair PDF_InlineKeyAbbr[] = { - {"BPC", "BitsPerComponent"}, {"CS", "ColorSpace"}, {"D", "Decode"}, - {"DP", "DecodeParms"}, {"F", "Filter"}, {"H", "Height"}, - {"IM", "ImageMask"}, {"I", "Interpolate"}, {"W", "Width"}, -}; - -const PDF_AbbrPair PDF_InlineValueAbbr[] = { - {"G", "DeviceGray"}, {"RGB", "DeviceRGB"}, - {"CMYK", "DeviceCMYK"}, {"I", "Indexed"}, - {"AHx", "ASCIIHexDecode"}, {"A85", "ASCII85Decode"}, - {"LZW", "LZWDecode"}, {"Fl", "FlateDecode"}, - {"RL", "RunLengthDecode"}, {"CCF", "CCITTFaxDecode"}, - {"DCT", "DCTDecode"}, -}; - -struct AbbrReplacementOp { - bool is_replace_key; - CFX_ByteString key; - CFX_ByteStringC replacement; -}; - -CFX_ByteStringC PDF_FindFullName(const PDF_AbbrPair* table, - size_t count, - const CFX_ByteStringC& abbr) { - auto it = std::find_if( - table, table + count, - [abbr](const PDF_AbbrPair& pair) { return pair.abbr == abbr; }); - return it != table + count ? CFX_ByteStringC(it->full_name) - : CFX_ByteStringC(); -} - -} // namespace - -CFX_ByteStringC PDF_FindKeyAbbreviationForTesting(const CFX_ByteStringC& abbr) { - return PDF_FindFullName(PDF_InlineKeyAbbr, FX_ArraySize(PDF_InlineKeyAbbr), - abbr); -} - -CFX_ByteStringC PDF_FindValueAbbreviationForTesting( - const CFX_ByteStringC& abbr) { - return PDF_FindFullName(PDF_InlineValueAbbr, - FX_ArraySize(PDF_InlineValueAbbr), abbr); -} - -void PDF_ReplaceAbbr(CPDF_Object* pObj) { - switch (pObj->GetType()) { - case CPDF_Object::DICTIONARY: { - CPDF_Dictionary* pDict = pObj->AsDictionary(); - std::vector replacements; - for (const auto& it : *pDict) { - CFX_ByteString key = it.first; - CPDF_Object* value = it.second; - CFX_ByteStringC fullname = - PDF_FindFullName(PDF_InlineKeyAbbr, FX_ArraySize(PDF_InlineKeyAbbr), - key.AsStringC()); - if (!fullname.IsEmpty()) { - AbbrReplacementOp op; - op.is_replace_key = true; - op.key = key; - op.replacement = fullname; - replacements.push_back(op); - key = fullname; - } - - if (value->IsName()) { - CFX_ByteString name = value->GetString(); - fullname = PDF_FindFullName(PDF_InlineValueAbbr, - FX_ArraySize(PDF_InlineValueAbbr), - name.AsStringC()); - if (!fullname.IsEmpty()) { - AbbrReplacementOp op; - op.is_replace_key = false; - op.key = key; - op.replacement = fullname; - replacements.push_back(op); - } - } else { - PDF_ReplaceAbbr(value); - } - } - for (const auto& op : replacements) { - if (op.is_replace_key) - pDict->ReplaceKey(op.key, CFX_ByteString(op.replacement)); - else - pDict->SetNameFor(op.key, CFX_ByteString(op.replacement)); - } - break; - } - case CPDF_Object::ARRAY: { - CPDF_Array* pArray = pObj->AsArray(); - for (size_t i = 0; i < pArray->GetCount(); i++) { - CPDF_Object* pElement = pArray->GetObjectAt(i); - if (pElement->IsName()) { - CFX_ByteString name = pElement->GetString(); - CFX_ByteStringC fullname = PDF_FindFullName( - PDF_InlineValueAbbr, FX_ArraySize(PDF_InlineValueAbbr), - name.AsStringC()); - if (!fullname.IsEmpty()) { - pArray->SetAt(i, new CPDF_Name(CFX_ByteString(fullname))); - } - } else { - PDF_ReplaceAbbr(pElement); - } - } - break; - } - default: - break; - } -} diff --git a/core/fpdfapi/page/fpdf_page_parser_old.cpp b/core/fpdfapi/page/fpdf_page_parser_old.cpp deleted file mode 100644 index 51ffc11b03..0000000000 --- a/core/fpdfapi/page/fpdf_page_parser_old.cpp +++ /dev/null @@ -1,842 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fpdfapi/page/pageint.h" - -#include - -#include "core/fpdfapi/cpdf_modulemgr.h" -#include "core/fpdfapi/font/cpdf_type3char.h" -#include "core/fpdfapi/page/cpdf_allstates.h" -#include "core/fpdfapi/page/cpdf_docpagedata.h" -#include "core/fpdfapi/page/cpdf_form.h" -#include "core/fpdfapi/page/cpdf_page.h" -#include "core/fpdfapi/page/cpdf_pageobject.h" -#include "core/fpdfapi/page/cpdf_path.h" -#include "core/fpdfapi/parser/cpdf_array.h" -#include "core/fpdfapi/parser/cpdf_boolean.h" -#include "core/fpdfapi/parser/cpdf_dictionary.h" -#include "core/fpdfapi/parser/cpdf_document.h" -#include "core/fpdfapi/parser/cpdf_name.h" -#include "core/fpdfapi/parser/cpdf_null.h" -#include "core/fpdfapi/parser/cpdf_number.h" -#include "core/fpdfapi/parser/cpdf_stream.h" -#include "core/fpdfapi/parser/cpdf_stream_acc.h" -#include "core/fpdfapi/parser/cpdf_string.h" -#include "core/fpdfapi/parser/fpdf_parser_decode.h" -#include "core/fpdfapi/parser/fpdf_parser_utility.h" -#include "core/fxcodec/fx_codec.h" -#include "core/fxcrt/fx_ext.h" -#include "core/fxcrt/fx_safe_types.h" -#include "core/fxge/cfx_fxgedevice.h" -#include "core/fxge/cfx_renderdevice.h" - -CCodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder( - const uint8_t* src_buf, - uint32_t src_size, - int width, - int height, - const CPDF_Dictionary* pParams); - -namespace { - -const uint32_t kMaxNestedArrayLevel = 512; -const uint32_t kMaxWordBuffer = 256; -const FX_STRSIZE kMaxStringLength = 32767; - -uint32_t DecodeAllScanlines(CCodec_ScanlineDecoder* pDecoder, - uint8_t*& dest_buf, - uint32_t& dest_size) { - if (!pDecoder) { - return FX_INVALID_OFFSET; - } - int ncomps = pDecoder->CountComps(); - int bpc = pDecoder->GetBPC(); - int width = pDecoder->GetWidth(); - int height = pDecoder->GetHeight(); - int pitch = (width * ncomps * bpc + 7) / 8; - if (height == 0 || pitch > (1 << 30) / height) { - delete pDecoder; - return FX_INVALID_OFFSET; - } - dest_buf = FX_Alloc2D(uint8_t, pitch, height); - dest_size = pitch * height; // Safe since checked alloc returned. - for (int row = 0; row < height; row++) { - const uint8_t* pLine = pDecoder->GetScanline(row); - if (!pLine) - break; - - FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch); - } - uint32_t srcoff = pDecoder->GetSrcOffset(); - delete pDecoder; - return srcoff; -} - -uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf, - uint32_t limit, - int width, - int height, - CFX_ByteString& decoder, - CPDF_Dictionary* pParam, - uint8_t*& dest_buf, - uint32_t& dest_size) { - if (decoder == "CCITTFaxDecode" || decoder == "CCF") { - CCodec_ScanlineDecoder* pDecoder = - FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam); - return DecodeAllScanlines(pDecoder, dest_buf, dest_size); - } - if (decoder == "ASCII85Decode" || decoder == "A85") { - return A85Decode(src_buf, limit, dest_buf, dest_size); - } - if (decoder == "ASCIIHexDecode" || decoder == "AHx") { - return HexDecode(src_buf, limit, dest_buf, dest_size); - } - if (decoder == "FlateDecode" || decoder == "Fl") { - return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, dest_size, - dest_buf, dest_size); - } - if (decoder == "LZWDecode" || decoder == "LZW") { - return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf, - dest_size); - } - if (decoder == "DCTDecode" || decoder == "DCT") { - CCodec_ScanlineDecoder* pDecoder = - CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder( - src_buf, limit, width, height, 0, - !pParam || pParam->GetIntegerFor("ColorTransform", 1)); - return DecodeAllScanlines(pDecoder, dest_buf, dest_size); - } - if (decoder == "RunLengthDecode" || decoder == "RL") { - return RunLengthDecode(src_buf, limit, dest_buf, dest_size); - } - dest_size = 0; - dest_buf = 0; - return (uint32_t)-1; -} - -} // namespace - -CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize) - : m_pBuf(pData), - m_Size(dwSize), - m_Pos(0), - m_pLastObj(nullptr), - m_pPool(nullptr) {} - -CPDF_StreamParser::CPDF_StreamParser( - const uint8_t* pData, - uint32_t dwSize, - const CFX_WeakPtr& pPool) - : m_pBuf(pData), - m_Size(dwSize), - m_Pos(0), - m_pLastObj(nullptr), - m_pPool(pPool) {} - -CPDF_StreamParser::~CPDF_StreamParser() { - delete m_pLastObj; -} - -CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, - CPDF_Dictionary* pDict, - CPDF_Object* pCSObj) { - if (m_Pos == m_Size) - return nullptr; - - if (PDFCharIsWhitespace(m_pBuf[m_Pos])) - m_Pos++; - - CFX_ByteString Decoder; - CPDF_Dictionary* pParam = nullptr; - CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter"); - if (pFilter) { - if (CPDF_Array* pArray = pFilter->AsArray()) { - Decoder = pArray->GetStringAt(0); - CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms"); - if (pParams) - pParam = pParams->GetDictAt(0); - } else { - Decoder = pFilter->GetString(); - pParam = pDict->GetDictFor("DecodeParms"); - } - } - uint32_t width = pDict->GetIntegerFor("Width"); - uint32_t height = pDict->GetIntegerFor("Height"); - uint32_t OrigSize = 0; - if (pCSObj) { - uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent"); - uint32_t nComponents = 1; - CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj); - if (pCS) { - nComponents = pCS->CountComponents(); - pDoc->GetPageData()->ReleaseColorSpace(pCSObj); - } else { - nComponents = 3; - } - uint32_t pitch = width; - if (bpc && pitch > INT_MAX / bpc) - return nullptr; - - pitch *= bpc; - if (nComponents && pitch > INT_MAX / nComponents) - return nullptr; - - pitch *= nComponents; - if (pitch > INT_MAX - 7) - return nullptr; - - pitch += 7; - pitch /= 8; - OrigSize = pitch; - } else { - if (width > INT_MAX - 7) - return nullptr; - - OrigSize = ((width + 7) / 8); - } - if (height && OrigSize > INT_MAX / height) - return nullptr; - - OrigSize *= height; - uint8_t* pData = nullptr; - uint32_t dwStreamSize; - if (Decoder.IsEmpty()) { - if (OrigSize > m_Size - m_Pos) { - OrigSize = m_Size - m_Pos; - } - pData = FX_Alloc(uint8_t, OrigSize); - FXSYS_memcpy(pData, m_pBuf + m_Pos, OrigSize); - dwStreamSize = OrigSize; - m_Pos += OrigSize; - } else { - uint32_t dwDestSize = OrigSize; - dwStreamSize = - PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, - Decoder, pParam, pData, dwDestSize); - FX_Free(pData); - if ((int)dwStreamSize < 0) - return nullptr; - - uint32_t dwSavePos = m_Pos; - m_Pos += dwStreamSize; - while (1) { - uint32_t dwPrevPos = m_Pos; - CPDF_StreamParser::SyntaxType type = ParseNextElement(); - if (type == CPDF_StreamParser::EndOfData) - break; - - if (type != CPDF_StreamParser::Keyword) { - dwStreamSize += m_Pos - dwPrevPos; - continue; - } - if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' && - GetWordBuf()[1] == 'I') { - m_Pos = dwPrevPos; - break; - } - dwStreamSize += m_Pos - dwPrevPos; - } - m_Pos = dwSavePos; - pData = FX_Alloc(uint8_t, dwStreamSize); - FXSYS_memcpy(pData, m_pBuf + m_Pos, dwStreamSize); - m_Pos += dwStreamSize; - } - pDict->SetIntegerFor("Length", (int)dwStreamSize); - return new CPDF_Stream(pData, dwStreamSize, pDict); -} - -CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { - delete m_pLastObj; - m_pLastObj = nullptr; - - m_WordSize = 0; - bool bIsNumber = true; - if (!PositionIsInBounds()) - return EndOfData; - - int ch = m_pBuf[m_Pos++]; - while (1) { - while (PDFCharIsWhitespace(ch)) { - if (!PositionIsInBounds()) - return EndOfData; - - ch = m_pBuf[m_Pos++]; - } - - if (ch != '%') - break; - - while (1) { - if (!PositionIsInBounds()) - return EndOfData; - - ch = m_pBuf[m_Pos++]; - if (PDFCharIsLineEnding(ch)) - break; - } - } - - if (PDFCharIsDelimiter(ch) && ch != '/') { - m_Pos--; - m_pLastObj = ReadNextObject(false, 0); - return Others; - } - - while (1) { - if (m_WordSize < kMaxWordBuffer) - m_WordBuffer[m_WordSize++] = ch; - - if (!PDFCharIsNumeric(ch)) - bIsNumber = false; - - if (!PositionIsInBounds()) - break; - - ch = m_pBuf[m_Pos++]; - - if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { - m_Pos--; - break; - } - } - - m_WordBuffer[m_WordSize] = 0; - if (bIsNumber) - return Number; - - if (m_WordBuffer[0] == '/') - return Name; - - if (m_WordSize == 4) { - if (memcmp(m_WordBuffer, "true", 4) == 0) { - m_pLastObj = new CPDF_Boolean(true); - return Others; - } - if (memcmp(m_WordBuffer, "null", 4) == 0) { - m_pLastObj = new CPDF_Null; - return Others; - } - } else if (m_WordSize == 5) { - if (memcmp(m_WordBuffer, "false", 5) == 0) { - m_pLastObj = new CPDF_Boolean(false); - return Others; - } - } - return Keyword; -} - -CPDF_Object* CPDF_StreamParser::GetObject() { - CPDF_Object* pObj = m_pLastObj; - m_pLastObj = nullptr; - return pObj; -} - -CPDF_Object* CPDF_StreamParser::ReadNextObject(bool bAllowNestedArray, - uint32_t dwInArrayLevel) { - bool bIsNumber; - GetNextWord(bIsNumber); - if (!m_WordSize) - return nullptr; - - if (bIsNumber) { - m_WordBuffer[m_WordSize] = 0; - return new CPDF_Number(CFX_ByteStringC(m_WordBuffer, m_WordSize)); - } - - int first_char = m_WordBuffer[0]; - if (first_char == '/') { - CFX_ByteString name = - PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)); - return new CPDF_Name(m_pPool ? m_pPool->Intern(name) : name); - } - - if (first_char == '(') { - CFX_ByteString str = ReadString(); - return new CPDF_String(m_pPool ? m_pPool->Intern(str) : str, false); - } - - if (first_char == '<') { - if (m_WordSize == 1) - return new CPDF_String(ReadHexString(), true); - - CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pPool); - while (1) { - GetNextWord(bIsNumber); - if (m_WordSize == 2 && m_WordBuffer[0] == '>') - break; - - if (!m_WordSize || m_WordBuffer[0] != '/') { - delete pDict; - return nullptr; - } - - CFX_ByteString key = - PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)); - CPDF_Object* pObj = ReadNextObject(true, 0); - if (!pObj) { - delete pDict; - return nullptr; - } - - if (key.IsEmpty()) - delete pObj; - else - pDict->SetFor(key, pObj); - } - return pDict; - } - - if (first_char == '[') { - if ((!bAllowNestedArray && dwInArrayLevel) || - dwInArrayLevel > kMaxNestedArrayLevel) { - return nullptr; - } - - CPDF_Array* pArray = new CPDF_Array; - while (1) { - CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, dwInArrayLevel + 1); - if (pObj) { - pArray->Add(pObj); - continue; - } - - if (!m_WordSize || m_WordBuffer[0] == ']') - break; - } - return pArray; - } - - if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5)) - return new CPDF_Boolean(false); - - if (m_WordSize == 4) { - if (memcmp(m_WordBuffer, "true", 4) == 0) - return new CPDF_Boolean(true); - - if (memcmp(m_WordBuffer, "null", 4) == 0) - return new CPDF_Null; - } - - return nullptr; -} - -void CPDF_StreamParser::GetNextWord(bool& bIsNumber) { - m_WordSize = 0; - bIsNumber = true; - if (!PositionIsInBounds()) - return; - - int ch = m_pBuf[m_Pos++]; - while (1) { - while (PDFCharIsWhitespace(ch)) { - if (!PositionIsInBounds()) { - return; - } - ch = m_pBuf[m_Pos++]; - } - - if (ch != '%') - break; - - while (1) { - if (!PositionIsInBounds()) - return; - ch = m_pBuf[m_Pos++]; - if (PDFCharIsLineEnding(ch)) - break; - } - } - - if (PDFCharIsDelimiter(ch)) { - bIsNumber = false; - m_WordBuffer[m_WordSize++] = ch; - if (ch == '/') { - while (1) { - if (!PositionIsInBounds()) - return; - ch = m_pBuf[m_Pos++]; - if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { - m_Pos--; - return; - } - - if (m_WordSize < kMaxWordBuffer) - m_WordBuffer[m_WordSize++] = ch; - } - } else if (ch == '<') { - if (!PositionIsInBounds()) - return; - ch = m_pBuf[m_Pos++]; - if (ch == '<') - m_WordBuffer[m_WordSize++] = ch; - else - m_Pos--; - } else if (ch == '>') { - if (!PositionIsInBounds()) - return; - ch = m_pBuf[m_Pos++]; - if (ch == '>') - m_WordBuffer[m_WordSize++] = ch; - else - m_Pos--; - } - return; - } - - while (1) { - if (m_WordSize < kMaxWordBuffer) - m_WordBuffer[m_WordSize++] = ch; - if (!PDFCharIsNumeric(ch)) - bIsNumber = false; - - if (!PositionIsInBounds()) - return; - ch = m_pBuf[m_Pos++]; - if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { - m_Pos--; - break; - } - } -} - -CFX_ByteString CPDF_StreamParser::ReadString() { - if (!PositionIsInBounds()) - return CFX_ByteString(); - - uint8_t ch = m_pBuf[m_Pos++]; - CFX_ByteTextBuf buf; - int parlevel = 0; - int status = 0; - int iEscCode = 0; - while (1) { - switch (status) { - case 0: - if (ch == ')') { - if (parlevel == 0) { - if (buf.GetLength() > kMaxStringLength) { - return CFX_ByteString(buf.GetBuffer(), kMaxStringLength); - } - return buf.MakeString(); - } - parlevel--; - buf.AppendChar(')'); - } else if (ch == '(') { - parlevel++; - buf.AppendChar('('); - } else if (ch == '\\') { - status = 1; - } else { - buf.AppendChar((char)ch); - } - break; - case 1: - if (ch >= '0' && ch <= '7') { - iEscCode = FXSYS_toDecimalDigit(static_cast(ch)); - status = 2; - break; - } - if (ch == 'n') { - buf.AppendChar('\n'); - } else if (ch == 'r') { - buf.AppendChar('\r'); - } else if (ch == 't') { - buf.AppendChar('\t'); - } else if (ch == 'b') { - buf.AppendChar('\b'); - } else if (ch == 'f') { - buf.AppendChar('\f'); - } else if (ch == '\r') { - status = 4; - break; - } else if (ch == '\n') { - } else { - buf.AppendChar(ch); - } - status = 0; - break; - case 2: - if (ch >= '0' && ch <= '7') { - iEscCode = - iEscCode * 8 + FXSYS_toDecimalDigit(static_cast(ch)); - status = 3; - } else { - buf.AppendChar(iEscCode); - status = 0; - continue; - } - break; - case 3: - if (ch >= '0' && ch <= '7') { - iEscCode = - iEscCode * 8 + FXSYS_toDecimalDigit(static_cast(ch)); - buf.AppendChar(iEscCode); - status = 0; - } else { - buf.AppendChar(iEscCode); - status = 0; - continue; - } - break; - case 4: - status = 0; - if (ch != '\n') { - continue; - } - break; - } - if (!PositionIsInBounds()) - break; - - ch = m_pBuf[m_Pos++]; - } - if (PositionIsInBounds()) - ++m_Pos; - - if (buf.GetLength() > kMaxStringLength) { - return CFX_ByteString(buf.GetBuffer(), kMaxStringLength); - } - return buf.MakeString(); -} - -CFX_ByteString CPDF_StreamParser::ReadHexString() { - if (!PositionIsInBounds()) - return CFX_ByteString(); - - CFX_ByteTextBuf buf; - bool bFirst = true; - int code = 0; - while (PositionIsInBounds()) { - int ch = m_pBuf[m_Pos++]; - - if (ch == '>') - break; - - if (!std::isxdigit(ch)) - continue; - - int val = FXSYS_toHexDigit(ch); - if (bFirst) { - code = val * 16; - } else { - code += val; - buf.AppendByte((uint8_t)code); - } - bFirst = !bFirst; - } - if (!bFirst) - buf.AppendChar((char)code); - - if (buf.GetLength() > kMaxStringLength) - return CFX_ByteString(buf.GetBuffer(), kMaxStringLength); - - return buf.MakeString(); -} - -bool CPDF_StreamParser::PositionIsInBounds() const { - return m_Pos < m_Size; -} - -CPDF_ContentParser::CPDF_ContentParser() - : m_Status(Ready), - m_InternalStage(STAGE_GETCONTENT), - m_pObjectHolder(nullptr), - m_bForm(false), - m_pType3Char(nullptr), - m_pData(nullptr), - m_Size(0), - m_CurrentOffset(0) {} - -CPDF_ContentParser::~CPDF_ContentParser() { - if (!m_pSingleStream) - FX_Free(m_pData); -} - -void CPDF_ContentParser::Start(CPDF_Page* pPage) { - if (m_Status != Ready || !pPage || !pPage->m_pDocument || - !pPage->m_pFormDict) { - m_Status = Done; - return; - } - m_pObjectHolder = pPage; - m_bForm = false; - m_Status = ToBeContinued; - m_InternalStage = STAGE_GETCONTENT; - m_CurrentOffset = 0; - - CPDF_Object* pContent = pPage->m_pFormDict->GetDirectObjectFor("Contents"); - if (!pContent) { - m_Status = Done; - return; - } - if (CPDF_Stream* pStream = pContent->AsStream()) { - m_nStreams = 0; - m_pSingleStream.reset(new CPDF_StreamAcc); - m_pSingleStream->LoadAllData(pStream, false); - } else if (CPDF_Array* pArray = pContent->AsArray()) { - m_nStreams = pArray->GetCount(); - if (m_nStreams) - m_StreamArray.resize(m_nStreams); - else - m_Status = Done; - } else { - m_Status = Done; - } -} - -void CPDF_ContentParser::Start(CPDF_Form* pForm, - CPDF_AllStates* pGraphicStates, - const CFX_Matrix* pParentMatrix, - CPDF_Type3Char* pType3Char, - int level) { - m_pType3Char = pType3Char; - m_pObjectHolder = pForm; - m_bForm = true; - CFX_Matrix form_matrix = pForm->m_pFormDict->GetMatrixFor("Matrix"); - if (pGraphicStates) { - form_matrix.Concat(pGraphicStates->m_CTM); - } - CPDF_Array* pBBox = pForm->m_pFormDict->GetArrayFor("BBox"); - CFX_FloatRect form_bbox; - CPDF_Path ClipPath; - if (pBBox) { - form_bbox = pBBox->GetRect(); - ClipPath.Emplace(); - ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, - form_bbox.top); - ClipPath.Transform(&form_matrix); - if (pParentMatrix) { - ClipPath.Transform(pParentMatrix); - } - form_bbox.Transform(&form_matrix); - if (pParentMatrix) { - form_bbox.Transform(pParentMatrix); - } - } - CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDictFor("Resources"); - m_pParser.reset(new CPDF_StreamContentParser( - pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, - pParentMatrix, pForm, pResources, &form_bbox, pGraphicStates, level)); - m_pParser->GetCurStates()->m_CTM = form_matrix; - m_pParser->GetCurStates()->m_ParentMatrix = form_matrix; - if (ClipPath) { - m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, - true); - } - if (pForm->m_Transparency & PDFTRANS_GROUP) { - CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState; - pState->SetBlendType(FXDIB_BLEND_NORMAL); - pState->SetStrokeAlpha(1.0f); - pState->SetFillAlpha(1.0f); - pState->SetSoftMask(nullptr); - } - m_nStreams = 0; - m_pSingleStream.reset(new CPDF_StreamAcc); - m_pSingleStream->LoadAllData(pForm->m_pFormStream, false); - m_pData = (uint8_t*)m_pSingleStream->GetData(); - m_Size = m_pSingleStream->GetSize(); - m_Status = ToBeContinued; - m_InternalStage = STAGE_PARSE; - m_CurrentOffset = 0; -} - -void CPDF_ContentParser::Continue(IFX_Pause* pPause) { - int steps = 0; - while (m_Status == ToBeContinued) { - if (m_InternalStage == STAGE_GETCONTENT) { - if (m_CurrentOffset == m_nStreams) { - if (!m_StreamArray.empty()) { - FX_SAFE_UINT32 safeSize = 0; - for (const auto& stream : m_StreamArray) { - safeSize += stream->GetSize(); - safeSize += 1; - } - if (!safeSize.IsValid()) { - m_Status = Done; - return; - } - m_Size = safeSize.ValueOrDie(); - m_pData = FX_Alloc(uint8_t, m_Size); - uint32_t pos = 0; - for (const auto& stream : m_StreamArray) { - FXSYS_memcpy(m_pData + pos, stream->GetData(), stream->GetSize()); - pos += stream->GetSize(); - m_pData[pos++] = ' '; - } - m_StreamArray.clear(); - } else { - m_pData = (uint8_t*)m_pSingleStream->GetData(); - m_Size = m_pSingleStream->GetSize(); - } - m_InternalStage = STAGE_PARSE; - m_CurrentOffset = 0; - } else { - CPDF_Array* pContent = - m_pObjectHolder->m_pFormDict->GetArrayFor("Contents"); - m_StreamArray[m_CurrentOffset].reset(new CPDF_StreamAcc); - CPDF_Stream* pStreamObj = ToStream( - pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr); - m_StreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, false); - m_CurrentOffset++; - } - } - if (m_InternalStage == STAGE_PARSE) { - if (!m_pParser) { - m_pParser.reset(new CPDF_StreamContentParser( - m_pObjectHolder->m_pDocument, m_pObjectHolder->m_pPageResources, - nullptr, nullptr, m_pObjectHolder, m_pObjectHolder->m_pResources, - &m_pObjectHolder->m_BBox, nullptr, 0)); - m_pParser->GetCurStates()->m_ColorState.SetDefault(); - } - if (m_CurrentOffset >= m_Size) { - m_InternalStage = STAGE_CHECKCLIP; - } else { - m_CurrentOffset += - m_pParser->Parse(m_pData + m_CurrentOffset, - m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); - } - } - if (m_InternalStage == STAGE_CHECKCLIP) { - if (m_pType3Char) { - m_pType3Char->m_bColored = m_pParser->IsColored(); - m_pType3Char->m_Width = - FXSYS_round(m_pParser->GetType3Data()[0] * 1000); - m_pType3Char->m_BBox.left = - FXSYS_round(m_pParser->GetType3Data()[2] * 1000); - m_pType3Char->m_BBox.bottom = - FXSYS_round(m_pParser->GetType3Data()[3] * 1000); - m_pType3Char->m_BBox.right = - FXSYS_round(m_pParser->GetType3Data()[4] * 1000); - m_pType3Char->m_BBox.top = - FXSYS_round(m_pParser->GetType3Data()[5] * 1000); - } - for (auto& pObj : *m_pObjectHolder->GetPageObjectList()) { - if (!pObj->m_ClipPath) - continue; - if (pObj->m_ClipPath.GetPathCount() != 1) - continue; - if (pObj->m_ClipPath.GetTextCount()) - continue; - CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0); - if (!ClipPath.IsRect() || pObj->IsShading()) - continue; - CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0), - ClipPath.GetPointX(2), ClipPath.GetPointY(2)); - CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, - pObj->m_Top); - if (old_rect.Contains(obj_rect)) { - pObj->m_ClipPath.SetNull(); - } - } - m_Status = Done; - return; - } - steps++; - if (pPause && pPause->NeedToPauseNow()) { - break; - } - } -} diff --git a/core/fpdfapi/page/fpdf_page_parser_old_unittest.cpp b/core/fpdfapi/page/fpdf_page_parser_old_unittest.cpp deleted file mode 100644 index 52ebf1ef09..0000000000 --- a/core/fpdfapi/page/fpdf_page_parser_old_unittest.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2015 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "core/fpdfapi/page/pageint.h" -#include "testing/gtest/include/gtest/gtest.h" - -TEST(fpdf_page_parser_old, ReadHexString) { - { - // Position out of bounds. - uint8_t data[] = "12ab>"; - CPDF_StreamParser parser(data, 5); - parser.SetPos(6); - EXPECT_EQ("", parser.ReadHexString()); - } - - { - // Regular conversion. - uint8_t data[] = "1A2b>abcd"; - CPDF_StreamParser parser(data, 5); - EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); - EXPECT_EQ(5u, parser.GetPos()); - } - - { - // Missing ending > - uint8_t data[] = "1A2b"; - CPDF_StreamParser parser(data, 5); - EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); - EXPECT_EQ(5u, parser.GetPos()); - } - - { - // Uneven number of bytes. - uint8_t data[] = "1A2>asdf"; - CPDF_StreamParser parser(data, 5); - EXPECT_EQ("\x1a\x20", parser.ReadHexString()); - EXPECT_EQ(4u, parser.GetPos()); - } - - { - uint8_t data[] = ">"; - CPDF_StreamParser parser(data, 5); - EXPECT_EQ("", parser.ReadHexString()); - EXPECT_EQ(1u, parser.GetPos()); - } -} diff --git a/core/fpdfapi/page/fpdf_page_parser_unittest.cpp b/core/fpdfapi/page/fpdf_page_parser_unittest.cpp deleted file mode 100644 index b6eec7bcf7..0000000000 --- a/core/fpdfapi/page/fpdf_page_parser_unittest.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2016 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "core/fpdfapi/page/pageint.h" -#include "testing/gtest/include/gtest/gtest.h" - -TEST(fpdf_page_parser, PDF_FindKeyAbbreviation) { - EXPECT_EQ(CFX_ByteStringC("BitsPerComponent"), - PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("BPC"))); - EXPECT_EQ(CFX_ByteStringC("Width"), - PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("W"))); - EXPECT_EQ(CFX_ByteStringC(""), - PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC(""))); - EXPECT_EQ(CFX_ByteStringC(""), - PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("NoInList"))); - // Prefix should not match. - EXPECT_EQ(CFX_ByteStringC(""), - PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("WW"))); -} - -TEST(fpdf_page_parser, PDF_FindValueAbbreviation) { - EXPECT_EQ(CFX_ByteStringC("DeviceGray"), - PDF_FindValueAbbreviationForTesting(CFX_ByteStringC("G"))); - EXPECT_EQ(CFX_ByteStringC("DCTDecode"), - PDF_FindValueAbbreviationForTesting(CFX_ByteStringC("DCT"))); - EXPECT_EQ(CFX_ByteStringC(""), - PDF_FindValueAbbreviationForTesting(CFX_ByteStringC(""))); - EXPECT_EQ(CFX_ByteStringC(""), - PDF_FindValueAbbreviationForTesting(CFX_ByteStringC("NoInList"))); - // Prefix should not match. - EXPECT_EQ(CFX_ByteStringC(""), - PDF_FindValueAbbreviationForTesting(CFX_ByteStringC("II"))); -} diff --git a/core/fpdfapi/page/pageint.h b/core/fpdfapi/page/pageint.h index a96216bbb6..dee9639d7d 100644 --- a/core/fpdfapi/page/pageint.h +++ b/core/fpdfapi/page/pageint.h @@ -64,7 +64,7 @@ class CPDF_StreamParser { CPDF_Object* ReadNextObject(bool bAllowNestedArray, uint32_t dwInArrayLevel); private: - friend class fpdf_page_parser_old_ReadHexString_Test; + friend class cpdf_streamparser_ReadHexString_Test; void GetNextWord(bool& bIsNumber); CFX_ByteString ReadString(); @@ -303,6 +303,4 @@ CFX_ByteStringC PDF_FindKeyAbbreviationForTesting(const CFX_ByteStringC& abbr); CFX_ByteStringC PDF_FindValueAbbreviationForTesting( const CFX_ByteStringC& abbr); -void PDF_ReplaceAbbr(CPDF_Object* pObj); - #endif // CORE_FPDFAPI_PAGE_PAGEINT_H_ -- cgit v1.2.3