diff options
author | npm <npm@chromium.org> | 2016-11-07 08:42:11 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-11-07 08:42:11 -0800 |
commit | 014b012278b7438ef8d4b66730b8598c7eb4623a (patch) | |
tree | 75ea0ea37d0b239412133290d7b24cc975bfcc66 | |
parent | 240dec52b2e6502e7deb27a3535af3b1a3e23428 (diff) | |
download | pdfium-014b012278b7438ef8d4b66730b8598c7eb4623a.tar.xz |
Clean up fpdf_page_parsers
- The code in fpdf_page_parser is only called by CPDF_StreamContentParser, so moved there.
- Split fpdf_page_parser_old into its two classes
- Renamed the corresponding unittests accordingly.
- Moved PDF_ReplaceAbbr to namespace
- Fixed few nits
- Added TODO because CPDF_StreamParser has a lot of code similar to CPDF_SyntaxParser
Review-Url: https://codereview.chromium.org/2474303003
-rw-r--r-- | BUILD.gn | 8 | ||||
-rw-r--r-- | core/fpdfapi/page/cpdf_contentparser.cpp | 215 | ||||
-rw-r--r-- | core/fpdfapi/page/cpdf_streamcontentparser.cpp | 117 | ||||
-rw-r--r-- | core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp (renamed from core/fpdfapi/page/fpdf_page_parser_unittest.cpp) | 4 | ||||
-rw-r--r-- | core/fpdfapi/page/cpdf_streamparser.cpp (renamed from core/fpdfapi/page/fpdf_page_parser_old.cpp) | 231 | ||||
-rw-r--r-- | core/fpdfapi/page/cpdf_streamparser_unittest.cpp (renamed from core/fpdfapi/page/fpdf_page_parser_old_unittest.cpp) | 2 | ||||
-rw-r--r-- | core/fpdfapi/page/fpdf_page_parser.cpp | 158 | ||||
-rw-r--r-- | core/fpdfapi/page/pageint.h | 4 |
8 files changed, 347 insertions, 392 deletions
@@ -432,6 +432,7 @@ static_library("fpdfapi") { "core/fpdfapi/page/cpdf_contentmark.h", "core/fpdfapi/page/cpdf_contentmarkitem.cpp", "core/fpdfapi/page/cpdf_contentmarkitem.h", + "core/fpdfapi/page/cpdf_contentparser.cpp", "core/fpdfapi/page/cpdf_countedobject.h", "core/fpdfapi/page/cpdf_docpagedata.cpp", "core/fpdfapi/page/cpdf_docpagedata.h", @@ -471,6 +472,7 @@ static_library("fpdfapi") { "core/fpdfapi/page/cpdf_shadingpattern.h", "core/fpdfapi/page/cpdf_streamcontentparser.cpp", "core/fpdfapi/page/cpdf_streamcontentparser.h", + "core/fpdfapi/page/cpdf_streamparser.cpp", "core/fpdfapi/page/cpdf_textobject.cpp", "core/fpdfapi/page/cpdf_textobject.h", "core/fpdfapi/page/cpdf_textstate.cpp", @@ -479,8 +481,6 @@ static_library("fpdfapi") { "core/fpdfapi/page/cpdf_tilingpattern.h", "core/fpdfapi/page/fpdf_page_colors.cpp", "core/fpdfapi/page/fpdf_page_func.cpp", - "core/fpdfapi/page/fpdf_page_parser.cpp", - "core/fpdfapi/page/fpdf_page_parser_old.cpp", "core/fpdfapi/page/pageint.h", "core/fpdfapi/parser/cfdf_document.cpp", "core/fpdfapi/parser/cfdf_document.h", @@ -1636,8 +1636,8 @@ test("pdfium_unittests") { sources = [ "core/fpdfapi/font/fpdf_font_cid_unittest.cpp", "core/fpdfapi/font/fpdf_font_unittest.cpp", - "core/fpdfapi/page/fpdf_page_parser_old_unittest.cpp", - "core/fpdfapi/page/fpdf_page_parser_unittest.cpp", + "core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp", + "core/fpdfapi/page/cpdf_streamparser_unittest.cpp", "core/fpdfapi/parser/cpdf_array_unittest.cpp", "core/fpdfapi/parser/cpdf_document_unittest.cpp", "core/fpdfapi/parser/cpdf_object_unittest.cpp", diff --git a/core/fpdfapi/page/cpdf_contentparser.cpp b/core/fpdfapi/page/cpdf_contentparser.cpp new file mode 100644 index 0000000000..f581047835 --- /dev/null +++ b/core/fpdfapi/page/cpdf_contentparser.cpp @@ -0,0 +1,215 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/page/pageint.h" + +#include "core/fpdfapi/font/cpdf_type3char.h" +#include "core/fpdfapi/page/cpdf_allstates.h" +#include "core/fpdfapi/page/cpdf_form.h" +#include "core/fpdfapi/page/cpdf_page.h" +#include "core/fpdfapi/page/cpdf_pageobject.h" +#include "core/fpdfapi/page/cpdf_path.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_stream.h" +#include "core/fpdfapi/parser/cpdf_stream_acc.h" +#include "core/fxcrt/fx_safe_types.h" + +CPDF_ContentParser::CPDF_ContentParser() + : m_Status(Ready), + m_InternalStage(STAGE_GETCONTENT), + m_pObjectHolder(nullptr), + m_bForm(false), + m_pType3Char(nullptr), + m_pData(nullptr), + m_Size(0), + m_CurrentOffset(0) {} + +CPDF_ContentParser::~CPDF_ContentParser() { + if (!m_pSingleStream) + FX_Free(m_pData); +} + +void CPDF_ContentParser::Start(CPDF_Page* pPage) { + if (m_Status != Ready || !pPage || !pPage->m_pDocument || + !pPage->m_pFormDict) { + m_Status = Done; + return; + } + m_pObjectHolder = pPage; + m_bForm = false; + m_Status = ToBeContinued; + m_InternalStage = STAGE_GETCONTENT; + m_CurrentOffset = 0; + + CPDF_Object* pContent = pPage->m_pFormDict->GetDirectObjectFor("Contents"); + if (!pContent) { + m_Status = Done; + return; + } + if (CPDF_Stream* pStream = pContent->AsStream()) { + m_nStreams = 0; + m_pSingleStream.reset(new CPDF_StreamAcc); + m_pSingleStream->LoadAllData(pStream, false); + } else if (CPDF_Array* pArray = pContent->AsArray()) { + m_nStreams = pArray->GetCount(); + if (m_nStreams) + m_StreamArray.resize(m_nStreams); + else + m_Status = Done; + } else { + m_Status = Done; + } +} + +void CPDF_ContentParser::Start(CPDF_Form* pForm, + CPDF_AllStates* pGraphicStates, + const CFX_Matrix* pParentMatrix, + CPDF_Type3Char* pType3Char, + int level) { + m_pType3Char = pType3Char; + m_pObjectHolder = pForm; + m_bForm = true; + CFX_Matrix form_matrix = pForm->m_pFormDict->GetMatrixFor("Matrix"); + if (pGraphicStates) + form_matrix.Concat(pGraphicStates->m_CTM); + CPDF_Array* pBBox = pForm->m_pFormDict->GetArrayFor("BBox"); + CFX_FloatRect form_bbox; + CPDF_Path ClipPath; + if (pBBox) { + form_bbox = pBBox->GetRect(); + ClipPath.Emplace(); + ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, + form_bbox.top); + ClipPath.Transform(&form_matrix); + if (pParentMatrix) + ClipPath.Transform(pParentMatrix); + form_bbox.Transform(&form_matrix); + if (pParentMatrix) + form_bbox.Transform(pParentMatrix); + } + CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDictFor("Resources"); + m_pParser.reset(new CPDF_StreamContentParser( + pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, + pParentMatrix, pForm, pResources, &form_bbox, pGraphicStates, level)); + m_pParser->GetCurStates()->m_CTM = form_matrix; + m_pParser->GetCurStates()->m_ParentMatrix = form_matrix; + if (ClipPath) { + m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, + true); + } + if (pForm->m_Transparency & PDFTRANS_GROUP) { + CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState; + pState->SetBlendType(FXDIB_BLEND_NORMAL); + pState->SetStrokeAlpha(1.0f); + pState->SetFillAlpha(1.0f); + pState->SetSoftMask(nullptr); + } + m_nStreams = 0; + m_pSingleStream.reset(new CPDF_StreamAcc); + m_pSingleStream->LoadAllData(pForm->m_pFormStream, false); + m_pData = (uint8_t*)m_pSingleStream->GetData(); + m_Size = m_pSingleStream->GetSize(); + m_Status = ToBeContinued; + m_InternalStage = STAGE_PARSE; + m_CurrentOffset = 0; +} + +void CPDF_ContentParser::Continue(IFX_Pause* pPause) { + int steps = 0; + while (m_Status == ToBeContinued) { + if (m_InternalStage == STAGE_GETCONTENT) { + if (m_CurrentOffset == m_nStreams) { + if (!m_StreamArray.empty()) { + FX_SAFE_UINT32 safeSize = 0; + for (const auto& stream : m_StreamArray) { + safeSize += stream->GetSize(); + safeSize += 1; + } + if (!safeSize.IsValid()) { + m_Status = Done; + return; + } + m_Size = safeSize.ValueOrDie(); + m_pData = FX_Alloc(uint8_t, m_Size); + uint32_t pos = 0; + for (const auto& stream : m_StreamArray) { + FXSYS_memcpy(m_pData + pos, stream->GetData(), stream->GetSize()); + pos += stream->GetSize(); + m_pData[pos++] = ' '; + } + m_StreamArray.clear(); + } else { + m_pData = (uint8_t*)m_pSingleStream->GetData(); + m_Size = m_pSingleStream->GetSize(); + } + m_InternalStage = STAGE_PARSE; + m_CurrentOffset = 0; + } else { + CPDF_Array* pContent = + m_pObjectHolder->m_pFormDict->GetArrayFor("Contents"); + m_StreamArray[m_CurrentOffset].reset(new CPDF_StreamAcc); + CPDF_Stream* pStreamObj = ToStream( + pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr); + m_StreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, false); + m_CurrentOffset++; + } + } + if (m_InternalStage == STAGE_PARSE) { + if (!m_pParser) { + m_pParser.reset(new CPDF_StreamContentParser( + m_pObjectHolder->m_pDocument, m_pObjectHolder->m_pPageResources, + nullptr, nullptr, m_pObjectHolder, m_pObjectHolder->m_pResources, + &m_pObjectHolder->m_BBox, nullptr, 0)); + m_pParser->GetCurStates()->m_ColorState.SetDefault(); + } + if (m_CurrentOffset >= m_Size) { + m_InternalStage = STAGE_CHECKCLIP; + } else { + m_CurrentOffset += + m_pParser->Parse(m_pData + m_CurrentOffset, + m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); + } + } + if (m_InternalStage == STAGE_CHECKCLIP) { + if (m_pType3Char) { + m_pType3Char->m_bColored = m_pParser->IsColored(); + m_pType3Char->m_Width = + FXSYS_round(m_pParser->GetType3Data()[0] * 1000); + m_pType3Char->m_BBox.left = + FXSYS_round(m_pParser->GetType3Data()[2] * 1000); + m_pType3Char->m_BBox.bottom = + FXSYS_round(m_pParser->GetType3Data()[3] * 1000); + m_pType3Char->m_BBox.right = + FXSYS_round(m_pParser->GetType3Data()[4] * 1000); + m_pType3Char->m_BBox.top = + FXSYS_round(m_pParser->GetType3Data()[5] * 1000); + } + for (auto& pObj : *m_pObjectHolder->GetPageObjectList()) { + if (!pObj->m_ClipPath) + continue; + if (pObj->m_ClipPath.GetPathCount() != 1) + continue; + if (pObj->m_ClipPath.GetTextCount()) + continue; + CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0); + if (!ClipPath.IsRect() || pObj->IsShading()) + continue; + CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0), + ClipPath.GetPointX(2), ClipPath.GetPointY(2)); + CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, + pObj->m_Top); + if (old_rect.Contains(obj_rect)) + pObj->m_ClipPath.SetNull(); + } + m_Status = Done; + return; + } + steps++; + if (pPause && pPause->NeedToPauseNow()) + break; + } +} diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp index 7618f8271f..cd77c0b633 100644 --- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp +++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp @@ -6,6 +6,10 @@ #include "core/fpdfapi/page/cpdf_streamcontentparser.h" +#include <memory> +#include <utility> +#include <vector> + #include "core/fpdfapi/font/cpdf_font.h" #include "core/fpdfapi/font/cpdf_type3font.h" #include "core/fpdfapi/page/cpdf_allstates.h" @@ -22,11 +26,14 @@ #include "core/fpdfapi/page/cpdf_textobject.h" #include "core/fpdfapi/page/pageint.h" #include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fxcrt/fx_safe_types.h" +#include "core/fxge/cfx_graphstatedata.h" #include "third_party/base/ptr_util.h" namespace { @@ -122,8 +129,116 @@ CFX_FloatRect GetShadingBBox(CPDF_ShadingPattern* pShading, return rect; } +struct AbbrPair { + const FX_CHAR* abbr; + const FX_CHAR* full_name; +}; + +const AbbrPair InlineKeyAbbr[] = { + {"BPC", "BitsPerComponent"}, {"CS", "ColorSpace"}, {"D", "Decode"}, + {"DP", "DecodeParms"}, {"F", "Filter"}, {"H", "Height"}, + {"IM", "ImageMask"}, {"I", "Interpolate"}, {"W", "Width"}, +}; + +const AbbrPair InlineValueAbbr[] = { + {"G", "DeviceGray"}, {"RGB", "DeviceRGB"}, + {"CMYK", "DeviceCMYK"}, {"I", "Indexed"}, + {"AHx", "ASCIIHexDecode"}, {"A85", "ASCII85Decode"}, + {"LZW", "LZWDecode"}, {"Fl", "FlateDecode"}, + {"RL", "RunLengthDecode"}, {"CCF", "CCITTFaxDecode"}, + {"DCT", "DCTDecode"}, +}; + +struct AbbrReplacementOp { + bool is_replace_key; + CFX_ByteString key; + CFX_ByteStringC replacement; +}; + +CFX_ByteStringC FindFullName(const AbbrPair* table, + size_t count, + const CFX_ByteStringC& abbr) { + auto it = std::find_if(table, table + count, [abbr](const AbbrPair& pair) { + return pair.abbr == abbr; + }); + return it != table + count ? CFX_ByteStringC(it->full_name) + : CFX_ByteStringC(); +} + +void ReplaceAbbr(CPDF_Object* pObj) { + switch (pObj->GetType()) { + case CPDF_Object::DICTIONARY: { + CPDF_Dictionary* pDict = pObj->AsDictionary(); + std::vector<AbbrReplacementOp> replacements; + for (const auto& it : *pDict) { + CFX_ByteString key = it.first; + CPDF_Object* value = it.second; + CFX_ByteStringC fullname = FindFullName( + InlineKeyAbbr, FX_ArraySize(InlineKeyAbbr), key.AsStringC()); + if (!fullname.IsEmpty()) { + AbbrReplacementOp op; + op.is_replace_key = true; + op.key = key; + op.replacement = fullname; + replacements.push_back(op); + key = fullname; + } + + if (value->IsName()) { + CFX_ByteString name = value->GetString(); + fullname = FindFullName( + InlineValueAbbr, FX_ArraySize(InlineValueAbbr), name.AsStringC()); + if (!fullname.IsEmpty()) { + AbbrReplacementOp op; + op.is_replace_key = false; + op.key = key; + op.replacement = fullname; + replacements.push_back(op); + } + } else { + ReplaceAbbr(value); + } + } + for (const auto& op : replacements) { + if (op.is_replace_key) + pDict->ReplaceKey(op.key, CFX_ByteString(op.replacement)); + else + pDict->SetNameFor(op.key, CFX_ByteString(op.replacement)); + } + break; + } + case CPDF_Object::ARRAY: { + CPDF_Array* pArray = pObj->AsArray(); + for (size_t i = 0; i < pArray->GetCount(); i++) { + CPDF_Object* pElement = pArray->GetObjectAt(i); + if (pElement->IsName()) { + CFX_ByteString name = pElement->GetString(); + CFX_ByteStringC fullname = FindFullName( + InlineValueAbbr, FX_ArraySize(InlineValueAbbr), name.AsStringC()); + if (!fullname.IsEmpty()) + pArray->SetAt(i, new CPDF_Name(CFX_ByteString(fullname))); + } else { + ReplaceAbbr(pElement); + } + } + break; + } + default: + break; + } +} + } // namespace +CFX_ByteStringC PDF_FindKeyAbbreviationForTesting(const CFX_ByteStringC& abbr) { + return FindFullName(InlineKeyAbbr, FX_ArraySize(InlineKeyAbbr), abbr); +} + +CFX_ByteStringC PDF_FindValueAbbreviationForTesting( + const CFX_ByteStringC& abbr) { + return FindFullName(InlineValueAbbr, FX_ArraySize(InlineValueAbbr), abbr); +} + CPDF_StreamContentParser::CPDF_StreamContentParser( CPDF_Document* pDocument, CPDF_Dictionary* pPageResources, @@ -543,7 +658,7 @@ void CPDF_StreamContentParser::Handle_BeginImage() { pDict->SetFor(key, pObj.release()); } } - PDF_ReplaceAbbr(pDict); + ReplaceAbbr(pDict); CPDF_Object* pCSObj = nullptr; if (pDict->KeyExist("ColorSpace")) { pCSObj = pDict->GetDirectObjectFor("ColorSpace"); diff --git a/core/fpdfapi/page/fpdf_page_parser_unittest.cpp b/core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp index b6eec7bcf7..be2fcb09e4 100644 --- a/core/fpdfapi/page/fpdf_page_parser_unittest.cpp +++ b/core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp @@ -5,7 +5,7 @@ #include "core/fpdfapi/page/pageint.h" #include "testing/gtest/include/gtest/gtest.h" -TEST(fpdf_page_parser, PDF_FindKeyAbbreviation) { +TEST(cpdf_streamcontentparser, PDF_FindKeyAbbreviation) { EXPECT_EQ(CFX_ByteStringC("BitsPerComponent"), PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("BPC"))); EXPECT_EQ(CFX_ByteStringC("Width"), @@ -19,7 +19,7 @@ TEST(fpdf_page_parser, PDF_FindKeyAbbreviation) { PDF_FindKeyAbbreviationForTesting(CFX_ByteStringC("WW"))); } -TEST(fpdf_page_parser, PDF_FindValueAbbreviation) { +TEST(cpdf_streamcontentparser, PDF_FindValueAbbreviation) { EXPECT_EQ(CFX_ByteStringC("DeviceGray"), PDF_FindValueAbbreviationForTesting(CFX_ByteStringC("G"))); EXPECT_EQ(CFX_ByteStringC("DCTDecode"), diff --git a/core/fpdfapi/page/fpdf_page_parser_old.cpp b/core/fpdfapi/page/cpdf_streamparser.cpp index 51ffc11b03..9d36d0a38b 100644 --- a/core/fpdfapi/page/fpdf_page_parser_old.cpp +++ b/core/fpdfapi/page/cpdf_streamparser.cpp @@ -1,4 +1,4 @@ -// Copyright 2014 PDFium Authors. All rights reserved. +// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -9,13 +9,7 @@ #include <limits.h> #include "core/fpdfapi/cpdf_modulemgr.h" -#include "core/fpdfapi/font/cpdf_type3char.h" -#include "core/fpdfapi/page/cpdf_allstates.h" #include "core/fpdfapi/page/cpdf_docpagedata.h" -#include "core/fpdfapi/page/cpdf_form.h" -#include "core/fpdfapi/page/cpdf_page.h" -#include "core/fpdfapi/page/cpdf_pageobject.h" -#include "core/fpdfapi/page/cpdf_path.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_boolean.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" @@ -24,15 +18,11 @@ #include "core/fpdfapi/parser/cpdf_null.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_stream.h" -#include "core/fpdfapi/parser/cpdf_stream_acc.h" #include "core/fpdfapi/parser/cpdf_string.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fpdfapi/parser/fpdf_parser_utility.h" #include "core/fxcodec/fx_codec.h" #include "core/fxcrt/fx_ext.h" -#include "core/fxcrt/fx_safe_types.h" -#include "core/fxge/cfx_fxgedevice.h" -#include "core/fxge/cfx_renderdevice.h" CCodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder( const uint8_t* src_buf, @@ -50,9 +40,8 @@ const FX_STRSIZE kMaxStringLength = 32767; uint32_t DecodeAllScanlines(CCodec_ScanlineDecoder* pDecoder, uint8_t*& dest_buf, uint32_t& dest_size) { - if (!pDecoder) { + if (!pDecoder) return FX_INVALID_OFFSET; - } int ncomps = pDecoder->CountComps(); int bpc = pDecoder->GetBPC(); int width = pDecoder->GetWidth(); @@ -89,12 +78,10 @@ uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf, FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam); return DecodeAllScanlines(pDecoder, dest_buf, dest_size); } - if (decoder == "ASCII85Decode" || decoder == "A85") { + if (decoder == "ASCII85Decode" || decoder == "A85") return A85Decode(src_buf, limit, dest_buf, dest_size); - } - if (decoder == "ASCIIHexDecode" || decoder == "AHx") { + if (decoder == "ASCIIHexDecode" || decoder == "AHx") return HexDecode(src_buf, limit, dest_buf, dest_size); - } if (decoder == "FlateDecode" || decoder == "Fl") { return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, dest_size, dest_buf, dest_size); @@ -110,9 +97,8 @@ uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf, !pParam || pParam->GetIntegerFor("ColorTransform", 1)); return DecodeAllScanlines(pDecoder, dest_buf, dest_size); } - if (decoder == "RunLengthDecode" || decoder == "RL") { + if (decoder == "RunLengthDecode" || decoder == "RL") return RunLengthDecode(src_buf, limit, dest_buf, dest_size); - } dest_size = 0; dest_buf = 0; return (uint32_t)-1; @@ -205,9 +191,8 @@ CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, uint8_t* pData = nullptr; uint32_t dwStreamSize; if (Decoder.IsEmpty()) { - if (OrigSize > m_Size - m_Pos) { + if (OrigSize > m_Size - m_Pos) OrigSize = m_Size - m_Pos; - } pData = FX_Alloc(uint8_t, OrigSize); FXSYS_memcpy(pData, m_pBuf + m_Pos, OrigSize); dwStreamSize = OrigSize; @@ -218,7 +203,7 @@ CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, Decoder, pParam, pData, dwDestSize); FX_Free(pData); - if ((int)dwStreamSize < 0) + if (static_cast<int>(dwStreamSize) < 0) return nullptr; uint32_t dwSavePos = m_Pos; @@ -424,6 +409,7 @@ CPDF_Object* CPDF_StreamParser::ReadNextObject(bool bAllowNestedArray, return nullptr; } +// TODO(npm): the following methods are almost identical in cpdf_syntaxparser void CPDF_StreamParser::GetNextWord(bool& bIsNumber) { m_WordSize = 0; bIsNumber = true; @@ -639,204 +625,3 @@ CFX_ByteString CPDF_StreamParser::ReadHexString() { bool CPDF_StreamParser::PositionIsInBounds() const { return m_Pos < m_Size; } - -CPDF_ContentParser::CPDF_ContentParser() - : m_Status(Ready), - m_InternalStage(STAGE_GETCONTENT), - m_pObjectHolder(nullptr), - m_bForm(false), - m_pType3Char(nullptr), - m_pData(nullptr), - m_Size(0), - m_CurrentOffset(0) {} - -CPDF_ContentParser::~CPDF_ContentParser() { - if (!m_pSingleStream) - FX_Free(m_pData); -} - -void CPDF_ContentParser::Start(CPDF_Page* pPage) { - if (m_Status != Ready || !pPage || !pPage->m_pDocument || - !pPage->m_pFormDict) { - m_Status = Done; - return; - } - m_pObjectHolder = pPage; - m_bForm = false; - m_Status = ToBeContinued; - m_InternalStage = STAGE_GETCONTENT; - m_CurrentOffset = 0; - - CPDF_Object* pContent = pPage->m_pFormDict->GetDirectObjectFor("Contents"); - if (!pContent) { - m_Status = Done; - return; - } - if (CPDF_Stream* pStream = pContent->AsStream()) { - m_nStreams = 0; - m_pSingleStream.reset(new CPDF_StreamAcc); - m_pSingleStream->LoadAllData(pStream, false); - } else if (CPDF_Array* pArray = pContent->AsArray()) { - m_nStreams = pArray->GetCount(); - if (m_nStreams) - m_StreamArray.resize(m_nStreams); - else - m_Status = Done; - } else { - m_Status = Done; - } -} - -void CPDF_ContentParser::Start(CPDF_Form* pForm, - CPDF_AllStates* pGraphicStates, - const CFX_Matrix* pParentMatrix, - CPDF_Type3Char* pType3Char, - int level) { - m_pType3Char = pType3Char; - m_pObjectHolder = pForm; - m_bForm = true; - CFX_Matrix form_matrix = pForm->m_pFormDict->GetMatrixFor("Matrix"); - if (pGraphicStates) { - form_matrix.Concat(pGraphicStates->m_CTM); - } - CPDF_Array* pBBox = pForm->m_pFormDict->GetArrayFor("BBox"); - CFX_FloatRect form_bbox; - CPDF_Path ClipPath; - if (pBBox) { - form_bbox = pBBox->GetRect(); - ClipPath.Emplace(); - ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, - form_bbox.top); - ClipPath.Transform(&form_matrix); - if (pParentMatrix) { - ClipPath.Transform(pParentMatrix); - } - form_bbox.Transform(&form_matrix); - if (pParentMatrix) { - form_bbox.Transform(pParentMatrix); - } - } - CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDictFor("Resources"); - m_pParser.reset(new CPDF_StreamContentParser( - pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, - pParentMatrix, pForm, pResources, &form_bbox, pGraphicStates, level)); - m_pParser->GetCurStates()->m_CTM = form_matrix; - m_pParser->GetCurStates()->m_ParentMatrix = form_matrix; - if (ClipPath) { - m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, - true); - } - if (pForm->m_Transparency & PDFTRANS_GROUP) { - CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState; - pState->SetBlendType(FXDIB_BLEND_NORMAL); - pState->SetStrokeAlpha(1.0f); - pState->SetFillAlpha(1.0f); - pState->SetSoftMask(nullptr); - } - m_nStreams = 0; - m_pSingleStream.reset(new CPDF_StreamAcc); - m_pSingleStream->LoadAllData(pForm->m_pFormStream, false); - m_pData = (uint8_t*)m_pSingleStream->GetData(); - m_Size = m_pSingleStream->GetSize(); - m_Status = ToBeContinued; - m_InternalStage = STAGE_PARSE; - m_CurrentOffset = 0; -} - -void CPDF_ContentParser::Continue(IFX_Pause* pPause) { - int steps = 0; - while (m_Status == ToBeContinued) { - if (m_InternalStage == STAGE_GETCONTENT) { - if (m_CurrentOffset == m_nStreams) { - if (!m_StreamArray.empty()) { - FX_SAFE_UINT32 safeSize = 0; - for (const auto& stream : m_StreamArray) { - safeSize += stream->GetSize(); - safeSize += 1; - } - if (!safeSize.IsValid()) { - m_Status = Done; - return; - } - m_Size = safeSize.ValueOrDie(); - m_pData = FX_Alloc(uint8_t, m_Size); - uint32_t pos = 0; - for (const auto& stream : m_StreamArray) { - FXSYS_memcpy(m_pData + pos, stream->GetData(), stream->GetSize()); - pos += stream->GetSize(); - m_pData[pos++] = ' '; - } - m_StreamArray.clear(); - } else { - m_pData = (uint8_t*)m_pSingleStream->GetData(); - m_Size = m_pSingleStream->GetSize(); - } - m_InternalStage = STAGE_PARSE; - m_CurrentOffset = 0; - } else { - CPDF_Array* pContent = - m_pObjectHolder->m_pFormDict->GetArrayFor("Contents"); - m_StreamArray[m_CurrentOffset].reset(new CPDF_StreamAcc); - CPDF_Stream* pStreamObj = ToStream( - pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr); - m_StreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, false); - m_CurrentOffset++; - } - } - if (m_InternalStage == STAGE_PARSE) { - if (!m_pParser) { - m_pParser.reset(new CPDF_StreamContentParser( - m_pObjectHolder->m_pDocument, m_pObjectHolder->m_pPageResources, - nullptr, nullptr, m_pObjectHolder, m_pObjectHolder->m_pResources, - &m_pObjectHolder->m_BBox, nullptr, 0)); - m_pParser->GetCurStates()->m_ColorState.SetDefault(); - } - if (m_CurrentOffset >= m_Size) { - m_InternalStage = STAGE_CHECKCLIP; - } else { - m_CurrentOffset += - m_pParser->Parse(m_pData + m_CurrentOffset, - m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); - } - } - if (m_InternalStage == STAGE_CHECKCLIP) { - if (m_pType3Char) { - m_pType3Char->m_bColored = m_pParser->IsColored(); - m_pType3Char->m_Width = - FXSYS_round(m_pParser->GetType3Data()[0] * 1000); - m_pType3Char->m_BBox.left = - FXSYS_round(m_pParser->GetType3Data()[2] * 1000); - m_pType3Char->m_BBox.bottom = - FXSYS_round(m_pParser->GetType3Data()[3] * 1000); - m_pType3Char->m_BBox.right = - FXSYS_round(m_pParser->GetType3Data()[4] * 1000); - m_pType3Char->m_BBox.top = - FXSYS_round(m_pParser->GetType3Data()[5] * 1000); - } - for (auto& pObj : *m_pObjectHolder->GetPageObjectList()) { - if (!pObj->m_ClipPath) - continue; - if (pObj->m_ClipPath.GetPathCount() != 1) - continue; - if (pObj->m_ClipPath.GetTextCount()) - continue; - CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0); - if (!ClipPath.IsRect() || pObj->IsShading()) - continue; - CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0), - ClipPath.GetPointX(2), ClipPath.GetPointY(2)); - CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, - pObj->m_Top); - if (old_rect.Contains(obj_rect)) { - pObj->m_ClipPath.SetNull(); - } - } - m_Status = Done; - return; - } - steps++; - if (pPause && pPause->NeedToPauseNow()) { - break; - } - } -} diff --git a/core/fpdfapi/page/fpdf_page_parser_old_unittest.cpp b/core/fpdfapi/page/cpdf_streamparser_unittest.cpp index 52ebf1ef09..f2a5a542f8 100644 --- a/core/fpdfapi/page/fpdf_page_parser_old_unittest.cpp +++ b/core/fpdfapi/page/cpdf_streamparser_unittest.cpp @@ -5,7 +5,7 @@ #include "core/fpdfapi/page/pageint.h" #include "testing/gtest/include/gtest/gtest.h" -TEST(fpdf_page_parser_old, ReadHexString) { +TEST(cpdf_streamparser, ReadHexString) { { // Position out of bounds. uint8_t data[] = "12ab>"; diff --git a/core/fpdfapi/page/fpdf_page_parser.cpp b/core/fpdfapi/page/fpdf_page_parser.cpp deleted file mode 100644 index b6f0bc605c..0000000000 --- a/core/fpdfapi/page/fpdf_page_parser.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fpdfapi/page/pageint.h" - -#include <memory> -#include <utility> -#include <vector> - -#include "core/fpdfapi/edit/cpdf_creator.h" -#include "core/fpdfapi/font/cpdf_font.h" -#include "core/fpdfapi/font/cpdf_type3font.h" -#include "core/fpdfapi/page/cpdf_allstates.h" -#include "core/fpdfapi/page/cpdf_docpagedata.h" -#include "core/fpdfapi/page/cpdf_form.h" -#include "core/fpdfapi/page/cpdf_formobject.h" -#include "core/fpdfapi/page/cpdf_image.h" -#include "core/fpdfapi/page/cpdf_imageobject.h" -#include "core/fpdfapi/page/cpdf_meshstream.h" -#include "core/fpdfapi/page/cpdf_pageobject.h" -#include "core/fpdfapi/page/cpdf_pathobject.h" -#include "core/fpdfapi/page/cpdf_shadingobject.h" -#include "core/fpdfapi/page/cpdf_shadingpattern.h" -#include "core/fpdfapi/page/cpdf_textobject.h" -#include "core/fpdfapi/parser/cpdf_array.h" -#include "core/fpdfapi/parser/cpdf_dictionary.h" -#include "core/fpdfapi/parser/cpdf_document.h" -#include "core/fpdfapi/parser/cpdf_name.h" -#include "core/fpdfapi/parser/cpdf_number.h" -#include "core/fpdfapi/parser/cpdf_reference.h" -#include "core/fpdfapi/parser/cpdf_stream.h" -#include "core/fpdfapi/parser/cpdf_stream_acc.h" -#include "core/fpdfapi/parser/fpdf_parser_decode.h" -#include "core/fxcrt/fx_safe_types.h" -#include "core/fxge/cfx_graphstatedata.h" -#include "core/fxge/cfx_pathdata.h" -#include "third_party/base/ptr_util.h" - -namespace { - -struct PDF_AbbrPair { - const FX_CHAR* abbr; - const FX_CHAR* full_name; -}; - -const PDF_AbbrPair PDF_InlineKeyAbbr[] = { - {"BPC", "BitsPerComponent"}, {"CS", "ColorSpace"}, {"D", "Decode"}, - {"DP", "DecodeParms"}, {"F", "Filter"}, {"H", "Height"}, - {"IM", "ImageMask"}, {"I", "Interpolate"}, {"W", "Width"}, -}; - -const PDF_AbbrPair PDF_InlineValueAbbr[] = { - {"G", "DeviceGray"}, {"RGB", "DeviceRGB"}, - {"CMYK", "DeviceCMYK"}, {"I", "Indexed"}, - {"AHx", "ASCIIHexDecode"}, {"A85", "ASCII85Decode"}, - {"LZW", "LZWDecode"}, {"Fl", "FlateDecode"}, - {"RL", "RunLengthDecode"}, {"CCF", "CCITTFaxDecode"}, - {"DCT", "DCTDecode"}, -}; - -struct AbbrReplacementOp { - bool is_replace_key; - CFX_ByteString key; - CFX_ByteStringC replacement; -}; - -CFX_ByteStringC PDF_FindFullName(const PDF_AbbrPair* table, - size_t count, - const CFX_ByteStringC& abbr) { - auto it = std::find_if( - table, table + count, - [abbr](const PDF_AbbrPair& pair) { return pair.abbr == abbr; }); - return it != table + count ? CFX_ByteStringC(it->full_name) - : CFX_ByteStringC(); -} - -} // namespace - -CFX_ByteStringC PDF_FindKeyAbbreviationForTesting(const CFX_ByteStringC& abbr) { - return PDF_FindFullName(PDF_InlineKeyAbbr, FX_ArraySize(PDF_InlineKeyAbbr), - abbr); -} - -CFX_ByteStringC PDF_FindValueAbbreviationForTesting( - const CFX_ByteStringC& abbr) { - return PDF_FindFullName(PDF_InlineValueAbbr, - FX_ArraySize(PDF_InlineValueAbbr), abbr); -} - -void PDF_ReplaceAbbr(CPDF_Object* pObj) { - switch (pObj->GetType()) { - case CPDF_Object::DICTIONARY: { - CPDF_Dictionary* pDict = pObj->AsDictionary(); - std::vector<AbbrReplacementOp> replacements; - for (const auto& it : *pDict) { - CFX_ByteString key = it.first; - CPDF_Object* value = it.second; - CFX_ByteStringC fullname = - PDF_FindFullName(PDF_InlineKeyAbbr, FX_ArraySize(PDF_InlineKeyAbbr), - key.AsStringC()); - if (!fullname.IsEmpty()) { - AbbrReplacementOp op; - op.is_replace_key = true; - op.key = key; - op.replacement = fullname; - replacements.push_back(op); - key = fullname; - } - - if (value->IsName()) { - CFX_ByteString name = value->GetString(); - fullname = PDF_FindFullName(PDF_InlineValueAbbr, - FX_ArraySize(PDF_InlineValueAbbr), - name.AsStringC()); - if (!fullname.IsEmpty()) { - AbbrReplacementOp op; - op.is_replace_key = false; - op.key = key; - op.replacement = fullname; - replacements.push_back(op); - } - } else { - PDF_ReplaceAbbr(value); - } - } - for (const auto& op : replacements) { - if (op.is_replace_key) - pDict->ReplaceKey(op.key, CFX_ByteString(op.replacement)); - else - pDict->SetNameFor(op.key, CFX_ByteString(op.replacement)); - } - break; - } - case CPDF_Object::ARRAY: { - CPDF_Array* pArray = pObj->AsArray(); - for (size_t i = 0; i < pArray->GetCount(); i++) { - CPDF_Object* pElement = pArray->GetObjectAt(i); - if (pElement->IsName()) { - CFX_ByteString name = pElement->GetString(); - CFX_ByteStringC fullname = PDF_FindFullName( - PDF_InlineValueAbbr, FX_ArraySize(PDF_InlineValueAbbr), - name.AsStringC()); - if (!fullname.IsEmpty()) { - pArray->SetAt(i, new CPDF_Name(CFX_ByteString(fullname))); - } - } else { - PDF_ReplaceAbbr(pElement); - } - } - break; - } - default: - break; - } -} diff --git a/core/fpdfapi/page/pageint.h b/core/fpdfapi/page/pageint.h index a96216bbb6..dee9639d7d 100644 --- a/core/fpdfapi/page/pageint.h +++ b/core/fpdfapi/page/pageint.h @@ -64,7 +64,7 @@ class CPDF_StreamParser { CPDF_Object* ReadNextObject(bool bAllowNestedArray, uint32_t dwInArrayLevel); private: - friend class fpdf_page_parser_old_ReadHexString_Test; + friend class cpdf_streamparser_ReadHexString_Test; void GetNextWord(bool& bIsNumber); CFX_ByteString ReadString(); @@ -303,6 +303,4 @@ CFX_ByteStringC PDF_FindKeyAbbreviationForTesting(const CFX_ByteStringC& abbr); CFX_ByteStringC PDF_FindValueAbbreviationForTesting( const CFX_ByteStringC& abbr); -void PDF_ReplaceAbbr(CPDF_Object* pObj); - #endif // CORE_FPDFAPI_PAGE_PAGEINT_H_ |