From d24b97ee1d065eff482355ea3ff82be59bb528b1 Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Wed, 9 Aug 2017 18:50:59 +0300 Subject: Unify of saving documents. In the original code the method of writing of objects depends on a much unpredictable factors: as: 1) Is there an updated version of the at least one object in the document. 2) The password is changed. 3) Was this object loaded earlier. 4) The Object is compressed and document have a password. With these factors it is difficult to predict what will be the final file. To reduce volatility use only one method that works in all cases mentioned. This method is parse then serialize. Change-Id: I3d7dcadd10abffbad68d1f993f2dd60b039ed989 Reviewed-on: https://pdfium-review.googlesource.com/9572 Commit-Queue: Art Snake Reviewed-by: dsinclair --- core/fpdfapi/edit/cpdf_creator.cpp | 35 ++----- core/fpdfapi/edit/cpdf_creator_embeddertest.cpp | 43 +++++++++ core/fpdfapi/parser/cpdf_parser.cpp | 116 ------------------------ core/fpdfapi/parser/cpdf_parser.h | 1 - 4 files changed, 51 insertions(+), 144 deletions(-) create mode 100644 core/fpdfapi/edit/cpdf_creator_embeddertest.cpp (limited to 'core/fpdfapi') diff --git a/core/fpdfapi/edit/cpdf_creator.cpp b/core/fpdfapi/edit/cpdf_creator.cpp index b347d45dce..9735460f41 100644 --- a/core/fpdfapi/edit/cpdf_creator.cpp +++ b/core/fpdfapi/edit/cpdf_creator.cpp @@ -323,34 +323,15 @@ bool CPDF_Creator::WriteOldIndirectObject(uint32_t objnum) { m_ObjectOffsets[objnum] = m_Archive->CurrentOffset(); bool bExistInMap = !!m_pDocument->GetIndirectObject(objnum); - const CPDF_Parser::ObjectType object_type = m_pParser->GetObjectType(objnum); - if (m_pParser->IsVersionUpdated() || m_bSecurityChanged || bExistInMap || - (object_type == CPDF_Parser::ObjectType::kCompressed && m_pEncryptDict)) { - CPDF_Object* pObj = m_pDocument->GetOrParseIndirectObject(objnum); - if (!pObj) { - m_ObjectOffsets.erase(objnum); - return true; - } - if (!WriteIndirectObj(pObj->GetObjNum(), pObj)) - return false; - if (!bExistInMap) - m_pDocument->DeleteIndirectObject(objnum); - } else { - std::vector buffer = m_pParser->GetIndirectBinary(objnum); - if (buffer.empty()) - return true; - if (object_type == CPDF_Parser::ObjectType::kCompressed) { - if (!m_Archive->WriteDWord(objnum) || - !m_Archive->WriteString(" 0 obj ") || - !m_Archive->WriteBlock(buffer.data(), buffer.size()) || - !m_Archive->WriteString("\r\nendobj\r\n")) { - return false; - } - } else { - if (!m_Archive->WriteBlock(buffer.data(), buffer.size())) - return false; - } + CPDF_Object* pObj = m_pDocument->GetOrParseIndirectObject(objnum); + if (!pObj) { + m_ObjectOffsets.erase(objnum); + return true; } + if (!WriteIndirectObj(pObj->GetObjNum(), pObj)) + return false; + if (!bExistInMap) + m_pDocument->DeleteIndirectObject(objnum); return true; } diff --git a/core/fpdfapi/edit/cpdf_creator_embeddertest.cpp b/core/fpdfapi/edit/cpdf_creator_embeddertest.cpp new file mode 100644 index 0000000000..def7d50a97 --- /dev/null +++ b/core/fpdfapi/edit/cpdf_creator_embeddertest.cpp @@ -0,0 +1,43 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include "core/fxcrt/fx_system.h" +#include "public/fpdf_annot.h" +#include "public/fpdf_edit.h" +#include "public/fpdfview.h" +#include "testing/embedder_test.h" +#include "testing/gtest/include/gtest/gtest.h" + +class CPDF_CreatorEmbedderTest : public EmbedderTest {}; + +TEST_F(CPDF_CreatorEmbedderTest, SavedDocsAreEqualAfterParse) { + ASSERT_TRUE(OpenDocument("annotation_stamp_with_ap.pdf")); + // Save without additional data reading. + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + const std::string saved_doc_1 = GetString(); + ClearString(); + + { + // Do some read only operations. + ASSERT_GE(1, FPDF_GetPageCount(document())); + FPDF_PAGE page = FPDF_LoadPage(document(), 0); + ASSERT_TRUE(page); + FPDF_BITMAP new_bitmap = + RenderPageWithFlags(page, form_handle(), FPDF_ANNOT); + FPDFBitmap_Destroy(new_bitmap); + UnloadPage(page); + } + + // Save when we have additional loaded data. + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + const std::string saved_doc_2 = GetString(); + ClearString(); + + // The sizes of saved docs should be equal. + EXPECT_EQ(saved_doc_1.size(), saved_doc_2.size()); +} diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index 32c616c1c6..da51eb1c83 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -1258,122 +1258,6 @@ FX_FILESIZE CPDF_Parser::GetObjectSize(uint32_t objnum) const { return *it - offset; } -std::vector CPDF_Parser::GetIndirectBinary(uint32_t objnum) { - std::vector buffer; - if (!IsValidObjectNumber(objnum)) - return buffer; - - if (GetObjectType(objnum) == ObjectType::kCompressed) { - CFX_RetainPtr pObjStream = - GetObjectStream(m_ObjectInfo[objnum].pos); - if (!pObjStream) - return buffer; - - int32_t offset = GetStreamFirst(pObjStream); - const uint8_t* pData = pObjStream->GetData(); - uint32_t totalsize = pObjStream->GetSize(); - auto file = pdfium::MakeRetain( - const_cast(pData), static_cast(totalsize), false); - CPDF_SyntaxParser syntax; - syntax.InitParser(file, 0); - - for (int i = GetStreamNCount(pObjStream); i > 0; --i) { - uint32_t thisnum = syntax.GetDirectNum(); - uint32_t thisoff = syntax.GetDirectNum(); - if (thisnum != objnum) - continue; - - size_t size = 0; - if (i == 1) { - size = totalsize - (thisoff + offset); - } else { - syntax.GetDirectNum(); // Skip nextnum. - uint32_t nextoff = syntax.GetDirectNum(); - size = nextoff - thisoff; - } - - buffer.resize(size); - memcpy(buffer.data(), pData + thisoff + offset, size); - break; - } - return buffer; - } - - if (GetObjectType(objnum) != ObjectType::kNotCompressed) - return buffer; - - FX_FILESIZE pos = m_ObjectInfo[objnum].pos; - if (pos == 0) - return buffer; - - FX_FILESIZE SavedPos = m_pSyntax->GetPos(); - m_pSyntax->SetPos(pos); - - bool bIsNumber; - CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) { - m_pSyntax->SetPos(SavedPos); - return buffer; - } - - uint32_t parser_objnum = FXSYS_atoui(word.c_str()); - if (parser_objnum && parser_objnum != objnum) { - m_pSyntax->SetPos(SavedPos); - return buffer; - } - - word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) { - m_pSyntax->SetPos(SavedPos); - return buffer; - } - - if (m_pSyntax->GetKeyword() != "obj") { - m_pSyntax->SetPos(SavedPos); - return buffer; - } - - auto it = m_SortedOffset.find(pos); - if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { - m_pSyntax->SetPos(SavedPos); - return buffer; - } - - FX_FILESIZE nextoff = *it; - bool bNextOffValid = false; - if (nextoff != pos) { - m_pSyntax->SetPos(nextoff); - word = m_pSyntax->GetNextWord(&bIsNumber); - if (word == "xref") { - bNextOffValid = true; - } else if (bIsNumber) { - word = m_pSyntax->GetNextWord(&bIsNumber); - if (bIsNumber && m_pSyntax->GetKeyword() == "obj") { - bNextOffValid = true; - } - } - } - - if (!bNextOffValid) { - m_pSyntax->SetPos(pos); - while (1) { - if (m_pSyntax->GetKeyword() == "endobj") - break; - - if (m_pSyntax->GetPos() == m_pSyntax->m_FileLen) - break; - } - nextoff = m_pSyntax->GetPos(); - } - - size_t size = (uint32_t)(nextoff - pos); - buffer.resize(size); - m_pSyntax->SetPos(pos); - m_pSyntax->ReadBlock(buffer.data(), size); - m_pSyntax->SetPos(SavedPos); - return buffer; -} - std::unique_ptr CPDF_Parser::ParseIndirectObjectAt( CPDF_IndirectObjectHolder* pObjList, FX_FILESIZE pos, diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h index c379905aed..fd74c5471e 100644 --- a/core/fpdfapi/parser/cpdf_parser.h +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -91,7 +91,6 @@ class CPDF_Parser { FX_FILESIZE GetObjectOffset(uint32_t objnum) const; FX_FILESIZE GetObjectSize(uint32_t objnum) const; - std::vector GetIndirectBinary(uint32_t objnum); int GetFileVersion() const { return m_FileVersion; } bool IsXRefStream() const { return m_bXRefStream; } -- cgit v1.2.3