summaryrefslogtreecommitdiff
path: root/core/fpdfapi/parser
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdfapi/parser')
-rw-r--r--core/fpdfapi/parser/cpdf_linearized_header.cpp17
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp11
-rw-r--r--core/fpdfapi/parser/cpdf_parser.h6
-rw-r--r--core/fpdfapi/parser/cpdf_parser_unittest.cpp71
-rw-r--r--core/fpdfapi/parser/cpdf_syntax_parser.cpp9
-rw-r--r--core/fpdfapi/parser/cpdf_syntax_parser.h19
6 files changed, 103 insertions, 30 deletions
diff --git a/core/fpdfapi/parser/cpdf_linearized_header.cpp b/core/fpdfapi/parser/cpdf_linearized_header.cpp
index 5032bc3807..8286e0fbe3 100644
--- a/core/fpdfapi/parser/cpdf_linearized_header.cpp
+++ b/core/fpdfapi/parser/cpdf_linearized_header.cpp
@@ -38,17 +38,17 @@ bool IsValidNumericDictionaryValue(const CPDF_Dictionary* pDict,
}
bool IsLinearizedHeaderValid(const CPDF_LinearizedHeader* header,
- FX_FILESIZE file_size) {
+ FX_FILESIZE document_size) {
ASSERT(header);
- return header->GetFileSize() == file_size &&
+ return header->GetFileSize() == document_size &&
static_cast<int>(header->GetFirstPageNo()) >= 0 &&
header->GetFirstPageNo() < kMaxInt &&
header->GetFirstPageNo() < header->GetPageCount() &&
- header->GetMainXRefTableFirstEntryOffset() < file_size &&
+ header->GetMainXRefTableFirstEntryOffset() < document_size &&
header->GetPageCount() > 0 &&
- header->GetFirstPageEndOffset() < file_size &&
- header->GetLastXRefOffset() < file_size &&
- header->GetHintStart() < file_size;
+ header->GetFirstPageEndOffset() < document_size &&
+ header->GetLastXRefOffset() < document_size &&
+ header->GetHintStart() < document_size;
}
} // namespace
@@ -78,10 +78,9 @@ std::unique_ptr<CPDF_LinearizedHeader> CPDF_LinearizedHeader::Parse(
auto result = pdfium::WrapUnique(
new CPDF_LinearizedHeader(pDict.get(), parser->GetPos()));
- if (!IsLinearizedHeaderValid(result.get(),
- parser->GetFileAccess()->GetSize())) {
+ if (!IsLinearizedHeaderValid(result.get(), parser->GetDocumentSize()))
return nullptr;
- }
+
return result;
}
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 16077fd39e..be18560ffc 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -119,10 +119,6 @@ bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
return GetObjectType(objnum) == ObjectType::kFree;
}
-RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const {
- return m_pSyntax->GetFileAccess();
-}
-
void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) {
m_CrossRefTable->ShrinkObjectMap(objnum);
}
@@ -230,8 +226,7 @@ CPDF_Parser::Error CPDF_Parser::StartParseInternal() {
FX_FILESIZE CPDF_Parser::ParseStartXRef() {
static constexpr char kStartXRefKeyword[] = "startxref";
- m_pSyntax->SetPos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset -
- strlen(kStartXRefKeyword));
+ m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
return 0;
@@ -245,7 +240,7 @@ FX_FILESIZE CPDF_Parser::ParseStartXRef() {
return 0;
const FX_SAFE_FILESIZE result = FXSYS_atoi64(xrefpos_str.c_str());
- if (!result.IsValid() || result.ValueOrDie() >= GetFileAccess()->GetSize())
+ if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
return 0;
return result.ValueOrDie();
@@ -453,7 +448,7 @@ bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
return false;
const size_t max_entries_in_file =
- m_pSyntax->GetFileAccess()->GetSize() / kEntryConstSize;
+ m_pSyntax->GetDocumentSize() / kEntryConstSize;
if (new_size.ValueOrDie() > max_entries_in_file)
return false;
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 0af39d7529..7cd98c2c26 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -90,7 +90,6 @@ class CPDF_Parser {
CPDF_SecurityHandler* GetSecurityHandler() const {
return m_pSecurityHandler.get();
}
- RetainPtr<IFX_SeekableReadStream> GetFileAccess() const;
bool IsObjectFree(uint32_t objnum) const;
int GetFileVersion() const { return m_FileVersion; }
@@ -109,6 +108,8 @@ class CPDF_Parser {
return m_CrossRefTable.get();
}
+ CPDF_SyntaxParser* GetSyntax() const { return m_pSyntax.get(); }
+
void SetLinearizedHeader(std::unique_ptr<CPDF_LinearizedHeader> pLinearized);
protected:
@@ -121,6 +122,9 @@ class CPDF_Parser {
bool RebuildCrossRef();
private:
+ friend class cpdf_parser_ParseStartXRefWithHeaderOffset_Test;
+ friend class cpdf_parser_ParseStartXRef_Test;
+ friend class cpdf_parser_ParseLinearizedWithHeaderOffset_Test;
friend class CPDF_DataAvail;
struct CrossRefObjData {
diff --git a/core/fpdfapi/parser/cpdf_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
index db48493c2f..4ce1238ee3 100644
--- a/core/fpdfapi/parser/cpdf_parser_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
@@ -3,8 +3,12 @@
// found in the LICENSE file.
#include <limits>
+#include <memory>
#include <string>
+#include <vector>
+#include "core/fpdfapi/parser/cpdf_linearized_header.h"
+#include "core/fpdfapi/parser/cpdf_object.h"
#include "core/fpdfapi/parser/cpdf_parser.h"
#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
#include "core/fxcrt/fx_extension.h"
@@ -43,13 +47,19 @@ class CPDF_TestParser : public CPDF_Parser {
}
// Setup reading from a buffer and initial states.
- bool InitTestFromBuffer(const unsigned char* buffer, size_t len) {
- // For the test file, the header is set at the beginning.
+ bool InitTestFromBufferWithOffset(const unsigned char* buffer,
+ size_t len,
+ int header_offset) {
m_pSyntax->InitParser(
- pdfium::MakeRetain<CFX_BufferSeekableReadStream>(buffer, len), 0);
+ pdfium::MakeRetain<CFX_BufferSeekableReadStream>(buffer, len),
+ header_offset);
return true;
}
+ bool InitTestFromBuffer(const unsigned char* buffer, size_t len) {
+ return InitTestFromBufferWithOffset(buffer, len, 0 /*header_offset*/);
+ }
+
private:
// Add test cases here as private friend so that protected members in
// CPDF_Parser can be accessed by test cases.
@@ -226,3 +236,58 @@ TEST(cpdf_parser, LoadCrossRefV4) {
}
}
}
+
+TEST(cpdf_parser, ParseStartXRef) {
+ CPDF_TestParser parser;
+ std::string test_file;
+ ASSERT_TRUE(
+ PathService::GetTestFilePath("annotation_stamp_with_ap.pdf", &test_file));
+ ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
+
+ EXPECT_EQ(100940, parser.ParseStartXRef());
+ std::unique_ptr<CPDF_Object> cross_ref_v5_obj =
+ parser.ParseIndirectObjectAt(100940, 0);
+ ASSERT_TRUE(cross_ref_v5_obj);
+ EXPECT_EQ(75u, cross_ref_v5_obj->GetObjNum());
+}
+
+TEST(cpdf_parser, ParseStartXRefWithHeaderOffset) {
+ static constexpr FX_FILESIZE kTestHeaderOffset = 765;
+ std::string test_file;
+ ASSERT_TRUE(
+ PathService::GetTestFilePath("annotation_stamp_with_ap.pdf", &test_file));
+ RetainPtr<IFX_SeekableReadStream> pFileAccess =
+ IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
+ ASSERT_TRUE(pFileAccess);
+
+ std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
+ ASSERT_TRUE(pFileAccess->ReadBlock(&data.front() + kTestHeaderOffset, 0,
+ pFileAccess->GetSize()));
+ CPDF_TestParser parser;
+ parser.InitTestFromBufferWithOffset(&data.front(), data.size(),
+ kTestHeaderOffset);
+
+ EXPECT_EQ(100940, parser.ParseStartXRef());
+ std::unique_ptr<CPDF_Object> cross_ref_v5_obj =
+ parser.ParseIndirectObjectAt(100940, 0);
+ ASSERT_TRUE(cross_ref_v5_obj);
+ EXPECT_EQ(75u, cross_ref_v5_obj->GetObjNum());
+}
+
+TEST(cpdf_parser, ParseLinearizedWithHeaderOffset) {
+ static constexpr FX_FILESIZE kTestHeaderOffset = 765;
+ std::string test_file;
+ ASSERT_TRUE(PathService::GetTestFilePath("linearized.pdf", &test_file));
+ RetainPtr<IFX_SeekableReadStream> pFileAccess =
+ IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
+ ASSERT_TRUE(pFileAccess);
+
+ std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
+ ASSERT_TRUE(pFileAccess->ReadBlock(&data.front() + kTestHeaderOffset, 0,
+ pFileAccess->GetSize()));
+ CPDF_TestParser parser;
+ parser.InitTestFromBufferWithOffset(&data.front(), data.size(),
+ kTestHeaderOffset);
+
+ EXPECT_TRUE(parser.ParseLinearizedHeader());
+}
diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
index f8f36aee11..af5ff488db 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
@@ -112,6 +112,10 @@ bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
return true;
}
+FX_FILESIZE CPDF_SyntaxParser::GetDocumentSize() const {
+ return m_FileLen - m_HeaderOffset;
+}
+
bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) {
pos += m_HeaderOffset;
if (pos >= m_FileLen)
@@ -714,6 +718,7 @@ void CPDF_SyntaxParser::InitParserWithValidator(
m_pFileBuf.clear();
m_HeaderOffset = HeaderOffset;
m_FileLen = validator->GetSize();
+ ASSERT(m_HeaderOffset <= m_FileLen);
m_Pos = 0;
m_pFileAccess = validator;
m_BufOffset = 0;
@@ -813,10 +818,6 @@ FX_FILESIZE CPDF_SyntaxParser::FindTag(const ByteStringView& tag) {
return -1;
}
-RetainPtr<IFX_SeekableReadStream> CPDF_SyntaxParser::GetFileAccess() const {
- return m_pFileAccess;
-}
-
bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const {
return m_BufOffset <= pos &&
pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size());
diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.h b/core/fpdfapi/parser/cpdf_syntax_parser.h
index 962c32753d..ebd844c3e0 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.h
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.h
@@ -60,16 +60,24 @@ class CPDF_SyntaxParser {
ByteString GetNextWord(bool* bIsNumber);
ByteString PeekNextWord(bool* bIsNumber);
- RetainPtr<IFX_SeekableReadStream> GetFileAccess() const;
-
const RetainPtr<CPDF_ReadValidator>& GetValidator() const {
return m_pFileAccess;
}
uint32_t GetDirectNum();
bool GetNextChar(uint8_t& ch);
+ // The document size may be smaller than the file size.
+ // The syntax parser use position relative to document
+ // offset (|m_HeaderOffset|).
+ // The document size will be FileSize - "Header offset".
+ // All offsets was readed from document, should not be great than document
+ // size. Use it for checks instead of real file size.
+ FX_FILESIZE GetDocumentSize() const;
+
+ ByteString ReadString();
+ ByteString ReadHexString();
+
private:
- friend class CPDF_Parser;
friend class CPDF_DataAvail;
friend class cpdf_syntax_parser_ReadHexString_Test;
@@ -84,8 +92,6 @@ class CPDF_SyntaxParser {
const ByteStringView& tag,
bool checkKeyword);
- ByteString ReadString();
- ByteString ReadHexString();
unsigned int ReadEOLMarkers(FX_FILESIZE pos);
FX_FILESIZE FindWordPos(const ByteStringView& word);
FX_FILESIZE FindStreamEndPos();
@@ -99,6 +105,9 @@ class CPDF_SyntaxParser {
ParseType parse_type);
FX_FILESIZE m_Pos;
+ // The syntax parser use position relative to header offset.
+ // The header contains at file start, and can follow after some stuff. We
+ // ignore this stuff.
FX_FILESIZE m_HeaderOffset;
FX_FILESIZE m_FileLen;
WeakPtr<ByteStringPool> m_pPool;