summaryrefslogtreecommitdiff
path: root/core/fpdfapi/parser
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2017-08-02 14:27:22 +0300
committerChromium commit bot <commit-bot@chromium.org>2017-08-02 21:20:33 +0000
commit17b1c191da26e477c6898a8b06f2ff624f9e4c6b (patch)
tree6c383224fc0c8c1d9ef2c8eb9df6fe0352fe4b9a /core/fpdfapi/parser
parent17e54528fe0a2203074f4d086677d14c33cf7253 (diff)
downloadpdfium-17b1c191da26e477c6898a8b06f2ff624f9e4c6b.tar.xz
Unify parsing of cross refs v4
Change-Id: I7e3d45263a0bae61fd86fd4c3710de7fc0b9347d Reviewed-on: https://pdfium-review.googlesource.com/9290 Reviewed-by: Wei Li <weili@chromium.org> Commit-Queue: Art Snake <art-snake@yandex-team.ru>
Diffstat (limited to 'core/fpdfapi/parser')
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp173
-rw-r--r--core/fpdfapi/parser/cpdf_parser.h15
2 files changed, 111 insertions, 77 deletions
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 2ca820eb98..e33cec0165 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -440,76 +440,93 @@ bool CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
m_pSyntax->SetPos(dwStartPos);
m_SortedOffset.insert(pos);
+ std::vector<CrossRefObjData> objects;
+ if (!ParseAndAppendCrossRefSubsectionData(0, dwObjCount, &objects))
+ return false;
+ MergeCrossRefObjectsData(objects);
+ return true;
+}
- uint32_t start_objnum = 0;
- uint32_t count = dwObjCount;
- FX_FILESIZE SavedPos = m_pSyntax->GetPos();
+bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
+ uint32_t start_objnum,
+ uint32_t count,
+ std::vector<CrossRefObjData>* out_objects) {
+ // Each entry shall be exactly 20 byte.
+ // A sample entry looks like:
+ // "0000000000 00007 f\r\n"
+ static constexpr int32_t kEntryConstSize = 20;
+
+ if (!out_objects) {
+ m_pSyntax->SetPos(m_pSyntax->GetPos() + count * kEntryConstSize);
+ return true;
+ }
+ const size_t start_obj_index = out_objects->size();
+ out_objects->resize(start_obj_index + count);
- const int32_t recordsize = 20;
- std::vector<char> buf(1024 * recordsize + 1);
- buf[1024 * recordsize] = '\0';
+ std::vector<char> buf(1024 * kEntryConstSize + 1);
+ buf[1024 * kEntryConstSize] = '\0';
int32_t nBlocks = count / 1024 + 1;
for (int32_t block = 0; block < nBlocks; block++) {
int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
- uint32_t dwReadSize = block_size * recordsize;
- if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
- return false;
-
if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
- dwReadSize)) {
+ block_size * kEntryConstSize))
return false;
- }
for (int32_t i = 0; i < block_size; i++) {
- uint32_t objnum = start_objnum + block * 1024 + i;
- char* pEntry = &buf[i * recordsize];
+ CrossRefObjData& obj_data =
+ (*out_objects)[start_obj_index + block * 1024 + i];
+
+ const uint32_t objnum = start_objnum + block * 1024 + i;
+
+ obj_data.obj_num = objnum;
+
+ ObjectInfo& info = obj_data.info;
+
+ char* pEntry = &buf[i * kEntryConstSize];
if (pEntry[17] == 'f') {
- m_ObjectInfo[objnum].pos = 0;
- m_ObjectInfo[objnum].type = ObjectType::kFree;
+ info.pos = 0;
+ info.type = ObjectType::kFree;
} else {
- int32_t offset = FXSYS_atoi(pEntry);
- if (offset == 0) {
+ const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
+ if (!offset.IsValid())
+ return false;
+
+ if (offset.ValueOrDie() == 0) {
for (int32_t c = 0; c < 10; c++) {
if (!std::isdigit(pEntry[c]))
return false;
}
}
- m_ObjectInfo[objnum].pos = offset;
- int32_t version = FXSYS_atoi(pEntry + 11);
- if (version >= 1)
- m_bVersionUpdated = true;
+ info.pos = offset.ValueOrDie();
- m_ObjectInfo[objnum].gennum = version;
- if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
- m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
-
- m_ObjectInfo[objnum].type = ObjectType::kNotCompressed;
+ // TODO(art-snake): The info.gennum is uint16_t, but version may be
+ // greated than max<uint16_t>. Needs solve this issue.
+ const int32_t version = FXSYS_atoi(pEntry + 11);
+ info.gennum = version;
+ info.type = ObjectType::kNotCompressed;
}
}
}
- m_pSyntax->SetPos(SavedPos + count * recordsize);
return true;
}
-bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
- FX_FILESIZE streampos,
- bool bSkip) {
- m_pSyntax->SetPos(pos);
+bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects,
+ uint32_t* start_obj_num_at_last_block) {
+ if (out_objects)
+ out_objects->clear();
+
if (m_pSyntax->GetKeyword() != "xref")
return false;
-
- m_SortedOffset.insert(pos);
- if (streampos)
- m_SortedOffset.insert(streampos);
-
+ std::vector<CrossRefObjData> result_objects;
while (1) {
FX_FILESIZE SavedPos = m_pSyntax->GetPos();
bool bIsNumber;
CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
- if (word.IsEmpty())
+ if (word.IsEmpty()) {
return false;
+ }
if (!bIsNumber) {
m_pSyntax->SetPos(SavedPos);
@@ -519,55 +536,57 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
uint32_t start_objnum = FXSYS_atoui(word.c_str());
if (start_objnum >= kMaxObjectNumber)
return false;
+ if (start_obj_num_at_last_block)
+ *start_obj_num_at_last_block = start_objnum;
uint32_t count = m_pSyntax->GetDirectNum();
m_pSyntax->ToNextWord();
SavedPos = m_pSyntax->GetPos();
- const int32_t recordsize = 20;
-
- m_dwXrefStartObjNum = start_objnum;
- if (!bSkip) {
- std::vector<char> buf(1024 * recordsize + 1);
- buf[1024 * recordsize] = '\0';
-
- int32_t nBlocks = count / 1024 + 1;
- for (int32_t block = 0; block < nBlocks; block++) {
- int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
- m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
- block_size * recordsize);
-
- for (int32_t i = 0; i < block_size; i++) {
- uint32_t objnum = start_objnum + block * 1024 + i;
- char* pEntry = &buf[i * recordsize];
- if (pEntry[17] == 'f') {
- m_ObjectInfo[objnum].pos = 0;
- m_ObjectInfo[objnum].type = ObjectType::kFree;
- } else {
- FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
- if (offset == 0) {
- for (int32_t c = 0; c < 10; c++) {
- if (!std::isdigit(pEntry[c]))
- return false;
- }
- }
- m_ObjectInfo[objnum].pos = offset;
- int32_t version = FXSYS_atoi(pEntry + 11);
- if (version >= 1)
- m_bVersionUpdated = true;
+ if (!ParseAndAppendCrossRefSubsectionData(
+ start_objnum, count, out_objects ? &result_objects : nullptr)) {
+ return false;
+ }
+ }
+ if (out_objects)
+ *out_objects = std::move(result_objects);
+ return true;
+}
- m_ObjectInfo[objnum].gennum = version;
- if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
- m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
+bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
+ FX_FILESIZE streampos,
+ bool bSkip) {
+ m_pSyntax->SetPos(pos);
+ if (m_pSyntax->GetKeyword() != "xref")
+ return false;
- m_ObjectInfo[objnum].type = ObjectType::kNotCompressed;
- }
- }
+ m_SortedOffset.insert(pos);
+ if (streampos)
+ m_SortedOffset.insert(streampos);
+
+ m_pSyntax->SetPos(pos);
+ std::vector<CrossRefObjData> objects;
+ if (!ParseCrossRefV4(bSkip ? nullptr : &objects, &m_dwXrefStartObjNum))
+ return false;
+
+ MergeCrossRefObjectsData(objects);
+
+ return !streampos || LoadCrossRefV5(&streampos, false);
+}
+
+void CPDF_Parser::MergeCrossRefObjectsData(
+ const std::vector<CrossRefObjData>& objects) {
+ for (const auto& obj : objects) {
+ m_ObjectInfo[obj.obj_num] = obj.info;
+ if (obj.info.type != ObjectType::kFree) {
+ if (obj.info.gennum > 0)
+ m_bVersionUpdated = true;
+ if (obj.info.type == ObjectType::kNotCompressed &&
+ obj.info.pos < m_pSyntax->m_FileLen) {
+ m_SortedOffset.insert(obj.info.pos);
}
}
- m_pSyntax->SetPos(SavedPos + count * recordsize);
}
- return !streampos || LoadCrossRefV5(&streampos, false);
}
bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index ece1e6a2d4..759d042360 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -148,6 +148,11 @@ class CPDF_Parser {
kEndObj
};
+ struct CrossRefObjData {
+ uint32_t obj_num = 0;
+ ObjectInfo info;
+ };
+
CPDF_Object* ParseDirect(CPDF_Object* pObj);
bool LoadAllCrossRefV4(FX_FILESIZE pos);
bool LoadAllCrossRefV5(FX_FILESIZE pos);
@@ -169,6 +174,16 @@ class CPDF_Parser {
// the objects.
bool VerifyCrossRefV4();
+ // If out_objects is null, the parser position will be moved to end subsection
+ // without additional validation.
+ bool ParseAndAppendCrossRefSubsectionData(
+ uint32_t start_objnum,
+ uint32_t count,
+ std::vector<CrossRefObjData>* out_objects);
+ bool ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects,
+ uint32_t* start_obj_num_at_last_block);
+ void MergeCrossRefObjectsData(const std::vector<CrossRefObjData>& objects);
+
CFX_UnownedPtr<CPDF_Document> m_pDocument;
bool m_bHasParsed;
bool m_bXRefStream;