summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2018-07-20 12:03:16 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-07-20 12:03:16 +0000
commitc68109a2dac3be544b7753d1fd677255d859745b (patch)
treede09e60fc1a975482a8e4607de569492c4557826
parentabf9829bc4112e5d25e4d0e2f57341fac22daccd (diff)
downloadpdfium-chromium/3498.tar.xz
Rework of CPDF_Parser::RebuildCrossRef.chromium/3498
Use CPDF_SyntaxParser logic to rebuild crossref. Change-Id: I394f64e76294b97c6a7c2b8984a880712fd193a7 Reviewed-on: https://pdfium-review.googlesource.com/37314 Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Art Snake <art-snake@yandex-team.ru>
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp303
-rw-r--r--core/fpdfapi/parser/cpdf_parser.h17
2 files changed, 42 insertions, 278 deletions
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index ecc0546de0..d4ad0656e6 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -617,276 +617,57 @@ bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
bool CPDF_Parser::RebuildCrossRef() {
auto cross_ref_table = pdfium::MakeUnique<CPDF_CrossRefTable>();
- ParserState state = ParserState::kDefault;
- int32_t inside_index = 0;
- uint32_t objnum = 0;
- uint32_t gennum = 0;
- int32_t depth = 0;
const uint32_t kBufferSize = 4096;
m_pSyntax->SetReadBufferSize(kBufferSize);
- FX_FILESIZE start_pos = 0;
- FX_FILESIZE start_pos1 = 0;
- FX_FILESIZE last_obj = -1;
- FX_FILESIZE last_xref = -1;
- FX_FILESIZE last_trailer = -1;
-
- uint8_t byte = 0;
m_pSyntax->SetPos(0);
- {
- while (m_pSyntax->GetNextChar(byte)) {
- const FX_FILESIZE current_char_pos = m_pSyntax->GetPos() - 1;
- switch (state) {
- case ParserState::kDefault:
- if (PDFCharIsWhitespace(byte)) {
- state = ParserState::kWhitespace;
- } else if (std::isdigit(byte)) {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kWhitespace;
- } else if (byte == '%') {
- inside_index = 0;
- state = ParserState::kComment;
- } else if (byte == '(') {
- state = ParserState::kString;
- depth = 1;
- } else if (byte == '<') {
- inside_index = 1;
- state = ParserState::kHexString;
- } else if (byte == '\\') {
- state = ParserState::kEscapedString;
- } else if (byte == 't') {
- state = ParserState::kTrailer;
- inside_index = 1;
- }
- break;
-
- case ParserState::kWhitespace:
- if (std::isdigit(byte)) {
- start_pos = current_char_pos;
- state = ParserState::kObjNum;
- objnum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
- } else if (byte == 't') {
- state = ParserState::kTrailer;
- inside_index = 1;
- } else if (byte == 'x') {
- state = ParserState::kXref;
- inside_index = 1;
- } else if (!PDFCharIsWhitespace(byte)) {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kObjNum:
- if (std::isdigit(byte)) {
- objnum = objnum * 10 +
- FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
- } else if (PDFCharIsWhitespace(byte)) {
- state = ParserState::kPostObjNum;
- } else {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kEndObj;
- inside_index = 0;
- }
- break;
-
- case ParserState::kPostObjNum:
- if (std::isdigit(byte)) {
- start_pos1 = current_char_pos;
- state = ParserState::kGenNum;
- gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
- } else if (byte == 't') {
- state = ParserState::kTrailer;
- inside_index = 1;
- } else if (!PDFCharIsWhitespace(byte)) {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kGenNum:
- if (std::isdigit(byte)) {
- gennum = gennum * 10 +
- FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
- } else if (PDFCharIsWhitespace(byte)) {
- state = ParserState::kPostGenNum;
- } else {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kPostGenNum:
- if (byte == 'o') {
- state = ParserState::kBeginObj;
- inside_index = 1;
- } else if (std::isdigit(byte)) {
- objnum = gennum;
- gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
- start_pos = start_pos1;
- start_pos1 = current_char_pos;
- state = ParserState::kGenNum;
- } else if (byte == 't') {
- state = ParserState::kTrailer;
- inside_index = 1;
- } else if (!PDFCharIsWhitespace(byte)) {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kBeginObj:
- switch (inside_index) {
- case 1:
- if (byte != 'b') {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kDefault;
- } else {
- inside_index++;
- }
- break;
- case 2:
- if (byte != 'j') {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kDefault;
- } else {
- inside_index++;
- }
- break;
- case 3:
- if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
- const FX_FILESIZE obj_pos = start_pos;
- last_obj = start_pos;
- m_pSyntax->SetPos(obj_pos);
- auto pObject = m_pSyntax->GetIndirectObject(
- nullptr, CPDF_SyntaxParser::ParseType::kStrict);
- m_pSyntax->SetPos(
- std::max(current_char_pos, m_pSyntax->GetPos()));
-
- if (CPDF_Stream* pStream = ToStream(pObject.get())) {
- if (CPDF_Dictionary* pDict = pStream->GetDict()) {
- if ((pDict->KeyExist("Type")) &&
- (pDict->GetStringFor("Type") == "XRef" &&
- pDict->KeyExist("Size"))) {
- CPDF_Object* pRoot = pDict->GetObjectFor("Root");
- if (pRoot && pRoot->GetDict() &&
- pRoot->GetDict()->GetObjectFor("Pages")) {
- cross_ref_table->Update(
- pdfium::MakeUnique<CPDF_CrossRefTable>(
- ToDictionary(pDict->Clone())));
- }
- }
- }
- }
- if (objnum < kMaxObjectNumber)
- cross_ref_table->AddNormal(objnum, gennum, obj_pos);
- }
- state = ParserState::kDefault;
- break;
- }
- break;
-
- case ParserState::kTrailer:
- if (inside_index == 7) {
- if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
- last_trailer = current_char_pos - 7;
- m_pSyntax->SetPos(current_char_pos);
- std::unique_ptr<CPDF_Object> pObj =
- m_pSyntax->GetObjectBody(nullptr);
- if (!pObj)
- m_pSyntax->SetPos(current_char_pos);
-
- if (pObj) {
- cross_ref_table->Update(pdfium::MakeUnique<CPDF_CrossRefTable>(
- ToDictionary(pObj->IsStream()
- ? pObj->AsStream()->GetDict()->Clone()
- : std::move(pObj))));
-
- FX_FILESIZE dwSavePos = m_pSyntax->GetPos();
- ByteString strWord = m_pSyntax->GetKeyword();
- if (!strWord.Compare("startxref")) {
- bool bNumber;
- ByteString bsOffset = m_pSyntax->GetNextWord(&bNumber);
- if (bNumber)
- last_xref = FXSYS_atoi(bsOffset.c_str());
- } else {
- m_pSyntax->SetPos(dwSavePos);
- }
- }
- }
- state = ParserState::kDefault;
- } else if (byte == "trailer"[inside_index]) {
- inside_index++;
- } else {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kXref:
- if (inside_index == 4) {
- last_xref = current_char_pos - 4;
- state = ParserState::kWhitespace;
- } else if (byte == "xref"[inside_index]) {
- inside_index++;
- } else {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kComment:
- if (PDFCharIsLineEnding(byte))
- state = ParserState::kDefault;
- break;
-
- case ParserState::kString:
- if (byte == ')') {
- if (depth > 0)
- depth--;
- } else if (byte == '(') {
- depth++;
- }
-
- if (!depth)
- state = ParserState::kDefault;
- break;
- case ParserState::kHexString:
- if (byte == '>' || (byte == '<' && inside_index == 1))
- state = ParserState::kDefault;
- inside_index = 0;
- break;
+ bool bIsNumber;
+ std::vector<std::pair<uint32_t, FX_FILESIZE>> numbers;
+ for (ByteString word = m_pSyntax->GetNextWord(&bIsNumber); !word.IsEmpty();
+ word = m_pSyntax->GetNextWord(&bIsNumber)) {
+ if (bIsNumber) {
+ numbers.emplace_back(FXSYS_atoui(word.c_str()),
+ m_pSyntax->GetPos() - word.GetLength());
+ if (numbers.size() > 2u)
+ numbers.erase(numbers.begin());
+ continue;
+ }
- case ParserState::kEscapedString:
- if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
- m_pSyntax->SetPos(current_char_pos);
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kEndObj:
- if (PDFCharIsWhitespace(byte)) {
- state = ParserState::kDefault;
- } else if (byte == '%' || byte == '(' || byte == '<' ||
- byte == '\\') {
- state = ParserState::kDefault;
- m_pSyntax->SetPos(current_char_pos);
- } else if (inside_index == 6) {
- state = ParserState::kDefault;
- m_pSyntax->SetPos(current_char_pos);
- } else if (byte == "endobj"[inside_index]) {
- inside_index++;
- }
- break;
+ if (word == "(") {
+ m_pSyntax->ReadString();
+ } else if (word == "<") {
+ m_pSyntax->ReadHexString();
+ } else if (word == "trailer") {
+ std::unique_ptr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
+ if (pTrailer) {
+ cross_ref_table = CPDF_CrossRefTable::MergeUp(
+ std::move(cross_ref_table),
+ pdfium::MakeUnique<CPDF_CrossRefTable>(ToDictionary(
+ pTrailer->IsStream() ? pTrailer->AsStream()->GetDict()->Clone()
+ : std::move(pTrailer))));
+ }
+ } else if (word == "obj" && numbers.size() == 2u) {
+ const FX_FILESIZE obj_pos = numbers[0].second;
+ const uint32_t obj_num = numbers[0].first;
+ const uint32_t gen_num = numbers[1].first;
+ if (obj_num < kMaxObjectNumber)
+ cross_ref_table->AddNormal(obj_num, gen_num, obj_pos);
+
+ m_pSyntax->SetPos(obj_pos);
+ const std::unique_ptr<CPDF_Stream> pStream =
+ ToStream(m_pSyntax->GetIndirectObject(
+ nullptr, CPDF_SyntaxParser::ParseType::kStrict));
+
+ if (pStream && pStream->GetDict()->GetStringFor("Type") == "XRef") {
+ cross_ref_table = CPDF_CrossRefTable::MergeUp(
+ std::move(cross_ref_table),
+ pdfium::MakeUnique<CPDF_CrossRefTable>(
+ ToDictionary(pStream->GetDict()->Clone())));
}
}
+ numbers.clear();
}
- if (last_xref != -1 && last_xref > last_obj)
- last_trailer = last_xref;
- else if (last_trailer == -1 || last_xref < last_obj)
- last_trailer = m_pSyntax->m_FileLen;
-
m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
std::move(cross_ref_table));
// Resore default buffer size.
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index fc4e49910d..92aaf7df6a 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -123,23 +123,6 @@ class CPDF_Parser {
private:
friend class CPDF_DataAvail;
- enum class ParserState {
- kDefault,
- kComment,
- kWhitespace,
- kString,
- kHexString,
- kEscapedString,
- kXref,
- kObjNum,
- kPostObjNum,
- kGenNum,
- kPostGenNum,
- kTrailer,
- kBeginObj,
- kEndObj
- };
-
struct CrossRefObjData {
uint32_t obj_num = 0;
ObjectInfo info;