summaryrefslogtreecommitdiff
path: root/core/fpdfapi/parser/cpdf_parser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdfapi/parser/cpdf_parser.cpp')
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp153
1 files changed, 58 insertions, 95 deletions
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 599b04199d..854d6e7f60 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -679,31 +679,24 @@ bool CPDF_Parser::RebuildCrossRef() {
uint32_t gennum = 0;
int32_t depth = 0;
const uint32_t kBufferSize = 4096;
- std::vector<uint8_t> buffer(kBufferSize);
-
- FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
+ m_pSyntax->SetReadBufferSize(kBufferSize);
FX_FILESIZE start_pos = 0;
FX_FILESIZE start_pos1 = 0;
FX_FILESIZE last_obj = -1;
FX_FILESIZE last_xref = -1;
FX_FILESIZE last_trailer = -1;
- while (pos < m_pSyntax->m_FileLen) {
- const FX_FILESIZE saved_pos = pos;
- bool bOverFlow = false;
- uint32_t size =
- std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize);
- if (!m_pSyntax->GetFileAccess()->ReadBlock(buffer.data(), pos, size))
- break;
-
- for (uint32_t i = 0; i < size; i++) {
- uint8_t byte = buffer[i];
+ uint8_t byte = 0;
+ m_pSyntax->SetPos(0);
+ {
+ while (m_pSyntax->GetNextChar(byte)) {
+ const FX_FILESIZE current_char_pos = m_pSyntax->GetPos() - 1;
switch (state) {
case ParserState::kDefault:
if (PDFCharIsWhitespace(byte)) {
state = ParserState::kWhitespace;
} else if (std::isdigit(byte)) {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kWhitespace;
} else if (byte == '%') {
inside_index = 0;
@@ -724,7 +717,7 @@ bool CPDF_Parser::RebuildCrossRef() {
case ParserState::kWhitespace:
if (std::isdigit(byte)) {
- start_pos = pos + i;
+ start_pos = current_char_pos;
state = ParserState::kObjNum;
objnum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
} else if (byte == 't') {
@@ -734,7 +727,7 @@ bool CPDF_Parser::RebuildCrossRef() {
state = ParserState::kXref;
inside_index = 1;
} else if (!PDFCharIsWhitespace(byte)) {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kDefault;
}
break;
@@ -746,7 +739,7 @@ bool CPDF_Parser::RebuildCrossRef() {
} else if (PDFCharIsWhitespace(byte)) {
state = ParserState::kPostObjNum;
} else {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kEndObj;
inside_index = 0;
}
@@ -754,14 +747,14 @@ bool CPDF_Parser::RebuildCrossRef() {
case ParserState::kPostObjNum:
if (std::isdigit(byte)) {
- start_pos1 = pos + i;
+ start_pos1 = current_char_pos;
state = ParserState::kGenNum;
gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
} else if (byte == 't') {
state = ParserState::kTrailer;
inside_index = 1;
} else if (!PDFCharIsWhitespace(byte)) {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kDefault;
}
break;
@@ -773,7 +766,7 @@ bool CPDF_Parser::RebuildCrossRef() {
} else if (PDFCharIsWhitespace(byte)) {
state = ParserState::kPostGenNum;
} else {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kDefault;
}
break;
@@ -786,13 +779,13 @@ bool CPDF_Parser::RebuildCrossRef() {
objnum = gennum;
gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
start_pos = start_pos1;
- start_pos1 = pos + i;
+ start_pos1 = current_char_pos;
state = ParserState::kGenNum;
} else if (byte == 't') {
state = ParserState::kTrailer;
inside_index = 1;
} else if (!PDFCharIsWhitespace(byte)) {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kDefault;
}
break;
@@ -801,7 +794,7 @@ bool CPDF_Parser::RebuildCrossRef() {
switch (inside_index) {
case 1:
if (byte != 'b') {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kDefault;
} else {
inside_index++;
@@ -809,7 +802,7 @@ bool CPDF_Parser::RebuildCrossRef() {
break;
case 2:
if (byte != 'j') {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kDefault;
} else {
inside_index++;
@@ -817,12 +810,14 @@ bool CPDF_Parser::RebuildCrossRef() {
break;
case 3:
if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
- FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
+ const FX_FILESIZE obj_pos = start_pos;
last_obj = start_pos;
- FX_FILESIZE obj_end = 0;
- std::unique_ptr<CPDF_Object> pObject =
- ParseIndirectObjectAtByStrict(m_pDocument.Get(), obj_pos,
- objnum, &obj_end);
+ m_pSyntax->SetPos(obj_pos);
+ auto pObject = m_pSyntax->GetIndirectObject(
+ nullptr, CPDF_SyntaxParser::ParseType::kStrict);
+ m_pSyntax->SetPos(
+ std::max(current_char_pos, m_pSyntax->GetPos()));
+
if (CPDF_Stream* pStream = ToStream(pObject.get())) {
if (CPDF_Dictionary* pDict = pStream->GetDict()) {
if ((pDict->KeyExist("Type")) &&
@@ -837,36 +832,18 @@ bool CPDF_Parser::RebuildCrossRef() {
}
}
}
-
- FX_FILESIZE offset = 0;
- m_pSyntax->SetPos(obj_pos);
- offset = m_pSyntax->FindTag("obj", 0);
- if (offset == -1)
- offset = 0;
- else
- offset += 3;
-
- FX_FILESIZE nLen = obj_end - obj_pos - offset;
- if ((uint32_t)nLen > size - i) {
- pos = obj_end + m_pSyntax->m_HeaderOffset;
- bOverFlow = true;
- } else {
- i += (uint32_t)nLen;
- }
-
- if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
- m_ObjectInfo[objnum].pos) {
- if (pObject) {
- m_ObjectInfo[objnum].pos = obj_pos;
- m_ObjectInfo[objnum].gennum = gennum;
- }
- } else {
- m_ObjectInfo[objnum].pos = obj_pos;
- m_ObjectInfo[objnum].type = ObjectType::kNotCompressed;
- m_ObjectInfo[objnum].gennum = gennum;
+ ObjectInfo& info = m_ObjectInfo[objnum];
+ if (pObject || !info.pos) {
+ info.pos = obj_pos;
+ info.type = ObjectType::kNotCompressed;
+ // The newer version of object should be located after old
+ // version.
+ // Do not worry about gennum in this case, because we read
+ // file
+ // in front order.
+ info.gennum = gennum;
}
}
- --i;
state = ParserState::kDefault;
break;
}
@@ -875,35 +852,31 @@ bool CPDF_Parser::RebuildCrossRef() {
case ParserState::kTrailer:
if (inside_index == 7) {
if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
- last_trailer = pos + i - 7;
- m_pSyntax->SetPos(pos + i - m_pSyntax->m_HeaderOffset);
-
+ last_trailer = current_char_pos - 7;
+ m_pSyntax->SetPos(current_char_pos);
std::unique_ptr<CPDF_Object> pObj =
- m_pSyntax->GetObjectBody(m_pDocument.Get());
- if (pObj) {
- if (pObj->IsDictionary() || pObj->AsStream()) {
- CPDF_Stream* pStream = pObj->AsStream();
+ m_pSyntax->GetObjectBody(nullptr);
+ if (!pObj)
+ m_pSyntax->SetPos(current_char_pos);
+
+ {
+ if (pObj && (pObj->IsDictionary() || pObj->IsStream())) {
if (CPDF_Dictionary* pTrailer =
- pStream ? pStream->GetDict() : pObj->AsDictionary()) {
+ pObj->IsStream() ? pObj->AsStream()->GetDict()
+ : pObj->AsDictionary()) {
if (GetTrailer()) {
CPDF_Object* pRoot = pTrailer->GetObjectFor("Root");
CPDF_Reference* pRef = ToReference(pRoot);
if (!pRoot ||
(pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
- auto it = pTrailer->begin();
- while (it != pTrailer->end()) {
- const ByteString& key = it->first;
- CPDF_Object* pElement = it->second.get();
+ // This is newer version of trailer. Merge it with old.
+ for (auto it = pTrailer->begin();
+ it != pTrailer->end();) {
+ DCHECK(it->second->IsInline());
+ const ByteString key = it->first;
++it;
- uint32_t dwObjNum =
- pElement ? pElement->GetObjNum() : 0;
- if (dwObjNum) {
- GetTrailer()->SetNewFor<CPDF_Reference>(
- key, m_pDocument.Get(), dwObjNum);
- } else {
- GetTrailer()->SetFor(key, pElement->Clone());
- }
+ GetTrailer()->SetFor(key, pTrailer->RemoveFor(key));
}
}
} else {
@@ -925,24 +898,23 @@ bool CPDF_Parser::RebuildCrossRef() {
}
}
}
- --i;
state = ParserState::kDefault;
} else if (byte == "trailer"[inside_index]) {
inside_index++;
} else {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kDefault;
}
break;
case ParserState::kXref:
if (inside_index == 4) {
- last_xref = pos + i - 4;
+ last_xref = current_char_pos - 4;
state = ParserState::kWhitespace;
} else if (byte == "xref"[inside_index]) {
inside_index++;
} else {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kDefault;
}
break;
@@ -972,7 +944,7 @@ bool CPDF_Parser::RebuildCrossRef() {
case ParserState::kEscapedString:
if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
- --i;
+ m_pSyntax->SetPos(current_char_pos);
state = ParserState::kDefault;
}
break;
@@ -983,27 +955,16 @@ bool CPDF_Parser::RebuildCrossRef() {
} else if (byte == '%' || byte == '(' || byte == '<' ||
byte == '\\') {
state = ParserState::kDefault;
- --i;
+ m_pSyntax->SetPos(current_char_pos);
} else if (inside_index == 6) {
state = ParserState::kDefault;
- --i;
+ m_pSyntax->SetPos(current_char_pos);
} else if (byte == "endobj"[inside_index]) {
inside_index++;
}
break;
}
-
- if (bOverFlow) {
- size = 0;
- break;
- }
}
- pos += size;
-
- // If the position has not changed at all or went backwards in a loop
- // iteration, then break out to prevent infinite looping.
- if (pos <= saved_pos)
- break;
}
if (last_xref != -1 && last_xref > last_obj)
@@ -1011,6 +972,8 @@ bool CPDF_Parser::RebuildCrossRef() {
else if (last_trailer == -1 || last_xref < last_obj)
last_trailer = m_pSyntax->m_FileLen;
+ // Resore default buffer size.
+ m_pSyntax->SetReadBufferSize(CPDF_ModuleMgr::kFileBufSize);
return GetTrailer() && !m_ObjectInfo.empty();
}