diff options
author | Dan Sinclair <dsinclair@chromium.org> | 2016-03-03 08:59:22 -0500 |
---|---|---|
committer | Dan Sinclair <dsinclair@chromium.org> | 2016-03-03 08:59:22 -0500 |
commit | 42fb301abcf6b9f6a580f3d30defeadedf5d7ebd (patch) | |
tree | 99810ae95593d9d382634b2b7c523b3f66b10136 /core | |
parent | 41c7a97a1b303e43652f40f1b96ab7751783d8ed (diff) | |
download | pdfium-42fb301abcf6b9f6a580f3d30defeadedf5d7ebd.tar.xz |
Fix parsing of object numbers > 16,777,216.
Currently, there is a check that an object number is <= 0x1000000. If that
check fails, we end up putting the parser into a bad state and fail to load
documents. The object does not need to be in the XRef table, or referenced
from the document, just be in the document.
This Cl removes the size check and updates the various atoi calls to use a
uint32_t instead of an int32_t so we don't end up getting strange values when
converting from a string.
BUG=455199
R=tsepez@chromium.org
Review URL: https://codereview.chromium.org/1755273002 .
Diffstat (limited to 'core')
-rw-r--r-- | core/include/fpdfapi/fpdf_parser.h | 15 | ||||
-rw-r--r-- | core/include/fxcrt/fx_system.h | 1 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp | 9 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp | 2 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 36 | ||||
-rw-r--r-- | core/src/fxcrt/fx_basic_gcc.cpp | 25 | ||||
-rw-r--r-- | core/src/fxcrt/fx_basic_gcc_unittest.cpp | 88 |
7 files changed, 134 insertions, 42 deletions
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h index c57d9f20b9..5903e82a16 100644 --- a/core/include/fpdfapi/fpdf_parser.h +++ b/core/include/fpdfapi/fpdf_parser.h @@ -239,43 +239,33 @@ class CPDF_SyntaxParser { void InitParser(IFX_FileRead* pFileAccess, FX_DWORD HeaderOffset); FX_FILESIZE SavePos() const { return m_Pos; } - void RestorePos(FX_FILESIZE pos) { m_Pos = pos; } CPDF_Object* GetObject(CPDF_IndirectObjectHolder* pObjList, FX_DWORD objnum, FX_DWORD gennum, FX_BOOL bDecrypt); - CPDF_Object* GetObjectByStrict(CPDF_IndirectObjectHolder* pObjList, FX_DWORD objnum, FX_DWORD gennum); - - int GetDirectNum(); - CFX_ByteString GetKeyword(); void ToNextLine(); - void ToNextWord(); FX_BOOL SearchWord(const CFX_ByteStringC& word, FX_BOOL bWholeWord, FX_BOOL bForward, FX_FILESIZE limit); - int SearchMultiWord(const CFX_ByteStringC& words, FX_BOOL bWholeWord, FX_FILESIZE limit); - FX_FILESIZE FindTag(const CFX_ByteStringC& tag, FX_FILESIZE limit); void SetEncrypt(std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler); - FX_BOOL GetCharAt(FX_FILESIZE pos, uint8_t& ch); - FX_BOOL ReadBlock(uint8_t* pBuf, FX_DWORD size); - + FX_BOOL GetCharAt(FX_FILESIZE pos, uint8_t& ch); CFX_ByteString GetNextWord(bool* bIsNumber); protected: @@ -318,6 +308,9 @@ class CPDF_SyntaxParser { std::unique_ptr<CPDF_CryptoHandler> m_pCryptoHandler; uint8_t m_WordBuffer[257]; FX_DWORD m_WordSize; + + private: + uint32_t GetDirectNum(); }; class CPDF_Parser { diff --git a/core/include/fxcrt/fx_system.h b/core/include/fxcrt/fx_system.h index e28099ed4a..462fb3babf 100644 --- a/core/include/fxcrt/fx_system.h +++ b/core/include/fxcrt/fx_system.h @@ -264,6 +264,7 @@ wchar_t* FXSYS_wcsupr(wchar_t* str); #define FXSYS_HIWORD(dword) ((FX_WORD)((dword) >> 16)) #define FXSYS_LOWORD(dword) ((FX_WORD)(dword)) int32_t FXSYS_atoi(const FX_CHAR* str); +uint32_t FXSYS_atoui(const FX_CHAR* str); int32_t FXSYS_wtoi(const FX_WCHAR* str); int64_t FXSYS_atoi64(const FX_CHAR* str); int64_t FXSYS_wtoi64(const FX_WCHAR* str); diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp index 9bb9d41db1..7ed82fdfb6 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp @@ -105,3 +105,12 @@ TEST_F(FPDFParserDecodeEmbeddertest, Bug_555784) { UnloadPage(page); } +TEST_F(FPDFParserDecodeEmbeddertest, Bug_455199) { + // Tests object numbers with a value > 01000000. + // Should open successfully. + EXPECT_TRUE(OpenDocument("bug_455199.pdf")); + FPDF_PAGE page = LoadPage(0); + FPDF_BITMAP bitmap = RenderPage(page); + FPDFBitmap_Destroy(bitmap); + UnloadPage(page); +} diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp index b80568b63c..fbe6a8fb3a 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp @@ -51,7 +51,7 @@ void CFDF_Document::ParseStream(IFX_FileRead* pFile, FX_BOOL bOwnFile) { bool bNumber; CFX_ByteString word = parser.GetNextWord(&bNumber); if (bNumber) { - FX_DWORD objnum = FXSYS_atoi(word); + FX_DWORD objnum = FXSYS_atoui(word); word = parser.GetNextWord(&bNumber); if (!bNumber) { break; diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index 825a8a26d4..16d1134cb1 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -534,7 +534,7 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, break; } - FX_DWORD start_objnum = FXSYS_atoi(word); + FX_DWORD start_objnum = FXSYS_atoui(word); if (start_objnum >= kMaxObjectNumber) return false; @@ -758,10 +758,6 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { break; case 3: if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { - if (objnum > 0x1000000) { - state = ParserState::kDefault; - break; - } FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; m_SortedOffset.insert(obj_pos); last_obj = start_pos; @@ -1323,7 +1319,7 @@ void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, return; } - FX_DWORD parser_objnum = FXSYS_atoi(word); + FX_DWORD parser_objnum = FXSYS_atoui(word); if (parser_objnum && parser_objnum != objnum) { m_Syntax.RestorePos(SavedPos); return; @@ -1395,7 +1391,7 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( FX_FILESIZE objOffset = m_Syntax.SavePos(); objOffset -= word.GetLength(); - FX_DWORD parser_objnum = FXSYS_atoi(word); + FX_DWORD parser_objnum = FXSYS_atoui(word); if (objnum && parser_objnum != objnum) { m_Syntax.RestorePos(SavedPos); return nullptr; @@ -1407,7 +1403,7 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( return nullptr; } - FX_DWORD parser_gennum = FXSYS_atoi(word); + FX_DWORD parser_gennum = FXSYS_atoui(word); if (m_Syntax.GetKeyword() != "obj") { m_Syntax.RestorePos(SavedPos); return nullptr; @@ -1444,7 +1440,7 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( return nullptr; } - FX_DWORD parser_objnum = FXSYS_atoi(word); + FX_DWORD parser_objnum = FXSYS_atoui(word); if (objnum && parser_objnum != objnum) { m_Syntax.RestorePos(SavedPos); return nullptr; @@ -1456,7 +1452,7 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( return nullptr; } - FX_DWORD gennum = FXSYS_atoi(word); + FX_DWORD gennum = FXSYS_atoui(word); if (m_Syntax.GetKeyword() != "obj") { m_Syntax.RestorePos(SavedPos); return nullptr; @@ -1506,12 +1502,12 @@ FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, if (!bIsNumber) return FALSE; - FX_DWORD objnum = FXSYS_atoi(word); + FX_DWORD objnum = FXSYS_atoui(word); word = m_Syntax.GetNextWord(&bIsNumber); if (!bIsNumber) return FALSE; - FX_DWORD gennum = FXSYS_atoi(word); + FX_DWORD gennum = FXSYS_atoui(word); if (m_Syntax.GetKeyword() != "obj") { m_Syntax.RestorePos(SavedPos); return FALSE; @@ -2039,7 +2035,7 @@ CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, if (bIsNumber) { CFX_ByteString nextword2 = GetNextWord(nullptr); if (nextword2 == "R") { - FX_DWORD objnum = FXSYS_atoi(word); + FX_DWORD objnum = FXSYS_atoui(word); return new CPDF_Reference(pObjList, objnum); } } @@ -2163,7 +2159,7 @@ CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict( if (bIsNumber) { CFX_ByteString nextword2 = GetNextWord(nullptr); if (nextword2 == "R") - return new CPDF_Reference(pObjList, FXSYS_atoi(word)); + return new CPDF_Reference(pObjList, FXSYS_atoui(word)); } m_Pos = SavedPos; return new CPDF_Number(word); @@ -2440,14 +2436,14 @@ void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); } -int32_t CPDF_SyntaxParser::GetDirectNum() { +uint32_t CPDF_SyntaxParser::GetDirectNum() { bool bIsNumber; GetNextWordInternal(&bIsNumber); if (!bIsNumber) return 0; m_WordBuffer[m_WordSize] = 0; - return FXSYS_atoi(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); + return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); } bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, @@ -3554,7 +3550,7 @@ CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt( if (!bIsNumber) return nullptr; - FX_DWORD parser_objnum = FXSYS_atoi(word); + FX_DWORD parser_objnum = FXSYS_atoui(word); if (objnum && parser_objnum != objnum) return nullptr; @@ -3562,7 +3558,7 @@ CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt( if (!bIsNumber) return nullptr; - FX_DWORD gennum = FXSYS_atoi(word); + FX_DWORD gennum = FXSYS_atoui(word); if (m_syntaxParser.GetKeyword() != "obj") { m_syntaxParser.RestorePos(SavedPos); return nullptr; @@ -3611,7 +3607,7 @@ FX_BOOL CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen) { if (!bNumber) return FALSE; - FX_DWORD objnum = FXSYS_atoi(wordObjNum); + FX_DWORD objnum = FXSYS_atoui(wordObjNum); if (m_pLinearized) { m_pLinearized->Release(); m_pLinearized = nullptr; @@ -3704,7 +3700,7 @@ int32_t CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints, if (!bNumber) return -1; - FX_DWORD objNum = FXSYS_atoi(objnum); + FX_DWORD objNum = FXSYS_atoui(objnum); CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum); if (!pObj) { m_Pos += m_parser.m_Syntax.SavePos(); diff --git a/core/src/fxcrt/fx_basic_gcc.cpp b/core/src/fxcrt/fx_basic_gcc.cpp index d905d6b498..607f095570 100644 --- a/core/src/fxcrt/fx_basic_gcc.cpp +++ b/core/src/fxcrt/fx_basic_gcc.cpp @@ -13,20 +13,21 @@ template <class T> T FXSYS_StrToInt(const FX_CHAR* str) { - FX_BOOL neg = FALSE; + bool neg = false; if (!str) return 0; - if (*str == '-') { - neg = TRUE; + if (std::numeric_limits<T>::is_signed && *str == '-') { + neg = true; str++; } T num = 0; while (*str && std::isdigit(*str)) { - if (num > (std::numeric_limits<T>::max() - 9) / 10) + T val = FXSYS_toDecimalDigit(*str); + if (num > (std::numeric_limits<T>::max() - val) / 10) break; - num = num * 10 + FXSYS_toDecimalDigit(*str); + num = num * 10 + val; str++; } return neg ? -num : num; @@ -34,20 +35,21 @@ T FXSYS_StrToInt(const FX_CHAR* str) { template <class T> T FXSYS_StrToInt(const FX_WCHAR* str) { - FX_BOOL neg = FALSE; + bool neg = false; if (!str) return 0; - if (*str == '-') { - neg = TRUE; + if (std::numeric_limits<T>::is_signed && *str == '-') { + neg = true; str++; } T num = 0; while (*str && std::iswdigit(*str)) { - if (num > (std::numeric_limits<T>::max() - 9) / 10) + T val = FXSYS_toDecimalDigitWide(*str); + if (num > (std::numeric_limits<T>::max() - val) / 10) break; - num = num * 10 + FXSYS_toDecimalDigitWide(*str); + num = num * 10 + val; str++; } return neg ? -num : num; @@ -93,6 +95,9 @@ extern "C" { int32_t FXSYS_atoi(const FX_CHAR* str) { return FXSYS_StrToInt<int32_t>(str); } +uint32_t FXSYS_atoui(const FX_CHAR* str) { + return FXSYS_StrToInt<uint32_t>(str); +} int32_t FXSYS_wtoi(const FX_WCHAR* str) { return FXSYS_StrToInt<int32_t>(str); } diff --git a/core/src/fxcrt/fx_basic_gcc_unittest.cpp b/core/src/fxcrt/fx_basic_gcc_unittest.cpp new file mode 100644 index 0000000000..eb1e0669ae --- /dev/null +++ b/core/src/fxcrt/fx_basic_gcc_unittest.cpp @@ -0,0 +1,88 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/include/fxcrt/fx_system.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(fxcrt, FXSYS_atoi) { + EXPECT_EQ(0, FXSYS_atoi("")); + EXPECT_EQ(0, FXSYS_atoi("0")); + EXPECT_EQ(-1, FXSYS_atoi("-1")); + EXPECT_EQ(2345, FXSYS_atoi("2345")); + EXPECT_EQ(2147483647, FXSYS_atoi("2147483647")); + EXPECT_EQ(-2147483647, FXSYS_atoi("-2147483647")); + EXPECT_EQ(9, FXSYS_atoi("9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(2147483623, FXSYS_atoi("2147483623423412348")); + EXPECT_EQ(214748364, FXSYS_atoi("2147483648")); + // The digit is parsed as a positive value, so we end up not being able to + // handle the largest possible negative value. + EXPECT_EQ(-214748364, FXSYS_atoi("-2147483648")); +} + +TEST(fxcrt, FXSYS_atoi64) { + EXPECT_EQ(0, FXSYS_atoi64("")); + EXPECT_EQ(0, FXSYS_atoi64("0")); + EXPECT_EQ(-1, FXSYS_atoi64("-1")); + EXPECT_EQ(2345, FXSYS_atoi64("2345")); + EXPECT_EQ(9223372036854775807LL, FXSYS_atoi64("9223372036854775807")); + EXPECT_EQ(-9223372036854775807LL, FXSYS_atoi64("-9223372036854775807")); + EXPECT_EQ(9, FXSYS_atoi64("9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(9223372036854712341LL, FXSYS_atoi64("922337203685471234123475807")); + EXPECT_EQ(922337203685477580LL, FXSYS_atoi64("9223372036854775808")); + // The digit is parsed as a positive value, so we end up not being able to + // handle the largest possible negative value. + EXPECT_EQ(-922337203685477580LL, FXSYS_atoi64("-9223372036854775808")); +} + +TEST(fxcrt, FXSYS_wtoi) { + EXPECT_EQ(0, FXSYS_wtoi(L"")); + EXPECT_EQ(0, FXSYS_wtoi(L"0")); + EXPECT_EQ(-1, FXSYS_wtoi(L"-1")); + EXPECT_EQ(2345, FXSYS_wtoi(L"2345")); + EXPECT_EQ(2147483647, FXSYS_wtoi(L"2147483647")); + EXPECT_EQ(-2147483647, FXSYS_wtoi(L"-2147483647")); + EXPECT_EQ(9, FXSYS_wtoi64(L"9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(2147483623, FXSYS_wtoi(L"2147483623423412348")); + EXPECT_EQ(214748364, FXSYS_wtoi(L"2147483648")); + // The digit is parsed as a positive value, so we end up not being able to + // handle the largest possible negative value. + EXPECT_EQ(-214748364, FXSYS_wtoi(L"-2147483648")); +} + +TEST(fxcrt, FXSYS_wtoi64) { + EXPECT_EQ(0, FXSYS_wtoi64(L"")); + EXPECT_EQ(0, FXSYS_wtoi64(L"0")); + EXPECT_EQ(-1, FXSYS_wtoi64(L"-1")); + EXPECT_EQ(2345, FXSYS_wtoi64(L"2345")); + EXPECT_EQ(9223372036854775807LL, FXSYS_wtoi64(L"9223372036854775807")); + EXPECT_EQ(-9223372036854775807LL, FXSYS_wtoi64(L"-9223372036854775807")); + EXPECT_EQ(9, FXSYS_wtoi64(L"9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(9223372036854712341LL, + FXSYS_wtoi64(L"922337203685471234123475807")); + EXPECT_EQ(922337203685477580LL, FXSYS_wtoi64(L"9223372036854775808")); + // The digit is parsed as a positive value, so we end up not being able to + // handle the largest possible negative value. + EXPECT_EQ(-922337203685477580LL, FXSYS_wtoi64(L"-9223372036854775808")); +} + +TEST(fxcrt, FXSYS_atoui) { + EXPECT_EQ(0, FXSYS_atoui("")); + EXPECT_EQ(0, FXSYS_atoui("0")); + EXPECT_EQ(0, FXSYS_atoui("-1")); + EXPECT_EQ(2345, FXSYS_atoui("2345")); + EXPECT_EQ(4294967295, FXSYS_atoui("4294967295")); + EXPECT_EQ(9, FXSYS_atoui("9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(2147483623, FXSYS_atoi("2147483623423412348")); + EXPECT_EQ(429496729, FXSYS_atoi("4294967296")); +} |