From 42fb301abcf6b9f6a580f3d30defeadedf5d7ebd Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Thu, 3 Mar 2016 08:59:22 -0500 Subject: Fix parsing of object numbers > 16,777,216. Currently, there is a check that an object number is <= 0x1000000. If that check fails, we end up putting the parser into a bad state and fail to load documents. The object does not need to be in the XRef table, or referenced from the document, just be in the document. This Cl removes the size check and updates the various atoi calls to use a uint32_t instead of an int32_t so we don't end up getting strange values when converting from a string. BUG=455199 R=tsepez@chromium.org Review URL: https://codereview.chromium.org/1755273002 . --- .../fpdf_parser_decode_embeddertest.cpp | 9 +++ core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp | 2 +- .../src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 36 ++++----- core/src/fxcrt/fx_basic_gcc.cpp | 25 +++--- core/src/fxcrt/fx_basic_gcc_unittest.cpp | 88 ++++++++++++++++++++++ 5 files changed, 129 insertions(+), 31 deletions(-) create mode 100644 core/src/fxcrt/fx_basic_gcc_unittest.cpp (limited to 'core/src') diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp index 9bb9d41db1..7ed82fdfb6 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp @@ -105,3 +105,12 @@ TEST_F(FPDFParserDecodeEmbeddertest, Bug_555784) { UnloadPage(page); } +TEST_F(FPDFParserDecodeEmbeddertest, Bug_455199) { + // Tests object numbers with a value > 01000000. + // Should open successfully. + EXPECT_TRUE(OpenDocument("bug_455199.pdf")); + FPDF_PAGE page = LoadPage(0); + FPDF_BITMAP bitmap = RenderPage(page); + FPDFBitmap_Destroy(bitmap); + UnloadPage(page); +} diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp index b80568b63c..fbe6a8fb3a 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp @@ -51,7 +51,7 @@ void CFDF_Document::ParseStream(IFX_FileRead* pFile, FX_BOOL bOwnFile) { bool bNumber; CFX_ByteString word = parser.GetNextWord(&bNumber); if (bNumber) { - FX_DWORD objnum = FXSYS_atoi(word); + FX_DWORD objnum = FXSYS_atoui(word); word = parser.GetNextWord(&bNumber); if (!bNumber) { break; diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index 825a8a26d4..16d1134cb1 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -534,7 +534,7 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, break; } - FX_DWORD start_objnum = FXSYS_atoi(word); + FX_DWORD start_objnum = FXSYS_atoui(word); if (start_objnum >= kMaxObjectNumber) return false; @@ -758,10 +758,6 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { break; case 3: if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { - if (objnum > 0x1000000) { - state = ParserState::kDefault; - break; - } FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; m_SortedOffset.insert(obj_pos); last_obj = start_pos; @@ -1323,7 +1319,7 @@ void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, return; } - FX_DWORD parser_objnum = FXSYS_atoi(word); + FX_DWORD parser_objnum = FXSYS_atoui(word); if (parser_objnum && parser_objnum != objnum) { m_Syntax.RestorePos(SavedPos); return; @@ -1395,7 +1391,7 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( FX_FILESIZE objOffset = m_Syntax.SavePos(); objOffset -= word.GetLength(); - FX_DWORD parser_objnum = FXSYS_atoi(word); + FX_DWORD parser_objnum = FXSYS_atoui(word); if (objnum && parser_objnum != objnum) { m_Syntax.RestorePos(SavedPos); return nullptr; @@ -1407,7 +1403,7 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( return nullptr; } - FX_DWORD parser_gennum = FXSYS_atoi(word); + FX_DWORD parser_gennum = FXSYS_atoui(word); if (m_Syntax.GetKeyword() != "obj") { m_Syntax.RestorePos(SavedPos); return nullptr; @@ -1444,7 +1440,7 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( return nullptr; } - FX_DWORD parser_objnum = FXSYS_atoi(word); + FX_DWORD parser_objnum = FXSYS_atoui(word); if (objnum && parser_objnum != objnum) { m_Syntax.RestorePos(SavedPos); return nullptr; @@ -1456,7 +1452,7 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( return nullptr; } - FX_DWORD gennum = FXSYS_atoi(word); + FX_DWORD gennum = FXSYS_atoui(word); if (m_Syntax.GetKeyword() != "obj") { m_Syntax.RestorePos(SavedPos); return nullptr; @@ -1506,12 +1502,12 @@ FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, if (!bIsNumber) return FALSE; - FX_DWORD objnum = FXSYS_atoi(word); + FX_DWORD objnum = FXSYS_atoui(word); word = m_Syntax.GetNextWord(&bIsNumber); if (!bIsNumber) return FALSE; - FX_DWORD gennum = FXSYS_atoi(word); + FX_DWORD gennum = FXSYS_atoui(word); if (m_Syntax.GetKeyword() != "obj") { m_Syntax.RestorePos(SavedPos); return FALSE; @@ -2039,7 +2035,7 @@ CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, if (bIsNumber) { CFX_ByteString nextword2 = GetNextWord(nullptr); if (nextword2 == "R") { - FX_DWORD objnum = FXSYS_atoi(word); + FX_DWORD objnum = FXSYS_atoui(word); return new CPDF_Reference(pObjList, objnum); } } @@ -2163,7 +2159,7 @@ CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict( if (bIsNumber) { CFX_ByteString nextword2 = GetNextWord(nullptr); if (nextword2 == "R") - return new CPDF_Reference(pObjList, FXSYS_atoi(word)); + return new CPDF_Reference(pObjList, FXSYS_atoui(word)); } m_Pos = SavedPos; return new CPDF_Number(word); @@ -2440,14 +2436,14 @@ void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); } -int32_t CPDF_SyntaxParser::GetDirectNum() { +uint32_t CPDF_SyntaxParser::GetDirectNum() { bool bIsNumber; GetNextWordInternal(&bIsNumber); if (!bIsNumber) return 0; m_WordBuffer[m_WordSize] = 0; - return FXSYS_atoi(reinterpret_cast(m_WordBuffer)); + return FXSYS_atoui(reinterpret_cast(m_WordBuffer)); } bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, @@ -3554,7 +3550,7 @@ CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt( if (!bIsNumber) return nullptr; - FX_DWORD parser_objnum = FXSYS_atoi(word); + FX_DWORD parser_objnum = FXSYS_atoui(word); if (objnum && parser_objnum != objnum) return nullptr; @@ -3562,7 +3558,7 @@ CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt( if (!bIsNumber) return nullptr; - FX_DWORD gennum = FXSYS_atoi(word); + FX_DWORD gennum = FXSYS_atoui(word); if (m_syntaxParser.GetKeyword() != "obj") { m_syntaxParser.RestorePos(SavedPos); return nullptr; @@ -3611,7 +3607,7 @@ FX_BOOL CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen) { if (!bNumber) return FALSE; - FX_DWORD objnum = FXSYS_atoi(wordObjNum); + FX_DWORD objnum = FXSYS_atoui(wordObjNum); if (m_pLinearized) { m_pLinearized->Release(); m_pLinearized = nullptr; @@ -3704,7 +3700,7 @@ int32_t CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints, if (!bNumber) return -1; - FX_DWORD objNum = FXSYS_atoi(objnum); + FX_DWORD objNum = FXSYS_atoui(objnum); CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum); if (!pObj) { m_Pos += m_parser.m_Syntax.SavePos(); diff --git a/core/src/fxcrt/fx_basic_gcc.cpp b/core/src/fxcrt/fx_basic_gcc.cpp index d905d6b498..607f095570 100644 --- a/core/src/fxcrt/fx_basic_gcc.cpp +++ b/core/src/fxcrt/fx_basic_gcc.cpp @@ -13,20 +13,21 @@ template T FXSYS_StrToInt(const FX_CHAR* str) { - FX_BOOL neg = FALSE; + bool neg = false; if (!str) return 0; - if (*str == '-') { - neg = TRUE; + if (std::numeric_limits::is_signed && *str == '-') { + neg = true; str++; } T num = 0; while (*str && std::isdigit(*str)) { - if (num > (std::numeric_limits::max() - 9) / 10) + T val = FXSYS_toDecimalDigit(*str); + if (num > (std::numeric_limits::max() - val) / 10) break; - num = num * 10 + FXSYS_toDecimalDigit(*str); + num = num * 10 + val; str++; } return neg ? -num : num; @@ -34,20 +35,21 @@ T FXSYS_StrToInt(const FX_CHAR* str) { template T FXSYS_StrToInt(const FX_WCHAR* str) { - FX_BOOL neg = FALSE; + bool neg = false; if (!str) return 0; - if (*str == '-') { - neg = TRUE; + if (std::numeric_limits::is_signed && *str == '-') { + neg = true; str++; } T num = 0; while (*str && std::iswdigit(*str)) { - if (num > (std::numeric_limits::max() - 9) / 10) + T val = FXSYS_toDecimalDigitWide(*str); + if (num > (std::numeric_limits::max() - val) / 10) break; - num = num * 10 + FXSYS_toDecimalDigitWide(*str); + num = num * 10 + val; str++; } return neg ? -num : num; @@ -93,6 +95,9 @@ extern "C" { int32_t FXSYS_atoi(const FX_CHAR* str) { return FXSYS_StrToInt(str); } +uint32_t FXSYS_atoui(const FX_CHAR* str) { + return FXSYS_StrToInt(str); +} int32_t FXSYS_wtoi(const FX_WCHAR* str) { return FXSYS_StrToInt(str); } diff --git a/core/src/fxcrt/fx_basic_gcc_unittest.cpp b/core/src/fxcrt/fx_basic_gcc_unittest.cpp new file mode 100644 index 0000000000..eb1e0669ae --- /dev/null +++ b/core/src/fxcrt/fx_basic_gcc_unittest.cpp @@ -0,0 +1,88 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/include/fxcrt/fx_system.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(fxcrt, FXSYS_atoi) { + EXPECT_EQ(0, FXSYS_atoi("")); + EXPECT_EQ(0, FXSYS_atoi("0")); + EXPECT_EQ(-1, FXSYS_atoi("-1")); + EXPECT_EQ(2345, FXSYS_atoi("2345")); + EXPECT_EQ(2147483647, FXSYS_atoi("2147483647")); + EXPECT_EQ(-2147483647, FXSYS_atoi("-2147483647")); + EXPECT_EQ(9, FXSYS_atoi("9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(2147483623, FXSYS_atoi("2147483623423412348")); + EXPECT_EQ(214748364, FXSYS_atoi("2147483648")); + // The digit is parsed as a positive value, so we end up not being able to + // handle the largest possible negative value. + EXPECT_EQ(-214748364, FXSYS_atoi("-2147483648")); +} + +TEST(fxcrt, FXSYS_atoi64) { + EXPECT_EQ(0, FXSYS_atoi64("")); + EXPECT_EQ(0, FXSYS_atoi64("0")); + EXPECT_EQ(-1, FXSYS_atoi64("-1")); + EXPECT_EQ(2345, FXSYS_atoi64("2345")); + EXPECT_EQ(9223372036854775807LL, FXSYS_atoi64("9223372036854775807")); + EXPECT_EQ(-9223372036854775807LL, FXSYS_atoi64("-9223372036854775807")); + EXPECT_EQ(9, FXSYS_atoi64("9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(9223372036854712341LL, FXSYS_atoi64("922337203685471234123475807")); + EXPECT_EQ(922337203685477580LL, FXSYS_atoi64("9223372036854775808")); + // The digit is parsed as a positive value, so we end up not being able to + // handle the largest possible negative value. + EXPECT_EQ(-922337203685477580LL, FXSYS_atoi64("-9223372036854775808")); +} + +TEST(fxcrt, FXSYS_wtoi) { + EXPECT_EQ(0, FXSYS_wtoi(L"")); + EXPECT_EQ(0, FXSYS_wtoi(L"0")); + EXPECT_EQ(-1, FXSYS_wtoi(L"-1")); + EXPECT_EQ(2345, FXSYS_wtoi(L"2345")); + EXPECT_EQ(2147483647, FXSYS_wtoi(L"2147483647")); + EXPECT_EQ(-2147483647, FXSYS_wtoi(L"-2147483647")); + EXPECT_EQ(9, FXSYS_wtoi64(L"9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(2147483623, FXSYS_wtoi(L"2147483623423412348")); + EXPECT_EQ(214748364, FXSYS_wtoi(L"2147483648")); + // The digit is parsed as a positive value, so we end up not being able to + // handle the largest possible negative value. + EXPECT_EQ(-214748364, FXSYS_wtoi(L"-2147483648")); +} + +TEST(fxcrt, FXSYS_wtoi64) { + EXPECT_EQ(0, FXSYS_wtoi64(L"")); + EXPECT_EQ(0, FXSYS_wtoi64(L"0")); + EXPECT_EQ(-1, FXSYS_wtoi64(L"-1")); + EXPECT_EQ(2345, FXSYS_wtoi64(L"2345")); + EXPECT_EQ(9223372036854775807LL, FXSYS_wtoi64(L"9223372036854775807")); + EXPECT_EQ(-9223372036854775807LL, FXSYS_wtoi64(L"-9223372036854775807")); + EXPECT_EQ(9, FXSYS_wtoi64(L"9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(9223372036854712341LL, + FXSYS_wtoi64(L"922337203685471234123475807")); + EXPECT_EQ(922337203685477580LL, FXSYS_wtoi64(L"9223372036854775808")); + // The digit is parsed as a positive value, so we end up not being able to + // handle the largest possible negative value. + EXPECT_EQ(-922337203685477580LL, FXSYS_wtoi64(L"-9223372036854775808")); +} + +TEST(fxcrt, FXSYS_atoui) { + EXPECT_EQ(0, FXSYS_atoui("")); + EXPECT_EQ(0, FXSYS_atoui("0")); + EXPECT_EQ(0, FXSYS_atoui("-1")); + EXPECT_EQ(2345, FXSYS_atoui("2345")); + EXPECT_EQ(4294967295, FXSYS_atoui("4294967295")); + EXPECT_EQ(9, FXSYS_atoui("9x9")); + + // TODO(dsinclair): These are all wacky ..... + EXPECT_EQ(2147483623, FXSYS_atoi("2147483623423412348")); + EXPECT_EQ(429496729, FXSYS_atoi("4294967296")); +} -- cgit v1.2.3