From 42a1bc02c0810c039afbcb62170c326f0e717320 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Fri, 12 Feb 2016 18:21:21 -0800 Subject: Fix the way to access marked content. When there is no dictionary for marked content, it potientially may cause crash. But it is not happening now since 1) we now check for the returned dict parameter 2) the alloc function in pdfium does zero initialization. BUG=pdfium:67 R=thestig@chromium.org Review URL: https://codereview.chromium.org/1695633004 . --- core/include/fpdfapi/fpdf_pageobj.h | 8 +- .../fpdfapi/fpdf_page/fpdf_page_graph_state.cpp | 16 ++- core/src/fpdfapi/fpdf_render/fpdf_render.cpp | 9 +- core/src/fpdftext/fpdf_text_int.cpp | 119 +++++++++++++-------- 4 files changed, 91 insertions(+), 61 deletions(-) diff --git a/core/include/fpdfapi/fpdf_pageobj.h b/core/include/fpdfapi/fpdf_pageobj.h index 7a3d390fed..42248c7dd1 100644 --- a/core/include/fpdfapi/fpdf_pageobj.h +++ b/core/include/fpdfapi/fpdf_pageobj.h @@ -265,7 +265,7 @@ class CPDF_GeneralState : public CFX_CountRef { }; class CPDF_ContentMarkItem { public: - typedef enum { None, PropertiesDict, DirectDict, MCID } ParamType; + enum ParamType { None, PropertiesDict, DirectDict }; CPDF_ContentMarkItem(); @@ -277,13 +277,13 @@ class CPDF_ContentMarkItem { inline ParamType GetParamType() const { return m_ParamType; } - inline void* GetParam() const { return m_pParam; } + inline CPDF_Dictionary* GetParam() const { return m_pParam; } inline FX_BOOL HasMCID() const; inline void SetName(const CFX_ByteString& name) { m_MarkName = name; } - inline void SetParam(ParamType type, void* param) { + inline void SetParam(ParamType type, CPDF_Dictionary* param) { m_ParamType = type; m_pParam = param; } @@ -293,7 +293,7 @@ class CPDF_ContentMarkItem { ParamType m_ParamType; - void* m_pParam; + CPDF_Dictionary* m_pParam; }; class CPDF_ContentMarkData { public: diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_graph_state.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_graph_state.cpp index 37e3c04e53..20345138b4 100644 --- a/core/src/fpdfapi/fpdf_page/fpdf_page_graph_state.cpp +++ b/core/src/fpdfapi/fpdf_page/fpdf_page_graph_state.cpp @@ -594,20 +594,19 @@ CPDF_ContentMarkItem::CPDF_ContentMarkItem(const CPDF_ContentMarkItem& src) { m_MarkName = src.m_MarkName; m_ParamType = src.m_ParamType; if (m_ParamType == DirectDict) { - m_pParam = ToDictionary(static_cast(src.m_pParam))->Clone(); + m_pParam = ToDictionary(src.m_pParam->Clone()); } else { m_pParam = src.m_pParam; } } CPDF_ContentMarkItem::~CPDF_ContentMarkItem() { - if (m_ParamType == DirectDict && m_pParam) { - ToDictionary(static_cast(m_pParam))->Release(); - } + if (m_ParamType == DirectDict && m_pParam) + m_pParam->Release(); } FX_BOOL CPDF_ContentMarkItem::HasMCID() const { if (m_pParam && (m_ParamType == DirectDict || m_ParamType == PropertiesDict)) { - return ToDictionary(static_cast(m_pParam))->KeyExist("MCID"); + return m_pParam->KeyExist("MCID"); } return FALSE; } @@ -622,8 +621,7 @@ int CPDF_ContentMarkData::GetMCID() const { type = m_Marks[i].GetParamType(); if (type == CPDF_ContentMarkItem::PropertiesDict || type == CPDF_ContentMarkItem::DirectDict) { - CPDF_Dictionary* pDict = - ToDictionary(static_cast(m_Marks[i].GetParam())); + CPDF_Dictionary* pDict = m_Marks[i].GetParam(); if (pDict->KeyExist("MCID")) { return pDict->GetIntegerBy("MCID"); } @@ -641,7 +639,7 @@ void CPDF_ContentMarkData::AddMark(const CFX_ByteString& name, } item.SetParam(bDirect ? CPDF_ContentMarkItem::DirectDict : CPDF_ContentMarkItem::PropertiesDict, - bDirect ? pDict->Clone() : pDict); + bDirect ? ToDictionary(pDict->Clone()) : pDict); } void CPDF_ContentMarkData::DeleteLastMark() { int size = m_Marks.GetSize(); @@ -673,7 +671,7 @@ FX_BOOL CPDF_ContentMark::LookupMark(const CFX_ByteStringC& mark, pDict = NULL; if (item.GetParamType() == CPDF_ContentMarkItem::PropertiesDict || item.GetParamType() == CPDF_ContentMarkItem::DirectDict) { - pDict = ToDictionary(static_cast(item.GetParam())); + pDict = item.GetParam(); } return TRUE; } diff --git a/core/src/fpdfapi/fpdf_render/fpdf_render.cpp b/core/src/fpdfapi/fpdf_render/fpdf_render.cpp index 35d62ce0b3..eeee03485a 100644 --- a/core/src/fpdfapi/fpdf_render/fpdf_render.cpp +++ b/core/src/fpdfapi/fpdf_render/fpdf_render.cpp @@ -1395,12 +1395,9 @@ FX_BOOL IPDF_OCContext::CheckObjectVisible(const CPDF_PageObject* pObj) { for (int i = 0; i < nItems; i++) { CPDF_ContentMarkItem& item = pData->GetItem(i); if (item.GetName() == "OC" && - item.GetParamType() == CPDF_ContentMarkItem::PropertiesDict) { - CPDF_Dictionary* pOCG = - ToDictionary(static_cast(item.GetParam())); - if (!CheckOCGVisible(pOCG)) { - return FALSE; - } + item.GetParamType() == CPDF_ContentMarkItem::PropertiesDict && + !CheckOCGVisible(item.GetParam())) { + return FALSE; } } return TRUE; diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp index 22591651ab..a0f0c64822 100644 --- a/core/src/fpdftext/fpdf_text_int.cpp +++ b/core/src/fpdftext/fpdf_text_int.cpp @@ -134,6 +134,7 @@ bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { return false; } } + FX_BOOL CPDF_TextPage::ParseTextPage() { m_bIsParsed = false; if (!m_pPage) @@ -186,9 +187,11 @@ FX_BOOL CPDF_TextPage::ParseTextPage() { } return TRUE; } + int CPDF_TextPage::CountChars() const { return pdfium::CollectionSize(m_CharList); } + int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { int indexSize = pdfium::CollectionSize(m_CharIndex); int count = 0; @@ -199,6 +202,7 @@ int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { } return -1; } + int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { int indexSize = pdfium::CollectionSize(m_CharIndex); int count = 0; @@ -213,6 +217,7 @@ int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { } return -1; } + void CPDF_TextPage::GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const { @@ -298,6 +303,7 @@ void CPDF_TextPage::GetRectArray(int start, } rectArray.Add(rect); } + int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, FX_FLOAT xTolerance, FX_FLOAT yTolerance) const { @@ -378,6 +384,7 @@ CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { } return strText; } + void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const { if (!m_bIsParsed) @@ -415,6 +422,7 @@ void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, } resRectArray.Add(curRect); } + int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTolerance, @@ -483,6 +491,7 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, nCount = endIndex - start + 1; } } + CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { if (!m_bIsParsed || nCount == 0) return L""; @@ -532,6 +541,7 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { } return m_TextBuf.GetWideString().Mid(startindex, nCount); } + int CPDF_TextPage::CountRects(int start, int nCount) { if (!m_bIsParsed || start < 0) return -1; @@ -544,6 +554,7 @@ int CPDF_TextPage::CountRects(int start, int nCount) { GetRectArray(start, nCount, m_SelRects); return m_SelRects.GetSize(); } + void CPDF_TextPage::GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, @@ -625,6 +636,7 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); return GetBaselineRotate(rect, Rotate); } + int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, @@ -702,6 +714,7 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, } return m_Segments.GetSize(); } + void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { if (index < 0 || index >= m_Segments.GetSize()) { return; @@ -709,6 +722,7 @@ void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { start = m_Segments.GetAt(index).m_Start; count = m_Segments.GetAt(index).m_nCount; } + int CPDF_TextPage::GetWordBreak(int index, int direction) const { if (!m_bIsParsed) return -1; @@ -740,6 +754,7 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const { } return breakPos; } + int32_t CPDF_TextPage::FindTextlineFlowDirection() { if (!m_pPage) { return -1; @@ -837,6 +852,7 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() { } return -1; } + void CPDF_TextPage::ProcessObject() { CPDF_PageObject* pPageObj = NULL; if (!m_pPage) { @@ -870,6 +886,7 @@ void CPDF_TextPage::ProcessObject() { m_LineObj.RemoveAll(); CloseTempLine(); } + void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, const CFX_Matrix& formMatrix) { CPDF_PageObject* pPageObj = NULL; @@ -896,6 +913,7 @@ void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj, pPageObj = NULL; } } + int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { if (charCode == -1) { return 0; @@ -913,6 +931,7 @@ int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { } return w; } + void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo(); if (seg.direction == CFX_BidiChar::RIGHT) { @@ -927,6 +946,7 @@ void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { } } } + void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info) { if (!IsControlChar(info)) { @@ -954,6 +974,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, } m_CharList.push_back(info); } + void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info) { if (!IsControlChar(info)) { @@ -1106,21 +1127,21 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { CPDF_TextObject* pTextObj = Obj.m_pTextObj; CPDF_ContentMarkData* pMarkData = (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); - if (!pMarkData) { + if (!pMarkData) return FPDFTEXT_MC_PASS; - } + int nContentMark = pMarkData->CountItems(); - if (nContentMark < 1) { + if (nContentMark < 1) return FPDFTEXT_MC_PASS; - } CFX_WideString actText; FX_BOOL bExist = FALSE; CPDF_Dictionary* pDict = NULL; int n = 0; for (n = 0; n < nContentMark; n++) { CPDF_ContentMarkItem& item = pMarkData->GetItem(n); - CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); - pDict = ToDictionary(static_cast(item.GetParam())); + if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None) + continue; + pDict = item.GetParam(); CPDF_String* temp = ToString(pDict ? pDict->GetElement("ActualText") : nullptr); if (temp) { @@ -1128,38 +1149,32 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { actText = temp->GetUnicodeText(); } } - if (!bExist) { + if (!bExist) return FPDFTEXT_MC_PASS; - } + if (m_pPreTextObj) { - if (CPDF_ContentMarkData* pPreMarkData = - (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject()) { - if (pPreMarkData->CountItems() == n) { - CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1); - if (pDict == item.GetParam()) { - return FPDFTEXT_MC_DONE; - } - } + CPDF_ContentMarkData* pPreMarkData = + (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject(); + if (pPreMarkData && pPreMarkData->CountItems() == n && + pDict == pPreMarkData->GetItem(n - 1).GetParam()) { + return FPDFTEXT_MC_DONE; } } - CPDF_Font* pFont = pTextObj->GetFont(); FX_STRSIZE nItems = actText.GetLength(); - if (nItems < 1) { + if (nItems < 1) return FPDFTEXT_MC_PASS; - } + + CPDF_Font* pFont = pTextObj->GetFont(); bExist = FALSE; for (FX_STRSIZE i = 0; i < nItems; i++) { - FX_WCHAR wChar = actText.GetAt(i); - if (-1 == pFont->CharCodeFromUnicode(wChar)) { - continue; - } else { + if (pFont->CharCodeFromUnicode(actText.GetAt(i)) != -1) { bExist = TRUE; break; } } - if (!bExist) { + if (!bExist) return FPDFTEXT_MC_PASS; - } + bExist = FALSE; for (FX_STRSIZE i = 0; i < nItems; i++) { FX_WCHAR wChar = actText.GetAt(i); @@ -1168,39 +1183,36 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { break; } } - if (!bExist) { + if (!bExist) return FPDFTEXT_MC_DONE; - } + return FPDFTEXT_MC_DELAY; } + void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { CPDF_TextObject* pTextObj = Obj.m_pTextObj; CPDF_ContentMarkData* pMarkData = (CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject(); - if (!pMarkData) { + if (!pMarkData) return; - } + int nContentMark = pMarkData->CountItems(); - if (nContentMark < 1) { + if (nContentMark < 1) return; - } CFX_WideString actText; CPDF_Dictionary* pDict = NULL; - int n = 0; - for (n = 0; n < nContentMark; n++) { + for (int n = 0; n < nContentMark; n++) { CPDF_ContentMarkItem& item = pMarkData->GetItem(n); - CFX_ByteString tagStr = (CFX_ByteString)item.GetName(); - pDict = ToDictionary(static_cast(item.GetParam())); - CPDF_String* temp = - ToString(pDict ? pDict->GetElement("ActualText") : nullptr); - if (temp) { - actText = temp->GetUnicodeText(); - } + if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None) + continue; + pDict = item.GetParam(); + if (pDict) + actText = pDict->GetUnicodeTextBy("ActualText"); } FX_STRSIZE nItems = actText.GetLength(); - if (nItems < 1) { + if (nItems < 1) return; - } + CPDF_Font* pFont = pTextObj->GetFont(); CFX_Matrix formMatrix = Obj.m_formMatrix; CFX_Matrix matrix; @@ -1239,6 +1251,7 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { m_TempCharList.push_back(charinfo); } } + void CPDF_TextPage::FindPreviousTextObject() { if (m_TempCharList.empty() && m_CharList.empty()) return; @@ -1549,6 +1562,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { SwapTempTextBuf(iCharListStartAppend, iBufStartAppend); } } + int32_t CPDF_TextPage::GetTextObjectWritingMode( const CPDF_TextObject* pTextObj) { int32_t nChars = pTextObj->CountChars(); @@ -1614,6 +1628,7 @@ FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) { } return FALSE; } + int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, const CFX_Matrix& formMatrix) { FindPreviousTextObject(); @@ -1770,6 +1785,7 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, } return 0; } + FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2) { if (!pTextObj1 || !pTextObj2) { @@ -1827,6 +1843,7 @@ FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, } return TRUE; } + FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos) { if (!pTextObj) { @@ -1890,6 +1907,7 @@ FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, rect.Intersect(rect2); return !rect.IsEmpty(); } + FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { if (unicode < L'A') { return FALSE; @@ -1902,6 +1920,7 @@ FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) { } return TRUE; } + CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) : m_pTextPage(pTextPage), m_flags(0), @@ -1946,9 +1965,11 @@ CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); } } + int CPDF_TextPageFind::GetCharIndex(int index) const { return m_pTextPage->CharIndexFromTextIndex(index); } + FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, int flags, int startPos) { @@ -1999,6 +2020,7 @@ FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, m_resEnd = -1; return TRUE; } + FX_BOOL CPDF_TextPageFind::FindNext() { if (!m_pTextPage) { return FALSE; @@ -2106,6 +2128,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() { } return m_IsFind; } + FX_BOOL CPDF_TextPageFind::FindPrev() { if (!m_pTextPage) { return FALSE; @@ -2151,6 +2174,7 @@ FX_BOOL CPDF_TextPageFind::FindPrev() { } return m_IsFind; } + void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { if (findwhat.IsEmpty()) { return; @@ -2199,6 +2223,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { index++; } } + FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, int startPos, int endPos) { @@ -2245,6 +2270,7 @@ FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, } return TRUE; } + FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, const FX_WCHAR* lpszFullString, int iSubString, @@ -2272,6 +2298,7 @@ FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, rString.ReleaseBuffer(); return TRUE; } + CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) { CFX_WideString str2; str2.Empty(); @@ -2281,12 +2308,15 @@ CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) { } return str2; } + void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const { rects.Copy(m_resArray); } + int CPDF_TextPageFind::GetCurOrder() const { return GetCharIndex(m_resStart); } + int CPDF_TextPageFind::GetMatchedCount() const { int resStart = GetCharIndex(m_resStart); int resEnd = GetCharIndex(m_resEnd); @@ -2325,12 +2355,14 @@ void CPDF_LinkExtract::DeleteLinkList() { } m_LinkList.RemoveAll(); } + int CPDF_LinkExtract::CountLinks() const { if (!m_bIsParsed) { return -1; } return m_LinkList.GetSize(); } + void CPDF_LinkExtract::ParseLink() { int start = 0, pos = 0; int TotalChar = m_pTextPage->CountChars(); @@ -2366,6 +2398,7 @@ void CPDF_LinkExtract::ParseLink() { } } } + FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { CFX_WideString str = strBeCheck; str.MakeLower(); @@ -2392,6 +2425,7 @@ FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { } return FALSE; } + bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { int aPos = str.Find(L'@'); // Invalid when no '@'. @@ -2496,6 +2530,7 @@ void CPDF_LinkExtract::GetBoundedSegment(int index, start = link->m_Start; count = link->m_Count; } + void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { return; -- cgit v1.2.3