summaryrefslogtreecommitdiff
path: root/core/src/fpdftext
diff options
context:
space:
mode:
authorWei Li <weili@chromium.org>2016-02-12 18:21:21 -0800
committerWei Li <weili@chromium.org>2016-02-12 18:21:21 -0800
commit42a1bc02c0810c039afbcb62170c326f0e717320 (patch)
treea686d50142aaa5fbd19b0a4fde32f33377a74102 /core/src/fpdftext
parentf5f1399f1af3c5869bf6857a125552d4834c19da (diff)
downloadpdfium-42a1bc02c0810c039afbcb62170c326f0e717320.tar.xz
Fix the way to access marked content.
When there is no dictionary for marked content, it potientially may cause crash. But it is not happening now since 1) we now check for the returned dict parameter 2) the alloc function in pdfium does zero initialization. BUG=pdfium:67 R=thestig@chromium.org Review URL: https://codereview.chromium.org/1695633004 .
Diffstat (limited to 'core/src/fpdftext')
-rw-r--r--core/src/fpdftext/fpdf_text_int.cpp119
1 files changed, 77 insertions, 42 deletions
diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp
index 22591651ab..a0f0c64822 100644
--- a/core/src/fpdftext/fpdf_text_int.cpp
+++ b/core/src/fpdftext/fpdf_text_int.cpp
@@ -134,6 +134,7 @@ bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
return false;
}
}
+
FX_BOOL CPDF_TextPage::ParseTextPage() {
m_bIsParsed = false;
if (!m_pPage)
@@ -186,9 +187,11 @@ FX_BOOL CPDF_TextPage::ParseTextPage() {
}
return TRUE;
}
+
int CPDF_TextPage::CountChars() const {
return pdfium::CollectionSize<int>(m_CharList);
}
+
int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
int count = 0;
@@ -199,6 +202,7 @@ int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
}
return -1;
}
+
int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const {
int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
int count = 0;
@@ -213,6 +217,7 @@ int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const {
}
return -1;
}
+
void CPDF_TextPage::GetRectArray(int start,
int nCount,
CFX_RectArray& rectArray) const {
@@ -298,6 +303,7 @@ void CPDF_TextPage::GetRectArray(int start,
}
rectArray.Add(rect);
}
+
int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
FX_FLOAT xTolerance,
FX_FLOAT yTolerance) const {
@@ -378,6 +384,7 @@ CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
}
return strText;
}
+
void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
CFX_RectArray& resRectArray) const {
if (!m_bIsParsed)
@@ -415,6 +422,7 @@ void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
}
resRectArray.Add(curRect);
}
+
int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x,
FX_FLOAT y,
FX_FLOAT xTolerance,
@@ -483,6 +491,7 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start,
nCount = endIndex - start + 1;
}
}
+
CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
if (!m_bIsParsed || nCount == 0)
return L"";
@@ -532,6 +541,7 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
}
return m_TextBuf.GetWideString().Mid(startindex, nCount);
}
+
int CPDF_TextPage::CountRects(int start, int nCount) {
if (!m_bIsParsed || start < 0)
return -1;
@@ -544,6 +554,7 @@ int CPDF_TextPage::CountRects(int start, int nCount) {
GetRectArray(start, nCount, m_SelRects);
return m_SelRects.GetSize();
}
+
void CPDF_TextPage::GetRect(int rectIndex,
FX_FLOAT& left,
FX_FLOAT& top,
@@ -625,6 +636,7 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
return GetBaselineRotate(rect, Rotate);
}
+
int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
FX_FLOAT top,
FX_FLOAT right,
@@ -702,6 +714,7 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
}
return m_Segments.GetSize();
}
+
void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
if (index < 0 || index >= m_Segments.GetSize()) {
return;
@@ -709,6 +722,7 @@ void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
start = m_Segments.GetAt(index).m_Start;
count = m_Segments.GetAt(index).m_nCount;
}
+
int CPDF_TextPage::GetWordBreak(int index, int direction) const {
if (!m_bIsParsed)
return -1;
@@ -740,6 +754,7 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const {
}
return breakPos;
}
+
int32_t CPDF_TextPage::FindTextlineFlowDirection() {
if (!m_pPage) {
return -1;
@@ -837,6 +852,7 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() {
}
return -1;
}
+
void CPDF_TextPage::ProcessObject() {
CPDF_PageObject* pPageObj = NULL;
if (!m_pPage) {
@@ -870,6 +886,7 @@ void CPDF_TextPage::ProcessObject() {
m_LineObj.RemoveAll();
CloseTempLine();
}
+
void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj,
const CFX_Matrix& formMatrix) {
CPDF_PageObject* pPageObj = NULL;
@@ -896,6 +913,7 @@ void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj,
pPageObj = NULL;
}
}
+
int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const {
if (charCode == -1) {
return 0;
@@ -913,6 +931,7 @@ int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const {
}
return w;
}
+
void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) {
CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo();
if (seg.direction == CFX_BidiChar::RIGHT) {
@@ -927,6 +946,7 @@ void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) {
}
}
}
+
void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
PAGECHAR_INFO info) {
if (!IsControlChar(info)) {
@@ -954,6 +974,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
}
m_CharList.push_back(info);
}
+
void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
PAGECHAR_INFO info) {
if (!IsControlChar(info)) {
@@ -1106,21 +1127,21 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
CPDF_TextObject* pTextObj = Obj.m_pTextObj;
CPDF_ContentMarkData* pMarkData =
(CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
- if (!pMarkData) {
+ if (!pMarkData)
return FPDFTEXT_MC_PASS;
- }
+
int nContentMark = pMarkData->CountItems();
- if (nContentMark < 1) {
+ if (nContentMark < 1)
return FPDFTEXT_MC_PASS;
- }
CFX_WideString actText;
FX_BOOL bExist = FALSE;
CPDF_Dictionary* pDict = NULL;
int n = 0;
for (n = 0; n < nContentMark; n++) {
CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
- CFX_ByteString tagStr = (CFX_ByteString)item.GetName();
- pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam()));
+ if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None)
+ continue;
+ pDict = item.GetParam();
CPDF_String* temp =
ToString(pDict ? pDict->GetElement("ActualText") : nullptr);
if (temp) {
@@ -1128,38 +1149,32 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
actText = temp->GetUnicodeText();
}
}
- if (!bExist) {
+ if (!bExist)
return FPDFTEXT_MC_PASS;
- }
+
if (m_pPreTextObj) {
- if (CPDF_ContentMarkData* pPreMarkData =
- (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject()) {
- if (pPreMarkData->CountItems() == n) {
- CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1);
- if (pDict == item.GetParam()) {
- return FPDFTEXT_MC_DONE;
- }
- }
+ CPDF_ContentMarkData* pPreMarkData =
+ (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject();
+ if (pPreMarkData && pPreMarkData->CountItems() == n &&
+ pDict == pPreMarkData->GetItem(n - 1).GetParam()) {
+ return FPDFTEXT_MC_DONE;
}
}
- CPDF_Font* pFont = pTextObj->GetFont();
FX_STRSIZE nItems = actText.GetLength();
- if (nItems < 1) {
+ if (nItems < 1)
return FPDFTEXT_MC_PASS;
- }
+
+ CPDF_Font* pFont = pTextObj->GetFont();
bExist = FALSE;
for (FX_STRSIZE i = 0; i < nItems; i++) {
- FX_WCHAR wChar = actText.GetAt(i);
- if (-1 == pFont->CharCodeFromUnicode(wChar)) {
- continue;
- } else {
+ if (pFont->CharCodeFromUnicode(actText.GetAt(i)) != -1) {
bExist = TRUE;
break;
}
}
- if (!bExist) {
+ if (!bExist)
return FPDFTEXT_MC_PASS;
- }
+
bExist = FALSE;
for (FX_STRSIZE i = 0; i < nItems; i++) {
FX_WCHAR wChar = actText.GetAt(i);
@@ -1168,39 +1183,36 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
break;
}
}
- if (!bExist) {
+ if (!bExist)
return FPDFTEXT_MC_DONE;
- }
+
return FPDFTEXT_MC_DELAY;
}
+
void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
CPDF_TextObject* pTextObj = Obj.m_pTextObj;
CPDF_ContentMarkData* pMarkData =
(CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
- if (!pMarkData) {
+ if (!pMarkData)
return;
- }
+
int nContentMark = pMarkData->CountItems();
- if (nContentMark < 1) {
+ if (nContentMark < 1)
return;
- }
CFX_WideString actText;
CPDF_Dictionary* pDict = NULL;
- int n = 0;
- for (n = 0; n < nContentMark; n++) {
+ for (int n = 0; n < nContentMark; n++) {
CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
- CFX_ByteString tagStr = (CFX_ByteString)item.GetName();
- pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam()));
- CPDF_String* temp =
- ToString(pDict ? pDict->GetElement("ActualText") : nullptr);
- if (temp) {
- actText = temp->GetUnicodeText();
- }
+ if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None)
+ continue;
+ pDict = item.GetParam();
+ if (pDict)
+ actText = pDict->GetUnicodeTextBy("ActualText");
}
FX_STRSIZE nItems = actText.GetLength();
- if (nItems < 1) {
+ if (nItems < 1)
return;
- }
+
CPDF_Font* pFont = pTextObj->GetFont();
CFX_Matrix formMatrix = Obj.m_formMatrix;
CFX_Matrix matrix;
@@ -1239,6 +1251,7 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
m_TempCharList.push_back(charinfo);
}
}
+
void CPDF_TextPage::FindPreviousTextObject() {
if (m_TempCharList.empty() && m_CharList.empty())
return;
@@ -1549,6 +1562,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
SwapTempTextBuf(iCharListStartAppend, iBufStartAppend);
}
}
+
int32_t CPDF_TextPage::GetTextObjectWritingMode(
const CPDF_TextObject* pTextObj) {
int32_t nChars = pTextObj->CountChars();
@@ -1614,6 +1628,7 @@ FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) {
}
return FALSE;
}
+
int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
const CFX_Matrix& formMatrix) {
FindPreviousTextObject();
@@ -1770,6 +1785,7 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
}
return 0;
}
+
FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
CPDF_TextObject* pTextObj2) {
if (!pTextObj1 || !pTextObj2) {
@@ -1827,6 +1843,7 @@ FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
}
return TRUE;
}
+
FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
FX_POSITION ObjPos) {
if (!pTextObj) {
@@ -1890,6 +1907,7 @@ FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1,
rect.Intersect(rect2);
return !rect.IsEmpty();
}
+
FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) {
if (unicode < L'A') {
return FALSE;
@@ -1902,6 +1920,7 @@ FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) {
}
return TRUE;
}
+
CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage)
: m_pTextPage(pTextPage),
m_flags(0),
@@ -1946,9 +1965,11 @@ CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage)
m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
}
}
+
int CPDF_TextPageFind::GetCharIndex(int index) const {
return m_pTextPage->CharIndexFromTextIndex(index);
}
+
FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat,
int flags,
int startPos) {
@@ -1999,6 +2020,7 @@ FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat,
m_resEnd = -1;
return TRUE;
}
+
FX_BOOL CPDF_TextPageFind::FindNext() {
if (!m_pTextPage) {
return FALSE;
@@ -2106,6 +2128,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() {
}
return m_IsFind;
}
+
FX_BOOL CPDF_TextPageFind::FindPrev() {
if (!m_pTextPage) {
return FALSE;
@@ -2151,6 +2174,7 @@ FX_BOOL CPDF_TextPageFind::FindPrev() {
}
return m_IsFind;
}
+
void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
if (findwhat.IsEmpty()) {
return;
@@ -2199,6 +2223,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
index++;
}
}
+
FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText,
int startPos,
int endPos) {
@@ -2245,6 +2270,7 @@ FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText,
}
return TRUE;
}
+
FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString,
const FX_WCHAR* lpszFullString,
int iSubString,
@@ -2272,6 +2298,7 @@ FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString,
rString.ReleaseBuffer();
return TRUE;
}
+
CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) {
CFX_WideString str2;
str2.Empty();
@@ -2281,12 +2308,15 @@ CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) {
}
return str2;
}
+
void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const {
rects.Copy(m_resArray);
}
+
int CPDF_TextPageFind::GetCurOrder() const {
return GetCharIndex(m_resStart);
}
+
int CPDF_TextPageFind::GetMatchedCount() const {
int resStart = GetCharIndex(m_resStart);
int resEnd = GetCharIndex(m_resEnd);
@@ -2325,12 +2355,14 @@ void CPDF_LinkExtract::DeleteLinkList() {
}
m_LinkList.RemoveAll();
}
+
int CPDF_LinkExtract::CountLinks() const {
if (!m_bIsParsed) {
return -1;
}
return m_LinkList.GetSize();
}
+
void CPDF_LinkExtract::ParseLink() {
int start = 0, pos = 0;
int TotalChar = m_pTextPage->CountChars();
@@ -2366,6 +2398,7 @@ void CPDF_LinkExtract::ParseLink() {
}
}
}
+
FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
CFX_WideString str = strBeCheck;
str.MakeLower();
@@ -2392,6 +2425,7 @@ FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
}
return FALSE;
}
+
bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
int aPos = str.Find(L'@');
// Invalid when no '@'.
@@ -2496,6 +2530,7 @@ void CPDF_LinkExtract::GetBoundedSegment(int index,
start = link->m_Start;
count = link->m_Count;
}
+
void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {
if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
return;