summaryrefslogtreecommitdiff
path: root/core/src/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/fpdftext')
-rw-r--r--core/src/fpdftext/fpdf_text_int.cpp405
-rw-r--r--core/src/fpdftext/text_int.h28
2 files changed, 199 insertions, 234 deletions
diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp
index 67411e31b8..ae26cbd252 100644
--- a/core/src/fpdftext/fpdf_text_int.cpp
+++ b/core/src/fpdftext/fpdf_text_int.cpp
@@ -4,6 +4,8 @@
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+#include "core/src/fpdftext/text_int.h"
+
#include <algorithm>
#include <cctype>
#include <cwctype>
@@ -17,7 +19,7 @@
#include "core/include/fxcrt/fx_bidi.h"
#include "core/include/fxcrt/fx_ext.h"
#include "core/include/fxcrt/fx_ucd.h"
-#include "text_int.h"
+#include "third_party/base/stl_util.h"
namespace {
@@ -106,8 +108,6 @@ IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() {
CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
: m_pPage(pPage),
- m_charList(512),
- m_TempCharList(50),
m_parserflag(flags),
m_pPreTextObj(nullptr),
m_bIsParsed(false),
@@ -139,19 +139,19 @@ FX_BOOL CPDF_TextPage::ParseTextPage() {
return FALSE;
m_TextBuf.Clear();
- m_charList.RemoveAll();
+ m_CharList.clear();
m_pPreTextObj = NULL;
ProcessObject();
m_bIsParsed = true;
m_CharIndex.RemoveAll();
- int nCount = m_charList.GetSize();
+ int nCount = pdfium::CollectionSize<int>(m_CharList);
if (nCount) {
m_CharIndex.Add(0);
}
for (int i = 0; i < nCount; i++) {
int indexSize = m_CharIndex.GetSize();
FX_BOOL bNormal = FALSE;
- PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i);
+ const PAGECHAR_INFO& charinfo = m_CharList[i];
if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
bNormal = TRUE;
} else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) {
@@ -186,7 +186,7 @@ FX_BOOL CPDF_TextPage::ParseTextPage() {
return TRUE;
}
int CPDF_TextPage::CountChars() const {
- return m_charList.GetSize();
+ return pdfium::CollectionSize<int>(m_CharList);
}
int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
int indexSize = m_CharIndex.GetSize();
@@ -224,16 +224,16 @@ void CPDF_TextPage::GetRectArray(int start,
if (!m_bIsParsed) {
return;
}
- PAGECHAR_INFO info_curchar;
CPDF_TextObject* pCurObj = NULL;
CFX_FloatRect rect;
int curPos = start;
FX_BOOL flagNewRect = TRUE;
- if (nCount + start > m_charList.GetSize() || nCount == -1) {
- nCount = m_charList.GetSize() - start;
+ if (nCount + start > pdfium::CollectionSize<int>(m_CharList) ||
+ nCount == -1) {
+ nCount = pdfium::CollectionSize<int>(m_CharList) - start;
}
while (nCount--) {
- info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(curPos++);
+ PAGECHAR_INFO info_curchar = m_CharList[curPos++];
if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
continue;
}
@@ -299,7 +299,6 @@ void CPDF_TextPage::GetRectArray(int start,
}
}
rectArray.Add(rect);
- return;
}
int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
FX_FLOAT xTolerance,
@@ -309,9 +308,10 @@ int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
int pos = 0;
int NearPos = -1;
- double xdif = 5000, ydif = 5000;
- while (pos < m_charList.GetSize()) {
- PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)(m_charList.GetAt(pos));
+ double xdif = 5000;
+ double ydif = 5000;
+ while (pos < pdfium::CollectionSize<int>(m_CharList)) {
+ PAGECHAR_INFO charinfo = m_CharList[pos];
CFX_FloatRect charrect = charinfo.m_CharBox;
if (charrect.Contains(point.x, point.y)) {
break;
@@ -342,45 +342,40 @@ int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
}
++pos;
}
- if (pos >= m_charList.GetSize()) {
- pos = NearPos;
- }
- return pos;
+ return pos < pdfium::CollectionSize<int>(m_CharList) ? pos : NearPos;
}
+
CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
- CFX_WideString strText;
if (!m_bIsParsed)
- return strText;
+ return CFX_WideString();
- int nCount = m_charList.GetSize();
- int pos = 0;
FX_FLOAT posy = 0;
- FX_BOOL IsContainPreChar = FALSE;
- FX_BOOL ISAddLineFeed = FALSE;
- while (pos < nCount) {
- PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
+ bool IsContainPreChar = false;
+ bool IsAddLineFeed = false;
+ CFX_WideString strText;
+ for (const auto& charinfo : m_CharList) {
if (IsRectIntersect(rect, charinfo.m_CharBox)) {
if (FXSYS_fabs(posy - charinfo.m_OriginY) > 0 && !IsContainPreChar &&
- ISAddLineFeed) {
+ IsAddLineFeed) {
posy = charinfo.m_OriginY;
if (strText.GetLength() > 0) {
strText += L"\r\n";
}
}
- IsContainPreChar = TRUE;
- ISAddLineFeed = FALSE;
+ IsContainPreChar = true;
+ IsAddLineFeed = false;
if (charinfo.m_Unicode) {
strText += charinfo.m_Unicode;
}
} else if (charinfo.m_Unicode == 32) {
if (IsContainPreChar && charinfo.m_Unicode) {
strText += charinfo.m_Unicode;
- IsContainPreChar = FALSE;
- ISAddLineFeed = FALSE;
+ IsContainPreChar = false;
+ IsAddLineFeed = false;
}
} else {
- IsContainPreChar = FALSE;
- ISAddLineFeed = TRUE;
+ IsContainPreChar = false;
+ IsAddLineFeed = true;
}
}
return strText;
@@ -391,47 +386,36 @@ void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
return;
CFX_FloatRect curRect;
- FX_BOOL flagNewRect = TRUE;
- CPDF_TextObject* pCurObj = NULL;
- int nCount = m_charList.GetSize();
- int pos = 0;
- while (pos < nCount) {
- PAGECHAR_INFO info_curchar = *(PAGECHAR_INFO*)m_charList.GetAt(pos++);
+ bool flagNewRect = true;
+ CPDF_TextObject* pCurObj = nullptr;
+ for (auto info_curchar : m_CharList) {
if (info_curchar.m_Flag == FPDFTEXT_CHAR_GENERATED) {
continue;
}
- if (IsRectIntersect(rect, info_curchar.m_CharBox)) {
- if (!pCurObj) {
- pCurObj = info_curchar.m_pTextObj;
- }
- if (pCurObj != info_curchar.m_pTextObj) {
- resRectArray.Add(curRect);
- pCurObj = info_curchar.m_pTextObj;
- flagNewRect = TRUE;
- }
- if (flagNewRect) {
- curRect = info_curchar.m_CharBox;
- flagNewRect = FALSE;
- curRect.Normalize();
- } else {
- info_curchar.m_CharBox.Normalize();
- if (curRect.left > info_curchar.m_CharBox.left) {
- curRect.left = info_curchar.m_CharBox.left;
- }
- if (curRect.right < info_curchar.m_CharBox.right) {
- curRect.right = info_curchar.m_CharBox.right;
- }
- if (curRect.top < info_curchar.m_CharBox.top) {
- curRect.top = info_curchar.m_CharBox.top;
- }
- if (curRect.bottom > info_curchar.m_CharBox.bottom) {
- curRect.bottom = info_curchar.m_CharBox.bottom;
- }
- }
+ if (!IsRectIntersect(rect, info_curchar.m_CharBox)) {
+ continue;
+ }
+ if (!pCurObj) {
+ pCurObj = info_curchar.m_pTextObj;
+ }
+ if (pCurObj != info_curchar.m_pTextObj) {
+ resRectArray.Add(curRect);
+ pCurObj = info_curchar.m_pTextObj;
+ flagNewRect = true;
+ }
+ if (flagNewRect) {
+ curRect = info_curchar.m_CharBox;
+ curRect.Normalize();
+ flagNewRect = false;
+ } else {
+ info_curchar.m_CharBox.Normalize();
+ curRect.left = std::min(curRect.left, info_curchar.m_CharBox.left);
+ curRect.bottom = std::min(curRect.bottom, info_curchar.m_CharBox.bottom);
+ curRect.right = std::max(curRect.right, info_curchar.m_CharBox.right);
+ curRect.top = std::max(curRect.top, info_curchar.m_CharBox.top);
}
}
resRectArray.Add(curRect);
- return;
}
int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x,
FX_FLOAT y,
@@ -445,31 +429,29 @@ void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const {
if (!m_bIsParsed)
return;
- if (index < 0 || index >= m_charList.GetSize())
+ if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList))
return;
- const PAGECHAR_INFO* charinfo =
- static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index));
- info->m_Charcode = charinfo->m_CharCode;
- info->m_OriginX = charinfo->m_OriginX;
- info->m_OriginY = charinfo->m_OriginY;
- info->m_Unicode = charinfo->m_Unicode;
- info->m_Flag = charinfo->m_Flag;
- info->m_CharBox = charinfo->m_CharBox;
- info->m_pTextObj = charinfo->m_pTextObj;
- if (charinfo->m_pTextObj && charinfo->m_pTextObj->GetFont()) {
- info->m_FontSize = charinfo->m_pTextObj->GetFontSize();
+ const PAGECHAR_INFO& charinfo = m_CharList[index];
+ info->m_Charcode = charinfo.m_CharCode;
+ info->m_OriginX = charinfo.m_OriginX;
+ info->m_OriginY = charinfo.m_OriginY;
+ info->m_Unicode = charinfo.m_Unicode;
+ info->m_Flag = charinfo.m_Flag;
+ info->m_CharBox = charinfo.m_CharBox;
+ info->m_pTextObj = charinfo.m_pTextObj;
+ if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) {
+ info->m_FontSize = charinfo.m_pTextObj->GetFontSize();
} else {
info->m_FontSize = kDefaultFontSize;
}
- info->m_Matrix.Copy(charinfo->m_Matrix);
+ info->m_Matrix.Copy(charinfo.m_Matrix);
}
void CPDF_TextPage::CheckMarkedContentObject(int32_t& start,
int32_t& nCount) const {
- PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start);
- PAGECHAR_INFO charinfo2 =
- *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1);
+ PAGECHAR_INFO charinfo = m_CharList[start];
+ PAGECHAR_INFO charinfo2 = m_CharList[start + nCount - 1];
if (FPDFTEXT_CHAR_PIECE != charinfo.m_Flag &&
FPDFTEXT_CHAR_PIECE != charinfo2.m_Flag) {
return;
@@ -483,7 +465,7 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start,
if (startIndex < 0) {
break;
}
- charinfo1 = *(PAGECHAR_INFO*)m_charList.GetAt(startIndex);
+ charinfo1 = m_CharList[startIndex];
}
startIndex++;
start = startIndex;
@@ -494,10 +476,10 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start,
while (FPDFTEXT_CHAR_PIECE == charinfo3.m_Flag &&
charinfo3.m_Index == charinfo2.m_Index) {
endIndex++;
- if (endIndex >= m_charList.GetSize()) {
+ if (endIndex >= pdfium::CollectionSize<int>(m_CharList)) {
break;
}
- charinfo3 = *(PAGECHAR_INFO*)m_charList.GetAt(endIndex);
+ charinfo3 = m_CharList[endIndex];
}
endIndex--;
nCount = endIndex - start + 1;
@@ -511,40 +493,40 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
start = 0;
if (nCount == -1) {
- nCount = m_charList.GetSize() - start;
+ nCount = pdfium::CollectionSize<int>(m_CharList) - start;
return m_TextBuf.GetWideString().Mid(start,
m_TextBuf.GetWideString().GetLength());
}
- if (nCount <= 0 || m_charList.GetSize() <= 0) {
+ if (nCount <= 0 || m_CharList.empty()) {
return L"";
}
- if (nCount + start > m_charList.GetSize() - 1) {
- nCount = m_charList.GetSize() - start;
+ if (nCount + start > pdfium::CollectionSize<int>(m_CharList) - 1) {
+ nCount = pdfium::CollectionSize<int>(m_CharList) - start;
}
if (nCount <= 0) {
return L"";
}
CheckMarkedContentObject(start, nCount);
int startindex = 0;
- PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start);
+ PAGECHAR_INFO charinfo = m_CharList[start];
int startOffset = 0;
while (charinfo.m_Index == -1) {
startOffset++;
- if (startOffset > nCount || start + startOffset >= m_charList.GetSize()) {
+ if (startOffset > nCount ||
+ start + startOffset >= pdfium::CollectionSize<int>(m_CharList)) {
return L"";
}
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + startOffset);
+ charinfo = m_CharList[start + startOffset];
}
startindex = charinfo.m_Index;
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - 1);
+ charinfo = m_CharList[start + nCount - 1];
int nCountOffset = 0;
while (charinfo.m_Index == -1) {
nCountOffset++;
if (nCountOffset >= nCount) {
return L"";
}
- charinfo =
- *(PAGECHAR_INFO*)m_charList.GetAt(start + nCount - nCountOffset - 1);
+ charinfo = m_CharList[start + nCount - nCountOffset - 1];
}
nCount = start + nCount - nCountOffset - startindex;
if (nCount <= 0) {
@@ -556,8 +538,9 @@ int CPDF_TextPage::CountRects(int start, int nCount) {
if (!m_bIsParsed || start < 0)
return -1;
- if (nCount == -1 || nCount + start > m_charList.GetSize()) {
- nCount = m_charList.GetSize() - start;
+ if (nCount == -1 ||
+ nCount + start > pdfium::CollectionSize<int>(m_CharList)) {
+ nCount = pdfium::CollectionSize<int>(m_CharList) - start;
}
m_SelRects.RemoveAll();
GetRectArray(start, nCount, m_SelRects);
@@ -649,21 +632,21 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
FX_FLOAT right,
FX_FLOAT bottom,
FX_BOOL bContains) {
- m_Segment.RemoveAll();
+ m_Segments.RemoveAll();
if (!m_bIsParsed)
return -1;
CFX_FloatRect rect(left, bottom, right, top);
rect.Normalize();
- int nCount = m_charList.GetSize();
- int pos = 0;
+
FPDF_SEGMENT segment;
segment.m_Start = 0;
segment.m_nCount = 0;
+
+ int pos = 0;
int segmentStatus = 0;
FX_BOOL IsContainPreChar = FALSE;
- while (pos < nCount) {
- PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(pos);
+ for (const auto& charinfo : m_CharList) {
if (bContains && rect.Contains(charinfo.m_CharBox)) {
if (segmentStatus == 0 || segmentStatus == 2) {
segment.m_Start = pos;
@@ -697,7 +680,7 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
} else {
if (segmentStatus == 1) {
segmentStatus = 2;
- m_Segment.Add(segment);
+ m_Segments.Add(segment);
segment.m_Start = 0;
segment.m_nCount = 0;
}
@@ -705,7 +688,7 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
} else {
if (segmentStatus == 1) {
segmentStatus = 2;
- m_Segment.Add(segment);
+ m_Segments.Add(segment);
segment.m_Start = 0;
segment.m_nCount = 0;
}
@@ -715,18 +698,18 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
}
if (segmentStatus == 1) {
segmentStatus = 2;
- m_Segment.Add(segment);
+ m_Segments.Add(segment);
segment.m_Start = 0;
segment.m_nCount = 0;
}
- return m_Segment.GetSize();
+ return m_Segments.GetSize();
}
void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
- if (index < 0 || index >= m_Segment.GetSize()) {
+ if (index < 0 || index >= m_Segments.GetSize()) {
return;
}
- start = m_Segment.GetAt(index).m_Start;
- count = m_Segment.GetAt(index).m_nCount;
+ start = m_Segments.GetAt(index).m_Start;
+ count = m_Segments.GetAt(index).m_nCount;
}
int CPDF_TextPage::GetWordBreak(int index, int direction) const {
if (!m_bIsParsed)
@@ -735,11 +718,10 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const {
if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)
return -1;
- if (index < 0 || index >= m_charList.GetSize())
+ if (index < 0 || index >= pdfium::CollectionSize<int>(m_CharList))
return -1;
- PAGECHAR_INFO charinfo;
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
+ const PAGECHAR_INFO& charinfo = m_CharList[index];
if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
return index;
}
@@ -749,17 +731,13 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const {
int breakPos = index;
if (direction == FPDFTEXT_LEFT) {
while (--breakPos > 0) {
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos);
- if (!IsLetter(charinfo.m_Unicode)) {
- return breakPos;
- }
+ if (!IsLetter(m_CharList[breakPos].m_Unicode))
+ break;
}
} else if (direction == FPDFTEXT_RIGHT) {
- while (++breakPos < m_charList.GetSize()) {
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos);
- if (!IsLetter(charinfo.m_Unicode)) {
- return breakPos;
- }
+ while (++breakPos < pdfium::CollectionSize<int>(m_CharList)) {
+ if (!IsLetter(m_CharList[breakPos].m_Unicode))
+ break;
}
}
return breakPos;
@@ -949,21 +927,21 @@ void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) {
if (ret == CFX_BidiChar::RIGHT) {
for (int i = start + count - 1; i >= start; i--) {
m_TextBuf.AppendChar(str.GetAt(i));
- m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i));
+ m_CharList.push_back(m_TempCharList[i]);
}
} else {
int end = start + count;
for (int i = start; i < end; i++) {
m_TextBuf.AppendChar(str.GetAt(i));
- m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i));
+ m_CharList.push_back(m_TempCharList[i]);
}
}
}
void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) {
- PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
+ PAGECHAR_INFO info = m_TempCharList[i];
FX_WCHAR wChar = str.GetAt(i);
- if (!IsControlChar(Info)) {
- Info.m_Index = m_TextBuf.GetLength();
+ if (!IsControlChar(info)) {
+ info.m_Index = m_TextBuf.GetLength();
if (wChar >= 0xFB00 && wChar <= 0xFB06) {
FX_WCHAR* pDst = NULL;
FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
@@ -971,11 +949,11 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) {
pDst = FX_Alloc(FX_WCHAR, nCount);
FX_Unicode_GetNormalization(wChar, pDst);
for (int nIndex = 0; nIndex < nCount; nIndex++) {
- PAGECHAR_INFO Info2 = Info;
- Info2.m_Unicode = pDst[nIndex];
- Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
- m_TextBuf.AppendChar(Info2.m_Unicode);
- m_charList.Add(Info2);
+ PAGECHAR_INFO info2 = info;
+ info2.m_Unicode = pDst[nIndex];
+ info2.m_Flag = FPDFTEXT_CHAR_PIECE;
+ m_TextBuf.AppendChar(info2.m_Unicode);
+ m_CharList.push_back(info2);
}
FX_Free(pDst);
return;
@@ -983,14 +961,14 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) {
}
m_TextBuf.AppendChar(wChar);
} else {
- Info.m_Index = -1;
+ info.m_Index = -1;
}
- m_charList.Add(Info);
+ m_CharList.push_back(info);
}
void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) {
- PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
- if (!IsControlChar(Info)) {
- Info.m_Index = m_TextBuf.GetLength();
+ PAGECHAR_INFO info = m_TempCharList[i];
+ if (!IsControlChar(info)) {
+ info.m_Index = m_TextBuf.GetLength();
FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE);
FX_WCHAR* pDst = NULL;
FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
@@ -998,25 +976,24 @@ void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) {
pDst = FX_Alloc(FX_WCHAR, nCount);
FX_Unicode_GetNormalization(wChar, pDst);
for (int nIndex = 0; nIndex < nCount; nIndex++) {
- PAGECHAR_INFO Info2 = Info;
- Info2.m_Unicode = pDst[nIndex];
- Info2.m_Flag = FPDFTEXT_CHAR_PIECE;
- m_TextBuf.AppendChar(Info2.m_Unicode);
- m_charList.Add(Info2);
+ PAGECHAR_INFO info2 = info;
+ info2.m_Unicode = pDst[nIndex];
+ info2.m_Flag = FPDFTEXT_CHAR_PIECE;
+ m_TextBuf.AppendChar(info2.m_Unicode);
+ m_CharList.push_back(info2);
}
FX_Free(pDst);
return;
}
- Info.m_Unicode = wChar;
- m_TextBuf.AppendChar(Info.m_Unicode);
+ info.m_Unicode = wChar;
+ m_TextBuf.AppendChar(info.m_Unicode);
} else {
- Info.m_Index = -1;
+ info.m_Index = -1;
}
- m_charList.Add(Info);
+ m_CharList.push_back(info);
}
void CPDF_TextPage::CloseTempLine() {
- int count1 = m_TempCharList.GetSize();
- if (count1 <= 0) {
+ if (m_TempCharList.empty()) {
return;
}
std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar);
@@ -1030,9 +1007,8 @@ void CPDF_TextPage::CloseTempLine() {
if (str.GetAt(i) == 32) {
if (bPrevSpace) {
m_TempTextBuf.Delete(i, 1);
- m_TempCharList.Delete(i);
+ m_TempCharList.erase(m_TempCharList.begin() + i);
str.Delete(i);
- count1--;
i--;
continue;
}
@@ -1133,7 +1109,7 @@ void CPDF_TextPage::CloseTempLine() {
bL2R = TRUE;
continue;
}
- int end = m_TempCharList.GetSize() - 1;
+ int end = pdfium::CollectionSize<int>(m_TempCharList) - 1;
if (j < count) {
end = order.GetAt(j) - 1;
}
@@ -1150,7 +1126,7 @@ void CPDF_TextPage::CloseTempLine() {
}
}
order.RemoveAll();
- m_TempCharList.RemoveAll();
+ m_TempCharList.clear();
m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength());
}
void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj,
@@ -1367,29 +1343,23 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
charinfo.m_CharBox.bottom = charBox.bottom;
charinfo.m_Matrix.Copy(matrix);
m_TempTextBuf.AppendChar(wChar);
- m_TempCharList.Add(charinfo);
+ m_TempCharList.push_back(charinfo);
}
}
-void CPDF_TextPage::FindPreviousTextObject(void) {
- if (m_TempCharList.GetSize() < 1 && m_charList.GetSize() < 1) {
+void CPDF_TextPage::FindPreviousTextObject() {
+ if (m_TempCharList.empty() && m_CharList.empty())
return;
- }
- PAGECHAR_INFO preChar;
- if (m_TempCharList.GetSize() >= 1) {
- preChar =
- *(PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1);
- } else {
- preChar = *(PAGECHAR_INFO*)m_charList.GetAt(m_charList.GetSize() - 1);
- }
- if (preChar.m_pTextObj) {
+
+ PAGECHAR_INFO preChar =
+ m_TempCharList.empty() ? m_CharList.back() : m_TempCharList.back();
+
+ if (preChar.m_pTextObj)
m_pPreTextObj = preChar.m_pTextObj;
- }
}
void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend,
int32_t iBufStartAppend) {
- int32_t i, j;
- i = iCharListStartAppend;
- j = m_TempCharList.GetSize() - 1;
+ int32_t i = iCharListStartAppend;
+ int32_t j = pdfium::CollectionSize<int32_t>(m_TempCharList) - 1;
for (; i < j; i++, j--) {
std::swap(m_TempCharList[i], m_TempCharList[j]);
std::swap(m_TempCharList[i].m_Index, m_TempCharList[j].m_Index);
@@ -1476,7 +1446,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
generateChar.m_Matrix.Copy(formMatrix);
}
m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
- m_TempCharList.Add(generateChar);
+ m_TempCharList.push_back(generateChar);
}
} else if (result == 2) {
CloseTempLine();
@@ -1486,14 +1456,14 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
if (!formMatrix.IsIdentity()) {
generateChar.m_Matrix.Copy(formMatrix);
}
- m_charList.Add(generateChar);
+ m_CharList.push_back(generateChar);
}
if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) {
m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
if (!formMatrix.IsIdentity()) {
generateChar.m_Matrix.Copy(formMatrix);
}
- m_charList.Add(generateChar);
+ m_CharList.push_back(generateChar);
}
}
} else if (result == 3) {
@@ -1515,13 +1485,12 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
m_TempTextBuf.GetWideString().GetAt(m_TempTextBuf.GetLength() -
1) == 0x20) {
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
- m_TempCharList.Delete(m_TempCharList.GetSize() - 1);
+ m_TempCharList.pop_back();
}
- PAGECHAR_INFO* cha =
- (PAGECHAR_INFO*)m_TempCharList.GetAt(m_TempCharList.GetSize() - 1);
+ PAGECHAR_INFO* charinfo = &m_TempCharList.back();
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
- cha->m_Unicode = 0x2;
- cha->m_Flag = FPDFTEXT_CHAR_HYPHEN;
+ charinfo->m_Unicode = 0x2;
+ charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN;
m_TempTextBuf.AppendChar(0xfffe);
}
} else {
@@ -1544,7 +1513,8 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
const FX_BOOL bIsBidiAndMirrorInverse =
bR2L && (matrix.a * matrix.d - matrix.b * matrix.c) < 0;
int32_t iBufStartAppend = m_TempTextBuf.GetLength();
- int32_t iCharListStartAppend = m_TempCharList.GetSize();
+ int32_t iCharListStartAppend =
+ pdfium::CollectionSize<int32_t>(m_TempCharList);
FX_FLOAT spacing = 0;
for (int i = 0; i < nItems; i++) {
@@ -1606,7 +1576,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
charinfo.m_CharBox =
CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY,
charinfo.m_OriginX, charinfo.m_OriginY);
- m_TempCharList.Add(charinfo);
+ m_TempCharList.push_back(charinfo);
}
if (item.m_CharCode == (FX_DWORD)-1) {
continue;
@@ -1658,23 +1628,23 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
charinfo.m_Matrix.Copy(matrix);
if (wstrItem.IsEmpty()) {
charinfo.m_Unicode = 0;
- m_TempCharList.Add(charinfo);
+ m_TempCharList.push_back(charinfo);
m_TempTextBuf.AppendChar(0xfffe);
continue;
} else {
int nTotal = wstrItem.GetLength();
FX_BOOL bDel = FALSE;
- const int count = std::min(m_TempCharList.GetSize(), 7);
+ const int count =
+ std::min(pdfium::CollectionSize<int>(m_TempCharList), 7);
FX_FLOAT threshold = charinfo.m_Matrix.TransformXDistance(
(FX_FLOAT)TEXT_CHARRATIO_GAPDELTA * pTextObj->GetFontSize());
- for (int n = m_TempCharList.GetSize();
- n > m_TempCharList.GetSize() - count; n--) {
- PAGECHAR_INFO* charinfo1 = (PAGECHAR_INFO*)m_TempCharList.GetAt(n - 1);
- if (charinfo1->m_CharCode == charinfo.m_CharCode &&
- charinfo1->m_pTextObj->GetFont() ==
- charinfo.m_pTextObj->GetFont() &&
- FXSYS_fabs(charinfo1->m_OriginX - charinfo.m_OriginX) < threshold &&
- FXSYS_fabs(charinfo1->m_OriginY - charinfo.m_OriginY) < threshold) {
+ for (int n = pdfium::CollectionSize<int>(m_TempCharList);
+ n > pdfium::CollectionSize<int>(m_TempCharList) - count; n--) {
+ const PAGECHAR_INFO& charinfo1 = m_TempCharList[n - 1];
+ if (charinfo1.m_CharCode == charinfo.m_CharCode &&
+ charinfo1.m_pTextObj->GetFont() == charinfo.m_pTextObj->GetFont() &&
+ FXSYS_fabs(charinfo1.m_OriginX - charinfo.m_OriginX) < threshold &&
+ FXSYS_fabs(charinfo1.m_OriginY - charinfo.m_OriginY) < threshold) {
bDel = TRUE;
break;
}
@@ -1688,14 +1658,14 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
} else {
m_TempTextBuf.AppendChar(0xfffe);
}
- m_TempCharList.Add(charinfo);
+ m_TempCharList.push_back(charinfo);
}
} else if (i == 0) {
CFX_WideString str = m_TempTextBuf.GetWideString();
if (!str.IsEmpty() &&
str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) {
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
- m_TempCharList.Delete(m_TempCharList.GetSize() - 1);
+ m_TempCharList.pop_back();
}
}
}
@@ -1754,19 +1724,16 @@ FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) {
return TRUE;
}
}
- int size = m_TempCharList.GetSize();
- PAGECHAR_INFO preChar;
- if (size) {
- preChar = (PAGECHAR_INFO)m_TempCharList[size - 1];
+ const PAGECHAR_INFO* preInfo;
+ if (!m_TempCharList.empty()) {
+ preInfo = &m_TempCharList.back();
+ } else if (!m_CharList.empty()) {
+ preInfo = &m_CharList.back();
} else {
- size = m_charList.GetSize();
- if (size == 0) {
- return FALSE;
- }
- preChar = (PAGECHAR_INFO)m_charList[size - 1];
+ return FALSE;
}
- if (FPDFTEXT_CHAR_PIECE == preChar.m_Flag &&
- (0xAD == preChar.m_Unicode || 0x2D == preChar.m_Unicode)) {
+ if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
+ (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) {
return TRUE;
}
}
@@ -1939,9 +1906,9 @@ FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
pTextObj1->m_Right, pTextObj1->m_Top);
if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) {
FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left);
- int nCount = m_charList.GetSize();
+ size_t nCount = m_CharList.size();
if (nCount >= 2) {
- PAGECHAR_INFO perCharTemp = (PAGECHAR_INFO)m_charList[nCount - 2];
+ PAGECHAR_INFO perCharTemp = m_CharList[nCount - 2];
FX_FLOAT dbSpace = perCharTemp.m_CharBox.Width();
if (dbXdif > dbSpace) {
return FALSE;
@@ -2012,16 +1979,13 @@ FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
}
FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) {
- int size = m_TempCharList.GetSize();
- PAGECHAR_INFO preChar;
- if (size) {
- preChar = (PAGECHAR_INFO)m_TempCharList[size - 1];
+ const PAGECHAR_INFO* preChar;
+ if (!m_TempCharList.empty()) {
+ preChar = &m_TempCharList.back();
+ } else if (!m_CharList.empty()) {
+ preChar = &m_CharList.back();
} else {
- size = m_charList.GetSize();
- if (size == 0) {
- return FALSE;
- }
- preChar = (PAGECHAR_INFO)m_charList[size - 1];
+ return FALSE;
}
info.m_Index = m_TextBuf.GetLength();
info.m_Unicode = unicode;
@@ -2029,16 +1993,17 @@ FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) {
info.m_CharCode = -1;
info.m_Flag = FPDFTEXT_CHAR_GENERATED;
int preWidth = 0;
- if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1)
- preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont());
+ if (preChar->m_pTextObj && preChar->m_CharCode != (FX_DWORD)-1)
+ preWidth =
+ GetCharWidth(preChar->m_CharCode, preChar->m_pTextObj->GetFont());
- FX_FLOAT fFontSize = preChar.m_pTextObj ? preChar.m_pTextObj->GetFontSize()
- : preChar.m_CharBox.Height();
+ FX_FLOAT fFontSize = preChar->m_pTextObj ? preChar->m_pTextObj->GetFontSize()
+ : preChar->m_CharBox.Height();
if (!fFontSize)
fFontSize = kDefaultFontSize;
- info.m_OriginX = preChar.m_OriginX + preWidth * (fFontSize) / 1000;
- info.m_OriginY = preChar.m_OriginY;
+ info.m_OriginX = preChar->m_OriginX + preWidth * (fFontSize) / 1000;
+ info.m_OriginY = preChar->m_OriginY;
info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX,
info.m_OriginY);
return TRUE;
diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h
index f5d1f98a1a..bc4d181863 100644
--- a/core/src/fpdftext/text_int.h
+++ b/core/src/fpdftext/text_int.h
@@ -7,6 +7,8 @@
#ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_
#define CORE_SRC_FPDFTEXT_TEXT_INT_H_
+#include <deque>
+
#include "core/include/fpdftext/fpdf_text.h"
#include "core/include/fxcrt/fx_basic.h"
@@ -26,7 +28,7 @@ class CPDF_TextPageFind;
#define FPDFTEXT_MC_DONE 1
#define FPDFTEXT_MC_DELAY 2
-typedef struct _PAGECHAR_INFO {
+struct PAGECHAR_INFO {
int m_CharCode;
FX_WCHAR m_Unicode;
FX_FLOAT m_OriginX;
@@ -36,18 +38,17 @@ typedef struct _PAGECHAR_INFO {
CPDF_TextObject* m_pTextObj;
CFX_Matrix m_Matrix;
int m_Index;
-} PAGECHAR_INFO;
-typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray;
-typedef struct {
+};
+
+struct FPDF_SEGMENT {
int m_Start;
int m_nCount;
-} FPDF_SEGMENT;
-typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array;
-typedef struct {
+};
+
+struct PDFTEXT_Obj {
CPDF_TextObject* m_pTextObj;
CFX_Matrix m_formMatrix;
-} PDFTEXT_Obj;
-typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ;
+};
class CPDF_TextPage : public IPDF_TextPage {
public:
@@ -91,7 +92,6 @@ class CPDF_TextPage : public IPDF_TextPage {
void GetBoundedSegment(int index, int& start, int& count) const override;
int GetWordBreak(int index, int direction) const override;
- const PAGECHAR_InfoArray* GetCharList() const { return &m_charList; }
static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1,
const CFX_FloatRect& rect2);
static FX_BOOL IsLetter(FX_WCHAR unicode);
@@ -132,18 +132,18 @@ class CPDF_TextPage : public IPDF_TextPage {
CFX_WordArray m_CharIndex;
const CPDF_PageObjectList* const m_pPage;
- PAGECHAR_InfoArray m_charList;
+ std::deque<PAGECHAR_INFO> m_CharList;
+ std::deque<PAGECHAR_INFO> m_TempCharList;
CFX_WideTextBuf m_TextBuf;
- PAGECHAR_InfoArray m_TempCharList;
CFX_WideTextBuf m_TempTextBuf;
const int m_parserflag;
CPDF_TextObject* m_pPreTextObj;
CFX_Matrix m_perMatrix;
bool m_bIsParsed;
CFX_Matrix m_DisplayMatrix;
- SEGMENT_Array m_Segment;
+ CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments;
CFX_RectArray m_SelRects;
- LINEOBJ m_LineObj;
+ CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj;
int32_t m_TextlineDir;
CFX_FloatRect m_CurlineRect;
};