summaryrefslogtreecommitdiff
path: root/core/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext')
-rw-r--r--core/fpdftext/fpdf_text_int.cpp347
-rw-r--r--core/fpdftext/include/cpdf_textpage.h27
2 files changed, 193 insertions, 181 deletions
diff --git a/core/fpdftext/fpdf_text_int.cpp b/core/fpdftext/fpdf_text_int.cpp
index 8ae2c1ef35..5be47143fd 100644
--- a/core/fpdftext/fpdf_text_int.cpp
+++ b/core/fpdftext/fpdf_text_int.cpp
@@ -39,11 +39,11 @@
#define FPDFTEXT_CHAR_HYPHEN 3
#define FPDFTEXT_CHAR_PIECE 4
-#define TEXT_BLANK_CHAR L' '
+#define TEXT_SPACE_CHAR L' '
#define TEXT_LINEFEED_CHAR L'\n'
#define TEXT_RETURN_CHAR L'\r'
#define TEXT_EMPTY L""
-#define TEXT_BLANK L" "
+#define TEXT_SPACE L" "
#define TEXT_RETURN_LINEFEED L"\r\n"
#define TEXT_LINEFEED L"\n"
#define TEXT_CHARRATIO_GAPDELTA 0.070
@@ -150,7 +150,7 @@ CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags)
m_parserflag(flags),
m_pPreTextObj(nullptr),
m_bIsParsed(false),
- m_TextlineDir(-1),
+ m_TextlineDir(TextOrientation::Unknown),
m_CurlineRect(0, 0, 0, 0) {
m_TextBuf.EstimateSize(0, 10240);
pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
@@ -183,34 +183,25 @@ void CPDF_TextPage::ParseTextPage() {
m_bIsParsed = true;
m_CharIndex.clear();
int nCount = pdfium::CollectionSize<int>(m_CharList);
- if (nCount) {
+ if (nCount)
m_CharIndex.push_back(0);
- }
+
for (int i = 0; i < nCount; i++) {
int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
- FX_BOOL bNormal = FALSE;
const PAGECHAR_INFO& charinfo = m_CharList[i];
- if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
- bNormal = TRUE;
- } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) {
- bNormal = FALSE;
- } else {
- bNormal = TRUE;
- }
- if (bNormal) {
+ if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED ||
+ (charinfo.m_Unicode != 0 && !IsControlChar(charinfo))) {
if (indexSize % 2) {
m_CharIndex.push_back(1);
} else {
- if (indexSize <= 0) {
+ if (indexSize <= 0)
continue;
- }
m_CharIndex[indexSize - 1] += 1;
}
} else {
if (indexSize % 2) {
- if (indexSize <= 0) {
+ if (indexSize <= 0)
continue;
- }
m_CharIndex[indexSize - 1] = i + 1;
} else {
m_CharIndex.push_back(i + 1);
@@ -218,9 +209,8 @@ void CPDF_TextPage::ParseTextPage() {
}
}
int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
- if (indexSize % 2) {
+ if (indexSize % 2)
m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
- }
}
int CPDF_TextPage::CountChars() const {
@@ -685,7 +675,8 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
return m_Segments.GetSize();
}
-int32_t CPDF_TextPage::FindTextlineFlowDirection() {
+CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation()
+ const {
const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth());
const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight());
std::vector<uint8_t> nHorizontalMask(nPageWidth);
@@ -695,7 +686,7 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() {
int32_t index = 0;
FX_FLOAT fLineHeight = 0.0f;
if (m_pPage->GetPageObjectList()->empty())
- return -1;
+ return TextOrientation::Unknown;
for (auto& pPageObj : *m_pPage->GetPageObjectList()) {
if (!pPageObj || !pPageObj->IsText())
@@ -749,33 +740,42 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() {
break;
}
nEndV = index;
- for (index = nStartV; index < nEndV; index++) {
+ for (index = nStartV; index < nEndV; index++)
nSumV += nVerticalMask[index];
- }
nSumV /= nEndV - nStartV;
- if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) {
- return 0;
- }
- if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) {
- return 1;
- }
- if (nSumH > 0.8f) {
- return 0;
- }
- if (nSumH - nSumV > 0.0f) {
- return 0;
- }
- if (nSumV - nSumH > 0.0f) {
- return 1;
- }
- return -1;
+
+ if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight))
+ return TextOrientation::Horizontal;
+ if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight))
+ return TextOrientation::Vertical;
+
+ if (nSumH > 0.8f)
+ return TextOrientation::Horizontal;
+
+ if (nSumH > nSumV)
+ return TextOrientation::Horizontal;
+ if (nSumH < nSumV)
+ return TextOrientation::Vertical;
+ return TextOrientation::Unknown;
+}
+
+void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode,
+ const CFX_Matrix& formMatrix) {
+ PAGECHAR_INFO generateChar;
+ if (!GenerateCharInfo(unicode, generateChar))
+ return;
+
+ m_TextBuf.AppendChar(unicode);
+ if (!formMatrix.IsIdentity())
+ generateChar.m_Matrix.Copy(formMatrix);
+ m_CharList.push_back(generateChar);
}
void CPDF_TextPage::ProcessObject() {
if (m_pPage->GetPageObjectList()->empty())
return;
- m_TextlineDir = FindTextlineFlowDirection();
+ m_TextlineDir = FindTextlineFlowOrientation();
const CPDF_PageObjectList* pObjList = m_pPage->GetPageObjectList();
for (auto it = pObjList->begin(); it != pObjList->end(); ++it) {
if (CPDF_PageObject* pObj = it->get()) {
@@ -833,37 +833,14 @@ int CPDF_TextPage::GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const {
void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
PAGECHAR_INFO info) {
- if (!IsControlChar(info)) {
- info.m_Index = m_TextBuf.GetLength();
- if (wChar >= 0xFB00 && wChar <= 0xFB06) {
- FX_WCHAR* pDst = nullptr;
- FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
- if (nCount >= 1) {
- pDst = FX_Alloc(FX_WCHAR, nCount);
- Unicode_GetNormalization(wChar, pDst);
- for (int nIndex = 0; nIndex < nCount; nIndex++) {
- PAGECHAR_INFO info2 = info;
- info2.m_Unicode = pDst[nIndex];
- info2.m_Flag = FPDFTEXT_CHAR_PIECE;
- m_TextBuf.AppendChar(info2.m_Unicode);
- m_CharList.push_back(info2);
- }
- FX_Free(pDst);
- return;
- }
- }
- m_TextBuf.AppendChar(wChar);
- } else {
+ if (IsControlChar(info)) {
info.m_Index = -1;
+ m_CharList.push_back(info);
+ return;
}
- m_CharList.push_back(info);
-}
-void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
- PAGECHAR_INFO info) {
- if (!IsControlChar(info)) {
- info.m_Index = m_TextBuf.GetLength();
- wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
+ info.m_Index = m_TextBuf.GetLength();
+ if (wChar >= 0xFB00 && wChar <= 0xFB06) {
FX_WCHAR* pDst = nullptr;
FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
if (nCount >= 1) {
@@ -879,11 +856,38 @@ void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
FX_Free(pDst);
return;
}
- info.m_Unicode = wChar;
- m_TextBuf.AppendChar(info.m_Unicode);
- } else {
+ }
+ m_TextBuf.AppendChar(wChar);
+ m_CharList.push_back(info);
+}
+
+void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
+ PAGECHAR_INFO info) {
+ if (IsControlChar(info)) {
info.m_Index = -1;
+ m_CharList.push_back(info);
+ return;
}
+
+ info.m_Index = m_TextBuf.GetLength();
+ wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
+ FX_WCHAR* pDst = nullptr;
+ FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
+ if (nCount >= 1) {
+ pDst = FX_Alloc(FX_WCHAR, nCount);
+ Unicode_GetNormalization(wChar, pDst);
+ for (int nIndex = 0; nIndex < nCount; nIndex++) {
+ PAGECHAR_INFO info2 = info;
+ info2.m_Unicode = pDst[nIndex];
+ info2.m_Flag = FPDFTEXT_CHAR_PIECE;
+ m_TextBuf.AppendChar(info2.m_Unicode);
+ m_CharList.push_back(info2);
+ }
+ FX_Free(pDst);
+ return;
+ }
+ info.m_Unicode = wChar;
+ m_TextBuf.AppendChar(info.m_Unicode);
m_CharList.push_back(info);
}
@@ -1145,6 +1149,7 @@ void CPDF_TextPage::FindPreviousTextObject() {
if (preChar.m_pTextObj)
m_pPreTextObj = preChar.m_pTextObj;
}
+
void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend,
int32_t iBufStartAppend) {
int32_t i = iCharListStartAppend;
@@ -1198,10 +1203,10 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
m_perMatrix.Copy(formMatrix);
return;
}
- int result = 0;
+ GenerateCharacter result = GenerateCharacter::None;
if (m_pPreTextObj) {
result = ProcessInsertObject(pTextObj, formMatrix);
- if (2 == result) {
+ if (result == GenerateCharacter::LineBreak) {
m_CurlineRect =
CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
@@ -1210,59 +1215,51 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top));
}
- PAGECHAR_INFO generateChar;
- if (result == 1) {
- if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) {
- if (!formMatrix.IsIdentity()) {
- generateChar.m_Matrix.Copy(formMatrix);
+ switch (result) {
+ case GenerateCharacter::None:
+ break;
+ case GenerateCharacter::Space: {
+ PAGECHAR_INFO generateChar;
+ if (GenerateCharInfo(TEXT_SPACE_CHAR, generateChar)) {
+ if (!formMatrix.IsIdentity())
+ generateChar.m_Matrix.Copy(formMatrix);
+ m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR);
+ m_TempCharList.push_back(generateChar);
}
- m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
- m_TempCharList.push_back(generateChar);
+ break;
}
- } else if (result == 2) {
- CloseTempLine();
- if (m_TextBuf.GetSize()) {
- if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) {
- m_TextBuf.AppendChar(TEXT_RETURN_CHAR);
- if (!formMatrix.IsIdentity()) {
- generateChar.m_Matrix.Copy(formMatrix);
- }
- m_CharList.push_back(generateChar);
+ case GenerateCharacter::LineBreak:
+ CloseTempLine();
+ if (m_TextBuf.GetSize()) {
+ AppendGeneratedCharacter(TEXT_RETURN_CHAR, formMatrix);
+ AppendGeneratedCharacter(TEXT_LINEFEED_CHAR, formMatrix);
}
- if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) {
- m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR);
- if (!formMatrix.IsIdentity()) {
- generateChar.m_Matrix.Copy(formMatrix);
+ break;
+ case GenerateCharacter::Hyphen:
+ if (pTextObj->CountChars() == 1) {
+ CPDF_TextObjectItem item;
+ pTextObj->GetCharInfo(0, &item);
+ CFX_WideString wstrItem =
+ pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
+ if (wstrItem.IsEmpty()) {
+ wstrItem += (FX_WCHAR)item.m_CharCode;
}
- m_CharList.push_back(generateChar);
+ FX_WCHAR curChar = wstrItem.GetAt(0);
+ if (curChar == 0x2D || curChar == 0xAD)
+ return;
}
- }
- } else if (result == 3) {
- int32_t nChars = pTextObj->CountChars();
- if (nChars == 1) {
- CPDF_TextObjectItem item;
- pTextObj->GetCharInfo(0, &item);
- CFX_WideString wstrItem =
- pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
- if (wstrItem.IsEmpty()) {
- wstrItem += (FX_WCHAR)item.m_CharCode;
- }
- FX_WCHAR curChar = wstrItem.GetAt(0);
- if (0x2D == curChar || 0xAD == curChar) {
- return;
+ while (m_TempTextBuf.GetSize() > 0 &&
+ m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) ==
+ 0x20) {
+ m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
+ m_TempCharList.pop_back();
}
- }
- while (m_TempTextBuf.GetSize() > 0 &&
- m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) ==
- 0x20) {
+ PAGECHAR_INFO* charinfo = &m_TempCharList.back();
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
- m_TempCharList.pop_back();
- }
- PAGECHAR_INFO* charinfo = &m_TempCharList.back();
- m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
- charinfo->m_Unicode = 0x2;
- charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN;
- m_TempTextBuf.AppendChar(0xfffe);
+ charinfo->m_Unicode = 0x2;
+ charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN;
+ m_TempTextBuf.AppendChar(0xfffe);
+ break;
}
} else {
m_CurlineRect =
@@ -1299,9 +1296,9 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
if (str.IsEmpty()) {
str = m_TextBuf.AsStringC();
}
- if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) {
+ if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_SPACE_CHAR)
continue;
- }
+
FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
spacing = -fontsize_h * item.m_OriginX / 1000;
continue;
@@ -1335,11 +1332,11 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
threshold = fontsize_h * threshold / 1000;
}
if (threshold && (spacing && spacing >= threshold)) {
- charinfo.m_Unicode = TEXT_BLANK_CHAR;
+ charinfo.m_Unicode = TEXT_SPACE_CHAR;
charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED;
charinfo.m_pTextObj = pTextObj;
charinfo.m_Index = m_TextBuf.GetLength();
- m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR);
+ m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR);
charinfo.m_CharCode = CPDF_Font::kInvalidCharCode;
charinfo.m_Matrix.Copy(formMatrix);
matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX,
@@ -1433,7 +1430,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
} else if (i == 0) {
CFX_WideString str = m_TempTextBuf.MakeString();
if (!str.IsEmpty() &&
- str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) {
+ str.GetAt(str.GetLength() - 1) == TEXT_SPACE_CHAR) {
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
m_TempCharList.pop_back();
}
@@ -1445,12 +1442,12 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
}
}
-int32_t CPDF_TextPage::GetTextObjectWritingMode(
- const CPDF_TextObject* pTextObj) {
+CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
+ const CPDF_TextObject* pTextObj) const {
int32_t nChars = pTextObj->CountChars();
- if (nChars == 1) {
+ if (nChars == 1)
return m_TextlineDir;
- }
+
CPDF_TextObjectItem first, last;
pTextObj->GetCharInfo(0, &first);
pTextObj->GetCharInfo(nChars - 1, &last);
@@ -1460,19 +1457,20 @@ int32_t CPDF_TextPage::GetTextObjectWritingMode(
textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY);
FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX);
FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY);
- if (dX <= 0.0001f && dY <= 0.0001f) {
- return -1;
- }
+ if (dX <= 0.0001f && dY <= 0.0001f)
+ return TextOrientation::Unknown;
+
CFX_VectorF v(dX, dY);
v.Normalize();
- if (v.y <= 0.0872f) {
- return v.x <= 0.0872f ? m_TextlineDir : 0;
- }
- if (v.x <= 0.0872f) {
- return 1;
- }
+ if (v.y <= 0.0872f)
+ return v.x <= 0.0872f ? m_TextlineDir : TextOrientation::Horizontal;
+
+ if (v.x <= 0.0872f)
+ return TextOrientation::Vertical;
+
return m_TextlineDir;
}
+
FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) {
CFX_WideString strCurText = m_TempTextBuf.MakeString();
if (strCurText.GetLength() == 0) {
@@ -1510,14 +1508,14 @@ FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) {
return FALSE;
}
-int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
- const CFX_Matrix& formMatrix) {
+CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
+ const CPDF_TextObject* pObj,
+ const CFX_Matrix& formMatrix) {
FindPreviousTextObject();
- FX_BOOL bNewline = FALSE;
- int WritingMode = GetTextObjectWritingMode(pObj);
- if (WritingMode == -1) {
+ TextOrientation WritingMode = GetTextObjectWritingMode(pObj);
+ if (WritingMode == TextOrientation::Unknown)
WritingMode = GetTextObjectWritingMode(m_pPreTextObj);
- }
+
CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right,
pObj->m_Top);
CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom,
@@ -1532,20 +1530,18 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
wstrItem += (FX_WCHAR)item.m_CharCode;
}
FX_WCHAR curChar = wstrItem.GetAt(0);
- if (WritingMode == 0) {
+ if (WritingMode == TextOrientation::Horizontal) {
if (this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) {
FX_FLOAT top =
this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top;
FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom
: prev_rect.bottom;
if (bottom >= top) {
- if (IsHyphen(curChar)) {
- return 3;
- }
- return 2;
+ return IsHyphen(curChar) ? GenerateCharacter::Hyphen
+ : GenerateCharacter::LineBreak;
}
}
- } else if (WritingMode == 1) {
+ } else if (WritingMode == TextOrientation::Vertical) {
if (this_rect.Width() > pObj->GetFontSize() * 0.1f &&
prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) {
FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left
@@ -1554,10 +1550,8 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
? this_rect.right
: m_CurlineRect.right;
if (right <= left) {
- if (IsHyphen(curChar)) {
- return 3;
- }
- return 2;
+ return IsHyphen(curChar) ? GenerateCharacter::Hyphen
+ : GenerateCharacter::LineBreak;
}
}
}
@@ -1581,17 +1575,18 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
if (last_width < this_width) {
threshold = prev_reverse.TransformDistance(threshold);
}
- CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom,
- m_pPreTextObj->m_Right, pObj->m_Top);
- CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom,
- m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
- CFX_FloatRect rect3 = rect1;
- rect1.Intersect(rect2);
- if (WritingMode == 0) {
+ bool bNewline = false;
+ if (WritingMode == TextOrientation::Horizontal) {
+ CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom,
+ m_pPreTextObj->m_Right, pObj->m_Top);
+ CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom,
+ m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
+ CFX_FloatRect rect3 = rect1;
+ rect1.Intersect(rect2);
if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) ||
((y > threshold * 2 || y < threshold * -3) &&
(FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) {
- bNewline = TRUE;
+ bNewline = true;
if (nItem > 1) {
CPDF_TextObjectItem tempItem;
m_pPreTextObj->GetItemInfo(0, &tempItem);
@@ -1604,25 +1599,27 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000,
m_pPreTextObj->m_Top);
if (re.Contains(pObj->GetPosX(), pObj->GetPosY())) {
- bNewline = FALSE;
+ bNewline = false;
} else {
CFX_FloatRect rect(0, pObj->m_Bottom, 1000, pObj->m_Top);
if (rect.Contains(m_pPreTextObj->GetPosX(),
m_pPreTextObj->GetPosY())) {
- bNewline = FALSE;
+ bNewline = false;
}
}
}
}
}
}
- if (bNewline)
- return IsHyphen(curChar) ? 3 : 2;
+ if (bNewline) {
+ return IsHyphen(curChar) ? GenerateCharacter::Hyphen
+ : GenerateCharacter::LineBreak;
+ }
int32_t nChars = pObj->CountChars();
if (nChars == 1 && (0x2D == curChar || 0xAD == curChar) &&
IsHyphen(curChar)) {
- return 3;
+ return GenerateCharacter::Hyphen;
}
CFX_WideString PrevStr =
m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode);
@@ -1653,18 +1650,18 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
if (curChar != L' ' && preChar != L' ') {
if ((x - last_pos - last_width) > threshold ||
(last_pos - x - last_width) > threshold) {
- return 1;
+ return GenerateCharacter::Space;
}
if (x < 0 && (last_pos - x - last_width) > threshold) {
- return 1;
+ return GenerateCharacter::Space;
}
if ((x - last_pos - last_width) > this_width ||
(x - last_pos - this_width) > last_width) {
- return 1;
+ return GenerateCharacter::Space;
}
}
}
- return 0;
+ return GenerateCharacter::None;
}
FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
@@ -1914,7 +1911,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() {
if (csWord.IsEmpty()) {
if (iWord == nCount - 1) {
FX_WCHAR strInsert = m_strText.GetAt(nStartPos);
- if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_BLANK_CHAR ||
+ if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_SPACE_CHAR ||
strInsert == TEXT_RETURN_CHAR || strInsert == 160) {
nResultPos = nStartPos + 1;
break;
@@ -1948,7 +1945,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() {
}
for (int d = PreResEndPos; d < nResultPos; d++) {
FX_WCHAR strInsert = m_strText.GetAt(d);
- if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR &&
+ if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
bMatch = FALSE;
break;
@@ -1957,7 +1954,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() {
} else if (bSpaceStart) {
if (nResultPos > 0) {
FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1);
- if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR &&
+ if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
bMatch = FALSE;
m_resStart = nResultPos;
@@ -2048,7 +2045,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
while (1) {
CFX_WideString csWord = TEXT_EMPTY;
int ret =
- ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR);
+ ExtractSubString(csWord, findwhat.c_str(), index, TEXT_SPACE_CHAR);
if (csWord.IsEmpty()) {
if (ret) {
m_csFindWhatArray.push_back(L"");
diff --git a/core/fpdftext/include/cpdf_textpage.h b/core/fpdftext/include/cpdf_textpage.h
index c7c673c23e..0c1efdf34a 100644
--- a/core/fpdftext/include/cpdf_textpage.h
+++ b/core/fpdftext/include/cpdf_textpage.h
@@ -99,6 +99,19 @@ class CPDF_TextPage {
FX_BOOL bContains = FALSE);
private:
+ enum class TextOrientation {
+ Unknown,
+ Horizontal,
+ Vertical,
+ };
+
+ enum class GenerateCharacter {
+ None,
+ Space,
+ LineBreak,
+ Hyphen,
+ };
+
FX_BOOL IsHyphen(FX_WCHAR curChar);
bool IsControlChar(const PAGECHAR_INFO& charInfo);
void ProcessObject();
@@ -109,8 +122,8 @@ class CPDF_TextPage {
const CFX_Matrix& formMatrix,
const CPDF_PageObjectList* pObjList,
CPDF_PageObjectList::const_iterator ObjPos);
- int ProcessInsertObject(const CPDF_TextObject* pObj,
- const CFX_Matrix& formMatrix);
+ GenerateCharacter ProcessInsertObject(const CPDF_TextObject* pObj,
+ const CFX_Matrix& formMatrix);
FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
const CPDF_PageObjectList* pObjList,
@@ -122,11 +135,13 @@ class CPDF_TextPage {
FPDFText_MarkedContent PreMarkedContent(PDFTEXT_Obj pObj);
void ProcessMarkedContent(PDFTEXT_Obj pObj);
void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const;
- void FindPreviousTextObject(void);
+ void FindPreviousTextObject();
void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
- int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
- int32_t FindTextlineFlowDirection();
+ TextOrientation GetTextObjectWritingMode(
+ const CPDF_TextObject* pTextObj) const;
+ TextOrientation FindTextlineFlowOrientation() const;
+ void AppendGeneratedCharacter(FX_WCHAR unicode, const CFX_Matrix& formMatrix);
void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend);
FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj,
@@ -147,7 +162,7 @@ class CPDF_TextPage {
CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments;
std::vector<CFX_FloatRect> m_SelRects;
CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj;
- int32_t m_TextlineDir;
+ TextOrientation m_TextlineDir;
CFX_FloatRect m_CurlineRect;
};