diff options
Diffstat (limited to 'core/fpdftext')
-rw-r--r-- | core/fpdftext/fpdf_text_int.cpp | 347 | ||||
-rw-r--r-- | core/fpdftext/include/cpdf_textpage.h | 27 |
2 files changed, 193 insertions, 181 deletions
diff --git a/core/fpdftext/fpdf_text_int.cpp b/core/fpdftext/fpdf_text_int.cpp index 8ae2c1ef35..5be47143fd 100644 --- a/core/fpdftext/fpdf_text_int.cpp +++ b/core/fpdftext/fpdf_text_int.cpp @@ -39,11 +39,11 @@ #define FPDFTEXT_CHAR_HYPHEN 3 #define FPDFTEXT_CHAR_PIECE 4 -#define TEXT_BLANK_CHAR L' ' +#define TEXT_SPACE_CHAR L' ' #define TEXT_LINEFEED_CHAR L'\n' #define TEXT_RETURN_CHAR L'\r' #define TEXT_EMPTY L"" -#define TEXT_BLANK L" " +#define TEXT_SPACE L" " #define TEXT_RETURN_LINEFEED L"\r\n" #define TEXT_LINEFEED L"\n" #define TEXT_CHARRATIO_GAPDELTA 0.070 @@ -150,7 +150,7 @@ CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, FPDFText_Direction flags) m_parserflag(flags), m_pPreTextObj(nullptr), m_bIsParsed(false), - m_TextlineDir(-1), + m_TextlineDir(TextOrientation::Unknown), m_CurlineRect(0, 0, 0, 0) { m_TextBuf.EstimateSize(0, 10240); pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), @@ -183,34 +183,25 @@ void CPDF_TextPage::ParseTextPage() { m_bIsParsed = true; m_CharIndex.clear(); int nCount = pdfium::CollectionSize<int>(m_CharList); - if (nCount) { + if (nCount) m_CharIndex.push_back(0); - } + for (int i = 0; i < nCount; i++) { int indexSize = pdfium::CollectionSize<int>(m_CharIndex); - FX_BOOL bNormal = FALSE; const PAGECHAR_INFO& charinfo = m_CharList[i]; - if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { - bNormal = TRUE; - } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) { - bNormal = FALSE; - } else { - bNormal = TRUE; - } - if (bNormal) { + if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED || + (charinfo.m_Unicode != 0 && !IsControlChar(charinfo))) { if (indexSize % 2) { m_CharIndex.push_back(1); } else { - if (indexSize <= 0) { + if (indexSize <= 0) continue; - } m_CharIndex[indexSize - 1] += 1; } } else { if (indexSize % 2) { - if (indexSize <= 0) { + if (indexSize <= 0) continue; - } m_CharIndex[indexSize - 1] = i + 1; } else { m_CharIndex.push_back(i + 1); @@ -218,9 +209,8 @@ void CPDF_TextPage::ParseTextPage() { } } int indexSize = pdfium::CollectionSize<int>(m_CharIndex); - if (indexSize % 2) { + if (indexSize % 2) m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1); - } } int CPDF_TextPage::CountChars() const { @@ -685,7 +675,8 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, return m_Segments.GetSize(); } -int32_t CPDF_TextPage::FindTextlineFlowDirection() { +CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation() + const { const int32_t nPageWidth = static_cast<int32_t>(m_pPage->GetPageWidth()); const int32_t nPageHeight = static_cast<int32_t>(m_pPage->GetPageHeight()); std::vector<uint8_t> nHorizontalMask(nPageWidth); @@ -695,7 +686,7 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() { int32_t index = 0; FX_FLOAT fLineHeight = 0.0f; if (m_pPage->GetPageObjectList()->empty()) - return -1; + return TextOrientation::Unknown; for (auto& pPageObj : *m_pPage->GetPageObjectList()) { if (!pPageObj || !pPageObj->IsText()) @@ -749,33 +740,42 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() { break; } nEndV = index; - for (index = nStartV; index < nEndV; index++) { + for (index = nStartV; index < nEndV; index++) nSumV += nVerticalMask[index]; - } nSumV /= nEndV - nStartV; - if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) { - return 0; - } - if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) { - return 1; - } - if (nSumH > 0.8f) { - return 0; - } - if (nSumH - nSumV > 0.0f) { - return 0; - } - if (nSumV - nSumH > 0.0f) { - return 1; - } - return -1; + + if ((nEndV - nStartV) < (int32_t)(2 * fLineHeight)) + return TextOrientation::Horizontal; + if ((nEndH - nStartH) < (int32_t)(2 * fLineHeight)) + return TextOrientation::Vertical; + + if (nSumH > 0.8f) + return TextOrientation::Horizontal; + + if (nSumH > nSumV) + return TextOrientation::Horizontal; + if (nSumH < nSumV) + return TextOrientation::Vertical; + return TextOrientation::Unknown; +} + +void CPDF_TextPage::AppendGeneratedCharacter(FX_WCHAR unicode, + const CFX_Matrix& formMatrix) { + PAGECHAR_INFO generateChar; + if (!GenerateCharInfo(unicode, generateChar)) + return; + + m_TextBuf.AppendChar(unicode); + if (!formMatrix.IsIdentity()) + generateChar.m_Matrix.Copy(formMatrix); + m_CharList.push_back(generateChar); } void CPDF_TextPage::ProcessObject() { if (m_pPage->GetPageObjectList()->empty()) return; - m_TextlineDir = FindTextlineFlowDirection(); + m_TextlineDir = FindTextlineFlowOrientation(); const CPDF_PageObjectList* pObjList = m_pPage->GetPageObjectList(); for (auto it = pObjList->begin(); it != pObjList->end(); ++it) { if (CPDF_PageObject* pObj = it->get()) { @@ -833,37 +833,14 @@ int CPDF_TextPage::GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const { void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info) { - if (!IsControlChar(info)) { - info.m_Index = m_TextBuf.GetLength(); - if (wChar >= 0xFB00 && wChar <= 0xFB06) { - FX_WCHAR* pDst = nullptr; - FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); - if (nCount >= 1) { - pDst = FX_Alloc(FX_WCHAR, nCount); - Unicode_GetNormalization(wChar, pDst); - for (int nIndex = 0; nIndex < nCount; nIndex++) { - PAGECHAR_INFO info2 = info; - info2.m_Unicode = pDst[nIndex]; - info2.m_Flag = FPDFTEXT_CHAR_PIECE; - m_TextBuf.AppendChar(info2.m_Unicode); - m_CharList.push_back(info2); - } - FX_Free(pDst); - return; - } - } - m_TextBuf.AppendChar(wChar); - } else { + if (IsControlChar(info)) { info.m_Index = -1; + m_CharList.push_back(info); + return; } - m_CharList.push_back(info); -} -void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, - PAGECHAR_INFO info) { - if (!IsControlChar(info)) { - info.m_Index = m_TextBuf.GetLength(); - wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); + info.m_Index = m_TextBuf.GetLength(); + if (wChar >= 0xFB00 && wChar <= 0xFB06) { FX_WCHAR* pDst = nullptr; FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); if (nCount >= 1) { @@ -879,11 +856,38 @@ void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, FX_Free(pDst); return; } - info.m_Unicode = wChar; - m_TextBuf.AppendChar(info.m_Unicode); - } else { + } + m_TextBuf.AppendChar(wChar); + m_CharList.push_back(info); +} + +void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar, + PAGECHAR_INFO info) { + if (IsControlChar(info)) { info.m_Index = -1; + m_CharList.push_back(info); + return; } + + info.m_Index = m_TextBuf.GetLength(); + wChar = FX_GetMirrorChar(wChar, TRUE, FALSE); + FX_WCHAR* pDst = nullptr; + FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); + if (nCount >= 1) { + pDst = FX_Alloc(FX_WCHAR, nCount); + Unicode_GetNormalization(wChar, pDst); + for (int nIndex = 0; nIndex < nCount; nIndex++) { + PAGECHAR_INFO info2 = info; + info2.m_Unicode = pDst[nIndex]; + info2.m_Flag = FPDFTEXT_CHAR_PIECE; + m_TextBuf.AppendChar(info2.m_Unicode); + m_CharList.push_back(info2); + } + FX_Free(pDst); + return; + } + info.m_Unicode = wChar; + m_TextBuf.AppendChar(info.m_Unicode); m_CharList.push_back(info); } @@ -1145,6 +1149,7 @@ void CPDF_TextPage::FindPreviousTextObject() { if (preChar.m_pTextObj) m_pPreTextObj = preChar.m_pTextObj; } + void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend) { int32_t i = iCharListStartAppend; @@ -1198,10 +1203,10 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { m_perMatrix.Copy(formMatrix); return; } - int result = 0; + GenerateCharacter result = GenerateCharacter::None; if (m_pPreTextObj) { result = ProcessInsertObject(pTextObj, formMatrix); - if (2 == result) { + if (result == GenerateCharacter::LineBreak) { m_CurlineRect = CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top); @@ -1210,59 +1215,51 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom, Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top)); } - PAGECHAR_INFO generateChar; - if (result == 1) { - if (GenerateCharInfo(TEXT_BLANK_CHAR, generateChar)) { - if (!formMatrix.IsIdentity()) { - generateChar.m_Matrix.Copy(formMatrix); + switch (result) { + case GenerateCharacter::None: + break; + case GenerateCharacter::Space: { + PAGECHAR_INFO generateChar; + if (GenerateCharInfo(TEXT_SPACE_CHAR, generateChar)) { + if (!formMatrix.IsIdentity()) + generateChar.m_Matrix.Copy(formMatrix); + m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR); + m_TempCharList.push_back(generateChar); } - m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); - m_TempCharList.push_back(generateChar); + break; } - } else if (result == 2) { - CloseTempLine(); - if (m_TextBuf.GetSize()) { - if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { - m_TextBuf.AppendChar(TEXT_RETURN_CHAR); - if (!formMatrix.IsIdentity()) { - generateChar.m_Matrix.Copy(formMatrix); - } - m_CharList.push_back(generateChar); + case GenerateCharacter::LineBreak: + CloseTempLine(); + if (m_TextBuf.GetSize()) { + AppendGeneratedCharacter(TEXT_RETURN_CHAR, formMatrix); + AppendGeneratedCharacter(TEXT_LINEFEED_CHAR, formMatrix); } - if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { - m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); - if (!formMatrix.IsIdentity()) { - generateChar.m_Matrix.Copy(formMatrix); + break; + case GenerateCharacter::Hyphen: + if (pTextObj->CountChars() == 1) { + CPDF_TextObjectItem item; + pTextObj->GetCharInfo(0, &item); + CFX_WideString wstrItem = + pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); + if (wstrItem.IsEmpty()) { + wstrItem += (FX_WCHAR)item.m_CharCode; } - m_CharList.push_back(generateChar); + FX_WCHAR curChar = wstrItem.GetAt(0); + if (curChar == 0x2D || curChar == 0xAD) + return; } - } - } else if (result == 3) { - int32_t nChars = pTextObj->CountChars(); - if (nChars == 1) { - CPDF_TextObjectItem item; - pTextObj->GetCharInfo(0, &item); - CFX_WideString wstrItem = - pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); - if (wstrItem.IsEmpty()) { - wstrItem += (FX_WCHAR)item.m_CharCode; - } - FX_WCHAR curChar = wstrItem.GetAt(0); - if (0x2D == curChar || 0xAD == curChar) { - return; + while (m_TempTextBuf.GetSize() > 0 && + m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) == + 0x20) { + m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); + m_TempCharList.pop_back(); } - } - while (m_TempTextBuf.GetSize() > 0 && - m_TempTextBuf.AsStringC().GetAt(m_TempTextBuf.GetLength() - 1) == - 0x20) { + PAGECHAR_INFO* charinfo = &m_TempCharList.back(); m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); - m_TempCharList.pop_back(); - } - PAGECHAR_INFO* charinfo = &m_TempCharList.back(); - m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); - charinfo->m_Unicode = 0x2; - charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN; - m_TempTextBuf.AppendChar(0xfffe); + charinfo->m_Unicode = 0x2; + charinfo->m_Flag = FPDFTEXT_CHAR_HYPHEN; + m_TempTextBuf.AppendChar(0xfffe); + break; } } else { m_CurlineRect = @@ -1299,9 +1296,9 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { if (str.IsEmpty()) { str = m_TextBuf.AsStringC(); } - if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { + if (str.IsEmpty() || str.GetAt(str.GetLength() - 1) == TEXT_SPACE_CHAR) continue; - } + FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH(); spacing = -fontsize_h * item.m_OriginX / 1000; continue; @@ -1335,11 +1332,11 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { threshold = fontsize_h * threshold / 1000; } if (threshold && (spacing && spacing >= threshold)) { - charinfo.m_Unicode = TEXT_BLANK_CHAR; + charinfo.m_Unicode = TEXT_SPACE_CHAR; charinfo.m_Flag = FPDFTEXT_CHAR_GENERATED; charinfo.m_pTextObj = pTextObj; charinfo.m_Index = m_TextBuf.GetLength(); - m_TempTextBuf.AppendChar(TEXT_BLANK_CHAR); + m_TempTextBuf.AppendChar(TEXT_SPACE_CHAR); charinfo.m_CharCode = CPDF_Font::kInvalidCharCode; charinfo.m_Matrix.Copy(formMatrix); matrix.Transform(item.m_OriginX, item.m_OriginY, charinfo.m_OriginX, @@ -1433,7 +1430,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { } else if (i == 0) { CFX_WideString str = m_TempTextBuf.MakeString(); if (!str.IsEmpty() && - str.GetAt(str.GetLength() - 1) == TEXT_BLANK_CHAR) { + str.GetAt(str.GetLength() - 1) == TEXT_SPACE_CHAR) { m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); m_TempCharList.pop_back(); } @@ -1445,12 +1442,12 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { } } -int32_t CPDF_TextPage::GetTextObjectWritingMode( - const CPDF_TextObject* pTextObj) { +CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode( + const CPDF_TextObject* pTextObj) const { int32_t nChars = pTextObj->CountChars(); - if (nChars == 1) { + if (nChars == 1) return m_TextlineDir; - } + CPDF_TextObjectItem first, last; pTextObj->GetCharInfo(0, &first); pTextObj->GetCharInfo(nChars - 1, &last); @@ -1460,19 +1457,20 @@ int32_t CPDF_TextPage::GetTextObjectWritingMode( textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY); FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX); FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY); - if (dX <= 0.0001f && dY <= 0.0001f) { - return -1; - } + if (dX <= 0.0001f && dY <= 0.0001f) + return TextOrientation::Unknown; + CFX_VectorF v(dX, dY); v.Normalize(); - if (v.y <= 0.0872f) { - return v.x <= 0.0872f ? m_TextlineDir : 0; - } - if (v.x <= 0.0872f) { - return 1; - } + if (v.y <= 0.0872f) + return v.x <= 0.0872f ? m_TextlineDir : TextOrientation::Horizontal; + + if (v.x <= 0.0872f) + return TextOrientation::Vertical; + return m_TextlineDir; } + FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) { CFX_WideString strCurText = m_TempTextBuf.MakeString(); if (strCurText.GetLength() == 0) { @@ -1510,14 +1508,14 @@ FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) { return FALSE; } -int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, - const CFX_Matrix& formMatrix) { +CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject( + const CPDF_TextObject* pObj, + const CFX_Matrix& formMatrix) { FindPreviousTextObject(); - FX_BOOL bNewline = FALSE; - int WritingMode = GetTextObjectWritingMode(pObj); - if (WritingMode == -1) { + TextOrientation WritingMode = GetTextObjectWritingMode(pObj); + if (WritingMode == TextOrientation::Unknown) WritingMode = GetTextObjectWritingMode(m_pPreTextObj); - } + CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m_Top); CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, @@ -1532,20 +1530,18 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, wstrItem += (FX_WCHAR)item.m_CharCode; } FX_WCHAR curChar = wstrItem.GetAt(0); - if (WritingMode == 0) { + if (WritingMode == TextOrientation::Horizontal) { if (this_rect.Height() > 4.5 && prev_rect.Height() > 4.5) { FX_FLOAT top = this_rect.top < prev_rect.top ? this_rect.top : prev_rect.top; FX_FLOAT bottom = this_rect.bottom > prev_rect.bottom ? this_rect.bottom : prev_rect.bottom; if (bottom >= top) { - if (IsHyphen(curChar)) { - return 3; - } - return 2; + return IsHyphen(curChar) ? GenerateCharacter::Hyphen + : GenerateCharacter::LineBreak; } } - } else if (WritingMode == 1) { + } else if (WritingMode == TextOrientation::Vertical) { if (this_rect.Width() > pObj->GetFontSize() * 0.1f && prev_rect.Width() > m_pPreTextObj->GetFontSize() * 0.1f) { FX_FLOAT left = this_rect.left > m_CurlineRect.left ? this_rect.left @@ -1554,10 +1550,8 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, ? this_rect.right : m_CurlineRect.right; if (right <= left) { - if (IsHyphen(curChar)) { - return 3; - } - return 2; + return IsHyphen(curChar) ? GenerateCharacter::Hyphen + : GenerateCharacter::LineBreak; } } } @@ -1581,17 +1575,18 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, if (last_width < this_width) { threshold = prev_reverse.TransformDistance(threshold); } - CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom, - m_pPreTextObj->m_Right, pObj->m_Top); - CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, - m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); - CFX_FloatRect rect3 = rect1; - rect1.Intersect(rect2); - if (WritingMode == 0) { + bool bNewline = false; + if (WritingMode == TextOrientation::Horizontal) { + CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom, + m_pPreTextObj->m_Right, pObj->m_Top); + CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom, + m_pPreTextObj->m_Right, m_pPreTextObj->m_Top); + CFX_FloatRect rect3 = rect1; + rect1.Intersect(rect2); if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) || ((y > threshold * 2 || y < threshold * -3) && (FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : TRUE))) { - bNewline = TRUE; + bNewline = true; if (nItem > 1) { CPDF_TextObjectItem tempItem; m_pPreTextObj->GetItemInfo(0, &tempItem); @@ -1604,25 +1599,27 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000, m_pPreTextObj->m_Top); if (re.Contains(pObj->GetPosX(), pObj->GetPosY())) { - bNewline = FALSE; + bNewline = false; } else { CFX_FloatRect rect(0, pObj->m_Bottom, 1000, pObj->m_Top); if (rect.Contains(m_pPreTextObj->GetPosX(), m_pPreTextObj->GetPosY())) { - bNewline = FALSE; + bNewline = false; } } } } } } - if (bNewline) - return IsHyphen(curChar) ? 3 : 2; + if (bNewline) { + return IsHyphen(curChar) ? GenerateCharacter::Hyphen + : GenerateCharacter::LineBreak; + } int32_t nChars = pObj->CountChars(); if (nChars == 1 && (0x2D == curChar || 0xAD == curChar) && IsHyphen(curChar)) { - return 3; + return GenerateCharacter::Hyphen; } CFX_WideString PrevStr = m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode); @@ -1653,18 +1650,18 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj, if (curChar != L' ' && preChar != L' ') { if ((x - last_pos - last_width) > threshold || (last_pos - x - last_width) > threshold) { - return 1; + return GenerateCharacter::Space; } if (x < 0 && (last_pos - x - last_width) > threshold) { - return 1; + return GenerateCharacter::Space; } if ((x - last_pos - last_width) > this_width || (x - last_pos - this_width) > last_width) { - return 1; + return GenerateCharacter::Space; } } } - return 0; + return GenerateCharacter::None; } FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, @@ -1914,7 +1911,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() { if (csWord.IsEmpty()) { if (iWord == nCount - 1) { FX_WCHAR strInsert = m_strText.GetAt(nStartPos); - if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_BLANK_CHAR || + if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_SPACE_CHAR || strInsert == TEXT_RETURN_CHAR || strInsert == 160) { nResultPos = nStartPos + 1; break; @@ -1948,7 +1945,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() { } for (int d = PreResEndPos; d < nResultPos; d++) { FX_WCHAR strInsert = m_strText.GetAt(d); - if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && + if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) { bMatch = FALSE; break; @@ -1957,7 +1954,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() { } else if (bSpaceStart) { if (nResultPos > 0) { FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1); - if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_BLANK_CHAR && + if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) { bMatch = FALSE; m_resStart = nResultPos; @@ -2048,7 +2045,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { while (1) { CFX_WideString csWord = TEXT_EMPTY; int ret = - ExtractSubString(csWord, findwhat.c_str(), index, TEXT_BLANK_CHAR); + ExtractSubString(csWord, findwhat.c_str(), index, TEXT_SPACE_CHAR); if (csWord.IsEmpty()) { if (ret) { m_csFindWhatArray.push_back(L""); diff --git a/core/fpdftext/include/cpdf_textpage.h b/core/fpdftext/include/cpdf_textpage.h index c7c673c23e..0c1efdf34a 100644 --- a/core/fpdftext/include/cpdf_textpage.h +++ b/core/fpdftext/include/cpdf_textpage.h @@ -99,6 +99,19 @@ class CPDF_TextPage { FX_BOOL bContains = FALSE); private: + enum class TextOrientation { + Unknown, + Horizontal, + Vertical, + }; + + enum class GenerateCharacter { + None, + Space, + LineBreak, + Hyphen, + }; + FX_BOOL IsHyphen(FX_WCHAR curChar); bool IsControlChar(const PAGECHAR_INFO& charInfo); void ProcessObject(); @@ -109,8 +122,8 @@ class CPDF_TextPage { const CFX_Matrix& formMatrix, const CPDF_PageObjectList* pObjList, CPDF_PageObjectList::const_iterator ObjPos); - int ProcessInsertObject(const CPDF_TextObject* pObj, - const CFX_Matrix& formMatrix); + GenerateCharacter ProcessInsertObject(const CPDF_TextObject* pObj, + const CFX_Matrix& formMatrix); FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, const CPDF_PageObjectList* pObjList, @@ -122,11 +135,13 @@ class CPDF_TextPage { FPDFText_MarkedContent PreMarkedContent(PDFTEXT_Obj pObj); void ProcessMarkedContent(PDFTEXT_Obj pObj); void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; - void FindPreviousTextObject(void); + void FindPreviousTextObject(); void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info); void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info); - int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); - int32_t FindTextlineFlowDirection(); + TextOrientation GetTextObjectWritingMode( + const CPDF_TextObject* pTextObj) const; + TextOrientation FindTextlineFlowOrientation() const; + void AppendGeneratedCharacter(FX_WCHAR unicode, const CFX_Matrix& formMatrix); void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj, @@ -147,7 +162,7 @@ class CPDF_TextPage { CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments; std::vector<CFX_FloatRect> m_SelRects; CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj; - int32_t m_TextlineDir; + TextOrientation m_TextlineDir; CFX_FloatRect m_CurlineRect; }; |