summaryrefslogtreecommitdiff
path: root/core/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext')
-rw-r--r--core/fpdftext/cpdf_textpage.cpp63
-rw-r--r--core/fpdftext/cpdf_textpage.h2
2 files changed, 35 insertions, 30 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index 3006ebce2b..ee1d51bfd8 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -106,6 +106,10 @@ float MaskPercentFilled(const std::vector<bool>& mask,
return count / (end - start);
}
+bool IsHyphenCode(wchar_t c) {
+ return c == 0x2D || c == 0xAD;
+}
+
} // namespace
PDFTEXT_Obj::PDFTEXT_Obj() {}
@@ -1215,36 +1219,37 @@ CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
return m_TextlineDir;
}
-bool CPDF_TextPage::IsHyphen(wchar_t curChar) {
- CFX_WideString strCurText = m_TempTextBuf.MakeString();
- if (strCurText.IsEmpty())
- strCurText = m_TextBuf.AsStringC();
- FX_STRSIZE nCount = strCurText.GetLength();
- if (nCount < 1)
+bool CPDF_TextPage::IsHyphen(wchar_t curChar) const {
+ CFX_WideStringC curText;
+ if (!m_TempTextBuf.IsEmpty())
+ curText = m_TempTextBuf.AsStringC();
+ else if (!m_TextBuf.IsEmpty())
+ curText = m_TextBuf.AsStringC();
+ else
return false;
- FX_STRSIZE nIndex = nCount - 1;
- wchar_t wcTmp = strCurText[nIndex];
- while (wcTmp == 0x20 && nIndex > 0 && nIndex <= nCount - 1)
- wcTmp = strCurText[--nIndex];
- if (0x2D == wcTmp || 0xAD == wcTmp) {
- if (--nIndex > 0) {
- wchar_t preChar = strCurText[nIndex];
- if (FXSYS_iswalpha(preChar) && FXSYS_iswalpha(curChar))
- return true;
- }
- const PAGECHAR_INFO* preInfo;
- if (!m_TempCharList.empty())
- preInfo = &m_TempCharList.back();
- else if (!m_CharList.empty())
- preInfo = &m_CharList.back();
- else
- return false;
- if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
- (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) {
- return true;
- }
- }
- return false;
+
+ curText = curText.TrimmedRight(0x20);
+ if (curText.GetLength() < 2)
+ return false;
+
+ // Extracting the last 2 characters, since they are all that matter
+ curText = curText.Right(2);
+ if (!IsHyphenCode(curText.Last()))
+ return false;
+
+ if (FXSYS_iswalpha(curText.First() && FXSYS_iswalnum(curChar)))
+ return true;
+
+ const PAGECHAR_INFO* preInfo;
+ if (!m_TempCharList.empty())
+ preInfo = &m_TempCharList.back();
+ else if (!m_CharList.empty())
+ preInfo = &m_CharList.back();
+ else
+ return false;
+
+ return FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
+ IsHyphenCode(preInfo->m_Unicode);
}
CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h
index 6a4c87368d..41892ea97c 100644
--- a/core/fpdftext/cpdf_textpage.h
+++ b/core/fpdftext/cpdf_textpage.h
@@ -132,7 +132,7 @@ class CPDF_TextPage {
Hyphen,
};
- bool IsHyphen(wchar_t curChar);
+ bool IsHyphen(wchar_t curChar) const;
bool IsControlChar(const PAGECHAR_INFO& charInfo);
void ProcessObject();
void ProcessFormObject(CPDF_FormObject* pFormObj,