summaryrefslogtreecommitdiff
path: root/core/fpdftext/cpdf_textpage.cpp
diff options
context:
space:
mode:
authorRyan Harrison <rharrison@chromium.org>2017-09-13 11:16:32 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-09-13 15:42:13 +0000
commit06c6855258bf25246c46a1f628b8a8a8185029a7 (patch)
tree7db9bd2cae8672a06834105542cef5c70bc9db08 /core/fpdftext/cpdf_textpage.cpp
parentf2ca50ffa2d26a6c023add24e92adbe6b28bfcc9 (diff)
downloadpdfium-06c6855258bf25246c46a1f628b8a8a8185029a7.tar.xz
Rewrite IsHyphen using string operations
The existing code did end of range checks by making sure that the value was never less then 0. This isn't correct when using an unsigned type, since 0 - 1 will wrap around to the max possible value, and thus still be less then 0. Additionally the existing code was hard to follow due to the complexity of some of the low level operations being performed. It has been rewritten using higher level string operations to make it clearer and correct. BUG=chromium:763256 Change-Id: Ib8bf5ca0e29e73724c4a1c4781362e8a8fc30149 Reviewed-on: https://pdfium-review.googlesource.com/13690 Commit-Queue: Ryan Harrison <rharrison@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org>
Diffstat (limited to 'core/fpdftext/cpdf_textpage.cpp')
-rw-r--r--core/fpdftext/cpdf_textpage.cpp63
1 files changed, 34 insertions, 29 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index 3006ebce2b..ee1d51bfd8 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -106,6 +106,10 @@ float MaskPercentFilled(const std::vector<bool>& mask,
return count / (end - start);
}
+bool IsHyphenCode(wchar_t c) {
+ return c == 0x2D || c == 0xAD;
+}
+
} // namespace
PDFTEXT_Obj::PDFTEXT_Obj() {}
@@ -1215,36 +1219,37 @@ CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
return m_TextlineDir;
}
-bool CPDF_TextPage::IsHyphen(wchar_t curChar) {
- CFX_WideString strCurText = m_TempTextBuf.MakeString();
- if (strCurText.IsEmpty())
- strCurText = m_TextBuf.AsStringC();
- FX_STRSIZE nCount = strCurText.GetLength();
- if (nCount < 1)
+bool CPDF_TextPage::IsHyphen(wchar_t curChar) const {
+ CFX_WideStringC curText;
+ if (!m_TempTextBuf.IsEmpty())
+ curText = m_TempTextBuf.AsStringC();
+ else if (!m_TextBuf.IsEmpty())
+ curText = m_TextBuf.AsStringC();
+ else
return false;
- FX_STRSIZE nIndex = nCount - 1;
- wchar_t wcTmp = strCurText[nIndex];
- while (wcTmp == 0x20 && nIndex > 0 && nIndex <= nCount - 1)
- wcTmp = strCurText[--nIndex];
- if (0x2D == wcTmp || 0xAD == wcTmp) {
- if (--nIndex > 0) {
- wchar_t preChar = strCurText[nIndex];
- if (FXSYS_iswalpha(preChar) && FXSYS_iswalpha(curChar))
- return true;
- }
- const PAGECHAR_INFO* preInfo;
- if (!m_TempCharList.empty())
- preInfo = &m_TempCharList.back();
- else if (!m_CharList.empty())
- preInfo = &m_CharList.back();
- else
- return false;
- if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
- (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) {
- return true;
- }
- }
- return false;
+
+ curText = curText.TrimmedRight(0x20);
+ if (curText.GetLength() < 2)
+ return false;
+
+ // Extracting the last 2 characters, since they are all that matter
+ curText = curText.Right(2);
+ if (!IsHyphenCode(curText.Last()))
+ return false;
+
+ if (FXSYS_iswalpha(curText.First() && FXSYS_iswalnum(curChar)))
+ return true;
+
+ const PAGECHAR_INFO* preInfo;
+ if (!m_TempCharList.empty())
+ preInfo = &m_TempCharList.back();
+ else if (!m_CharList.empty())
+ preInfo = &m_CharList.back();
+ else
+ return false;
+
+ return FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
+ IsHyphenCode(preInfo->m_Unicode);
}
CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(