summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Harrison <rharrison@chromium.org>2017-09-13 11:16:32 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-09-13 15:42:13 +0000
commit06c6855258bf25246c46a1f628b8a8a8185029a7 (patch)
tree7db9bd2cae8672a06834105542cef5c70bc9db08
parentf2ca50ffa2d26a6c023add24e92adbe6b28bfcc9 (diff)
downloadpdfium-06c6855258bf25246c46a1f628b8a8a8185029a7.tar.xz
Rewrite IsHyphen using string operations
The existing code did end of range checks by making sure that the value was never less then 0. This isn't correct when using an unsigned type, since 0 - 1 will wrap around to the max possible value, and thus still be less then 0. Additionally the existing code was hard to follow due to the complexity of some of the low level operations being performed. It has been rewritten using higher level string operations to make it clearer and correct. BUG=chromium:763256 Change-Id: Ib8bf5ca0e29e73724c4a1c4781362e8a8fc30149 Reviewed-on: https://pdfium-review.googlesource.com/13690 Commit-Queue: Ryan Harrison <rharrison@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org>
-rw-r--r--core/fpdftext/cpdf_textpage.cpp63
-rw-r--r--core/fpdftext/cpdf_textpage.h2
-rw-r--r--core/fxcrt/cfx_binarybuf.cpp4
-rw-r--r--core/fxcrt/cfx_binarybuf.h4
-rw-r--r--core/fxcrt/cfx_bytestring_unittest.cpp8
-rw-r--r--core/fxcrt/cfx_string_c_template.h14
-rw-r--r--core/fxcrt/cfx_widestring_unittest.cpp8
-rw-r--r--core/fxcrt/cfx_widetextbuf.cpp4
-rw-r--r--core/fxcrt/cfx_widetextbuf.h2
9 files changed, 77 insertions, 32 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index 3006ebce2b..ee1d51bfd8 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -106,6 +106,10 @@ float MaskPercentFilled(const std::vector<bool>& mask,
return count / (end - start);
}
+bool IsHyphenCode(wchar_t c) {
+ return c == 0x2D || c == 0xAD;
+}
+
} // namespace
PDFTEXT_Obj::PDFTEXT_Obj() {}
@@ -1215,36 +1219,37 @@ CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
return m_TextlineDir;
}
-bool CPDF_TextPage::IsHyphen(wchar_t curChar) {
- CFX_WideString strCurText = m_TempTextBuf.MakeString();
- if (strCurText.IsEmpty())
- strCurText = m_TextBuf.AsStringC();
- FX_STRSIZE nCount = strCurText.GetLength();
- if (nCount < 1)
+bool CPDF_TextPage::IsHyphen(wchar_t curChar) const {
+ CFX_WideStringC curText;
+ if (!m_TempTextBuf.IsEmpty())
+ curText = m_TempTextBuf.AsStringC();
+ else if (!m_TextBuf.IsEmpty())
+ curText = m_TextBuf.AsStringC();
+ else
return false;
- FX_STRSIZE nIndex = nCount - 1;
- wchar_t wcTmp = strCurText[nIndex];
- while (wcTmp == 0x20 && nIndex > 0 && nIndex <= nCount - 1)
- wcTmp = strCurText[--nIndex];
- if (0x2D == wcTmp || 0xAD == wcTmp) {
- if (--nIndex > 0) {
- wchar_t preChar = strCurText[nIndex];
- if (FXSYS_iswalpha(preChar) && FXSYS_iswalpha(curChar))
- return true;
- }
- const PAGECHAR_INFO* preInfo;
- if (!m_TempCharList.empty())
- preInfo = &m_TempCharList.back();
- else if (!m_CharList.empty())
- preInfo = &m_CharList.back();
- else
- return false;
- if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
- (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) {
- return true;
- }
- }
- return false;
+
+ curText = curText.TrimmedRight(0x20);
+ if (curText.GetLength() < 2)
+ return false;
+
+ // Extracting the last 2 characters, since they are all that matter
+ curText = curText.Right(2);
+ if (!IsHyphenCode(curText.Last()))
+ return false;
+
+ if (FXSYS_iswalpha(curText.First() && FXSYS_iswalnum(curChar)))
+ return true;
+
+ const PAGECHAR_INFO* preInfo;
+ if (!m_TempCharList.empty())
+ preInfo = &m_TempCharList.back();
+ else if (!m_CharList.empty())
+ preInfo = &m_CharList.back();
+ else
+ return false;
+
+ return FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
+ IsHyphenCode(preInfo->m_Unicode);
}
CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h
index 6a4c87368d..41892ea97c 100644
--- a/core/fpdftext/cpdf_textpage.h
+++ b/core/fpdftext/cpdf_textpage.h
@@ -132,7 +132,7 @@ class CPDF_TextPage {
Hyphen,
};
- bool IsHyphen(wchar_t curChar);
+ bool IsHyphen(wchar_t curChar) const;
bool IsControlChar(const PAGECHAR_INFO& charInfo);
void ProcessObject();
void ProcessFormObject(CPDF_FormObject* pFormObj,
diff --git a/core/fxcrt/cfx_binarybuf.cpp b/core/fxcrt/cfx_binarybuf.cpp
index 73fe945fd6..b826fdd5b5 100644
--- a/core/fxcrt/cfx_binarybuf.cpp
+++ b/core/fxcrt/cfx_binarybuf.cpp
@@ -28,6 +28,10 @@ void CFX_BinaryBuf::Delete(FX_STRSIZE start_index, FX_STRSIZE count) {
m_DataSize -= count;
}
+FX_STRSIZE CFX_BinaryBuf::GetLength() const {
+ return m_DataSize;
+}
+
void CFX_BinaryBuf::Clear() {
m_DataSize = 0;
}
diff --git a/core/fxcrt/cfx_binarybuf.h b/core/fxcrt/cfx_binarybuf.h
index 3081d02902..4c795eda0b 100644
--- a/core/fxcrt/cfx_binarybuf.h
+++ b/core/fxcrt/cfx_binarybuf.h
@@ -17,10 +17,12 @@ class CFX_BinaryBuf {
public:
CFX_BinaryBuf();
explicit CFX_BinaryBuf(FX_STRSIZE size);
- ~CFX_BinaryBuf();
+ virtual ~CFX_BinaryBuf();
uint8_t* GetBuffer() const { return m_pBuffer.get(); }
FX_STRSIZE GetSize() const { return m_DataSize; }
+ virtual FX_STRSIZE GetLength() const;
+ bool IsEmpty() const { return GetLength() == 0; }
void Clear();
void EstimateSize(FX_STRSIZE size, FX_STRSIZE alloc_step = 0);
diff --git a/core/fxcrt/cfx_bytestring_unittest.cpp b/core/fxcrt/cfx_bytestring_unittest.cpp
index dcb8577b65..a4c5187c11 100644
--- a/core/fxcrt/cfx_bytestring_unittest.cpp
+++ b/core/fxcrt/cfx_bytestring_unittest.cpp
@@ -999,6 +999,14 @@ TEST(fxcrt, ByteStringCMid) {
EXPECT_EQ("", longer_string.Mid(4, 3));
}
+TEST(fxcrt, ByteStringCTrimmedRight) {
+ CFX_ByteStringC fred("FRED");
+ EXPECT_EQ("FRED", fred.TrimmedRight('E'));
+ EXPECT_EQ("FRE", fred.TrimmedRight('D'));
+ CFX_ByteStringC fredd("FREDD");
+ EXPECT_EQ("FRE", fred.TrimmedRight('D'));
+}
+
TEST(fxcrt, ByteStringCElementAccess) {
// CFX_ByteStringC includes the NUL terminator for non-empty strings.
CFX_ByteStringC abc("abc");
diff --git a/core/fxcrt/cfx_string_c_template.h b/core/fxcrt/cfx_string_c_template.h
index 8585d73ae6..b0e17a1222 100644
--- a/core/fxcrt/cfx_string_c_template.h
+++ b/core/fxcrt/cfx_string_c_template.h
@@ -176,6 +176,20 @@ class CFX_StringCTemplate {
return Mid(GetLength() - count, count);
}
+ CFX_StringCTemplate TrimmedRight(CharType ch) const {
+ if (IsEmpty())
+ return CFX_StringCTemplate();
+
+ FX_STRSIZE pos = GetLength();
+ while (pos && CharAt(pos - 1) == ch)
+ pos--;
+
+ if (pos == 0)
+ return CFX_StringCTemplate();
+
+ return CFX_StringCTemplate(m_Ptr.Get(), pos);
+ }
+
bool operator<(const CFX_StringCTemplate& that) const {
int result = FXSYS_cmp(reinterpret_cast<const CharType*>(m_Ptr.Get()),
reinterpret_cast<const CharType*>(that.m_Ptr.Get()),
diff --git a/core/fxcrt/cfx_widestring_unittest.cpp b/core/fxcrt/cfx_widestring_unittest.cpp
index 7b12d50601..1c8aca9aca 100644
--- a/core/fxcrt/cfx_widestring_unittest.cpp
+++ b/core/fxcrt/cfx_widestring_unittest.cpp
@@ -1038,6 +1038,14 @@ TEST(fxcrt, WideStringCAnyAllNoneOf) {
EXPECT_FALSE(pdfium::ContainsValue(str, L'z'));
}
+TEST(fxcrt, WideStringCTrimmedRight) {
+ CFX_WideStringC fred(L"FRED");
+ EXPECT_EQ(L"FRED", fred.TrimmedRight(L'E'));
+ EXPECT_EQ(L"FRE", fred.TrimmedRight(L'D'));
+ CFX_WideStringC fredd(L"FREDD");
+ EXPECT_EQ(L"FRE", fred.TrimmedRight(L'D'));
+}
+
TEST(fxcrt, WideStringFormatWidth) {
{
CFX_WideString str;
diff --git a/core/fxcrt/cfx_widetextbuf.cpp b/core/fxcrt/cfx_widetextbuf.cpp
index 246124b95f..81b5fd4b05 100644
--- a/core/fxcrt/cfx_widetextbuf.cpp
+++ b/core/fxcrt/cfx_widetextbuf.cpp
@@ -6,6 +6,10 @@
#include "core/fxcrt/cfx_widetextbuf.h"
+FX_STRSIZE CFX_WideTextBuf::GetLength() const {
+ return m_DataSize / sizeof(wchar_t);
+}
+
void CFX_WideTextBuf::AppendChar(wchar_t ch) {
ExpandBuf(sizeof(wchar_t));
*(wchar_t*)(m_pBuffer.get() + m_DataSize) = ch;
diff --git a/core/fxcrt/cfx_widetextbuf.h b/core/fxcrt/cfx_widetextbuf.h
index 0c9b6ff99d..389f2e52a3 100644
--- a/core/fxcrt/cfx_widetextbuf.h
+++ b/core/fxcrt/cfx_widetextbuf.h
@@ -14,7 +14,7 @@
class CFX_WideTextBuf : public CFX_BinaryBuf {
public:
void AppendChar(wchar_t wch);
- FX_STRSIZE GetLength() const { return m_DataSize / sizeof(wchar_t); }
+ FX_STRSIZE GetLength() const override;
wchar_t* GetBuffer() const {
return reinterpret_cast<wchar_t*>(m_pBuffer.get());
}