diff options
-rw-r--r-- | core/fpdftext/cpdf_textpage.cpp | 63 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpage.h | 2 | ||||
-rw-r--r-- | core/fxcrt/cfx_binarybuf.cpp | 4 | ||||
-rw-r--r-- | core/fxcrt/cfx_binarybuf.h | 4 | ||||
-rw-r--r-- | core/fxcrt/cfx_bytestring_unittest.cpp | 8 | ||||
-rw-r--r-- | core/fxcrt/cfx_string_c_template.h | 14 | ||||
-rw-r--r-- | core/fxcrt/cfx_widestring_unittest.cpp | 8 | ||||
-rw-r--r-- | core/fxcrt/cfx_widetextbuf.cpp | 4 | ||||
-rw-r--r-- | core/fxcrt/cfx_widetextbuf.h | 2 |
9 files changed, 77 insertions, 32 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index 3006ebce2b..ee1d51bfd8 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp @@ -106,6 +106,10 @@ float MaskPercentFilled(const std::vector<bool>& mask, return count / (end - start); } +bool IsHyphenCode(wchar_t c) { + return c == 0x2D || c == 0xAD; +} + } // namespace PDFTEXT_Obj::PDFTEXT_Obj() {} @@ -1215,36 +1219,37 @@ CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode( return m_TextlineDir; } -bool CPDF_TextPage::IsHyphen(wchar_t curChar) { - CFX_WideString strCurText = m_TempTextBuf.MakeString(); - if (strCurText.IsEmpty()) - strCurText = m_TextBuf.AsStringC(); - FX_STRSIZE nCount = strCurText.GetLength(); - if (nCount < 1) +bool CPDF_TextPage::IsHyphen(wchar_t curChar) const { + CFX_WideStringC curText; + if (!m_TempTextBuf.IsEmpty()) + curText = m_TempTextBuf.AsStringC(); + else if (!m_TextBuf.IsEmpty()) + curText = m_TextBuf.AsStringC(); + else return false; - FX_STRSIZE nIndex = nCount - 1; - wchar_t wcTmp = strCurText[nIndex]; - while (wcTmp == 0x20 && nIndex > 0 && nIndex <= nCount - 1) - wcTmp = strCurText[--nIndex]; - if (0x2D == wcTmp || 0xAD == wcTmp) { - if (--nIndex > 0) { - wchar_t preChar = strCurText[nIndex]; - if (FXSYS_iswalpha(preChar) && FXSYS_iswalpha(curChar)) - return true; - } - const PAGECHAR_INFO* preInfo; - if (!m_TempCharList.empty()) - preInfo = &m_TempCharList.back(); - else if (!m_CharList.empty()) - preInfo = &m_CharList.back(); - else - return false; - if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag && - (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) { - return true; - } - } - return false; + + curText = curText.TrimmedRight(0x20); + if (curText.GetLength() < 2) + return false; + + // Extracting the last 2 characters, since they are all that matter + curText = curText.Right(2); + if (!IsHyphenCode(curText.Last())) + return false; + + if (FXSYS_iswalpha(curText.First() && FXSYS_iswalnum(curChar))) + return true; + + const PAGECHAR_INFO* preInfo; + if (!m_TempCharList.empty()) + preInfo = &m_TempCharList.back(); + else if (!m_CharList.empty()) + preInfo = &m_CharList.back(); + else + return false; + + return FPDFTEXT_CHAR_PIECE == preInfo->m_Flag && + IsHyphenCode(preInfo->m_Unicode); } CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject( diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h index 6a4c87368d..41892ea97c 100644 --- a/core/fpdftext/cpdf_textpage.h +++ b/core/fpdftext/cpdf_textpage.h @@ -132,7 +132,7 @@ class CPDF_TextPage { Hyphen, }; - bool IsHyphen(wchar_t curChar); + bool IsHyphen(wchar_t curChar) const; bool IsControlChar(const PAGECHAR_INFO& charInfo); void ProcessObject(); void ProcessFormObject(CPDF_FormObject* pFormObj, diff --git a/core/fxcrt/cfx_binarybuf.cpp b/core/fxcrt/cfx_binarybuf.cpp index 73fe945fd6..b826fdd5b5 100644 --- a/core/fxcrt/cfx_binarybuf.cpp +++ b/core/fxcrt/cfx_binarybuf.cpp @@ -28,6 +28,10 @@ void CFX_BinaryBuf::Delete(FX_STRSIZE start_index, FX_STRSIZE count) { m_DataSize -= count; } +FX_STRSIZE CFX_BinaryBuf::GetLength() const { + return m_DataSize; +} + void CFX_BinaryBuf::Clear() { m_DataSize = 0; } diff --git a/core/fxcrt/cfx_binarybuf.h b/core/fxcrt/cfx_binarybuf.h index 3081d02902..4c795eda0b 100644 --- a/core/fxcrt/cfx_binarybuf.h +++ b/core/fxcrt/cfx_binarybuf.h @@ -17,10 +17,12 @@ class CFX_BinaryBuf { public: CFX_BinaryBuf(); explicit CFX_BinaryBuf(FX_STRSIZE size); - ~CFX_BinaryBuf(); + virtual ~CFX_BinaryBuf(); uint8_t* GetBuffer() const { return m_pBuffer.get(); } FX_STRSIZE GetSize() const { return m_DataSize; } + virtual FX_STRSIZE GetLength() const; + bool IsEmpty() const { return GetLength() == 0; } void Clear(); void EstimateSize(FX_STRSIZE size, FX_STRSIZE alloc_step = 0); diff --git a/core/fxcrt/cfx_bytestring_unittest.cpp b/core/fxcrt/cfx_bytestring_unittest.cpp index dcb8577b65..a4c5187c11 100644 --- a/core/fxcrt/cfx_bytestring_unittest.cpp +++ b/core/fxcrt/cfx_bytestring_unittest.cpp @@ -999,6 +999,14 @@ TEST(fxcrt, ByteStringCMid) { EXPECT_EQ("", longer_string.Mid(4, 3)); } +TEST(fxcrt, ByteStringCTrimmedRight) { + CFX_ByteStringC fred("FRED"); + EXPECT_EQ("FRED", fred.TrimmedRight('E')); + EXPECT_EQ("FRE", fred.TrimmedRight('D')); + CFX_ByteStringC fredd("FREDD"); + EXPECT_EQ("FRE", fred.TrimmedRight('D')); +} + TEST(fxcrt, ByteStringCElementAccess) { // CFX_ByteStringC includes the NUL terminator for non-empty strings. CFX_ByteStringC abc("abc"); diff --git a/core/fxcrt/cfx_string_c_template.h b/core/fxcrt/cfx_string_c_template.h index 8585d73ae6..b0e17a1222 100644 --- a/core/fxcrt/cfx_string_c_template.h +++ b/core/fxcrt/cfx_string_c_template.h @@ -176,6 +176,20 @@ class CFX_StringCTemplate { return Mid(GetLength() - count, count); } + CFX_StringCTemplate TrimmedRight(CharType ch) const { + if (IsEmpty()) + return CFX_StringCTemplate(); + + FX_STRSIZE pos = GetLength(); + while (pos && CharAt(pos - 1) == ch) + pos--; + + if (pos == 0) + return CFX_StringCTemplate(); + + return CFX_StringCTemplate(m_Ptr.Get(), pos); + } + bool operator<(const CFX_StringCTemplate& that) const { int result = FXSYS_cmp(reinterpret_cast<const CharType*>(m_Ptr.Get()), reinterpret_cast<const CharType*>(that.m_Ptr.Get()), diff --git a/core/fxcrt/cfx_widestring_unittest.cpp b/core/fxcrt/cfx_widestring_unittest.cpp index 7b12d50601..1c8aca9aca 100644 --- a/core/fxcrt/cfx_widestring_unittest.cpp +++ b/core/fxcrt/cfx_widestring_unittest.cpp @@ -1038,6 +1038,14 @@ TEST(fxcrt, WideStringCAnyAllNoneOf) { EXPECT_FALSE(pdfium::ContainsValue(str, L'z')); } +TEST(fxcrt, WideStringCTrimmedRight) { + CFX_WideStringC fred(L"FRED"); + EXPECT_EQ(L"FRED", fred.TrimmedRight(L'E')); + EXPECT_EQ(L"FRE", fred.TrimmedRight(L'D')); + CFX_WideStringC fredd(L"FREDD"); + EXPECT_EQ(L"FRE", fred.TrimmedRight(L'D')); +} + TEST(fxcrt, WideStringFormatWidth) { { CFX_WideString str; diff --git a/core/fxcrt/cfx_widetextbuf.cpp b/core/fxcrt/cfx_widetextbuf.cpp index 246124b95f..81b5fd4b05 100644 --- a/core/fxcrt/cfx_widetextbuf.cpp +++ b/core/fxcrt/cfx_widetextbuf.cpp @@ -6,6 +6,10 @@ #include "core/fxcrt/cfx_widetextbuf.h" +FX_STRSIZE CFX_WideTextBuf::GetLength() const { + return m_DataSize / sizeof(wchar_t); +} + void CFX_WideTextBuf::AppendChar(wchar_t ch) { ExpandBuf(sizeof(wchar_t)); *(wchar_t*)(m_pBuffer.get() + m_DataSize) = ch; diff --git a/core/fxcrt/cfx_widetextbuf.h b/core/fxcrt/cfx_widetextbuf.h index 0c9b6ff99d..389f2e52a3 100644 --- a/core/fxcrt/cfx_widetextbuf.h +++ b/core/fxcrt/cfx_widetextbuf.h @@ -14,7 +14,7 @@ class CFX_WideTextBuf : public CFX_BinaryBuf { public: void AppendChar(wchar_t wch); - FX_STRSIZE GetLength() const { return m_DataSize / sizeof(wchar_t); } + FX_STRSIZE GetLength() const override; wchar_t* GetBuffer() const { return reinterpret_cast<wchar_t*>(m_pBuffer.get()); } |