summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/fpdftext/cpdf_textpage.cpp63
-rw-r--r--core/fpdftext/cpdf_textpage.h2
-rw-r--r--core/fxcrt/cfx_binarybuf.cpp4
-rw-r--r--core/fxcrt/cfx_binarybuf.h4
-rw-r--r--core/fxcrt/cfx_bytestring_unittest.cpp8
-rw-r--r--core/fxcrt/cfx_string_c_template.h14
-rw-r--r--core/fxcrt/cfx_widestring_unittest.cpp8
-rw-r--r--core/fxcrt/cfx_widetextbuf.cpp4
-rw-r--r--core/fxcrt/cfx_widetextbuf.h2
9 files changed, 77 insertions, 32 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index 3006ebce2b..ee1d51bfd8 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -106,6 +106,10 @@ float MaskPercentFilled(const std::vector<bool>& mask,
return count / (end - start);
}
+bool IsHyphenCode(wchar_t c) {
+ return c == 0x2D || c == 0xAD;
+}
+
} // namespace
PDFTEXT_Obj::PDFTEXT_Obj() {}
@@ -1215,36 +1219,37 @@ CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
return m_TextlineDir;
}
-bool CPDF_TextPage::IsHyphen(wchar_t curChar) {
- CFX_WideString strCurText = m_TempTextBuf.MakeString();
- if (strCurText.IsEmpty())
- strCurText = m_TextBuf.AsStringC();
- FX_STRSIZE nCount = strCurText.GetLength();
- if (nCount < 1)
+bool CPDF_TextPage::IsHyphen(wchar_t curChar) const {
+ CFX_WideStringC curText;
+ if (!m_TempTextBuf.IsEmpty())
+ curText = m_TempTextBuf.AsStringC();
+ else if (!m_TextBuf.IsEmpty())
+ curText = m_TextBuf.AsStringC();
+ else
return false;
- FX_STRSIZE nIndex = nCount - 1;
- wchar_t wcTmp = strCurText[nIndex];
- while (wcTmp == 0x20 && nIndex > 0 && nIndex <= nCount - 1)
- wcTmp = strCurText[--nIndex];
- if (0x2D == wcTmp || 0xAD == wcTmp) {
- if (--nIndex > 0) {
- wchar_t preChar = strCurText[nIndex];
- if (FXSYS_iswalpha(preChar) && FXSYS_iswalpha(curChar))
- return true;
- }
- const PAGECHAR_INFO* preInfo;
- if (!m_TempCharList.empty())
- preInfo = &m_TempCharList.back();
- else if (!m_CharList.empty())
- preInfo = &m_CharList.back();
- else
- return false;
- if (FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
- (0xAD == preInfo->m_Unicode || 0x2D == preInfo->m_Unicode)) {
- return true;
- }
- }
- return false;
+
+ curText = curText.TrimmedRight(0x20);
+ if (curText.GetLength() < 2)
+ return false;
+
+ // Extracting the last 2 characters, since they are all that matter
+ curText = curText.Right(2);
+ if (!IsHyphenCode(curText.Last()))
+ return false;
+
+ if (FXSYS_iswalpha(curText.First() && FXSYS_iswalnum(curChar)))
+ return true;
+
+ const PAGECHAR_INFO* preInfo;
+ if (!m_TempCharList.empty())
+ preInfo = &m_TempCharList.back();
+ else if (!m_CharList.empty())
+ preInfo = &m_CharList.back();
+ else
+ return false;
+
+ return FPDFTEXT_CHAR_PIECE == preInfo->m_Flag &&
+ IsHyphenCode(preInfo->m_Unicode);
}
CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h
index 6a4c87368d..41892ea97c 100644
--- a/core/fpdftext/cpdf_textpage.h
+++ b/core/fpdftext/cpdf_textpage.h
@@ -132,7 +132,7 @@ class CPDF_TextPage {
Hyphen,
};
- bool IsHyphen(wchar_t curChar);
+ bool IsHyphen(wchar_t curChar) const;
bool IsControlChar(const PAGECHAR_INFO& charInfo);
void ProcessObject();
void ProcessFormObject(CPDF_FormObject* pFormObj,
diff --git a/core/fxcrt/cfx_binarybuf.cpp b/core/fxcrt/cfx_binarybuf.cpp
index 73fe945fd6..b826fdd5b5 100644
--- a/core/fxcrt/cfx_binarybuf.cpp
+++ b/core/fxcrt/cfx_binarybuf.cpp
@@ -28,6 +28,10 @@ void CFX_BinaryBuf::Delete(FX_STRSIZE start_index, FX_STRSIZE count) {
m_DataSize -= count;
}
+FX_STRSIZE CFX_BinaryBuf::GetLength() const {
+ return m_DataSize;
+}
+
void CFX_BinaryBuf::Clear() {
m_DataSize = 0;
}
diff --git a/core/fxcrt/cfx_binarybuf.h b/core/fxcrt/cfx_binarybuf.h
index 3081d02902..4c795eda0b 100644
--- a/core/fxcrt/cfx_binarybuf.h
+++ b/core/fxcrt/cfx_binarybuf.h
@@ -17,10 +17,12 @@ class CFX_BinaryBuf {
public:
CFX_BinaryBuf();
explicit CFX_BinaryBuf(FX_STRSIZE size);
- ~CFX_BinaryBuf();
+ virtual ~CFX_BinaryBuf();
uint8_t* GetBuffer() const { return m_pBuffer.get(); }
FX_STRSIZE GetSize() const { return m_DataSize; }
+ virtual FX_STRSIZE GetLength() const;
+ bool IsEmpty() const { return GetLength() == 0; }
void Clear();
void EstimateSize(FX_STRSIZE size, FX_STRSIZE alloc_step = 0);
diff --git a/core/fxcrt/cfx_bytestring_unittest.cpp b/core/fxcrt/cfx_bytestring_unittest.cpp
index dcb8577b65..a4c5187c11 100644
--- a/core/fxcrt/cfx_bytestring_unittest.cpp
+++ b/core/fxcrt/cfx_bytestring_unittest.cpp
@@ -999,6 +999,14 @@ TEST(fxcrt, ByteStringCMid) {
EXPECT_EQ("", longer_string.Mid(4, 3));
}
+TEST(fxcrt, ByteStringCTrimmedRight) {
+ CFX_ByteStringC fred("FRED");
+ EXPECT_EQ("FRED", fred.TrimmedRight('E'));
+ EXPECT_EQ("FRE", fred.TrimmedRight('D'));
+ CFX_ByteStringC fredd("FREDD");
+ EXPECT_EQ("FRE", fred.TrimmedRight('D'));
+}
+
TEST(fxcrt, ByteStringCElementAccess) {
// CFX_ByteStringC includes the NUL terminator for non-empty strings.
CFX_ByteStringC abc("abc");
diff --git a/core/fxcrt/cfx_string_c_template.h b/core/fxcrt/cfx_string_c_template.h
index 8585d73ae6..b0e17a1222 100644
--- a/core/fxcrt/cfx_string_c_template.h
+++ b/core/fxcrt/cfx_string_c_template.h
@@ -176,6 +176,20 @@ class CFX_StringCTemplate {
return Mid(GetLength() - count, count);
}
+ CFX_StringCTemplate TrimmedRight(CharType ch) const {
+ if (IsEmpty())
+ return CFX_StringCTemplate();
+
+ FX_STRSIZE pos = GetLength();
+ while (pos && CharAt(pos - 1) == ch)
+ pos--;
+
+ if (pos == 0)
+ return CFX_StringCTemplate();
+
+ return CFX_StringCTemplate(m_Ptr.Get(), pos);
+ }
+
bool operator<(const CFX_StringCTemplate& that) const {
int result = FXSYS_cmp(reinterpret_cast<const CharType*>(m_Ptr.Get()),
reinterpret_cast<const CharType*>(that.m_Ptr.Get()),
diff --git a/core/fxcrt/cfx_widestring_unittest.cpp b/core/fxcrt/cfx_widestring_unittest.cpp
index 7b12d50601..1c8aca9aca 100644
--- a/core/fxcrt/cfx_widestring_unittest.cpp
+++ b/core/fxcrt/cfx_widestring_unittest.cpp
@@ -1038,6 +1038,14 @@ TEST(fxcrt, WideStringCAnyAllNoneOf) {
EXPECT_FALSE(pdfium::ContainsValue(str, L'z'));
}
+TEST(fxcrt, WideStringCTrimmedRight) {
+ CFX_WideStringC fred(L"FRED");
+ EXPECT_EQ(L"FRED", fred.TrimmedRight(L'E'));
+ EXPECT_EQ(L"FRE", fred.TrimmedRight(L'D'));
+ CFX_WideStringC fredd(L"FREDD");
+ EXPECT_EQ(L"FRE", fred.TrimmedRight(L'D'));
+}
+
TEST(fxcrt, WideStringFormatWidth) {
{
CFX_WideString str;
diff --git a/core/fxcrt/cfx_widetextbuf.cpp b/core/fxcrt/cfx_widetextbuf.cpp
index 246124b95f..81b5fd4b05 100644
--- a/core/fxcrt/cfx_widetextbuf.cpp
+++ b/core/fxcrt/cfx_widetextbuf.cpp
@@ -6,6 +6,10 @@
#include "core/fxcrt/cfx_widetextbuf.h"
+FX_STRSIZE CFX_WideTextBuf::GetLength() const {
+ return m_DataSize / sizeof(wchar_t);
+}
+
void CFX_WideTextBuf::AppendChar(wchar_t ch) {
ExpandBuf(sizeof(wchar_t));
*(wchar_t*)(m_pBuffer.get() + m_DataSize) = ch;
diff --git a/core/fxcrt/cfx_widetextbuf.h b/core/fxcrt/cfx_widetextbuf.h
index 0c9b6ff99d..389f2e52a3 100644
--- a/core/fxcrt/cfx_widetextbuf.h
+++ b/core/fxcrt/cfx_widetextbuf.h
@@ -14,7 +14,7 @@
class CFX_WideTextBuf : public CFX_BinaryBuf {
public:
void AppendChar(wchar_t wch);
- FX_STRSIZE GetLength() const { return m_DataSize / sizeof(wchar_t); }
+ FX_STRSIZE GetLength() const override;
wchar_t* GetBuffer() const {
return reinterpret_cast<wchar_t*>(m_pBuffer.get());
}