diff options
Diffstat (limited to 'core/fpdftext')
-rw-r--r-- | core/fpdftext/cpdf_linkextract.cpp | 31 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpage.h | 2 |
2 files changed, 28 insertions, 5 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp index 686b6a23b8..47d0754bd2 100644 --- a/core/fpdftext/cpdf_linkextract.cpp +++ b/core/fpdftext/cpdf_linkextract.cpp @@ -31,18 +31,36 @@ void CPDF_LinkExtract::ExtractLinks() { } void CPDF_LinkExtract::ParseLink() { - int start = 0, pos = 0; - int TotalChar = m_pTextPage->CountChars(); - while (pos < TotalChar) { + int start = 0; + int pos = 0; + int nTotalChar = m_pTextPage->CountChars(); + bool bAfterHyphen = false; + bool bLineBreak = false; + while (pos < nTotalChar) { FPDF_CHAR_INFO pageChar; m_pTextPage->GetCharInfo(pos, &pageChar); if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || - pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { + pageChar.m_Unicode == TEXT_SPACE_CHAR || pos == nTotalChar - 1) { int nCount = pos - start; - if (pos == TotalChar - 1) + if (pos == nTotalChar - 1) { nCount++; + } else if (bAfterHyphen && (pageChar.m_Unicode == TEXT_LINEFEED_CHAR || + pageChar.m_Unicode == TEXT_RETURN_CHAR)) { + // Handle text breaks with a hyphen to the next line. + bLineBreak = true; + pos++; + continue; + } CFX_WideString strBeCheck; strBeCheck = m_pTextPage->GetPageText(start, nCount); + if (bLineBreak) { + strBeCheck.Remove(TEXT_LINEFEED_CHAR); + strBeCheck.Remove(TEXT_RETURN_CHAR); + bLineBreak = false; + } + // Replace the generated code with the hyphen char. + strBeCheck.Replace(L"\xfffe", TEXT_HYPHEN); + if (strBeCheck.GetLength() > 5) { while (strBeCheck.GetLength() > 0) { wchar_t ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); @@ -60,6 +78,9 @@ void CPDF_LinkExtract::ParseLink() { } start = ++pos; } else { + bAfterHyphen = (pageChar.m_Flag == FPDFTEXT_CHAR_HYPHEN || + (pageChar.m_Flag == FPDFTEXT_CHAR_NORMAL && + pageChar.m_Unicode == TEXT_HYPHEN_CHAR)); pos++; } } diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h index ebe58eb7ff..d7e29edf3b 100644 --- a/core/fpdftext/cpdf_textpage.h +++ b/core/fpdftext/cpdf_textpage.h @@ -34,10 +34,12 @@ class CPDF_TextObject; #define TEXT_SPACE_CHAR L' ' #define TEXT_LINEFEED_CHAR L'\n' #define TEXT_RETURN_CHAR L'\r' +#define TEXT_HYPHEN_CHAR L'-' #define TEXT_EMPTY L"" #define TEXT_SPACE L" " #define TEXT_RETURN_LINEFEED L"\r\n" #define TEXT_LINEFEED L"\n" +#define TEXT_HYPHEN L"-" #define TEXT_CHARRATIO_GAPDELTA 0.070 enum class FPDFText_MarkedContent { Pass = 0, Done, Delay }; |