diff options
Diffstat (limited to 'core/fpdftext/cpdf_linkextract.cpp')
-rw-r--r-- | core/fpdftext/cpdf_linkextract.cpp | 31 |
1 files changed, 26 insertions, 5 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp index 686b6a23b8..47d0754bd2 100644 --- a/core/fpdftext/cpdf_linkextract.cpp +++ b/core/fpdftext/cpdf_linkextract.cpp @@ -31,18 +31,36 @@ void CPDF_LinkExtract::ExtractLinks() { } void CPDF_LinkExtract::ParseLink() { - int start = 0, pos = 0; - int TotalChar = m_pTextPage->CountChars(); - while (pos < TotalChar) { + int start = 0; + int pos = 0; + int nTotalChar = m_pTextPage->CountChars(); + bool bAfterHyphen = false; + bool bLineBreak = false; + while (pos < nTotalChar) { FPDF_CHAR_INFO pageChar; m_pTextPage->GetCharInfo(pos, &pageChar); if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED || - pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { + pageChar.m_Unicode == TEXT_SPACE_CHAR || pos == nTotalChar - 1) { int nCount = pos - start; - if (pos == TotalChar - 1) + if (pos == nTotalChar - 1) { nCount++; + } else if (bAfterHyphen && (pageChar.m_Unicode == TEXT_LINEFEED_CHAR || + pageChar.m_Unicode == TEXT_RETURN_CHAR)) { + // Handle text breaks with a hyphen to the next line. + bLineBreak = true; + pos++; + continue; + } CFX_WideString strBeCheck; strBeCheck = m_pTextPage->GetPageText(start, nCount); + if (bLineBreak) { + strBeCheck.Remove(TEXT_LINEFEED_CHAR); + strBeCheck.Remove(TEXT_RETURN_CHAR); + bLineBreak = false; + } + // Replace the generated code with the hyphen char. + strBeCheck.Replace(L"\xfffe", TEXT_HYPHEN); + if (strBeCheck.GetLength() > 5) { while (strBeCheck.GetLength() > 0) { wchar_t ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1); @@ -60,6 +78,9 @@ void CPDF_LinkExtract::ParseLink() { } start = ++pos; } else { + bAfterHyphen = (pageChar.m_Flag == FPDFTEXT_CHAR_HYPHEN || + (pageChar.m_Flag == FPDFTEXT_CHAR_NORMAL && + pageChar.m_Unicode == TEXT_HYPHEN_CHAR)); pos++; } } |