summaryrefslogtreecommitdiff
path: root/core/fpdftext/cpdf_linkextract.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext/cpdf_linkextract.cpp')
-rw-r--r--core/fpdftext/cpdf_linkextract.cpp31
1 files changed, 26 insertions, 5 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index 686b6a23b8..47d0754bd2 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp
@@ -31,18 +31,36 @@ void CPDF_LinkExtract::ExtractLinks() {
}
void CPDF_LinkExtract::ParseLink() {
- int start = 0, pos = 0;
- int TotalChar = m_pTextPage->CountChars();
- while (pos < TotalChar) {
+ int start = 0;
+ int pos = 0;
+ int nTotalChar = m_pTextPage->CountChars();
+ bool bAfterHyphen = false;
+ bool bLineBreak = false;
+ while (pos < nTotalChar) {
FPDF_CHAR_INFO pageChar;
m_pTextPage->GetCharInfo(pos, &pageChar);
if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED ||
- pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) {
+ pageChar.m_Unicode == TEXT_SPACE_CHAR || pos == nTotalChar - 1) {
int nCount = pos - start;
- if (pos == TotalChar - 1)
+ if (pos == nTotalChar - 1) {
nCount++;
+ } else if (bAfterHyphen && (pageChar.m_Unicode == TEXT_LINEFEED_CHAR ||
+ pageChar.m_Unicode == TEXT_RETURN_CHAR)) {
+ // Handle text breaks with a hyphen to the next line.
+ bLineBreak = true;
+ pos++;
+ continue;
+ }
CFX_WideString strBeCheck;
strBeCheck = m_pTextPage->GetPageText(start, nCount);
+ if (bLineBreak) {
+ strBeCheck.Remove(TEXT_LINEFEED_CHAR);
+ strBeCheck.Remove(TEXT_RETURN_CHAR);
+ bLineBreak = false;
+ }
+ // Replace the generated code with the hyphen char.
+ strBeCheck.Replace(L"\xfffe", TEXT_HYPHEN);
+
if (strBeCheck.GetLength() > 5) {
while (strBeCheck.GetLength() > 0) {
wchar_t ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1);
@@ -60,6 +78,9 @@ void CPDF_LinkExtract::ParseLink() {
}
start = ++pos;
} else {
+ bAfterHyphen = (pageChar.m_Flag == FPDFTEXT_CHAR_HYPHEN ||
+ (pageChar.m_Flag == FPDFTEXT_CHAR_NORMAL &&
+ pageChar.m_Unicode == TEXT_HYPHEN_CHAR));
pos++;
}
}