summaryrefslogtreecommitdiff
path: root/core/src/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/fpdftext')
-rw-r--r--core/src/fpdftext/fpdf_text_int.cpp190
-rw-r--r--core/src/fpdftext/text_int.h3
2 files changed, 168 insertions, 25 deletions
diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp
index c1aaad8b5c..6755939ca2 100644
--- a/core/src/fpdftext/fpdf_text_int.cpp
+++ b/core/src/fpdftext/fpdf_text_int.cpp
@@ -42,11 +42,9 @@ FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold)
{
if (threshold < 300) {
return threshold / 2.0f;
- }
- if (threshold < 500) {
+ } else if (threshold < 500) {
return threshold / 4.0f;
- }
- if (threshold < 700) {
+ } else if (threshold < 700) {
return threshold / 5.0f;
}
return threshold / 6.0f;
@@ -161,9 +159,12 @@ void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize)
{
m_ParseOptions.m_bNormalizeObjs = bNormalize;
}
-bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo)
+FX_BOOL CPDF_TextPage::IsControlChar(PAGECHAR_INFO* pCharInfo)
{
- switch (charInfo.m_Unicode) {
+ if(!pCharInfo) {
+ return FALSE;
+ }
+ switch(pCharInfo->m_Unicode) {
case 0x2:
case 0x3:
case 0x93:
@@ -172,9 +173,13 @@ bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo)
case 0x97:
case 0x98:
case 0xfffe:
- return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN;
+ if(pCharInfo->m_Flag == FPDFTEXT_CHAR_HYPHEN) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
default:
- return false;
+ return FALSE;
}
}
FX_BOOL CPDF_TextPage::ParseTextPage()
@@ -202,7 +207,7 @@ FX_BOOL CPDF_TextPage::ParseTextPage()
if(charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
bNormal = TRUE;
}
- else if(charinfo.m_Unicode == 0 || IsControlChar(charinfo))
+ else if(charinfo.m_Unicode == 0 || IsControlChar(&charinfo))
bNormal = FALSE;
else {
bNormal = TRUE;
@@ -487,6 +492,141 @@ int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX
CPDF_Point point(x, y);
return GetIndexAtPos(point, xTorelance, yTorelance);
}
+int CPDF_TextPage::GetOrderByDirection(int order, int direction) const
+{
+ if(m_ParseOptions.m_bGetCharCodeOnly) {
+ return -3;
+ }
+ if (!m_IsParsered) {
+ return -3;
+ }
+ if (direction == FPDFTEXT_RIGHT || direction == FPDFTEXT_LEFT) {
+ order += direction;
+ while(order >= 0 && order < m_charList.GetSize()) {
+ PAGECHAR_INFO cinfo = *(PAGECHAR_INFO*)m_charList.GetAt(order);
+ if (cinfo.m_Flag != FPDFTEXT_CHAR_GENERATED) {
+ break;
+ } else {
+ if (cinfo.m_Unicode == TEXT_LINEFEED_CHAR || cinfo.m_Unicode == TEXT_RETURN_CHAR) {
+ order += direction;
+ } else {
+ break;
+ }
+ }
+ }
+ if (order >= m_charList.GetSize()) {
+ order = -2;
+ }
+ return order;
+ }
+ PAGECHAR_INFO charinfo;
+ charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(order);
+ CPDF_Point curPos(charinfo.m_OriginX, charinfo.m_OriginY);
+ FX_FLOAT difPosY = 0.0, minXdif = 1000;
+ int minIndex = -2;
+ int index = order;
+ FX_FLOAT height = charinfo.m_CharBox.Height();
+ if (direction == FPDFTEXT_UP) {
+ minIndex = -1;
+ while (1) {
+ if (--index < 0) {
+ return -1;
+ }
+ charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
+ if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, charinfo.m_CharBox.Height()) / 2) {
+ difPosY = charinfo.m_OriginY;
+ minIndex = index;
+ break;
+ }
+ }
+ FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x;
+ minXdif = PreXdif;
+ if (PreXdif == 0) {
+ return index;
+ }
+ FX_FLOAT curXdif = 0;
+ while (--index >= 0) {
+ charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
+ if (difPosY != charinfo.m_OriginY) {
+ break;
+ }
+ curXdif = charinfo.m_OriginX - curPos.x;
+ if (curXdif == 0) {
+ return index;
+ }
+ int signflag = 0;
+ if (curXdif > 0) {
+ signflag = 1;
+ } else {
+ signflag = -1;
+ }
+ if (signflag * PreXdif < 0) {
+ if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) {
+ return index + 1;
+ } else {
+ return index;
+ }
+ }
+ if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) {
+ minIndex = index;
+ minXdif = curXdif;
+ }
+ PreXdif = curXdif;
+ if (difPosY != charinfo.m_OriginY) {
+ break;
+ }
+ }
+ return minIndex;
+ } else if(FPDFTEXT_DOWN) {
+ minIndex = -2;
+ while (1) {
+ if (++index > m_charList.GetSize() - 1) {
+ return minIndex;
+ }
+ charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
+ if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, charinfo.m_CharBox.Height()) / 2) {
+ difPosY = charinfo.m_OriginY;
+ minIndex = index;
+ break;
+ }
+ }
+ FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x;
+ minXdif = PreXdif;
+ if (PreXdif == 0) {
+ return index;
+ }
+ FX_FLOAT curXdif = 0;
+ while (++index < m_charList.GetSize()) {
+ charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
+ if (difPosY != charinfo.m_OriginY) {
+ break;
+ }
+ curXdif = charinfo.m_OriginX - curPos.x;
+ if (curXdif == 0) {
+ return index;
+ }
+ int signflag = 0;
+ if (curXdif > 0) {
+ signflag = 1;
+ } else {
+ signflag = -1;
+ }
+ if (signflag * PreXdif < 0) {
+ if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) {
+ return index - 1;
+ } else {
+ return index;
+ }
+ }
+ if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) {
+ minXdif = curXdif;
+ minIndex = index;
+ }
+ PreXdif = curXdif;
+ }
+ return minIndex;
+ }
+}
void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const
{
if(m_ParseOptions.m_bGetCharCodeOnly) {
@@ -812,6 +952,7 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const
return breakPos;
}
}
+ return breakPos;
} else if (direction == FPDFTEXT_RIGHT) {
while (++breakPos < m_charList.GetSize()) {
charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos);
@@ -819,6 +960,7 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const
return breakPos;
}
}
+ return breakPos;
}
return breakPos;
}
@@ -1019,7 +1161,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i)
{
PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
FX_WCHAR wChar = str.GetAt(i);
- if(!IsControlChar(Info)) {
+ if(!IsControlChar(&Info)) {
Info.m_Index = m_TextBuf.GetLength();
if (wChar >= 0xFB00 && wChar <= 0xFB06) {
FX_WCHAR* pDst = NULL;
@@ -1051,7 +1193,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i)
void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i)
{
PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
- if(!IsControlChar(Info)) {
+ if(!IsControlChar(&Info)) {
Info.m_Index = m_TextBuf.GetLength();
FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE);
FX_WCHAR* pDst = NULL;
@@ -1070,8 +1212,9 @@ void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i)
}
FX_Free(pDst);
return;
+ } else {
+ Info.m_Unicode = wChar;
}
- Info.m_Unicode = wChar;
m_TextBuf.AppendChar(Info.m_Unicode);
} else {
Info.m_Index = -1;
@@ -1772,9 +1915,11 @@ int32_t CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj)
v.Set(dX, dY);
v.Normalize();
if (v.y <= 0.0872f) {
- return v.x <= 0.0872f ? m_TextlineDir : 0;
- }
- if (v.x <= 0.0872f) {
+ if (v.x <= 0.0872f) {
+ return m_TextlineDir;
+ }
+ return 0;
+ } else if (v.x <= 0.0872f) {
return 1;
}
return m_TextlineDir;
@@ -2553,25 +2698,22 @@ FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck)
if (str.Find(L"http://www.") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
return TRUE;
- }
- if (str.Find(L"http://") != -1) {
+ } else if (str.Find(L"http://") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
return TRUE;
- }
- if (str.Find(L"https://www.") != -1) {
+ } else if (str.Find(L"https://www.") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
return TRUE;
- }
- if (str.Find(L"https://") != -1) {
+ } else if (str.Find(L"https://") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
return TRUE;
- }
- if (str.Find(L"www.") != -1) {
+ } else if (str.Find(L"www.") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
strBeCheck = L"http://" + strBeCheck;
return TRUE;
+ } else {
+ return FALSE;
}
- return FALSE;
}
FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str)
{
diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h
index ce52371df0..e2d6af6e98 100644
--- a/core/src/fpdftext/text_int.h
+++ b/core/src/fpdftext/text_int.h
@@ -66,6 +66,7 @@ public:
FX_FLOAT yTorelance) const;
virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const;
virtual void GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const;
+ virtual int GetOrderByDirection(int order, int direction) const;
virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const;
virtual int CountRects(int start, int nCount);
@@ -86,7 +87,7 @@ public:
static FX_BOOL IsLetter(FX_WCHAR unicode);
private:
FX_BOOL IsHyphen(FX_WCHAR curChar);
- bool IsControlChar(const PAGECHAR_INFO& charInfo);
+ FX_BOOL IsControlChar(PAGECHAR_INFO* pCharInfo);
FX_BOOL GetBaselineRotate(int start, int end, int& Rotate);
void ProcessObject();
void ProcessFormObject(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix);