summaryrefslogtreecommitdiff
path: root/core/src/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/fpdftext')
-rw-r--r--core/src/fpdftext/fpdf_text_int.cpp190
-rw-r--r--core/src/fpdftext/text_int.h3
2 files changed, 25 insertions, 168 deletions
diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp
index 6755939ca2..c1aaad8b5c 100644
--- a/core/src/fpdftext/fpdf_text_int.cpp
+++ b/core/src/fpdftext/fpdf_text_int.cpp
@@ -42,9 +42,11 @@ FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold)
{
if (threshold < 300) {
return threshold / 2.0f;
- } else if (threshold < 500) {
+ }
+ if (threshold < 500) {
return threshold / 4.0f;
- } else if (threshold < 700) {
+ }
+ if (threshold < 700) {
return threshold / 5.0f;
}
return threshold / 6.0f;
@@ -159,12 +161,9 @@ void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize)
{
m_ParseOptions.m_bNormalizeObjs = bNormalize;
}
-FX_BOOL CPDF_TextPage::IsControlChar(PAGECHAR_INFO* pCharInfo)
+bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo)
{
- if(!pCharInfo) {
- return FALSE;
- }
- switch(pCharInfo->m_Unicode) {
+ switch (charInfo.m_Unicode) {
case 0x2:
case 0x3:
case 0x93:
@@ -173,13 +172,9 @@ FX_BOOL CPDF_TextPage::IsControlChar(PAGECHAR_INFO* pCharInfo)
case 0x97:
case 0x98:
case 0xfffe:
- if(pCharInfo->m_Flag == FPDFTEXT_CHAR_HYPHEN) {
- return FALSE;
- } else {
- return TRUE;
- }
+ return charInfo.m_Flag != FPDFTEXT_CHAR_HYPHEN;
default:
- return FALSE;
+ return false;
}
}
FX_BOOL CPDF_TextPage::ParseTextPage()
@@ -207,7 +202,7 @@ FX_BOOL CPDF_TextPage::ParseTextPage()
if(charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
bNormal = TRUE;
}
- else if(charinfo.m_Unicode == 0 || IsControlChar(&charinfo))
+ else if(charinfo.m_Unicode == 0 || IsControlChar(charinfo))
bNormal = FALSE;
else {
bNormal = TRUE;
@@ -492,141 +487,6 @@ int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX
CPDF_Point point(x, y);
return GetIndexAtPos(point, xTorelance, yTorelance);
}
-int CPDF_TextPage::GetOrderByDirection(int order, int direction) const
-{
- if(m_ParseOptions.m_bGetCharCodeOnly) {
- return -3;
- }
- if (!m_IsParsered) {
- return -3;
- }
- if (direction == FPDFTEXT_RIGHT || direction == FPDFTEXT_LEFT) {
- order += direction;
- while(order >= 0 && order < m_charList.GetSize()) {
- PAGECHAR_INFO cinfo = *(PAGECHAR_INFO*)m_charList.GetAt(order);
- if (cinfo.m_Flag != FPDFTEXT_CHAR_GENERATED) {
- break;
- } else {
- if (cinfo.m_Unicode == TEXT_LINEFEED_CHAR || cinfo.m_Unicode == TEXT_RETURN_CHAR) {
- order += direction;
- } else {
- break;
- }
- }
- }
- if (order >= m_charList.GetSize()) {
- order = -2;
- }
- return order;
- }
- PAGECHAR_INFO charinfo;
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(order);
- CPDF_Point curPos(charinfo.m_OriginX, charinfo.m_OriginY);
- FX_FLOAT difPosY = 0.0, minXdif = 1000;
- int minIndex = -2;
- int index = order;
- FX_FLOAT height = charinfo.m_CharBox.Height();
- if (direction == FPDFTEXT_UP) {
- minIndex = -1;
- while (1) {
- if (--index < 0) {
- return -1;
- }
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
- if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, charinfo.m_CharBox.Height()) / 2) {
- difPosY = charinfo.m_OriginY;
- minIndex = index;
- break;
- }
- }
- FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x;
- minXdif = PreXdif;
- if (PreXdif == 0) {
- return index;
- }
- FX_FLOAT curXdif = 0;
- while (--index >= 0) {
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
- if (difPosY != charinfo.m_OriginY) {
- break;
- }
- curXdif = charinfo.m_OriginX - curPos.x;
- if (curXdif == 0) {
- return index;
- }
- int signflag = 0;
- if (curXdif > 0) {
- signflag = 1;
- } else {
- signflag = -1;
- }
- if (signflag * PreXdif < 0) {
- if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) {
- return index + 1;
- } else {
- return index;
- }
- }
- if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) {
- minIndex = index;
- minXdif = curXdif;
- }
- PreXdif = curXdif;
- if (difPosY != charinfo.m_OriginY) {
- break;
- }
- }
- return minIndex;
- } else if(FPDFTEXT_DOWN) {
- minIndex = -2;
- while (1) {
- if (++index > m_charList.GetSize() - 1) {
- return minIndex;
- }
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
- if (FXSYS_fabs(charinfo.m_OriginY - curPos.y) > FX_MAX(height, charinfo.m_CharBox.Height()) / 2) {
- difPosY = charinfo.m_OriginY;
- minIndex = index;
- break;
- }
- }
- FX_FLOAT PreXdif = charinfo.m_OriginX - curPos.x;
- minXdif = PreXdif;
- if (PreXdif == 0) {
- return index;
- }
- FX_FLOAT curXdif = 0;
- while (++index < m_charList.GetSize()) {
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
- if (difPosY != charinfo.m_OriginY) {
- break;
- }
- curXdif = charinfo.m_OriginX - curPos.x;
- if (curXdif == 0) {
- return index;
- }
- int signflag = 0;
- if (curXdif > 0) {
- signflag = 1;
- } else {
- signflag = -1;
- }
- if (signflag * PreXdif < 0) {
- if (FXSYS_fabs(PreXdif) < FXSYS_fabs(curXdif)) {
- return index - 1;
- } else {
- return index;
- }
- }
- if (FXSYS_fabs(curXdif) < FXSYS_fabs(minXdif)) {
- minXdif = curXdif;
- minIndex = index;
- }
- PreXdif = curXdif;
- }
- return minIndex;
- }
-}
void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO & info) const
{
if(m_ParseOptions.m_bGetCharCodeOnly) {
@@ -952,7 +812,6 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const
return breakPos;
}
}
- return breakPos;
} else if (direction == FPDFTEXT_RIGHT) {
while (++breakPos < m_charList.GetSize()) {
charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(breakPos);
@@ -960,7 +819,6 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const
return breakPos;
}
}
- return breakPos;
}
return breakPos;
}
@@ -1161,7 +1019,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i)
{
PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
FX_WCHAR wChar = str.GetAt(i);
- if(!IsControlChar(&Info)) {
+ if(!IsControlChar(Info)) {
Info.m_Index = m_TextBuf.GetLength();
if (wChar >= 0xFB00 && wChar <= 0xFB06) {
FX_WCHAR* pDst = NULL;
@@ -1193,7 +1051,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i)
void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i)
{
PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i);
- if(!IsControlChar(&Info)) {
+ if(!IsControlChar(Info)) {
Info.m_Index = m_TextBuf.GetLength();
FX_WCHAR wChar = FX_GetMirrorChar(str.GetAt(i), TRUE, FALSE);
FX_WCHAR* pDst = NULL;
@@ -1212,9 +1070,8 @@ void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i)
}
FX_Free(pDst);
return;
- } else {
- Info.m_Unicode = wChar;
}
+ Info.m_Unicode = wChar;
m_TextBuf.AppendChar(Info.m_Unicode);
} else {
Info.m_Index = -1;
@@ -1915,11 +1772,9 @@ int32_t CPDF_TextPage::GetTextObjectWritingMode(const CPDF_TextObject* pTextObj)
v.Set(dX, dY);
v.Normalize();
if (v.y <= 0.0872f) {
- if (v.x <= 0.0872f) {
- return m_TextlineDir;
- }
- return 0;
- } else if (v.x <= 0.0872f) {
+ return v.x <= 0.0872f ? m_TextlineDir : 0;
+ }
+ if (v.x <= 0.0872f) {
return 1;
}
return m_TextlineDir;
@@ -2698,22 +2553,25 @@ FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck)
if (str.Find(L"http://www.") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
return TRUE;
- } else if (str.Find(L"http://") != -1) {
+ }
+ if (str.Find(L"http://") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
return TRUE;
- } else if (str.Find(L"https://www.") != -1) {
+ }
+ if (str.Find(L"https://www.") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
return TRUE;
- } else if (str.Find(L"https://") != -1) {
+ }
+ if (str.Find(L"https://") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
return TRUE;
- } else if (str.Find(L"www.") != -1) {
+ }
+ if (str.Find(L"www.") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
strBeCheck = L"http://" + strBeCheck;
return TRUE;
- } else {
- return FALSE;
}
+ return FALSE;
}
FX_BOOL CPDF_LinkExtract::CheckMailLink(CFX_WideString& str)
{
diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h
index e2d6af6e98..ce52371df0 100644
--- a/core/src/fpdftext/text_int.h
+++ b/core/src/fpdftext/text_int.h
@@ -66,7 +66,6 @@ public:
FX_FLOAT yTorelance) const;
virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const;
virtual void GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const;
- virtual int GetOrderByDirection(int order, int direction) const;
virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const;
virtual int CountRects(int start, int nCount);
@@ -87,7 +86,7 @@ public:
static FX_BOOL IsLetter(FX_WCHAR unicode);
private:
FX_BOOL IsHyphen(FX_WCHAR curChar);
- FX_BOOL IsControlChar(PAGECHAR_INFO* pCharInfo);
+ bool IsControlChar(const PAGECHAR_INFO& charInfo);
FX_BOOL GetBaselineRotate(int start, int end, int& Rotate);
void ProcessObject();
void ProcessFormObject(CPDF_FormObject* pFormObj, const CFX_AffineMatrix& formMatrix);