summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/include/fpdftext/fpdf_text.h8
-rw-r--r--core/src/fpdftext/fpdf_text_int.cpp84
-rw-r--r--core/src/fpdftext/text_int.h2
-rw-r--r--fpdfsdk/src/fpdftext.cpp8
-rw-r--r--fpdfsdk/src/fpdftext_embeddertest.cpp28
5 files changed, 80 insertions, 50 deletions
diff --git a/core/include/fpdftext/fpdf_text.h b/core/include/fpdftext/fpdf_text.h
index 1b3ac0da00..a7fc6bdfbb 100644
--- a/core/include/fpdftext/fpdf_text.h
+++ b/core/include/fpdftext/fpdf_text.h
@@ -42,7 +42,8 @@ CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc,
#define CHAR_NORMAL 0
#define CHAR_GENERATED 1
#define CHAR_UNUNICODE 2
-typedef struct {
+
+struct FPDF_CHAR_INFO {
FX_WCHAR m_Unicode;
FX_WCHAR m_Charcode;
int32_t m_Flag;
@@ -52,7 +53,8 @@ typedef struct {
CFX_FloatRect m_CharBox;
CPDF_TextObject* m_pTextObj;
CFX_Matrix m_Matrix;
-} FPDF_CHAR_INFO;
+};
+
typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray;
#define FPDFTEXT_LRTB 0
#define FPDFTEXT_RLTB 1
@@ -92,7 +94,7 @@ class IPDF_TextPage {
virtual int CountChars() const = 0;
- virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const = 0;
+ virtual void GetCharInfo(int index, FPDF_CHAR_INFO* info) const = 0;
virtual void GetRectArray(int start,
int nCount,
diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp
index f527f48814..3b633a623f 100644
--- a/core/src/fpdftext/fpdf_text_int.cpp
+++ b/core/src/fpdftext/fpdf_text_int.cpp
@@ -77,6 +77,8 @@ FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj,
return baseSpace;
}
+const FX_FLOAT kDefaultFontSize = 1.0f;
+
} // namespace
CPDFText_ParseOptions::CPDFText_ParseOptions()
@@ -458,28 +460,31 @@ int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x,
CPDF_Point point(x, y);
return GetIndexAtPos(point, xTolerance, yTolerance);
}
-void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const {
+
+void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const {
if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
return;
if (index < 0 || index >= m_charList.GetSize())
return;
- PAGECHAR_INFO charinfo;
- charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
- info.m_Charcode = charinfo.m_CharCode;
- info.m_OriginX = charinfo.m_OriginX;
- info.m_OriginY = charinfo.m_OriginY;
- info.m_Unicode = charinfo.m_Unicode;
- info.m_Flag = charinfo.m_Flag;
- info.m_CharBox = charinfo.m_CharBox;
- info.m_pTextObj = charinfo.m_pTextObj;
- if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) {
- info.m_FontSize = charinfo.m_pTextObj->GetFontSize();
- }
- info.m_Matrix.Copy(charinfo.m_Matrix);
- return;
+ const PAGECHAR_INFO* charinfo =
+ static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index));
+ info->m_Charcode = charinfo->m_CharCode;
+ info->m_OriginX = charinfo->m_OriginX;
+ info->m_OriginY = charinfo->m_OriginY;
+ info->m_Unicode = charinfo->m_Unicode;
+ info->m_Flag = charinfo->m_Flag;
+ info->m_CharBox = charinfo->m_CharBox;
+ info->m_pTextObj = charinfo->m_pTextObj;
+ if (charinfo->m_pTextObj && charinfo->m_pTextObj->GetFont()) {
+ info->m_FontSize = charinfo->m_pTextObj->GetFontSize();
+ } else {
+ info->m_FontSize = kDefaultFontSize;
+ }
+ info->m_Matrix.Copy(charinfo->m_Matrix);
}
+
void CPDF_TextPage::CheckMarkedContentObject(int32_t& start,
int32_t& nCount) const {
PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start);
@@ -594,6 +599,7 @@ void CPDF_TextPage::GetRect(int rectIndex,
right = m_SelRects.GetAt(rectIndex).right;
bottom = m_SelRects.GetAt(rectIndex).bottom;
}
+
FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
if (m_ParseOptions.m_bGetCharCodeOnly) {
return FALSE;
@@ -601,19 +607,18 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
if (end == start) {
return FALSE;
}
- FX_FLOAT dx, dy;
- FPDF_CHAR_INFO info1, info2;
- GetCharInfo(start, info1);
- GetCharInfo(end, info2);
- while (info2.m_CharBox.Width() == 0 || info2.m_CharBox.Height() == 0) {
- end--;
- if (end <= start) {
+ FPDF_CHAR_INFO info_start;
+ FPDF_CHAR_INFO info_end;
+ GetCharInfo(start, &info_start);
+ GetCharInfo(end, &info_end);
+ while (info_end.m_CharBox.Width() == 0 || info_end.m_CharBox.Height() == 0) {
+ if (--end <= start)
return FALSE;
- }
- GetCharInfo(end, info2);
+
+ GetCharInfo(end, &info_end);
}
- dx = (info2.m_OriginX - info1.m_OriginX);
- dy = (info2.m_OriginY - info1.m_OriginY);
+ FX_FLOAT dx = (info_end.m_OriginX - info_start.m_OriginX);
+ FX_FLOAT dy = (info_end.m_OriginY - info_start.m_OriginY);
if (dx == 0) {
if (dy > 0) {
Rotate = 90;
@@ -633,6 +638,7 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
}
return TRUE;
}
+
FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect,
int& Rotate) {
if (m_ParseOptions.m_bGetCharCodeOnly) {
@@ -2053,6 +2059,7 @@ FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
}
return FALSE;
}
+
FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) {
int size = m_TempCharList.GetSize();
PAGECHAR_INFO preChar;
@@ -2071,24 +2078,21 @@ FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) {
info.m_CharCode = -1;
info.m_Flag = FPDFTEXT_CHAR_GENERATED;
int preWidth = 0;
- if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) {
+ if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1)
preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont());
- }
- FX_FLOAT fs = 0;
- if (preChar.m_pTextObj) {
- fs = preChar.m_pTextObj->GetFontSize();
- } else {
- fs = preChar.m_CharBox.Height();
- }
- if (!fs) {
- fs = 1;
- }
- info.m_OriginX = preChar.m_OriginX + preWidth * (fs) / 1000;
+
+ FX_FLOAT fFontSize = preChar.m_pTextObj ? preChar.m_pTextObj->GetFontSize()
+ : preChar.m_CharBox.Height();
+ if (!fFontSize)
+ fFontSize = kDefaultFontSize;
+
+ info.m_OriginX = preChar.m_OriginX + preWidth * (fFontSize) / 1000;
info.m_OriginY = preChar.m_OriginY;
info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX,
info.m_OriginY);
return TRUE;
}
+
FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1,
const CFX_FloatRect& rect2) {
CFX_FloatRect rect = rect1;
@@ -2124,7 +2128,7 @@ CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage)
}
for (int i = 0; i < nCount; i++) {
FPDF_CHAR_INFO info;
- pTextPage->GetCharInfo(i, info);
+ pTextPage->GetCharInfo(i, &info);
int indexSize = m_CharIndex.GetSize();
if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) {
if (indexSize % 2) {
@@ -2550,7 +2554,7 @@ void CPDF_LinkExtract::ParseLink() {
int TotalChar = m_pTextPage->CountChars();
while (pos < TotalChar) {
FPDF_CHAR_INFO pageChar;
- m_pTextPage->GetCharInfo(pos, pageChar);
+ m_pTextPage->GetCharInfo(pos, &pageChar);
if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 ||
pos == TotalChar - 1) {
int nCount = pos - start;
diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h
index 0fe43f2c40..4aff0a12a4 100644
--- a/core/src/fpdftext/text_int.h
+++ b/core/src/fpdftext/text_int.h
@@ -61,7 +61,7 @@ class CPDF_TextPage : public IPDF_TextPage {
int CharIndexFromTextIndex(int TextIndex) const override;
int TextIndexFromCharIndex(int CharIndex) const override;
int CountChars() const override;
- void GetCharInfo(int index, FPDF_CHAR_INFO& info) const override;
+ void GetCharInfo(int index, FPDF_CHAR_INFO* info) const override;
void GetRectArray(int start,
int nCount,
CFX_RectArray& rectArray) const override;
diff --git a/fpdfsdk/src/fpdftext.cpp b/fpdfsdk/src/fpdftext.cpp
index ed34ecdbf7..4659951c3c 100644
--- a/fpdfsdk/src/fpdftext.cpp
+++ b/fpdfsdk/src/fpdftext.cpp
@@ -33,6 +33,7 @@ DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
return textpage->CountChars();
}
+
DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
int index) {
if (!text_page)
@@ -43,9 +44,10 @@ DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
return 0;
FPDF_CHAR_INFO charinfo;
- textpage->GetCharInfo(index, charinfo);
+ textpage->GetCharInfo(index, &charinfo);
return charinfo.m_Unicode;
}
+
DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
int index) {
if (!text_page)
@@ -56,7 +58,7 @@ DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
return 0;
FPDF_CHAR_INFO charinfo;
- textpage->GetCharInfo(index, charinfo);
+ textpage->GetCharInfo(index, &charinfo);
return charinfo.m_FontSize;
}
@@ -73,7 +75,7 @@ DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
if (index < 0 || index >= textpage->CountChars())
return;
FPDF_CHAR_INFO charinfo;
- textpage->GetCharInfo(index, charinfo);
+ textpage->GetCharInfo(index, &charinfo);
*left = charinfo.m_CharBox.left;
*right = charinfo.m_CharBox.right;
*bottom = charinfo.m_CharBox.bottom;
diff --git a/fpdfsdk/src/fpdftext_embeddertest.cpp b/fpdfsdk/src/fpdftext_embeddertest.cpp
index 4653db32ad..e84a96e966 100644
--- a/fpdfsdk/src/fpdftext_embeddertest.cpp
+++ b/fpdfsdk/src/fpdftext_embeddertest.cpp
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include "core/include/fxcrt/fx_basic.h"
#include "public/fpdf_text.h"
#include "public/fpdfview.h"
#include "testing/embedder_test.h"
@@ -10,9 +11,9 @@
namespace {
-static bool check_unsigned_shorts(const char* expected,
- const unsigned short* actual,
- size_t length) {
+bool check_unsigned_shorts(const char* expected,
+ const unsigned short* actual,
+ size_t length) {
if (length > strlen(expected) + 1) {
return false;
}
@@ -367,3 +368,24 @@ TEST_F(FPDFTextEmbeddertest, WebLinks) {
FPDFText_ClosePage(textpage);
UnloadPage(page);
}
+
+TEST_F(FPDFTextEmbeddertest, GetFontSize) {
+ EXPECT_TRUE(OpenDocument("hello_world.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ EXPECT_NE(nullptr, page);
+
+ FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+ EXPECT_NE(nullptr, textpage);
+
+ const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 1, 1, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
+
+ int count = FPDFText_CountChars(textpage);
+ ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), count);
+ for (int i = 0; i < count; ++i)
+ EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i;
+
+ FPDFText_ClosePage(textpage);
+ UnloadPage(page);
+}