diff options
-rw-r--r-- | core/include/fpdftext/fpdf_text.h | 8 | ||||
-rw-r--r-- | core/src/fpdftext/fpdf_text_int.cpp | 84 | ||||
-rw-r--r-- | core/src/fpdftext/text_int.h | 2 | ||||
-rw-r--r-- | fpdfsdk/src/fpdftext.cpp | 8 | ||||
-rw-r--r-- | fpdfsdk/src/fpdftext_embeddertest.cpp | 28 |
5 files changed, 80 insertions, 50 deletions
diff --git a/core/include/fpdftext/fpdf_text.h b/core/include/fpdftext/fpdf_text.h index 1b3ac0da00..a7fc6bdfbb 100644 --- a/core/include/fpdftext/fpdf_text.h +++ b/core/include/fpdftext/fpdf_text.h @@ -42,7 +42,8 @@ CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, #define CHAR_NORMAL 0 #define CHAR_GENERATED 1 #define CHAR_UNUNICODE 2 -typedef struct { + +struct FPDF_CHAR_INFO { FX_WCHAR m_Unicode; FX_WCHAR m_Charcode; int32_t m_Flag; @@ -52,7 +53,8 @@ typedef struct { CFX_FloatRect m_CharBox; CPDF_TextObject* m_pTextObj; CFX_Matrix m_Matrix; -} FPDF_CHAR_INFO; +}; + typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray; #define FPDFTEXT_LRTB 0 #define FPDFTEXT_RLTB 1 @@ -92,7 +94,7 @@ class IPDF_TextPage { virtual int CountChars() const = 0; - virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const = 0; + virtual void GetCharInfo(int index, FPDF_CHAR_INFO* info) const = 0; virtual void GetRectArray(int start, int nCount, diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp index f527f48814..3b633a623f 100644 --- a/core/src/fpdftext/fpdf_text_int.cpp +++ b/core/src/fpdftext/fpdf_text_int.cpp @@ -77,6 +77,8 @@ FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj, return baseSpace; } +const FX_FLOAT kDefaultFontSize = 1.0f; + } // namespace CPDFText_ParseOptions::CPDFText_ParseOptions() @@ -458,28 +460,31 @@ int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, CPDF_Point point(x, y); return GetIndexAtPos(point, xTolerance, yTolerance); } -void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { + +void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) return; if (index < 0 || index >= m_charList.GetSize()) return; - PAGECHAR_INFO charinfo; - charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); - info.m_Charcode = charinfo.m_CharCode; - info.m_OriginX = charinfo.m_OriginX; - info.m_OriginY = charinfo.m_OriginY; - info.m_Unicode = charinfo.m_Unicode; - info.m_Flag = charinfo.m_Flag; - info.m_CharBox = charinfo.m_CharBox; - info.m_pTextObj = charinfo.m_pTextObj; - if (charinfo.m_pTextObj && charinfo.m_pTextObj->GetFont()) { - info.m_FontSize = charinfo.m_pTextObj->GetFontSize(); - } - info.m_Matrix.Copy(charinfo.m_Matrix); - return; + const PAGECHAR_INFO* charinfo = + static_cast<PAGECHAR_INFO*>(m_charList.GetAt(index)); + info->m_Charcode = charinfo->m_CharCode; + info->m_OriginX = charinfo->m_OriginX; + info->m_OriginY = charinfo->m_OriginY; + info->m_Unicode = charinfo->m_Unicode; + info->m_Flag = charinfo->m_Flag; + info->m_CharBox = charinfo->m_CharBox; + info->m_pTextObj = charinfo->m_pTextObj; + if (charinfo->m_pTextObj && charinfo->m_pTextObj->GetFont()) { + info->m_FontSize = charinfo->m_pTextObj->GetFontSize(); + } else { + info->m_FontSize = kDefaultFontSize; + } + info->m_Matrix.Copy(charinfo->m_Matrix); } + void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, int32_t& nCount) const { PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(start); @@ -594,6 +599,7 @@ void CPDF_TextPage::GetRect(int rectIndex, right = m_SelRects.GetAt(rectIndex).right; bottom = m_SelRects.GetAt(rectIndex).bottom; } + FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { if (m_ParseOptions.m_bGetCharCodeOnly) { return FALSE; @@ -601,19 +607,18 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { if (end == start) { return FALSE; } - FX_FLOAT dx, dy; - FPDF_CHAR_INFO info1, info2; - GetCharInfo(start, info1); - GetCharInfo(end, info2); - while (info2.m_CharBox.Width() == 0 || info2.m_CharBox.Height() == 0) { - end--; - if (end <= start) { + FPDF_CHAR_INFO info_start; + FPDF_CHAR_INFO info_end; + GetCharInfo(start, &info_start); + GetCharInfo(end, &info_end); + while (info_end.m_CharBox.Width() == 0 || info_end.m_CharBox.Height() == 0) { + if (--end <= start) return FALSE; - } - GetCharInfo(end, info2); + + GetCharInfo(end, &info_end); } - dx = (info2.m_OriginX - info1.m_OriginX); - dy = (info2.m_OriginY - info1.m_OriginY); + FX_FLOAT dx = (info_end.m_OriginX - info_start.m_OriginX); + FX_FLOAT dy = (info_end.m_OriginY - info_start.m_OriginY); if (dx == 0) { if (dy > 0) { Rotate = 90; @@ -633,6 +638,7 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { } return TRUE; } + FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) { if (m_ParseOptions.m_bGetCharCodeOnly) { @@ -2053,6 +2059,7 @@ FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj, } return FALSE; } + FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { int size = m_TempCharList.GetSize(); PAGECHAR_INFO preChar; @@ -2071,24 +2078,21 @@ FX_BOOL CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) { info.m_CharCode = -1; info.m_Flag = FPDFTEXT_CHAR_GENERATED; int preWidth = 0; - if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) { + if (preChar.m_pTextObj && preChar.m_CharCode != (FX_DWORD)-1) preWidth = GetCharWidth(preChar.m_CharCode, preChar.m_pTextObj->GetFont()); - } - FX_FLOAT fs = 0; - if (preChar.m_pTextObj) { - fs = preChar.m_pTextObj->GetFontSize(); - } else { - fs = preChar.m_CharBox.Height(); - } - if (!fs) { - fs = 1; - } - info.m_OriginX = preChar.m_OriginX + preWidth * (fs) / 1000; + + FX_FLOAT fFontSize = preChar.m_pTextObj ? preChar.m_pTextObj->GetFontSize() + : preChar.m_CharBox.Height(); + if (!fFontSize) + fFontSize = kDefaultFontSize; + + info.m_OriginX = preChar.m_OriginX + preWidth * (fFontSize) / 1000; info.m_OriginY = preChar.m_OriginY; info.m_CharBox = CFX_FloatRect(info.m_OriginX, info.m_OriginY, info.m_OriginX, info.m_OriginY); return TRUE; } + FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1, const CFX_FloatRect& rect2) { CFX_FloatRect rect = rect1; @@ -2124,7 +2128,7 @@ CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage) } for (int i = 0; i < nCount; i++) { FPDF_CHAR_INFO info; - pTextPage->GetCharInfo(i, info); + pTextPage->GetCharInfo(i, &info); int indexSize = m_CharIndex.GetSize(); if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) { if (indexSize % 2) { @@ -2550,7 +2554,7 @@ void CPDF_LinkExtract::ParseLink() { int TotalChar = m_pTextPage->CountChars(); while (pos < TotalChar) { FPDF_CHAR_INFO pageChar; - m_pTextPage->GetCharInfo(pos, pageChar); + m_pTextPage->GetCharInfo(pos, &pageChar); if (pageChar.m_Flag == CHAR_GENERATED || pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) { int nCount = pos - start; diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h index 0fe43f2c40..4aff0a12a4 100644 --- a/core/src/fpdftext/text_int.h +++ b/core/src/fpdftext/text_int.h @@ -61,7 +61,7 @@ class CPDF_TextPage : public IPDF_TextPage { int CharIndexFromTextIndex(int TextIndex) const override; int TextIndexFromCharIndex(int CharIndex) const override; int CountChars() const override; - void GetCharInfo(int index, FPDF_CHAR_INFO& info) const override; + void GetCharInfo(int index, FPDF_CHAR_INFO* info) const override; void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const override; diff --git a/fpdfsdk/src/fpdftext.cpp b/fpdfsdk/src/fpdftext.cpp index ed34ecdbf7..4659951c3c 100644 --- a/fpdfsdk/src/fpdftext.cpp +++ b/fpdfsdk/src/fpdftext.cpp @@ -33,6 +33,7 @@ DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) { IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; return textpage->CountChars(); } + DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index) { if (!text_page) @@ -43,9 +44,10 @@ DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, return 0; FPDF_CHAR_INFO charinfo; - textpage->GetCharInfo(index, charinfo); + textpage->GetCharInfo(index, &charinfo); return charinfo.m_Unicode; } + DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index) { if (!text_page) @@ -56,7 +58,7 @@ DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, return 0; FPDF_CHAR_INFO charinfo; - textpage->GetCharInfo(index, charinfo); + textpage->GetCharInfo(index, &charinfo); return charinfo.m_FontSize; } @@ -73,7 +75,7 @@ DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, if (index < 0 || index >= textpage->CountChars()) return; FPDF_CHAR_INFO charinfo; - textpage->GetCharInfo(index, charinfo); + textpage->GetCharInfo(index, &charinfo); *left = charinfo.m_CharBox.left; *right = charinfo.m_CharBox.right; *bottom = charinfo.m_CharBox.bottom; diff --git a/fpdfsdk/src/fpdftext_embeddertest.cpp b/fpdfsdk/src/fpdftext_embeddertest.cpp index 4653db32ad..e84a96e966 100644 --- a/fpdfsdk/src/fpdftext_embeddertest.cpp +++ b/fpdfsdk/src/fpdftext_embeddertest.cpp @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include "core/include/fxcrt/fx_basic.h" #include "public/fpdf_text.h" #include "public/fpdfview.h" #include "testing/embedder_test.h" @@ -10,9 +11,9 @@ namespace { -static bool check_unsigned_shorts(const char* expected, - const unsigned short* actual, - size_t length) { +bool check_unsigned_shorts(const char* expected, + const unsigned short* actual, + size_t length) { if (length > strlen(expected) + 1) { return false; } @@ -367,3 +368,24 @@ TEST_F(FPDFTextEmbeddertest, WebLinks) { FPDFText_ClosePage(textpage); UnloadPage(page); } + +TEST_F(FPDFTextEmbeddertest, GetFontSize) { + EXPECT_TRUE(OpenDocument("hello_world.pdf")); + FPDF_PAGE page = LoadPage(0); + EXPECT_NE(nullptr, page); + + FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); + EXPECT_NE(nullptr, textpage); + + const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 1, 1, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}; + + int count = FPDFText_CountChars(textpage); + ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), count); + for (int i = 0; i < count; ++i) + EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i; + + FPDFText_ClosePage(textpage); + UnloadPage(page); +} |