diff options
Diffstat (limited to 'fpdfsdk/fpdf_text.cpp')
-rw-r--r-- | fpdfsdk/fpdf_text.cpp | 393 |
1 files changed, 393 insertions, 0 deletions
diff --git a/fpdfsdk/fpdf_text.cpp b/fpdfsdk/fpdf_text.cpp new file mode 100644 index 0000000000..7778696931 --- /dev/null +++ b/fpdfsdk/fpdf_text.cpp @@ -0,0 +1,393 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "public/fpdf_text.h" + +#include <algorithm> +#include <vector> + +#include "core/fpdfapi/page/cpdf_page.h" +#include "core/fpdfdoc/cpdf_viewerpreferences.h" +#include "core/fpdftext/cpdf_linkextract.h" +#include "core/fpdftext/cpdf_textpage.h" +#include "core/fpdftext/cpdf_textpagefind.h" +#include "fpdfsdk/cpdfsdk_helpers.h" +#include "third_party/base/numerics/safe_conversions.h" +#include "third_party/base/stl_util.h" + +#ifdef PDF_ENABLE_XFA +#include "fpdfsdk/fpdfxfa/cpdfxfa_context.h" +#include "fpdfsdk/fpdfxfa/cpdfxfa_page.h" +#endif // PDF_ENABLE_XFA + +#ifdef _WIN32 +#include <tchar.h> +#endif + +namespace { + +constexpr size_t kBytesPerCharacter = sizeof(unsigned short); + +CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) { + return static_cast<CPDF_TextPage*>(text_page); +} + +CPDF_TextPageFind* CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle) { + return static_cast<CPDF_TextPageFind*>(handle); +} + +CPDF_LinkExtract* CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link) { + return static_cast<CPDF_LinkExtract*>(link); +} + +} // namespace + +FPDF_EXPORT FPDF_TEXTPAGE FPDF_CALLCONV FPDFText_LoadPage(FPDF_PAGE page) { + CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page); + if (!pPDFPage) + return nullptr; + +#ifdef PDF_ENABLE_XFA + CPDFXFA_Page* pPage = (CPDFXFA_Page*)page; + CPDFXFA_Context* pContext = pPage->GetContext(); + CPDF_ViewerPreferences viewRef(pContext->GetPDFDoc()); +#else // PDF_ENABLE_XFA + CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument.Get()); +#endif // PDF_ENABLE_XFA + + CPDF_TextPage* textpage = new CPDF_TextPage( + pPDFPage, viewRef.IsDirectionR2L() ? FPDFText_Direction::Right + : FPDFText_Direction::Left); + textpage->ParseTextPage(); + return textpage; +} + +FPDF_EXPORT void FPDF_CALLCONV FPDFText_ClosePage(FPDF_TEXTPAGE text_page) { + delete CPDFTextPageFromFPDFTextPage(text_page); +} + +FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountChars(FPDF_TEXTPAGE text_page) { + if (!text_page) + return -1; + + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); + return textpage->CountChars(); +} + +FPDF_EXPORT unsigned int FPDF_CALLCONV +FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index) { + if (!text_page) + return 0; + + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); + if (index < 0 || index >= textpage->CountChars()) + return 0; + + FPDF_CHAR_INFO charinfo; + textpage->GetCharInfo(index, &charinfo); + return charinfo.m_Unicode; +} + +FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, + int index) { + if (!text_page) + return 0; + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); + + if (index < 0 || index >= textpage->CountChars()) + return 0; + + FPDF_CHAR_INFO charinfo; + textpage->GetCharInfo(index, &charinfo); + return charinfo.m_FontSize; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, + int index, + double* left, + double* right, + double* bottom, + double* top) { + if (!text_page || index < 0) + return false; + + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); + if (index >= textpage->CountChars()) + return false; + + FPDF_CHAR_INFO charinfo; + textpage->GetCharInfo(index, &charinfo); + *left = charinfo.m_CharBox.left; + *right = charinfo.m_CharBox.right; + *bottom = charinfo.m_CharBox.bottom; + *top = charinfo.m_CharBox.top; + return true; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +FPDFText_GetCharOrigin(FPDF_TEXTPAGE text_page, + int index, + double* x, + double* y) { + if (!text_page) + return false; + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); + + if (index < 0 || index >= textpage->CountChars()) + return false; + FPDF_CHAR_INFO charinfo; + textpage->GetCharInfo(index, &charinfo); + *x = charinfo.m_Origin.x; + *y = charinfo.m_Origin.y; + return true; +} + +// select +FPDF_EXPORT int FPDF_CALLCONV +FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, + double x, + double y, + double xTolerance, + double yTolerance) { + if (!text_page) + return -3; + + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); + return textpage->GetIndexAtPos( + CFX_PointF(static_cast<float>(x), static_cast<float>(y)), + CFX_SizeF(static_cast<float>(xTolerance), + static_cast<float>(yTolerance))); +} + +FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE page, + int char_start, + int char_count, + unsigned short* result) { + if (!page || char_start < 0 || char_count < 0 || !result) + return 0; + + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(page); + int char_available = textpage->CountChars() - char_start; + if (char_available <= 0) + return 0; + + char_count = std::min(char_count, char_available); + if (char_count == 0) { + // Writing out "", which has a character count of 1 due to the NUL. + *result = '\0'; + return 1; + } + + WideString str = textpage->GetPageText(char_start, char_count); + + if (str.GetLength() > static_cast<size_t>(char_count)) + str = str.Left(static_cast<size_t>(char_count)); + + // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected + // the number of items to stay the same. + ByteString byte_str = str.UTF16LE_Encode(); + size_t byte_str_len = byte_str.GetLength(); + int ret_count = byte_str_len / kBytesPerCharacter; + + ASSERT(ret_count <= char_count + 1); // +1 to account for the NUL terminator. + memcpy(result, byte_str.GetBuffer(byte_str_len), byte_str_len); + return ret_count; +} + +FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page, + int start, + int count) { + if (!text_page) + return 0; + + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); + return textpage->CountRects(start, count); +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetRect(FPDF_TEXTPAGE text_page, + int rect_index, + double* left, + double* top, + double* right, + double* bottom) { + if (!text_page) + return false; + + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); + CFX_FloatRect rect; + bool result = textpage->GetRect(rect_index, &rect); + + *left = rect.left; + *top = rect.top; + *right = rect.right; + *bottom = rect.bottom; + return result; +} + +FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page, + double left, + double top, + double right, + double bottom, + unsigned short* buffer, + int buflen) { + if (!text_page) + return 0; + + CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); + CFX_FloatRect rect((float)left, (float)bottom, (float)right, (float)top); + WideString str = textpage->GetTextByRect(rect); + + if (buflen <= 0 || !buffer) + return str.GetLength(); + + ByteString cbUTF16Str = str.UTF16LE_Encode(); + int len = cbUTF16Str.GetLength() / sizeof(unsigned short); + int size = buflen > len ? len : buflen; + memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)), + size * sizeof(unsigned short)); + cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short)); + + return size; +} + +// Search +// -1 for end +FPDF_EXPORT FPDF_SCHHANDLE FPDF_CALLCONV +FPDFText_FindStart(FPDF_TEXTPAGE text_page, + FPDF_WIDESTRING findwhat, + unsigned long flags, + int start_index) { + if (!text_page) + return nullptr; + + CPDF_TextPageFind* textpageFind = + new CPDF_TextPageFind(CPDFTextPageFromFPDFTextPage(text_page)); + size_t len = WideString::WStringLength(findwhat); + textpageFind->FindFirst( + WideString::FromUTF16LE(findwhat, len), flags, + start_index >= 0 ? Optional<size_t>(start_index) : Optional<size_t>()); + return textpageFind; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindNext(FPDF_SCHHANDLE handle) { + if (!handle) + return false; + + CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); + return textpageFind->FindNext(); +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindPrev(FPDF_SCHHANDLE handle) { + if (!handle) + return false; + + CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); + return textpageFind->FindPrev(); +} + +FPDF_EXPORT int FPDF_CALLCONV +FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) { + if (!handle) + return 0; + + CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); + return textpageFind->GetCurOrder(); +} + +FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetSchCount(FPDF_SCHHANDLE handle) { + if (!handle) + return 0; + + CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); + return textpageFind->GetMatchedCount(); +} + +FPDF_EXPORT void FPDF_CALLCONV FPDFText_FindClose(FPDF_SCHHANDLE handle) { + if (!handle) + return; + + CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); + delete textpageFind; + handle = nullptr; +} + +// web link +FPDF_EXPORT FPDF_PAGELINK FPDF_CALLCONV +FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) { + if (!text_page) + return nullptr; + + CPDF_LinkExtract* pageLink = + new CPDF_LinkExtract(CPDFTextPageFromFPDFTextPage(text_page)); + pageLink->ExtractLinks(); + return pageLink; +} + +FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) { + if (!link_page) + return 0; + + CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); + return pdfium::base::checked_cast<int>(pageLink->CountLinks()); +} + +FPDF_EXPORT int FPDF_CALLCONV FPDFLink_GetURL(FPDF_PAGELINK link_page, + int link_index, + unsigned short* buffer, + int buflen) { + WideString wsUrl(L""); + if (link_page && link_index >= 0) { + CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); + wsUrl = pageLink->GetURL(link_index); + } + ByteString cbUTF16URL = wsUrl.UTF16LE_Encode(); + int required = cbUTF16URL.GetLength() / sizeof(unsigned short); + if (!buffer || buflen <= 0) + return required; + + int size = std::min(required, buflen); + if (size > 0) { + int buf_size = size * sizeof(unsigned short); + memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size); + } + return size; +} + +FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountRects(FPDF_PAGELINK link_page, + int link_index) { + if (!link_page || link_index < 0) + return 0; + + CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); + return pdfium::CollectionSize<int>(pageLink->GetRects(link_index)); +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFLink_GetRect(FPDF_PAGELINK link_page, + int link_index, + int rect_index, + double* left, + double* top, + double* right, + double* bottom) { + if (!link_page || link_index < 0 || rect_index < 0) + return false; + + CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); + std::vector<CFX_FloatRect> rectArray = pageLink->GetRects(link_index); + if (rect_index >= pdfium::CollectionSize<int>(rectArray)) + return false; + + *left = rectArray[rect_index].left; + *right = rectArray[rect_index].right; + *top = rectArray[rect_index].top; + *bottom = rectArray[rect_index].bottom; + return true; +} + +FPDF_EXPORT void FPDF_CALLCONV FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) { + delete CPDFLinkExtractFromFPDFPageLink(link_page); +} |