summaryrefslogtreecommitdiff
path: root/fpdfsdk/fpdf_text.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fpdfsdk/fpdf_text.cpp')
-rw-r--r--fpdfsdk/fpdf_text.cpp393
1 files changed, 393 insertions, 0 deletions
diff --git a/fpdfsdk/fpdf_text.cpp b/fpdfsdk/fpdf_text.cpp
new file mode 100644
index 0000000000..7778696931
--- /dev/null
+++ b/fpdfsdk/fpdf_text.cpp
@@ -0,0 +1,393 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "public/fpdf_text.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "core/fpdfapi/page/cpdf_page.h"
+#include "core/fpdfdoc/cpdf_viewerpreferences.h"
+#include "core/fpdftext/cpdf_linkextract.h"
+#include "core/fpdftext/cpdf_textpage.h"
+#include "core/fpdftext/cpdf_textpagefind.h"
+#include "fpdfsdk/cpdfsdk_helpers.h"
+#include "third_party/base/numerics/safe_conversions.h"
+#include "third_party/base/stl_util.h"
+
+#ifdef PDF_ENABLE_XFA
+#include "fpdfsdk/fpdfxfa/cpdfxfa_context.h"
+#include "fpdfsdk/fpdfxfa/cpdfxfa_page.h"
+#endif // PDF_ENABLE_XFA
+
+#ifdef _WIN32
+#include <tchar.h>
+#endif
+
+namespace {
+
+constexpr size_t kBytesPerCharacter = sizeof(unsigned short);
+
+CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) {
+ return static_cast<CPDF_TextPage*>(text_page);
+}
+
+CPDF_TextPageFind* CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle) {
+ return static_cast<CPDF_TextPageFind*>(handle);
+}
+
+CPDF_LinkExtract* CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link) {
+ return static_cast<CPDF_LinkExtract*>(link);
+}
+
+} // namespace
+
+FPDF_EXPORT FPDF_TEXTPAGE FPDF_CALLCONV FPDFText_LoadPage(FPDF_PAGE page) {
+ CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
+ if (!pPDFPage)
+ return nullptr;
+
+#ifdef PDF_ENABLE_XFA
+ CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
+ CPDFXFA_Context* pContext = pPage->GetContext();
+ CPDF_ViewerPreferences viewRef(pContext->GetPDFDoc());
+#else // PDF_ENABLE_XFA
+ CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument.Get());
+#endif // PDF_ENABLE_XFA
+
+ CPDF_TextPage* textpage = new CPDF_TextPage(
+ pPDFPage, viewRef.IsDirectionR2L() ? FPDFText_Direction::Right
+ : FPDFText_Direction::Left);
+ textpage->ParseTextPage();
+ return textpage;
+}
+
+FPDF_EXPORT void FPDF_CALLCONV FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
+ delete CPDFTextPageFromFPDFTextPage(text_page);
+}
+
+FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
+ if (!text_page)
+ return -1;
+
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
+ return textpage->CountChars();
+}
+
+FPDF_EXPORT unsigned int FPDF_CALLCONV
+FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index) {
+ if (!text_page)
+ return 0;
+
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
+ if (index < 0 || index >= textpage->CountChars())
+ return 0;
+
+ FPDF_CHAR_INFO charinfo;
+ textpage->GetCharInfo(index, &charinfo);
+ return charinfo.m_Unicode;
+}
+
+FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
+ int index) {
+ if (!text_page)
+ return 0;
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
+
+ if (index < 0 || index >= textpage->CountChars())
+ return 0;
+
+ FPDF_CHAR_INFO charinfo;
+ textpage->GetCharInfo(index, &charinfo);
+ return charinfo.m_FontSize;
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
+ int index,
+ double* left,
+ double* right,
+ double* bottom,
+ double* top) {
+ if (!text_page || index < 0)
+ return false;
+
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
+ if (index >= textpage->CountChars())
+ return false;
+
+ FPDF_CHAR_INFO charinfo;
+ textpage->GetCharInfo(index, &charinfo);
+ *left = charinfo.m_CharBox.left;
+ *right = charinfo.m_CharBox.right;
+ *bottom = charinfo.m_CharBox.bottom;
+ *top = charinfo.m_CharBox.top;
+ return true;
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+FPDFText_GetCharOrigin(FPDF_TEXTPAGE text_page,
+ int index,
+ double* x,
+ double* y) {
+ if (!text_page)
+ return false;
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
+
+ if (index < 0 || index >= textpage->CountChars())
+ return false;
+ FPDF_CHAR_INFO charinfo;
+ textpage->GetCharInfo(index, &charinfo);
+ *x = charinfo.m_Origin.x;
+ *y = charinfo.m_Origin.y;
+ return true;
+}
+
+// select
+FPDF_EXPORT int FPDF_CALLCONV
+FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
+ double x,
+ double y,
+ double xTolerance,
+ double yTolerance) {
+ if (!text_page)
+ return -3;
+
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
+ return textpage->GetIndexAtPos(
+ CFX_PointF(static_cast<float>(x), static_cast<float>(y)),
+ CFX_SizeF(static_cast<float>(xTolerance),
+ static_cast<float>(yTolerance)));
+}
+
+FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE page,
+ int char_start,
+ int char_count,
+ unsigned short* result) {
+ if (!page || char_start < 0 || char_count < 0 || !result)
+ return 0;
+
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(page);
+ int char_available = textpage->CountChars() - char_start;
+ if (char_available <= 0)
+ return 0;
+
+ char_count = std::min(char_count, char_available);
+ if (char_count == 0) {
+ // Writing out "", which has a character count of 1 due to the NUL.
+ *result = '\0';
+ return 1;
+ }
+
+ WideString str = textpage->GetPageText(char_start, char_count);
+
+ if (str.GetLength() > static_cast<size_t>(char_count))
+ str = str.Left(static_cast<size_t>(char_count));
+
+ // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected
+ // the number of items to stay the same.
+ ByteString byte_str = str.UTF16LE_Encode();
+ size_t byte_str_len = byte_str.GetLength();
+ int ret_count = byte_str_len / kBytesPerCharacter;
+
+ ASSERT(ret_count <= char_count + 1); // +1 to account for the NUL terminator.
+ memcpy(result, byte_str.GetBuffer(byte_str_len), byte_str_len);
+ return ret_count;
+}
+
+FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page,
+ int start,
+ int count) {
+ if (!text_page)
+ return 0;
+
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
+ return textpage->CountRects(start, count);
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetRect(FPDF_TEXTPAGE text_page,
+ int rect_index,
+ double* left,
+ double* top,
+ double* right,
+ double* bottom) {
+ if (!text_page)
+ return false;
+
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
+ CFX_FloatRect rect;
+ bool result = textpage->GetRect(rect_index, &rect);
+
+ *left = rect.left;
+ *top = rect.top;
+ *right = rect.right;
+ *bottom = rect.bottom;
+ return result;
+}
+
+FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
+ double left,
+ double top,
+ double right,
+ double bottom,
+ unsigned short* buffer,
+ int buflen) {
+ if (!text_page)
+ return 0;
+
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
+ CFX_FloatRect rect((float)left, (float)bottom, (float)right, (float)top);
+ WideString str = textpage->GetTextByRect(rect);
+
+ if (buflen <= 0 || !buffer)
+ return str.GetLength();
+
+ ByteString cbUTF16Str = str.UTF16LE_Encode();
+ int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
+ int size = buflen > len ? len : buflen;
+ memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
+ size * sizeof(unsigned short));
+ cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
+
+ return size;
+}
+
+// Search
+// -1 for end
+FPDF_EXPORT FPDF_SCHHANDLE FPDF_CALLCONV
+FPDFText_FindStart(FPDF_TEXTPAGE text_page,
+ FPDF_WIDESTRING findwhat,
+ unsigned long flags,
+ int start_index) {
+ if (!text_page)
+ return nullptr;
+
+ CPDF_TextPageFind* textpageFind =
+ new CPDF_TextPageFind(CPDFTextPageFromFPDFTextPage(text_page));
+ size_t len = WideString::WStringLength(findwhat);
+ textpageFind->FindFirst(
+ WideString::FromUTF16LE(findwhat, len), flags,
+ start_index >= 0 ? Optional<size_t>(start_index) : Optional<size_t>());
+ return textpageFind;
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindNext(FPDF_SCHHANDLE handle) {
+ if (!handle)
+ return false;
+
+ CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
+ return textpageFind->FindNext();
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
+ if (!handle)
+ return false;
+
+ CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
+ return textpageFind->FindPrev();
+}
+
+FPDF_EXPORT int FPDF_CALLCONV
+FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
+ if (!handle)
+ return 0;
+
+ CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
+ return textpageFind->GetCurOrder();
+}
+
+FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
+ if (!handle)
+ return 0;
+
+ CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
+ return textpageFind->GetMatchedCount();
+}
+
+FPDF_EXPORT void FPDF_CALLCONV FPDFText_FindClose(FPDF_SCHHANDLE handle) {
+ if (!handle)
+ return;
+
+ CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
+ delete textpageFind;
+ handle = nullptr;
+}
+
+// web link
+FPDF_EXPORT FPDF_PAGELINK FPDF_CALLCONV
+FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
+ if (!text_page)
+ return nullptr;
+
+ CPDF_LinkExtract* pageLink =
+ new CPDF_LinkExtract(CPDFTextPageFromFPDFTextPage(text_page));
+ pageLink->ExtractLinks();
+ return pageLink;
+}
+
+FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
+ if (!link_page)
+ return 0;
+
+ CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
+ return pdfium::base::checked_cast<int>(pageLink->CountLinks());
+}
+
+FPDF_EXPORT int FPDF_CALLCONV FPDFLink_GetURL(FPDF_PAGELINK link_page,
+ int link_index,
+ unsigned short* buffer,
+ int buflen) {
+ WideString wsUrl(L"");
+ if (link_page && link_index >= 0) {
+ CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
+ wsUrl = pageLink->GetURL(link_index);
+ }
+ ByteString cbUTF16URL = wsUrl.UTF16LE_Encode();
+ int required = cbUTF16URL.GetLength() / sizeof(unsigned short);
+ if (!buffer || buflen <= 0)
+ return required;
+
+ int size = std::min(required, buflen);
+ if (size > 0) {
+ int buf_size = size * sizeof(unsigned short);
+ memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size);
+ }
+ return size;
+}
+
+FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountRects(FPDF_PAGELINK link_page,
+ int link_index) {
+ if (!link_page || link_index < 0)
+ return 0;
+
+ CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
+ return pdfium::CollectionSize<int>(pageLink->GetRects(link_index));
+}
+
+FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFLink_GetRect(FPDF_PAGELINK link_page,
+ int link_index,
+ int rect_index,
+ double* left,
+ double* top,
+ double* right,
+ double* bottom) {
+ if (!link_page || link_index < 0 || rect_index < 0)
+ return false;
+
+ CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
+ std::vector<CFX_FloatRect> rectArray = pageLink->GetRects(link_index);
+ if (rect_index >= pdfium::CollectionSize<int>(rectArray))
+ return false;
+
+ *left = rectArray[rect_index].left;
+ *right = rectArray[rect_index].right;
+ *top = rectArray[rect_index].top;
+ *bottom = rectArray[rect_index].bottom;
+ return true;
+}
+
+FPDF_EXPORT void FPDF_CALLCONV FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
+ delete CPDFLinkExtractFromFPDFPageLink(link_page);
+}