diff options
Diffstat (limited to 'public/fpdf_text.h')
-rw-r--r-- | public/fpdf_text.h | 320 |
1 files changed, 320 insertions, 0 deletions
diff --git a/public/fpdf_text.h b/public/fpdf_text.h new file mode 100644 index 0000000000..d37715f6b8 --- /dev/null +++ b/public/fpdf_text.h @@ -0,0 +1,320 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef _FPDFTEXT_H_ +#define _FPDFTEXT_H_ + +#include "fpdfview.h" + +// Exported Functions +#ifdef __cplusplus +extern "C" { +#endif + +// Function: FPDFText_LoadPage +// Prepare information about all characters in a page. +// Parameters: +// page - Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module). +// Return value: +// A handle to the text page information structure. +// NULL if something goes wrong. +// Comments: +// Application must call FPDFText_ClosePage to release the text page information. +// +DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page); + +// Function: FPDFText_ClosePage +// Release all resources allocated for a text page information structure. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// Return Value: +// None. +// +DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page); + +// Function: FPDFText_CountChars +// Get number of characters in a page. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// Return value: +// Number of characters in the page. Return -1 for error. +// Generated characters, like additional space characters, new line characters, are also counted. +// Comments: +// Characters in a page form a "stream", inside the stream, each character has an index. +// We will use the index parameters in many of FPDFTEXT functions. The first character in the page +// has an index value of zero. +// +DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page); + +// Function: FPDFText_GetUnicode +// Get Unicode of a character in a page. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// index - Zero-based index of the character. +// Return value: +// The Unicode of the particular character. +// If a character is not encoded in Unicode and Foxit engine can't convert to Unicode, +// the return value will be zero. +// +DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index); + +// Function: FPDFText_GetFontSize +// Get the font size of a particular character. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// index - Zero-based index of the character. +// Return value: +// The font size of the particular character, measured in points (about 1/72 inch). +// This is the typographic size of the font (so called "em size"). +// +DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index); + +// Function: FPDFText_GetCharBox +// Get bounding box of a particular character. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// index - Zero-based index of the character. +// left - Pointer to a double number receiving left position of the character box. +// right - Pointer to a double number receiving right position of the character box. +// bottom - Pointer to a double number receiving bottom position of the character box. +// top - Pointer to a double number receiving top position of the character box. +// Return Value: +// None. +// Comments: +// All positions are measured in PDF "user space". +// +DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, double* left, + double* right, double* bottom, double* top); + +// Function: FPDFText_GetCharIndexAtPos +// Get the index of a character at or nearby a certain position on the page. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// x - X position in PDF "user space". +// y - Y position in PDF "user space". +// xTolerance - An x-axis tolerance value for character hit detection, in point unit. +// yTolerance - A y-axis tolerance value for character hit detection, in point unit. +// Return Value: +// The zero-based index of the character at, or nearby the point (x,y). +// If there is no character at or nearby the point, return value will be -1. +// If an error occurs, -3 will be returned. +// +DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, + double x, double y, double xTorelance, double yTolerance); + +// Function: FPDFText_GetText +// Extract unicode text string from the page. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// start_index - Index for the start characters. +// count - Number of characters to be extracted. +// result - A buffer (allocated by application) receiving the extracted unicodes. +// The size of the buffer must be able to hold the number of characters plus a terminator. +// Return Value: +// Number of characters written into the result buffer, including the trailing terminator. +// Comments: +// This function ignores characters without unicode information. +// +DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short* result); + +// Function: FPDFText_CountRects +// Count number of rectangular areas occupied by a segment of texts. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// start_index - Index for the start characters. +// count - Number of characters. +// Return value: +// Number of rectangles. Zero for error. +// Comments: +// This function, along with FPDFText_GetRect can be used by applications to detect the position +// on the page for a text segment, so proper areas can be highlighted or something. +// FPDFTEXT will automatically merge small character boxes into bigger one if those characters +// are on the same line and use same font settings. +// +DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_index, int count); + +// Function: FPDFText_GetRect +// Get a rectangular area from the result generated by FPDFText_CountRects. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// rect_index - Zero-based index for the rectangle. +// left - Pointer to a double value receiving the rectangle left boundary. +// top - Pointer to a double value receiving the rectangle top boundary. +// right - Pointer to a double value receiving the rectangle right boundary. +// bottom - Pointer to a double value receiving the rectangle bottom boundary. +// Return Value: +// None. +// +DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index, double* left, double* top, + double* right, double* bottom); + +// Function: FPDFText_GetBoundedText +// Extract unicode text within a rectangular boundary on the page. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// left - Left boundary. +// top - Top boundary. +// right - Right boundary. +// bottom - Bottom boundary. +// buffer - A unicode buffer. +// buflen - Number of characters (not bytes) for the buffer, excluding an additional terminator. +// Return Value: +// If buffer is NULL or buflen is zero, return number of characters (not bytes) of text present within +// the rectangle, excluding a terminating NUL. Generally you should pass a buffer at least one larger +// than this if you want a terminating NUL, which will be provided if space is available. +// Otherwise, return number of characters copied into the buffer, including the terminating NUL +// when space for it is available. +// Comment: +// If the buffer is too small, as much text as will fit is copied into it. +// +DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top, + double right, double bottom,unsigned short* buffer,int buflen); + + +// Flags used by FPDFText_FindStart function. +#define FPDF_MATCHCASE 0x00000001 //If not set, it will not match case by default. +#define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match the whole word by default. + +// Function: FPDFText_FindStart +// Start a search. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// findwhat - A unicode match pattern. +// flags - Option flags. +// start_index - Start from this character. -1 for end of the page. +// Return Value: +// A handle for the search context. FPDFText_FindClose must be called to release this handle. +// +DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPDF_WIDESTRING findwhat, + unsigned long flags, int start_index); + +// Function: FPDFText_FindNext +// Search in the direction from page start to end. +// Parameters: +// handle - A search context handle returned by FPDFText_FindStart. +// Return Value: +// Whether a match is found. +// +DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle); + +// Function: FPDFText_FindPrev +// Search in the direction from page end to start. +// Parameters: +// handle - A search context handle returned by FPDFText_FindStart. +// Return Value: +// Whether a match is found. +// +DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle); + +// Function: FPDFText_GetSchResultIndex +// Get the starting character index of the search result. +// Parameters: +// handle - A search context handle returned by FPDFText_FindStart. +// Return Value: +// Index for the starting character. +// +DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle); + +// Function: FPDFText_GetSchCount +// Get the number of matched characters in the search result. +// Parameters: +// handle - A search context handle returned by FPDFText_FindStart. +// Return Value: +// Number of matched characters. +// +DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle); + +// Function: FPDFText_FindClose +// Release a search context. +// Parameters: +// handle - A search context handle returned by FPDFText_FindStart. +// Return Value: +// None. +// +DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle); + +// Function: FPDFLink_LoadWebLinks +// Prepare information about weblinks in a page. +// Parameters: +// text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. +// Return Value: +// A handle to the page's links information structure. +// NULL if something goes wrong. +// Comments: +// Weblinks are those links implicitly embedded in PDF pages. PDF also has a type of +// annotation called "link", FPDFTEXT doesn't deal with that kind of link. +// FPDFTEXT weblink feature is useful for automatically detecting links in the page +// contents. For example, things like "http://www.foxitsoftware.com" will be detected, +// so applications can allow user to click on those characters to activate the link, +// even the PDF doesn't come with link annotations. +// +// FPDFLink_CloseWebLinks must be called to release resources. +// +DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page); + +// Function: FPDFLink_CountWebLinks +// Count number of detected web links. +// Parameters: +// link_page - Handle returned by FPDFLink_LoadWebLinks. +// Return Value: +// Number of detected web links. +// +DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page); + +// Function: FPDFLink_GetURL +// Fetch the URL information for a detected web link. +// Parameters: +// link_page - Handle returned by FPDFLink_LoadWebLinks. +// link_index - Zero-based index for the link. +// buffer - A unicode buffer. +// buflen - Number of characters (not bytes) for the buffer, including an additional terminator. +// Return Value: +// If buffer is NULL or buflen is zero, return number of characters (not bytes and an additional terminator is also counted) needed, +// otherwise, return number of characters copied into the buffer. +// +DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, unsigned short* buffer,int buflen); + +// Function: FPDFLink_CountRects +// Count number of rectangular areas for the link. +// Parameters: +// link_page - Handle returned by FPDFLink_LoadWebLinks. +// link_index - Zero-based index for the link. +// Return Value: +// Number of rectangular areas for the link. +// +DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index); + +// Function: FPDFLink_GetRect +// Fetch the boundaries of a rectangle for a link. +// Parameters: +// link_page - Handle returned by FPDFLink_LoadWebLinks. +// link_index - Zero-based index for the link. +// rect_index - Zero-based index for a rectangle. +// left - Pointer to a double value receiving the rectangle left boundary. +// top - Pointer to a double value receiving the rectangle top boundary. +// right - Pointer to a double value receiving the rectangle right boundary. +// bottom - Pointer to a double value receiving the rectangle bottom boundary. +// Return Value: +// None. +// +DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index, + double* left, double* top,double* right, double* bottom); + +// Function: FPDFLink_CloseWebLinks +// Release resources used by weblink feature. +// Parameters: +// link_page - Handle returned by FPDFLink_LoadWebLinks. +// Return Value: +// None. +// +DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); + + +#ifdef __cplusplus +}; +#endif + +#endif//_FPDFTEXT_H_ |