diff options
Diffstat (limited to 'core/include/fpdftext/fpdf_text.h')
-rw-r--r-- | core/include/fpdftext/fpdf_text.h | 229 |
1 files changed, 122 insertions, 107 deletions
diff --git a/core/include/fpdftext/fpdf_text.h b/core/include/fpdftext/fpdf_text.h index 04922c4b7f..eb16ca9a90 100644 --- a/core/include/fpdftext/fpdf_text.h +++ b/core/include/fpdftext/fpdf_text.h @@ -17,153 +17,168 @@ class IPDF_ReflowedPage; class IPDF_TextPage; class IPDF_TextPageFind; -#define PDF2TXT_AUTO_ROTATE 1 -#define PDF2TXT_AUTO_WIDTH 2 -#define PDF2TXT_KEEP_COLUMN 4 -#define PDF2TXT_USE_OCR 8 -#define PDF2TXT_INCLUDE_INVISIBLE 16 -void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage, - int iMinWidth, FX_DWORD flags); -void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage, - int iMinWidth, FX_DWORD flags); -void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage, +#define PDF2TXT_AUTO_ROTATE 1 +#define PDF2TXT_AUTO_WIDTH 2 +#define PDF2TXT_KEEP_COLUMN 4 +#define PDF2TXT_USE_OCR 8 +#define PDF2TXT_INCLUDE_INVISIBLE 16 +void PDF_GetPageText(CFX_ByteStringArray& lines, + CPDF_Document* pDoc, + CPDF_Dictionary* pPage, + int iMinWidth, + FX_DWORD flags); +void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, + CPDF_Document* pDoc, + CPDF_Dictionary* pPage, + int iMinWidth, + FX_DWORD flags); +void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, + CPDF_Document* pDoc, + CPDF_Dictionary* pPage, FX_DWORD flags); -CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage); -#define CHAR_ERROR -1 -#define CHAR_NORMAL 0 -#define CHAR_GENERATED 1 -#define CHAR_UNUNICODE 2 +CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, + CPDF_Dictionary* pPage); +#define CHAR_ERROR -1 +#define CHAR_NORMAL 0 +#define CHAR_GENERATED 1 +#define CHAR_UNUNICODE 2 typedef struct { - FX_WCHAR m_Unicode; - FX_WCHAR m_Charcode; - int32_t m_Flag; - FX_FLOAT m_FontSize; - FX_FLOAT m_OriginX; - FX_FLOAT m_OriginY; - CFX_FloatRect m_CharBox; - CPDF_TextObject* m_pTextObj; - CFX_AffineMatrix m_Matrix; + FX_WCHAR m_Unicode; + FX_WCHAR m_Charcode; + int32_t m_Flag; + FX_FLOAT m_FontSize; + FX_FLOAT m_OriginX; + FX_FLOAT m_OriginY; + CFX_FloatRect m_CharBox; + CPDF_TextObject* m_pTextObj; + CFX_AffineMatrix m_Matrix; } FPDF_CHAR_INFO; -typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray; -#define FPDFTEXT_LRTB 0 -#define FPDFTEXT_RLTB 1 -#define FPDFTEXT_TBRL 2 -#define FPDFTEXT_LEFT -1 -#define FPDFTEXT_RIGHT 1 -#define FPDFTEXT_UP -2 -#define FPDFTEXT_DOWN 2 -#define FPDFTEXT_WRITINGMODE_UNKNOW 0 -#define FPDFTEXT_WRITINGMODE_LRTB 1 -#define FPDFTEXT_WRITINGMODE_RLTB 2 -#define FPDFTEXT_WRITINGMODE_TBRL 3 -class CPDFText_ParseOptions -{ -public: - - CPDFText_ParseOptions(); - FX_BOOL m_bGetCharCodeOnly; - FX_BOOL m_bNormalizeObjs; - FX_BOOL m_bOutputHyphen; +typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray; +#define FPDFTEXT_LRTB 0 +#define FPDFTEXT_RLTB 1 +#define FPDFTEXT_TBRL 2 +#define FPDFTEXT_LEFT -1 +#define FPDFTEXT_RIGHT 1 +#define FPDFTEXT_UP -2 +#define FPDFTEXT_DOWN 2 +#define FPDFTEXT_WRITINGMODE_UNKNOW 0 +#define FPDFTEXT_WRITINGMODE_LRTB 1 +#define FPDFTEXT_WRITINGMODE_RLTB 2 +#define FPDFTEXT_WRITINGMODE_TBRL 3 +class CPDFText_ParseOptions { + public: + CPDFText_ParseOptions(); + FX_BOOL m_bGetCharCodeOnly; + FX_BOOL m_bNormalizeObjs; + FX_BOOL m_bOutputHyphen; }; -class IPDF_TextPage -{ -public: +class IPDF_TextPage { + public: + virtual ~IPDF_TextPage() {} + static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, + CPDFText_ParseOptions ParserOptions); + static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, int flags = 0); + static IPDF_TextPage* CreateTextPage(const CPDF_PageObjects* pObjs, + int flags = 0); + static IPDF_TextPage* CreateReflowTextPage(IPDF_ReflowedPage* pRefPage); - virtual ~IPDF_TextPage() {} - static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); - static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, int flags = 0); - static IPDF_TextPage* CreateTextPage(const CPDF_PageObjects* pObjs, int flags = 0); - static IPDF_TextPage* CreateReflowTextPage(IPDF_ReflowedPage* pRefPage); + virtual void NormalizeObjects(FX_BOOL bNormalize) = 0; - virtual void NormalizeObjects(FX_BOOL bNormalize) = 0; + virtual FX_BOOL ParseTextPage() = 0; - virtual FX_BOOL ParseTextPage() = 0; + virtual FX_BOOL IsParsered() const = 0; + public: + virtual int CharIndexFromTextIndex(int TextIndex) const = 0; - virtual FX_BOOL IsParsered() const = 0; -public: + virtual int TextIndexFromCharIndex(int CharIndex) const = 0; - virtual int CharIndexFromTextIndex(int TextIndex) const = 0; + virtual int CountChars() const = 0; - virtual int TextIndexFromCharIndex(int CharIndex) const = 0; + virtual void GetCharInfo(int index, FPDF_CHAR_INFO& info) const = 0; + virtual void GetRectArray(int start, + int nCount, + CFX_RectArray& rectArray) const = 0; - virtual int CountChars() const = 0; + virtual int GetIndexAtPos(CPDF_Point point, + FX_FLOAT xTorelance, + FX_FLOAT yTorelance) const = 0; - virtual void GetCharInfo(int index, FPDF_CHAR_INFO & info) const = 0; + virtual int GetIndexAtPos(FX_FLOAT x, + FX_FLOAT y, + FX_FLOAT xTorelance, + FX_FLOAT yTorelance) const = 0; - virtual void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const = 0; + virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const = 0; + virtual void GetRectsArrayByRect(const CFX_FloatRect& rect, + CFX_RectArray& resRectArray) const = 0; + virtual int CountRects(int start, int nCount) = 0; - virtual int GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0; + virtual void GetRect(int rectIndex, + FX_FLOAT& left, + FX_FLOAT& top, + FX_FLOAT& right, + FX_FLOAT& bottom) const = 0; - virtual int GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0; + virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) = 0; - virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const = 0; + virtual FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) = 0; - virtual void GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const = 0; + virtual int CountBoundedSegments(FX_FLOAT left, + FX_FLOAT top, + FX_FLOAT right, + FX_FLOAT bottom, + FX_BOOL bContains = FALSE) = 0; + virtual void GetBoundedSegment(int index, int& start, int& count) const = 0; - virtual int CountRects(int start, int nCount) = 0; + virtual int GetWordBreak(int index, int direction) const = 0; - virtual void GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const = 0; - - virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) = 0; - - virtual FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) = 0; - - virtual int CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE) = 0; - - virtual void GetBoundedSegment(int index, int& start, int& count) const = 0; - - - virtual int GetWordBreak(int index, int direction) const = 0; - - virtual CFX_WideString GetPageText(int start = 0, int nCount = -1 ) const = 0; + virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const = 0; }; -#define FPDFTEXT_MATCHCASE 0x00000001 +#define FPDFTEXT_MATCHCASE 0x00000001 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002 -#define FPDFTEXT_CONSECUTIVE 0x00000004 -class IPDF_TextPageFind -{ -public: +#define FPDFTEXT_CONSECUTIVE 0x00000004 +class IPDF_TextPageFind { + public: + virtual ~IPDF_TextPageFind() {} - virtual ~IPDF_TextPageFind() {} + static IPDF_TextPageFind* CreatePageFind(const IPDF_TextPage* pTextPage); - static IPDF_TextPageFind* CreatePageFind(const IPDF_TextPage* pTextPage); -public: + public: + virtual FX_BOOL FindFirst(const CFX_WideString& findwhat, + int flags, + int startPos = 0) = 0; - virtual FX_BOOL FindFirst(const CFX_WideString& findwhat, int flags, int startPos = 0) = 0; + virtual FX_BOOL FindNext() = 0; - virtual FX_BOOL FindNext() = 0; + virtual FX_BOOL FindPrev() = 0; - virtual FX_BOOL FindPrev() = 0; + virtual void GetRectArray(CFX_RectArray& rects) const = 0; - virtual void GetRectArray(CFX_RectArray& rects) const = 0; + virtual int GetCurOrder() const = 0; - virtual int GetCurOrder() const = 0; - - virtual int GetMatchedCount() const = 0; + virtual int GetMatchedCount() const = 0; }; -class IPDF_LinkExtract -{ -public: - - virtual ~IPDF_LinkExtract() {} +class IPDF_LinkExtract { + public: + virtual ~IPDF_LinkExtract() {} - static IPDF_LinkExtract* CreateLinkExtract(); + static IPDF_LinkExtract* CreateLinkExtract(); - virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) = 0; -public: + virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) = 0; - virtual int CountLinks() const = 0; + public: + virtual int CountLinks() const = 0; - virtual CFX_WideString GetURL(int index) const = 0; + virtual CFX_WideString GetURL(int index) const = 0; - virtual void GetBoundedSegment(int index, int& start, int& count) const = 0; + virtual void GetBoundedSegment(int index, int& start, int& count) const = 0; - virtual void GetRects(int index, CFX_RectArray& rects) const = 0; + virtual void GetRects(int index, CFX_RectArray& rects) const = 0; }; #endif // CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_ |