diff options
Diffstat (limited to 'core/src/fpdftext/text_int.h')
-rw-r--r-- | core/src/fpdftext/text_int.h | 430 |
1 files changed, 215 insertions, 215 deletions
diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h index a0af51eb60..39dc721eee 100644 --- a/core/src/fpdftext/text_int.h +++ b/core/src/fpdftext/text_int.h @@ -1,215 +1,215 @@ -// Copyright 2014 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#ifndef _PDF_TEXT_INT_H_
-#define _PDF_TEXT_INT_H_
-class CPDF_TextParseOptions : public CFX_Object
-{
-public:
- CPDF_TextParseOptions();
- FX_BOOL m_bCheckObjectOrder;
- FX_BOOL m_bCheckDirection;
- int m_nCheckSameObject;
-};
-class CPDF_TextPage;
-class CPDF_LinkExtract;
-class CPDF_TextPageFind;
-class CPDF_DocProgressiveSearch;
-#define FPDFTEXT_CHAR_ERROR -1
-#define FPDFTEXT_CHAR_NORMAL 0
-#define FPDFTEXT_CHAR_GENERATED 1
-#define FPDFTEXT_CHAR_UNUNICODE 2
-#define FPDFTEXT_CHAR_HYPHEN 3
-#define FPDFTEXT_CHAR_PIECE 4
-#define FPDFTEXT_MC_PASS 0
-#define FPDFTEXT_MC_DONE 1
-#define FPDFTEXT_MC_DELAY 2
-typedef struct _PAGECHAR_INFO: public CFX_Object {
- int m_CharCode;
- FX_WCHAR m_Unicode;
- FX_FLOAT m_OriginX;
- FX_FLOAT m_OriginY;
- FX_INT32 m_Flag;
- CFX_FloatRect m_CharBox;
- CPDF_TextObject* m_pTextObj;
- CFX_AffineMatrix m_Matrix;
- int m_Index;
-} PAGECHAR_INFO;
-typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray;
-typedef struct {
- int m_Start;
- int m_nCount;
-} FPDF_SEGMENT;
-typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array;
-typedef struct {
- CPDF_TextObject* m_pTextObj;
- CFX_AffineMatrix m_formMatrix;
-} PDFTEXT_Obj;
-typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ;
-class CPDF_TextPage: public IPDF_TextPage
-{
-public:
- CPDF_TextPage(const CPDF_Page* pPage, int flags = 0);
- CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0);
- CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
- virtual FX_BOOL ParseTextPage();
- virtual void NormalizeObjects(FX_BOOL bNormalize);
- virtual FX_BOOL IsParsered() const
- {
- return m_IsParsered;
- }
- virtual ~CPDF_TextPage() {};
-public:
- virtual int CharIndexFromTextIndex(int TextIndex)const ;
- virtual int TextIndexFromCharIndex(int CharIndex)const;
- virtual int CountChars() const;
- virtual void GetCharInfo(int index, FPDF_CHAR_INFO & info) const;
- virtual void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const;
- virtual int GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const;
- virtual int GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance,
- FX_FLOAT yTorelance) const;
- virtual CFX_WideString GetTextByRect(CFX_FloatRect rect) const;
- virtual void GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const;
- virtual int GetOrderByDirection(int order, int direction) const;
- virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const;
-
- virtual int CountRects(int start, int nCount);
- virtual void GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top
- , FX_FLOAT& right, FX_FLOAT &bottom) const;
- virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate);
- virtual FX_BOOL GetBaselineRotate(CFX_FloatRect rect, int& Rotate);
- virtual int CountBoundedSegments(FX_FLOAT left, FX_FLOAT top,
- FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE);
- virtual void GetBoundedSegment(int index, int& start, int& count) const;
- virtual int GetWordBreak(int index, int direction) const;
-public:
- const PAGECHAR_InfoArray* GetCharList() const
- {
- return &m_charList;
- }
- static FX_BOOL IsRectIntersect(CFX_FloatRect rect1, CFX_FloatRect rect2);
- static FX_BOOL IsLetter(FX_WCHAR unicode);
-private:
- FX_BOOL IsHyphen(FX_WCHAR curChar);
- FX_BOOL IsControlChar(PAGECHAR_INFO* pCharInfo);
- FX_BOOL GetBaselineRotate(int start, int end, int& Rotate);
- void ProcessObject();
- void ProcessFormObject(CPDF_FormObject* pFormObj, CFX_AffineMatrix formMatrix);
- void ProcessTextObject(PDFTEXT_Obj pObj);
- void ProcessTextObject(CPDF_TextObject* pTextObj, CFX_AffineMatrix formMatrix, FX_POSITION ObjPos);
- int ProcessInsertObject(const CPDF_TextObject* pObj, CFX_AffineMatrix formMatrix);
- FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
- FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos);
- FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2);
- int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const;
- void CloseTempLine();
- void OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str);
- FX_INT32 PreMarkedContent(PDFTEXT_Obj pObj);
- void ProcessMarkedContent(PDFTEXT_Obj pObj);
- void CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const;
- void FindPreviousTextObject(void);
- void AddCharInfoByLRDirection(CFX_WideString& str, int i);
- void AddCharInfoByRLDirection(CFX_WideString& str, int i);
- FX_INT32 GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
- FX_INT32 FindTextlineFlowDirection();
-protected:
- CPDFText_ParseOptions m_ParseOptions;
- CFX_WordArray m_CharIndex;
- const CPDF_PageObjects* m_pPage;
- PAGECHAR_InfoArray m_charList;
- CFX_WideTextBuf m_TextBuf;
- PAGECHAR_InfoArray m_TempCharList;
- CFX_WideTextBuf m_TempTextBuf;
- int m_parserflag;
- CPDF_TextObject* m_pPreTextObj;
- CFX_AffineMatrix m_perMatrix;
- FX_BOOL m_IsParsered;
- CFX_AffineMatrix m_DisplayMatrix;
-
- SEGMENT_Array m_Segment;
- CFX_RectArray m_SelRects;
- LINEOBJ m_LineObj;
- FX_BOOL m_TextlineDir;
- CFX_FloatRect m_CurlineRect;
-};
-class CPDF_TextPageFind: public IPDF_TextPageFind
-{
-public:
- CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
- virtual ~CPDF_TextPageFind() {};
-public:
- virtual FX_BOOL FindFirst(CFX_WideString findwhat, int flags, int startPos = 0);
- virtual FX_BOOL FindNext();
- virtual FX_BOOL FindPrev();
-
- virtual void GetRectArray(CFX_RectArray& rects) const;
- virtual int GetCurOrder() const;
- virtual int GetMatchedCount()const;
-protected:
- void ExtractFindWhat(CFX_WideString findwhat);
- FX_BOOL IsMatchWholeWord(CFX_WideString csPageText, int startPos, int endPos);
- FX_BOOL ExtractSubString(CFX_WideString& rString, FX_LPCWSTR lpszFullString,
- int iSubString, FX_WCHAR chSep);
- CFX_WideString MakeReverse(const CFX_WideString str);
- int ReverseFind(CFX_WideString csPageText, CFX_WideString csWord, int nStartPos, int& WordLength);
- int GetCharIndex(int index) const;
-private:
- CFX_WordArray m_CharIndex;
- const IPDF_TextPage* m_pTextPage;
- CFX_WideString m_strText;
- CFX_WideString m_findWhat;
- int m_flags;
- CFX_WideStringArray m_csFindWhatArray;
- int m_findNextStart;
- int m_findPreStart;
- FX_BOOL m_bMatchCase;
- FX_BOOL m_bMatchWholeWord;
- int m_resStart;
- int m_resEnd;
- CFX_RectArray m_resArray;
- FX_BOOL m_IsFind;
-};
-class CPDF_LinkExt: public CFX_Object
-{
-public:
- CPDF_LinkExt() {};
- int m_Start;
- int m_Count;
- CFX_WideString m_strUrl;
- virtual ~CPDF_LinkExt() {};
-};
-typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray;
-class CPDF_LinkExtract: public IPDF_LinkExtract
-{
-public:
- CPDF_LinkExtract();
- virtual ~CPDF_LinkExtract();
- virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage);
- virtual FX_BOOL IsExtract() const
- {
- return m_IsParserd;
- }
-public:
- virtual int CountLinks() const;
- virtual CFX_WideString GetURL(int index) const;
- virtual void GetBoundedSegment(int index, int& start, int& count) const;
- virtual void GetRects(int index, CFX_RectArray& rects)const;
-protected:
- void parserLink();
- void DeleteLinkList();
- FX_BOOL CheckWebLink(CFX_WideString& strBeCheck);
- FX_BOOL CheckMailLink(CFX_WideString& str);
- FX_BOOL AppendToLinkList(int start, int count, CFX_WideString strUrl);
-private:
- LINK_InfoArray m_LinkList;
- const CPDF_TextPage* m_pTextPage;
- CFX_WideString m_strPageText;
- FX_BOOL m_IsParserd;
-};
-FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst);
-void NormalizeString(CFX_WideString& str);
-void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest);
-#endif
+// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef _PDF_TEXT_INT_H_ +#define _PDF_TEXT_INT_H_ +class CPDF_TextParseOptions : public CFX_Object +{ +public: + CPDF_TextParseOptions(); + FX_BOOL m_bCheckObjectOrder; + FX_BOOL m_bCheckDirection; + int m_nCheckSameObject; +}; +class CPDF_TextPage; +class CPDF_LinkExtract; +class CPDF_TextPageFind; +class CPDF_DocProgressiveSearch; +#define FPDFTEXT_CHAR_ERROR -1 +#define FPDFTEXT_CHAR_NORMAL 0 +#define FPDFTEXT_CHAR_GENERATED 1 +#define FPDFTEXT_CHAR_UNUNICODE 2 +#define FPDFTEXT_CHAR_HYPHEN 3 +#define FPDFTEXT_CHAR_PIECE 4 +#define FPDFTEXT_MC_PASS 0 +#define FPDFTEXT_MC_DONE 1 +#define FPDFTEXT_MC_DELAY 2 +typedef struct _PAGECHAR_INFO: public CFX_Object { + int m_CharCode; + FX_WCHAR m_Unicode; + FX_FLOAT m_OriginX; + FX_FLOAT m_OriginY; + FX_INT32 m_Flag; + CFX_FloatRect m_CharBox; + CPDF_TextObject* m_pTextObj; + CFX_AffineMatrix m_Matrix; + int m_Index; +} PAGECHAR_INFO; +typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray; +typedef struct { + int m_Start; + int m_nCount; +} FPDF_SEGMENT; +typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array; +typedef struct { + CPDF_TextObject* m_pTextObj; + CFX_AffineMatrix m_formMatrix; +} PDFTEXT_Obj; +typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ; +class CPDF_TextPage: public IPDF_TextPage +{ +public: + CPDF_TextPage(const CPDF_Page* pPage, int flags = 0); + CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0); + CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions); + virtual FX_BOOL ParseTextPage(); + virtual void NormalizeObjects(FX_BOOL bNormalize); + virtual FX_BOOL IsParsered() const + { + return m_IsParsered; + } + virtual ~CPDF_TextPage() {}; +public: + virtual int CharIndexFromTextIndex(int TextIndex)const ; + virtual int TextIndexFromCharIndex(int CharIndex)const; + virtual int CountChars() const; + virtual void GetCharInfo(int index, FPDF_CHAR_INFO & info) const; + virtual void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const; + virtual int GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const; + virtual int GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, + FX_FLOAT yTorelance) const; + virtual CFX_WideString GetTextByRect(CFX_FloatRect rect) const; + virtual void GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const; + virtual int GetOrderByDirection(int order, int direction) const; + virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const; + + virtual int CountRects(int start, int nCount); + virtual void GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top + , FX_FLOAT& right, FX_FLOAT &bottom) const; + virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); + virtual FX_BOOL GetBaselineRotate(CFX_FloatRect rect, int& Rotate); + virtual int CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, + FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE); + virtual void GetBoundedSegment(int index, int& start, int& count) const; + virtual int GetWordBreak(int index, int direction) const; +public: + const PAGECHAR_InfoArray* GetCharList() const + { + return &m_charList; + } + static FX_BOOL IsRectIntersect(CFX_FloatRect rect1, CFX_FloatRect rect2); + static FX_BOOL IsLetter(FX_WCHAR unicode); +private: + FX_BOOL IsHyphen(FX_WCHAR curChar); + FX_BOOL IsControlChar(PAGECHAR_INFO* pCharInfo); + FX_BOOL GetBaselineRotate(int start, int end, int& Rotate); + void ProcessObject(); + void ProcessFormObject(CPDF_FormObject* pFormObj, CFX_AffineMatrix formMatrix); + void ProcessTextObject(PDFTEXT_Obj pObj); + void ProcessTextObject(CPDF_TextObject* pTextObj, CFX_AffineMatrix formMatrix, FX_POSITION ObjPos); + int ProcessInsertObject(const CPDF_TextObject* pObj, CFX_AffineMatrix formMatrix); + FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info); + FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos); + FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2); + int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const; + void CloseTempLine(); + void OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str); + FX_INT32 PreMarkedContent(PDFTEXT_Obj pObj); + void ProcessMarkedContent(PDFTEXT_Obj pObj); + void CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const; + void FindPreviousTextObject(void); + void AddCharInfoByLRDirection(CFX_WideString& str, int i); + void AddCharInfoByRLDirection(CFX_WideString& str, int i); + FX_INT32 GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); + FX_INT32 FindTextlineFlowDirection(); +protected: + CPDFText_ParseOptions m_ParseOptions; + CFX_WordArray m_CharIndex; + const CPDF_PageObjects* m_pPage; + PAGECHAR_InfoArray m_charList; + CFX_WideTextBuf m_TextBuf; + PAGECHAR_InfoArray m_TempCharList; + CFX_WideTextBuf m_TempTextBuf; + int m_parserflag; + CPDF_TextObject* m_pPreTextObj; + CFX_AffineMatrix m_perMatrix; + FX_BOOL m_IsParsered; + CFX_AffineMatrix m_DisplayMatrix; + + SEGMENT_Array m_Segment; + CFX_RectArray m_SelRects; + LINEOBJ m_LineObj; + FX_BOOL m_TextlineDir; + CFX_FloatRect m_CurlineRect; +}; +class CPDF_TextPageFind: public IPDF_TextPageFind +{ +public: + CPDF_TextPageFind(const IPDF_TextPage* pTextPage); + virtual ~CPDF_TextPageFind() {}; +public: + virtual FX_BOOL FindFirst(CFX_WideString findwhat, int flags, int startPos = 0); + virtual FX_BOOL FindNext(); + virtual FX_BOOL FindPrev(); + + virtual void GetRectArray(CFX_RectArray& rects) const; + virtual int GetCurOrder() const; + virtual int GetMatchedCount()const; +protected: + void ExtractFindWhat(CFX_WideString findwhat); + FX_BOOL IsMatchWholeWord(CFX_WideString csPageText, int startPos, int endPos); + FX_BOOL ExtractSubString(CFX_WideString& rString, FX_LPCWSTR lpszFullString, + int iSubString, FX_WCHAR chSep); + CFX_WideString MakeReverse(const CFX_WideString str); + int ReverseFind(CFX_WideString csPageText, CFX_WideString csWord, int nStartPos, int& WordLength); + int GetCharIndex(int index) const; +private: + CFX_WordArray m_CharIndex; + const IPDF_TextPage* m_pTextPage; + CFX_WideString m_strText; + CFX_WideString m_findWhat; + int m_flags; + CFX_WideStringArray m_csFindWhatArray; + int m_findNextStart; + int m_findPreStart; + FX_BOOL m_bMatchCase; + FX_BOOL m_bMatchWholeWord; + int m_resStart; + int m_resEnd; + CFX_RectArray m_resArray; + FX_BOOL m_IsFind; +}; +class CPDF_LinkExt: public CFX_Object +{ +public: + CPDF_LinkExt() {}; + int m_Start; + int m_Count; + CFX_WideString m_strUrl; + virtual ~CPDF_LinkExt() {}; +}; +typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray; +class CPDF_LinkExtract: public IPDF_LinkExtract +{ +public: + CPDF_LinkExtract(); + virtual ~CPDF_LinkExtract(); + virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage); + virtual FX_BOOL IsExtract() const + { + return m_IsParserd; + } +public: + virtual int CountLinks() const; + virtual CFX_WideString GetURL(int index) const; + virtual void GetBoundedSegment(int index, int& start, int& count) const; + virtual void GetRects(int index, CFX_RectArray& rects)const; +protected: + void parserLink(); + void DeleteLinkList(); + FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); + FX_BOOL CheckMailLink(CFX_WideString& str); + FX_BOOL AppendToLinkList(int start, int count, CFX_WideString strUrl); +private: + LINK_InfoArray m_LinkList; + const CPDF_TextPage* m_pTextPage; + CFX_WideString m_strPageText; + FX_BOOL m_IsParserd; +}; +FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst); +void NormalizeString(CFX_WideString& str); +void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest); +#endif |