summaryrefslogtreecommitdiff
path: root/core/src/fpdftext/text_int.h
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/fpdftext/text_int.h')
-rw-r--r--core/src/fpdftext/text_int.h215
1 files changed, 215 insertions, 0 deletions
diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h
new file mode 100644
index 0000000000..a0af51eb60
--- /dev/null
+++ b/core/src/fpdftext/text_int.h
@@ -0,0 +1,215 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef _PDF_TEXT_INT_H_
+#define _PDF_TEXT_INT_H_
+class CPDF_TextParseOptions : public CFX_Object
+{
+public:
+ CPDF_TextParseOptions();
+ FX_BOOL m_bCheckObjectOrder;
+ FX_BOOL m_bCheckDirection;
+ int m_nCheckSameObject;
+};
+class CPDF_TextPage;
+class CPDF_LinkExtract;
+class CPDF_TextPageFind;
+class CPDF_DocProgressiveSearch;
+#define FPDFTEXT_CHAR_ERROR -1
+#define FPDFTEXT_CHAR_NORMAL 0
+#define FPDFTEXT_CHAR_GENERATED 1
+#define FPDFTEXT_CHAR_UNUNICODE 2
+#define FPDFTEXT_CHAR_HYPHEN 3
+#define FPDFTEXT_CHAR_PIECE 4
+#define FPDFTEXT_MC_PASS 0
+#define FPDFTEXT_MC_DONE 1
+#define FPDFTEXT_MC_DELAY 2
+typedef struct _PAGECHAR_INFO: public CFX_Object {
+ int m_CharCode;
+ FX_WCHAR m_Unicode;
+ FX_FLOAT m_OriginX;
+ FX_FLOAT m_OriginY;
+ FX_INT32 m_Flag;
+ CFX_FloatRect m_CharBox;
+ CPDF_TextObject* m_pTextObj;
+ CFX_AffineMatrix m_Matrix;
+ int m_Index;
+} PAGECHAR_INFO;
+typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray;
+typedef struct {
+ int m_Start;
+ int m_nCount;
+} FPDF_SEGMENT;
+typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array;
+typedef struct {
+ CPDF_TextObject* m_pTextObj;
+ CFX_AffineMatrix m_formMatrix;
+} PDFTEXT_Obj;
+typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ;
+class CPDF_TextPage: public IPDF_TextPage
+{
+public:
+ CPDF_TextPage(const CPDF_Page* pPage, int flags = 0);
+ CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0);
+ CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
+ virtual FX_BOOL ParseTextPage();
+ virtual void NormalizeObjects(FX_BOOL bNormalize);
+ virtual FX_BOOL IsParsered() const
+ {
+ return m_IsParsered;
+ }
+ virtual ~CPDF_TextPage() {};
+public:
+ virtual int CharIndexFromTextIndex(int TextIndex)const ;
+ virtual int TextIndexFromCharIndex(int CharIndex)const;
+ virtual int CountChars() const;
+ virtual void GetCharInfo(int index, FPDF_CHAR_INFO & info) const;
+ virtual void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const;
+ virtual int GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const;
+ virtual int GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance,
+ FX_FLOAT yTorelance) const;
+ virtual CFX_WideString GetTextByRect(CFX_FloatRect rect) const;
+ virtual void GetRectsArrayByRect(CFX_FloatRect rect, CFX_RectArray& resRectArray) const;
+ virtual int GetOrderByDirection(int order, int direction) const;
+ virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const;
+
+ virtual int CountRects(int start, int nCount);
+ virtual void GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top
+ , FX_FLOAT& right, FX_FLOAT &bottom) const;
+ virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate);
+ virtual FX_BOOL GetBaselineRotate(CFX_FloatRect rect, int& Rotate);
+ virtual int CountBoundedSegments(FX_FLOAT left, FX_FLOAT top,
+ FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE);
+ virtual void GetBoundedSegment(int index, int& start, int& count) const;
+ virtual int GetWordBreak(int index, int direction) const;
+public:
+ const PAGECHAR_InfoArray* GetCharList() const
+ {
+ return &m_charList;
+ }
+ static FX_BOOL IsRectIntersect(CFX_FloatRect rect1, CFX_FloatRect rect2);
+ static FX_BOOL IsLetter(FX_WCHAR unicode);
+private:
+ FX_BOOL IsHyphen(FX_WCHAR curChar);
+ FX_BOOL IsControlChar(PAGECHAR_INFO* pCharInfo);
+ FX_BOOL GetBaselineRotate(int start, int end, int& Rotate);
+ void ProcessObject();
+ void ProcessFormObject(CPDF_FormObject* pFormObj, CFX_AffineMatrix formMatrix);
+ void ProcessTextObject(PDFTEXT_Obj pObj);
+ void ProcessTextObject(CPDF_TextObject* pTextObj, CFX_AffineMatrix formMatrix, FX_POSITION ObjPos);
+ int ProcessInsertObject(const CPDF_TextObject* pObj, CFX_AffineMatrix formMatrix);
+ FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
+ FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos);
+ FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2);
+ int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const;
+ void CloseTempLine();
+ void OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str);
+ FX_INT32 PreMarkedContent(PDFTEXT_Obj pObj);
+ void ProcessMarkedContent(PDFTEXT_Obj pObj);
+ void CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const;
+ void FindPreviousTextObject(void);
+ void AddCharInfoByLRDirection(CFX_WideString& str, int i);
+ void AddCharInfoByRLDirection(CFX_WideString& str, int i);
+ FX_INT32 GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
+ FX_INT32 FindTextlineFlowDirection();
+protected:
+ CPDFText_ParseOptions m_ParseOptions;
+ CFX_WordArray m_CharIndex;
+ const CPDF_PageObjects* m_pPage;
+ PAGECHAR_InfoArray m_charList;
+ CFX_WideTextBuf m_TextBuf;
+ PAGECHAR_InfoArray m_TempCharList;
+ CFX_WideTextBuf m_TempTextBuf;
+ int m_parserflag;
+ CPDF_TextObject* m_pPreTextObj;
+ CFX_AffineMatrix m_perMatrix;
+ FX_BOOL m_IsParsered;
+ CFX_AffineMatrix m_DisplayMatrix;
+
+ SEGMENT_Array m_Segment;
+ CFX_RectArray m_SelRects;
+ LINEOBJ m_LineObj;
+ FX_BOOL m_TextlineDir;
+ CFX_FloatRect m_CurlineRect;
+};
+class CPDF_TextPageFind: public IPDF_TextPageFind
+{
+public:
+ CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
+ virtual ~CPDF_TextPageFind() {};
+public:
+ virtual FX_BOOL FindFirst(CFX_WideString findwhat, int flags, int startPos = 0);
+ virtual FX_BOOL FindNext();
+ virtual FX_BOOL FindPrev();
+
+ virtual void GetRectArray(CFX_RectArray& rects) const;
+ virtual int GetCurOrder() const;
+ virtual int GetMatchedCount()const;
+protected:
+ void ExtractFindWhat(CFX_WideString findwhat);
+ FX_BOOL IsMatchWholeWord(CFX_WideString csPageText, int startPos, int endPos);
+ FX_BOOL ExtractSubString(CFX_WideString& rString, FX_LPCWSTR lpszFullString,
+ int iSubString, FX_WCHAR chSep);
+ CFX_WideString MakeReverse(const CFX_WideString str);
+ int ReverseFind(CFX_WideString csPageText, CFX_WideString csWord, int nStartPos, int& WordLength);
+ int GetCharIndex(int index) const;
+private:
+ CFX_WordArray m_CharIndex;
+ const IPDF_TextPage* m_pTextPage;
+ CFX_WideString m_strText;
+ CFX_WideString m_findWhat;
+ int m_flags;
+ CFX_WideStringArray m_csFindWhatArray;
+ int m_findNextStart;
+ int m_findPreStart;
+ FX_BOOL m_bMatchCase;
+ FX_BOOL m_bMatchWholeWord;
+ int m_resStart;
+ int m_resEnd;
+ CFX_RectArray m_resArray;
+ FX_BOOL m_IsFind;
+};
+class CPDF_LinkExt: public CFX_Object
+{
+public:
+ CPDF_LinkExt() {};
+ int m_Start;
+ int m_Count;
+ CFX_WideString m_strUrl;
+ virtual ~CPDF_LinkExt() {};
+};
+typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray;
+class CPDF_LinkExtract: public IPDF_LinkExtract
+{
+public:
+ CPDF_LinkExtract();
+ virtual ~CPDF_LinkExtract();
+ virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage);
+ virtual FX_BOOL IsExtract() const
+ {
+ return m_IsParserd;
+ }
+public:
+ virtual int CountLinks() const;
+ virtual CFX_WideString GetURL(int index) const;
+ virtual void GetBoundedSegment(int index, int& start, int& count) const;
+ virtual void GetRects(int index, CFX_RectArray& rects)const;
+protected:
+ void parserLink();
+ void DeleteLinkList();
+ FX_BOOL CheckWebLink(CFX_WideString& strBeCheck);
+ FX_BOOL CheckMailLink(CFX_WideString& str);
+ FX_BOOL AppendToLinkList(int start, int count, CFX_WideString strUrl);
+private:
+ LINK_InfoArray m_LinkList;
+ const CPDF_TextPage* m_pTextPage;
+ CFX_WideString m_strPageText;
+ FX_BOOL m_IsParserd;
+};
+FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst);
+void NormalizeString(CFX_WideString& str);
+void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest);
+#endif