summaryrefslogtreecommitdiff
path: root/core/fpdftext/include/cpdf_textpage.h
blob: 1a60a2351d46467ccff6ea147669ef90dd820809 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#ifndef CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_
#define CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_

#include <deque>
#include <vector>

#include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h"
#include "core/fxcrt/include/fx_basic.h"
#include "core/fxcrt/include/fx_coordinates.h"
#include "core/fxcrt/include/fx_string.h"

class CFX_BidiChar;
class CPDF_Font;
class CPDF_FormObject;
class CPDF_Page;
class CPDF_TextObject;

struct FPDF_CHAR_INFO {
  FX_WCHAR m_Unicode;
  FX_WCHAR m_Charcode;
  int32_t m_Flag;
  FX_FLOAT m_FontSize;
  FX_FLOAT m_OriginX;
  FX_FLOAT m_OriginY;
  CFX_FloatRect m_CharBox;
  CPDF_TextObject* m_pTextObj;
  CFX_Matrix m_Matrix;
};

struct FPDF_SEGMENT {
  int m_Start;
  int m_nCount;
};

struct PAGECHAR_INFO {
  int m_CharCode;
  FX_WCHAR m_Unicode;
  FX_FLOAT m_OriginX;
  FX_FLOAT m_OriginY;
  int32_t m_Flag;
  CFX_FloatRect m_CharBox;
  CPDF_TextObject* m_pTextObj;
  CFX_Matrix m_Matrix;
  int m_Index;
};

struct PDFTEXT_Obj {
  CPDF_TextObject* m_pTextObj;
  CFX_Matrix m_formMatrix;
};

class CPDF_TextPage {
 public:
  CPDF_TextPage(const CPDF_Page* pPage, int flags);
  ~CPDF_TextPage() {}

  // IPDF_TextPage:
  void ParseTextPage();
  bool IsParsed() const { return m_bIsParsed; }
  int CharIndexFromTextIndex(int TextIndex) const;
  int TextIndexFromCharIndex(int CharIndex) const;
  int CountChars() const;
  void GetCharInfo(int index, FPDF_CHAR_INFO* info) const;
  std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const;
  int GetIndexAtPos(CFX_FloatPoint point,
                    FX_FLOAT xTolerance,
                    FX_FLOAT yTolerance) const;
  int GetIndexAtPos(FX_FLOAT x,
                    FX_FLOAT y,
                    FX_FLOAT xTolerance,
                    FX_FLOAT yTolerance) const;
  CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const;
  std::vector<CFX_FloatRect> GetRectsArrayByRect(
      const CFX_FloatRect& rect) const;
  CFX_WideString GetPageText(int start = 0, int nCount = -1) const;
  int CountRects(int start, int nCount);
  void GetRect(int rectIndex,
               FX_FLOAT& left,
               FX_FLOAT& top,
               FX_FLOAT& right,
               FX_FLOAT& bottom) const;
  int CountBoundedSegments(FX_FLOAT left,
                           FX_FLOAT top,
                           FX_FLOAT right,
                           FX_FLOAT bottom,
                           FX_BOOL bContains = FALSE);

  int GetWordBreak(int index, int direction) const;

  static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1,
                                 const CFX_FloatRect& rect2);
  static FX_BOOL IsLetter(FX_WCHAR unicode);

 private:
  FX_BOOL IsHyphen(FX_WCHAR curChar);
  bool IsControlChar(const PAGECHAR_INFO& charInfo);
  FX_BOOL GetBaselineRotate(int start, int end, int& Rotate);
  void ProcessObject();
  void ProcessFormObject(CPDF_FormObject* pFormObj,
                         const CFX_Matrix& formMatrix);
  void ProcessTextObject(PDFTEXT_Obj pObj);
  void ProcessTextObject(CPDF_TextObject* pTextObj,
                         const CFX_Matrix& formMatrix,
                         const CPDF_PageObjectList* pObjList,
                         CPDF_PageObjectList::const_iterator ObjPos);
  int ProcessInsertObject(const CPDF_TextObject* pObj,
                          const CFX_Matrix& formMatrix);
  FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
  FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
                                const CPDF_PageObjectList* pObjList,
                                CPDF_PageObjectList::const_iterator ObjPos);
  FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1,
                           CPDF_TextObject* pTextObj2);
  int GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const;
  void CloseTempLine();
  void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str);
  int32_t PreMarkedContent(PDFTEXT_Obj pObj);
  void ProcessMarkedContent(PDFTEXT_Obj pObj);
  void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const;
  void FindPreviousTextObject(void);
  void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
  void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
  int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
  int32_t FindTextlineFlowDirection();

  void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend);
  FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj,
                        const CPDF_Font* pFont,
                        int nItems) const;

  const CPDF_Page* const m_pPage;
  std::vector<uint16_t> m_CharIndex;
  std::deque<PAGECHAR_INFO> m_CharList;
  std::deque<PAGECHAR_INFO> m_TempCharList;
  CFX_WideTextBuf m_TextBuf;
  CFX_WideTextBuf m_TempTextBuf;
  const int m_parserflag;
  CPDF_TextObject* m_pPreTextObj;
  CFX_Matrix m_perMatrix;
  bool m_bIsParsed;
  CFX_Matrix m_DisplayMatrix;
  CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments;
  std::vector<CFX_FloatRect> m_SelRects;
  CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj;
  int32_t m_TextlineDir;
  CFX_FloatRect m_CurlineRect;
};

#endif  // CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_