15 files changed, 436 insertions, 525 deletions
diff --git a/BUILD.gn b/BUILD.gn
index f57bec3177..74ffa178ef 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -477,13 +477,10 @@ static_library("fpdfapi") {
 static_library("fpdftext") {
   sources = [
     "core/fpdftext/fpdf_text_int.cpp",
-    "core/fpdftext/fpdf_text_int.h",
-    "core/fpdftext/include/ipdf_linkextract.h",
-    "core/fpdftext/include/ipdf_textpage.h",
-    "core/fpdftext/include/ipdf_textpagefind.h",
+    "core/fpdftext/include/cpdf_linkextract.h",
+    "core/fpdftext/include/cpdf_textpage.h",
+    "core/fpdftext/include/cpdf_textpagefind.h",
     "core/fpdftext/text_int.h",
-    "core/fpdftext/unicodenormalization.cpp",
-    "core/fpdftext/unicodenormalization.h",
     "core/fpdftext/unicodenormalizationdata.cpp",
     "core/fpdftext/unicodenormalizationdata.h",
   ]
diff --git a/core/fpdftext/fpdf_text_int.cpp b/core/fpdftext/fpdf_text_int.cpp
index 8e8686c4a1..741331fb77 100644
--- a/core/fpdftext/fpdf_text_int.cpp
+++ b/core/fpdftext/fpdf_text_int.cpp
@@ -4,8 +4,6 @@
 
 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
 
-#include "core/fpdftext/fpdf_text_int.h"
-
 #include <algorithm>
 #include <cctype>
 #include <cwctype>
@@ -14,15 +12,17 @@
 #include <vector>
 
 #include "core/fpdfapi/fpdf_font/include/cpdf_font.h"
+#include "core/fpdfapi/fpdf_page/include/cpdf_form.h"
 #include "core/fpdfapi/fpdf_page/include/cpdf_formobject.h"
+#include "core/fpdfapi/fpdf_page/include/cpdf_page.h"
 #include "core/fpdfapi/fpdf_page/include/cpdf_pageobject.h"
 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h"
 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h"
 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h"
-#include "core/fpdftext/include/ipdf_linkextract.h"
-#include "core/fpdftext/include/ipdf_textpage.h"
-#include "core/fpdftext/include/ipdf_textpagefind.h"
-#include "core/fpdftext/unicodenormalization.h"
+#include "core/fpdftext/include/cpdf_linkextract.h"
+#include "core/fpdftext/include/cpdf_textpage.h"
+#include "core/fpdftext/include/cpdf_textpagefind.h"
+#include "core/fpdftext/unicodenormalizationdata.h"
 #include "core/fxcrt/fx_bidi.h"
 #include "core/fxcrt/include/fx_ext.h"
 #include "core/fxcrt/include/fx_ucd.h"
@@ -36,9 +36,24 @@
 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002
 #define FPDFTEXT_CONSECUTIVE 0x00000004
 
+#define FPDFTEXT_CHAR_ERROR -1
+#define FPDFTEXT_CHAR_NORMAL 0
+#define FPDFTEXT_CHAR_GENERATED 1
+#define FPDFTEXT_CHAR_UNUNICODE 2
+#define FPDFTEXT_CHAR_HYPHEN 3
+#define FPDFTEXT_CHAR_PIECE 4
+#define FPDFTEXT_MC_PASS 0
+#define FPDFTEXT_MC_DONE 1
+#define FPDFTEXT_MC_DELAY 2
+
 namespace {
 
-FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) {
+const FX_FLOAT kDefaultFontSize = 1.0f;
+const uint16_t* const g_UnicodeData_Normalization_Maps[5] = {
+    nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2,
+    g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4};
+
+FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) {
   if (curChar < 255) {
     return FALSE;
   }
@@ -55,7 +70,7 @@ FX_BOOL _IsIgnoreSpaceCharacter(FX_WCHAR curChar) {
   return TRUE;
 }
 
-FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) {
+FX_FLOAT NormalizeThreshold(FX_FLOAT threshold) {
   if (threshold < 300) {
     return threshold / 2.0f;
   }
@@ -68,8 +83,8 @@ FX_FLOAT _NormalizeThreshold(FX_FLOAT threshold) {
   return threshold / 6.0f;
 }
 
-FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj,
-                             const CFX_Matrix& matrix) {
+FX_FLOAT CalculateBaseSpace(const CPDF_TextObject* pTextObj,
+                            const CFX_Matrix& matrix) {
   FX_FLOAT baseSpace = 0.0;
   const int nItems = pTextObj->CountItems();
   if (pTextObj->m_TextState.GetObject()->m_CharSpace && nItems >= 3) {
@@ -94,23 +109,39 @@ FX_FLOAT _CalculateBaseSpace(const CPDF_TextObject* pTextObj,
   return baseSpace;
 }
 
-const FX_FLOAT kDefaultFontSize = 1.0f;
-
-}  // namespace
-
-IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage,
-                                             int flags) {
-  return new CPDF_TextPage(pPage, flags);
-}
-
-IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind(
-    const IPDF_TextPage* pTextPage) {
-  return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr;
+FX_STRSIZE Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst) {
+  wch = wch & 0xFFFF;
+  FX_WCHAR wFind = g_UnicodeData_Normalization[wch];
+  if (!wFind) {
+    if (pDst) {
+      *pDst = wch;
+    }
+    return 1;
+  }
+  if (wFind >= 0x8000) {
+    wch = wFind - 0x8000;
+    wFind = 1;
+  } else {
+    wch = wFind & 0x0FFF;
+    wFind >>= 12;
+  }
+  const uint16_t* pMap = g_UnicodeData_Normalization_Maps[wFind];
+  if (pMap == g_UnicodeData_Normalization_Map4) {
+    pMap = g_UnicodeData_Normalization_Map4 + wch;
+    wFind = (FX_WCHAR)(*pMap++);
+  } else {
+    pMap += wch;
+  }
+  if (pDst) {
+    FX_WCHAR n = wFind;
+    while (n--) {
+      *pDst++ = *pMap++;
+    }
+  }
+  return (FX_STRSIZE)wFind;
 }
 
-IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() {
-  return new CPDF_LinkExtract();
-}
+}  // namespace
 
 #define TEXT_BLANK_CHAR L' '
 #define TEXT_LINEFEED_CHAR L'\n'
@@ -932,10 +963,10 @@ void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
     info.m_Index = m_TextBuf.GetLength();
     if (wChar >= 0xFB00 && wChar <= 0xFB06) {
       FX_WCHAR* pDst = NULL;
-      FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
+      FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
       if (nCount >= 1) {
         pDst = FX_Alloc(FX_WCHAR, nCount);
-        FX_Unicode_GetNormalization(wChar, pDst);
+        Unicode_GetNormalization(wChar, pDst);
         for (int nIndex = 0; nIndex < nCount; nIndex++) {
           PAGECHAR_INFO info2 = info;
           info2.m_Unicode = pDst[nIndex];
@@ -960,10 +991,10 @@ void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
     info.m_Index = m_TextBuf.GetLength();
     wChar = FX_GetMirrorChar(wChar, TRUE, FALSE);
     FX_WCHAR* pDst = NULL;
-    FX_STRSIZE nCount = FX_Unicode_GetNormalization(wChar, pDst);
+    FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
     if (nCount >= 1) {
       pDst = FX_Alloc(FX_WCHAR, nCount);
-      FX_Unicode_GetNormalization(wChar, pDst);
+      Unicode_GetNormalization(wChar, pDst);
       for (int nIndex = 0; nIndex < nCount; nIndex++) {
         PAGECHAR_INFO info2 = info;
         info2.m_Unicode = pDst[nIndex];
@@ -1377,7 +1408,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
   m_pPreTextObj = pTextObj;
   m_perMatrix.Copy(formMatrix);
   int nItems = pTextObj->CountItems();
-  FX_FLOAT baseSpace = _CalculateBaseSpace(pTextObj, matrix);
+  FX_FLOAT baseSpace = CalculateBaseSpace(pTextObj, matrix);
 
   const FX_BOOL bR2L = IsRightToLeft(pTextObj, pFont, nItems);
   const FX_BOOL bIsBidiAndMirrorInverse =
@@ -1430,7 +1461,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
         int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
         threshold = this_width > last_width ? (FX_FLOAT)this_width
                                             : (FX_FLOAT)last_width;
-        threshold = _NormalizeThreshold(threshold);
+        threshold = NormalizeThreshold(threshold);
         threshold = fontsize_h * threshold / 1000;
       }
       if (threshold && (spacing && spacing >= threshold)) {
@@ -1898,7 +1929,7 @@ FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) {
   return TRUE;
 }
 
-CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage)
+CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage)
     : m_pTextPage(pTextPage),
       m_flags(0),
       m_findNextStart(-1),
@@ -2054,8 +2085,8 @@ FX_BOOL CPDF_TextPageFind::FindNext() {
       CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
       int lastChar = lastWord.GetAt(lastWord.GetLength() - 1);
       if (nStartPos == nResultPos &&
-          !(_IsIgnoreSpaceCharacter(lastChar) ||
-            _IsIgnoreSpaceCharacter(curChar))) {
+          !(IsIgnoreSpaceCharacter(lastChar) ||
+            IsIgnoreSpaceCharacter(curChar))) {
         bMatch = FALSE;
       }
       for (int d = PreResEndPos; d < nResultPos; d++) {
@@ -2174,7 +2205,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
     while (pos < csWord.GetLength()) {
       CFX_WideString curStr = csWord.Mid(pos, 1);
       FX_WCHAR curChar = csWord.GetAt(pos);
-      if (_IsIgnoreSpaceCharacter(curChar)) {
+      if (IsIgnoreSpaceCharacter(curChar)) {
         if (pos > 0 && curChar == 0x2019) {
           pos++;
           continue;
@@ -2306,7 +2337,7 @@ CPDF_LinkExtract::~CPDF_LinkExtract() {
   DeleteLinkList();
 }
 
-FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) {
+FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) {
   if (!pTextPage || !pTextPage->IsParsed())
     return FALSE;
 
diff --git a/core/fpdftext/fpdf_text_int.h b/core/fpdftext/fpdf_text_int.h
deleted file mode 100644
index 7acab55ccb..0000000000
--- a/core/fpdftext/fpdf_text_int.h
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#ifndef CORE_FPDFTEXT_FPDF_TEXT_INT_H_
-#define CORE_FPDFTEXT_FPDF_TEXT_INT_H_
-
-#include <deque>
-#include <vector>
-
-#include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h"
-#include "core/fpdfapi/fpdf_page/include/cpdf_form.h"
-#include "core/fpdfapi/fpdf_page/include/cpdf_page.h"
-#include "core/fpdftext/include/ipdf_linkextract.h"
-#include "core/fpdftext/include/ipdf_textpage.h"
-#include "core/fpdftext/include/ipdf_textpagefind.h"
-#include "core/fxcrt/include/fx_basic.h"
-
-class CFX_BidiChar;
-class CPDF_FormObject;
-class CPDF_LinkExtract;
-class CPDF_TextPageFind;
-class CPDF_Font;
-
-#define FPDFTEXT_CHAR_ERROR -1
-#define FPDFTEXT_CHAR_NORMAL 0
-#define FPDFTEXT_CHAR_GENERATED 1
-#define FPDFTEXT_CHAR_UNUNICODE 2
-#define FPDFTEXT_CHAR_HYPHEN 3
-#define FPDFTEXT_CHAR_PIECE 4
-#define FPDFTEXT_MC_PASS 0
-#define FPDFTEXT_MC_DONE 1
-#define FPDFTEXT_MC_DELAY 2
-
-struct PAGECHAR_INFO {
-  int m_CharCode;
-  FX_WCHAR m_Unicode;
-  FX_FLOAT m_OriginX;
-  FX_FLOAT m_OriginY;
-  int32_t m_Flag;
-  CFX_FloatRect m_CharBox;
-  CPDF_TextObject* m_pTextObj;
-  CFX_Matrix m_Matrix;
-  int m_Index;
-};
-
-struct FPDF_SEGMENT {
-  int m_Start;
-  int m_nCount;
-};
-
-struct PDFTEXT_Obj {
-  CPDF_TextObject* m_pTextObj;
-  CFX_Matrix m_formMatrix;
-};
-
-class CPDF_TextPage : public IPDF_TextPage {
- public:
-  CPDF_TextPage(const CPDF_Page* pPage, int flags);
-  ~CPDF_TextPage() override {}
-
-  // IPDF_TextPage:
-  void ParseTextPage() override;
-  bool IsParsed() const override { return m_bIsParsed; }
-  int CharIndexFromTextIndex(int TextIndex) const override;
-  int TextIndexFromCharIndex(int CharIndex) const override;
-  int CountChars() const override;
-  void GetCharInfo(int index, FPDF_CHAR_INFO* info) const override;
-  void GetRectArray(int start,
-                    int nCount,
-                    CFX_RectArray& rectArray) const override;
-  int GetIndexAtPos(CFX_FloatPoint point,
-                    FX_FLOAT xTolerance,
-                    FX_FLOAT yTolerance) const override;
-  int GetIndexAtPos(FX_FLOAT x,
-                    FX_FLOAT y,
-                    FX_FLOAT xTolerance,
-                    FX_FLOAT yTolerance) const override;
-  CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const override;
-  void GetRectsArrayByRect(const CFX_FloatRect& rect,
-                           CFX_RectArray& resRectArray) const override;
-  CFX_WideString GetPageText(int start = 0, int nCount = -1) const override;
-  int CountRects(int start, int nCount) override;
-  void GetRect(int rectIndex,
-               FX_FLOAT& left,
-               FX_FLOAT& top,
-               FX_FLOAT& right,
-               FX_FLOAT& bottom) const override;
-  FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) override;
-  FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) override;
-  int CountBoundedSegments(FX_FLOAT left,
-                           FX_FLOAT top,
-                           FX_FLOAT right,
-                           FX_FLOAT bottom,
-                           FX_BOOL bContains = FALSE) override;
-  void GetBoundedSegment(int index, int& start, int& count) const override;
-  int GetWordBreak(int index, int direction) const override;
-
-  static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1,
-                                 const CFX_FloatRect& rect2);
-  static FX_BOOL IsLetter(FX_WCHAR unicode);
-
- private:
-  FX_BOOL IsHyphen(FX_WCHAR curChar);
-  bool IsControlChar(const PAGECHAR_INFO& charInfo);
-  FX_BOOL GetBaselineRotate(int start, int end, int& Rotate);
-  void ProcessObject();
-  void ProcessFormObject(CPDF_FormObject* pFormObj,
-                         const CFX_Matrix& formMatrix);
-  void ProcessTextObject(PDFTEXT_Obj pObj);
-  void ProcessTextObject(CPDF_TextObject* pTextObj,
-                         const CFX_Matrix& formMatrix,
-                         const CPDF_PageObjectList* pObjList,
-                         CPDF_PageObjectList::const_iterator ObjPos);
-  int ProcessInsertObject(const CPDF_TextObject* pObj,
-                          const CFX_Matrix& formMatrix);
-  FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
-  FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
-                                const CPDF_PageObjectList* pObjList,
-                                CPDF_PageObjectList::const_iterator ObjPos);
-  FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1,
-                           CPDF_TextObject* pTextObj2);
-  int GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const;
-  void CloseTempLine();
-  void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str);
-  int32_t PreMarkedContent(PDFTEXT_Obj pObj);
-  void ProcessMarkedContent(PDFTEXT_Obj pObj);
-  void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const;
-  void FindPreviousTextObject(void);
-  void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
-  void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
-  int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
-  int32_t FindTextlineFlowDirection();
-
-  void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend);
-  FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj,
-                        const CPDF_Font* pFont,
-                        int nItems) const;
-
-  const CPDF_Page* const m_pPage;
-  std::vector<uint16_t> m_CharIndex;
-  std::deque<PAGECHAR_INFO> m_CharList;
-  std::deque<PAGECHAR_INFO> m_TempCharList;
-  CFX_WideTextBuf m_TextBuf;
-  CFX_WideTextBuf m_TempTextBuf;
-  const int m_parserflag;
-  CPDF_TextObject* m_pPreTextObj;
-  CFX_Matrix m_perMatrix;
-  bool m_bIsParsed;
-  CFX_Matrix m_DisplayMatrix;
-  CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments;
-  CFX_RectArray m_SelRects;
-  CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj;
-  int32_t m_TextlineDir;
-  CFX_FloatRect m_CurlineRect;
-};
-
-class CPDF_TextPageFind : public IPDF_TextPageFind {
- public:
-  explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
-  ~CPDF_TextPageFind() override {}
-
-  // IPDF_TextPageFind
-  FX_BOOL FindFirst(const CFX_WideString& findwhat,
-                    int flags,
-                    int startPos = 0) override;
-  FX_BOOL FindNext() override;
-  FX_BOOL FindPrev() override;
-  void GetRectArray(CFX_RectArray& rects) const override;
-  int GetCurOrder() const override;
-  int GetMatchedCount() const override;
-
- protected:
-  void ExtractFindWhat(const CFX_WideString& findwhat);
-  FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText,
-                           int startPos,
-                           int endPos);
-  FX_BOOL ExtractSubString(CFX_WideString& rString,
-                           const FX_WCHAR* lpszFullString,
-                           int iSubString,
-                           FX_WCHAR chSep);
-  CFX_WideString MakeReverse(const CFX_WideString& str);
-  int ReverseFind(const CFX_WideString& csPageText,
-                  const CFX_WideString& csWord,
-                  int nStartPos,
-                  int& WordLength);
-  int GetCharIndex(int index) const;
-
- private:
-  std::vector<uint16_t> m_CharIndex;
-  const IPDF_TextPage* m_pTextPage;
-  CFX_WideString m_strText;
-  CFX_WideString m_findWhat;
-  int m_flags;
-  std::vector<CFX_WideString> m_csFindWhatArray;
-  int m_findNextStart;
-  int m_findPreStart;
-  FX_BOOL m_bMatchCase;
-  FX_BOOL m_bMatchWholeWord;
-  int m_resStart;
-  int m_resEnd;
-  CFX_RectArray m_resArray;
-  FX_BOOL m_IsFind;
-};
-
-class CPDF_LinkExt {
- public:
-  CPDF_LinkExt() {}
-  int m_Start;
-  int m_Count;
-  CFX_WideString m_strUrl;
-  virtual ~CPDF_LinkExt() {}
-};
-
-typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray;
-
-class CPDF_LinkExtract : public IPDF_LinkExtract {
- public:
-  CPDF_LinkExtract();
-  ~CPDF_LinkExtract() override;
-
-  // IPDF_LinkExtract
-  FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) override;
-  int CountLinks() const override;
-  CFX_WideString GetURL(int index) const override;
-  void GetBoundedSegment(int index, int& start, int& count) const override;
-  void GetRects(int index, CFX_RectArray& rects) const override;
-
-  FX_BOOL IsExtract() const { return m_bIsParsed; }
-
- protected:
-  void ParseLink();
-  void DeleteLinkList();
-  FX_BOOL CheckWebLink(CFX_WideString& strBeCheck);
-  bool CheckMailLink(CFX_WideString& str);
-  void AppendToLinkList(int start, int count, const CFX_WideString& strUrl);
-
- private:
-  LINK_InfoArray m_LinkList;
-  const CPDF_TextPage* m_pTextPage;
-  CFX_WideString m_strPageText;
-  bool m_bIsParsed;
-};
-
-#endif  // CORE_FPDFTEXT_FPDF_TEXT_INT_H_
diff --git a/core/fpdftext/fpdf_text_int_unittest.cpp b/core/fpdftext/fpdf_text_int_unittest.cpp
index e62e885d4b..e1dd0f7504 100644
--- a/core/fpdftext/fpdf_text_int_unittest.cpp
+++ b/core/fpdftext/fpdf_text_int_unittest.cpp
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include "core/fpdftext/fpdf_text_int.h"
+#include "core/fpdftext/include/cpdf_linkextract.h"
 
 #include "testing/gtest/include/gtest/gtest.h"
 
diff --git a/core/fpdftext/include/cpdf_linkextract.h b/core/fpdftext/include/cpdf_linkextract.h
new file mode 100644
index 0000000000..263768ee5d
--- /dev/null
+++ b/core/fpdftext/include/cpdf_linkextract.h
@@ -0,0 +1,54 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FPDFTEXT_INCLUDE_CPDF_LINKEXTRACT_H_
+#define CORE_FPDFTEXT_INCLUDE_CPDF_LINKEXTRACT_H_
+
+#include "core/fxcrt/include/fx_basic.h"
+#include "core/fxcrt/include/fx_coordinates.h"
+#include "core/fxcrt/include/fx_string.h"
+#include "core/fxcrt/include/fx_system.h"
+
+class CPDF_TextPage;
+
+class CPDF_LinkExt {
+ public:
+  CPDF_LinkExt() {}
+  ~CPDF_LinkExt() {}
+
+  int m_Start;
+  int m_Count;
+  CFX_WideString m_strUrl;
+};
+
+class CPDF_LinkExtract {
+ public:
+  CPDF_LinkExtract();
+  ~CPDF_LinkExtract();
+
+  FX_BOOL ExtractLinks(const CPDF_TextPage* pTextPage);
+  int CountLinks() const;
+  CFX_WideString GetURL(int index) const;
+  void GetBoundedSegment(int index, int& start, int& count) const;
+  void GetRects(int index, CFX_RectArray& rects) const;
+
+  FX_BOOL IsExtract() const { return m_bIsParsed; }
+
+ protected:
+  void ParseLink();
+  void DeleteLinkList();
+  FX_BOOL CheckWebLink(CFX_WideString& strBeCheck);
+  bool CheckMailLink(CFX_WideString& str);
+  void AppendToLinkList(int start, int count, const CFX_WideString& strUrl);
+
+ private:
+  CFX_ArrayTemplate<CPDF_LinkExt*> m_LinkList;
+  const CPDF_TextPage* m_pTextPage;
+  CFX_WideString m_strPageText;
+  bool m_bIsParsed;
+};
+
+#endif  // CORE_FPDFTEXT_INCLUDE_CPDF_LINKEXTRACT_H_
diff --git a/core/fpdftext/include/cpdf_textpage.h b/core/fpdftext/include/cpdf_textpage.h
new file mode 100644
index 0000000000..19e8791b5a
--- /dev/null
+++ b/core/fpdftext/include/cpdf_textpage.h
@@ -0,0 +1,157 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_
+#define CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_
+
+#include <deque>
+#include <vector>
+
+#include "core/fpdfapi/fpdf_page/cpdf_pageobjectlist.h"
+#include "core/fxcrt/include/fx_basic.h"
+#include "core/fxcrt/include/fx_coordinates.h"
+#include "core/fxcrt/include/fx_string.h"
+
+class CFX_BidiChar;
+class CPDF_Font;
+class CPDF_FormObject;
+class CPDF_Page;
+class CPDF_TextObject;
+
+struct FPDF_CHAR_INFO {
+  FX_WCHAR m_Unicode;
+  FX_WCHAR m_Charcode;
+  int32_t m_Flag;
+  FX_FLOAT m_FontSize;
+  FX_FLOAT m_OriginX;
+  FX_FLOAT m_OriginY;
+  CFX_FloatRect m_CharBox;
+  CPDF_TextObject* m_pTextObj;
+  CFX_Matrix m_Matrix;
+};
+
+struct FPDF_SEGMENT {
+  int m_Start;
+  int m_nCount;
+};
+
+struct PAGECHAR_INFO {
+  int m_CharCode;
+  FX_WCHAR m_Unicode;
+  FX_FLOAT m_OriginX;
+  FX_FLOAT m_OriginY;
+  int32_t m_Flag;
+  CFX_FloatRect m_CharBox;
+  CPDF_TextObject* m_pTextObj;
+  CFX_Matrix m_Matrix;
+  int m_Index;
+};
+
+struct PDFTEXT_Obj {
+  CPDF_TextObject* m_pTextObj;
+  CFX_Matrix m_formMatrix;
+};
+
+class CPDF_TextPage {
+ public:
+  CPDF_TextPage(const CPDF_Page* pPage, int flags);
+  ~CPDF_TextPage() {}
+
+  // IPDF_TextPage:
+  void ParseTextPage();
+  bool IsParsed() const { return m_bIsParsed; }
+  int CharIndexFromTextIndex(int TextIndex) const;
+  int TextIndexFromCharIndex(int CharIndex) const;
+  int CountChars() const;
+  void GetCharInfo(int index, FPDF_CHAR_INFO* info) const;
+  void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const;
+  int GetIndexAtPos(CFX_FloatPoint point,
+                    FX_FLOAT xTolerance,
+                    FX_FLOAT yTolerance) const;
+  int GetIndexAtPos(FX_FLOAT x,
+                    FX_FLOAT y,
+                    FX_FLOAT xTolerance,
+                    FX_FLOAT yTolerance) const;
+  CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const;
+  void GetRectsArrayByRect(const CFX_FloatRect& rect,
+                           CFX_RectArray& resRectArray) const;
+  CFX_WideString GetPageText(int start = 0, int nCount = -1) const;
+  int CountRects(int start, int nCount);
+  void GetRect(int rectIndex,
+               FX_FLOAT& left,
+               FX_FLOAT& top,
+               FX_FLOAT& right,
+               FX_FLOAT& bottom) const;
+  FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate);
+  FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate);
+  int CountBoundedSegments(FX_FLOAT left,
+                           FX_FLOAT top,
+                           FX_FLOAT right,
+                           FX_FLOAT bottom,
+                           FX_BOOL bContains = FALSE);
+  void GetBoundedSegment(int index, int& start, int& count) const;
+  int GetWordBreak(int index, int direction) const;
+
+  static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1,
+                                 const CFX_FloatRect& rect2);
+  static FX_BOOL IsLetter(FX_WCHAR unicode);
+
+ private:
+  FX_BOOL IsHyphen(FX_WCHAR curChar);
+  bool IsControlChar(const PAGECHAR_INFO& charInfo);
+  FX_BOOL GetBaselineRotate(int start, int end, int& Rotate);
+  void ProcessObject();
+  void ProcessFormObject(CPDF_FormObject* pFormObj,
+                         const CFX_Matrix& formMatrix);
+  void ProcessTextObject(PDFTEXT_Obj pObj);
+  void ProcessTextObject(CPDF_TextObject* pTextObj,
+                         const CFX_Matrix& formMatrix,
+                         const CPDF_PageObjectList* pObjList,
+                         CPDF_PageObjectList::const_iterator ObjPos);
+  int ProcessInsertObject(const CPDF_TextObject* pObj,
+                          const CFX_Matrix& formMatrix);
+  FX_BOOL GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
+  FX_BOOL IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
+                                const CPDF_PageObjectList* pObjList,
+                                CPDF_PageObjectList::const_iterator ObjPos);
+  FX_BOOL IsSameTextObject(CPDF_TextObject* pTextObj1,
+                           CPDF_TextObject* pTextObj2);
+  int GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const;
+  void CloseTempLine();
+  void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str);
+  int32_t PreMarkedContent(PDFTEXT_Obj pObj);
+  void ProcessMarkedContent(PDFTEXT_Obj pObj);
+  void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const;
+  void FindPreviousTextObject(void);
+  void AddCharInfoByLRDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
+  void AddCharInfoByRLDirection(FX_WCHAR wChar, PAGECHAR_INFO info);
+  int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
+  int32_t FindTextlineFlowDirection();
+
+  void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend);
+  FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj,
+                        const CPDF_Font* pFont,
+                        int nItems) const;
+
+  const CPDF_Page* const m_pPage;
+  std::vector<uint16_t> m_CharIndex;
+  std::deque<PAGECHAR_INFO> m_CharList;
+  std::deque<PAGECHAR_INFO> m_TempCharList;
+  CFX_WideTextBuf m_TextBuf;
+  CFX_WideTextBuf m_TempTextBuf;
+  const int m_parserflag;
+  CPDF_TextObject* m_pPreTextObj;
+  CFX_Matrix m_perMatrix;
+  bool m_bIsParsed;
+  CFX_Matrix m_DisplayMatrix;
+  CFX_ArrayTemplate<FPDF_SEGMENT> m_Segments;
+  CFX_RectArray m_SelRects;
+  CFX_ArrayTemplate<PDFTEXT_Obj> m_LineObj;
+  int32_t m_TextlineDir;
+  CFX_FloatRect m_CurlineRect;
+};
+
+#endif  // CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGE_H_
diff --git a/core/fpdftext/include/cpdf_textpagefind.h b/core/fpdftext/include/cpdf_textpagefind.h
new file mode 100644
index 0000000000..ec739e4896
--- /dev/null
+++ b/core/fpdftext/include/cpdf_textpagefind.h
@@ -0,0 +1,65 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGEFIND_H_
+#define CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGEFIND_H_
+
+#include <vector>
+
+#include "core/fxcrt/include/fx_coordinates.h"
+#include "core/fxcrt/include/fx_string.h"
+#include "core/fxcrt/include/fx_system.h"
+
+class CPDF_TextPage;
+
+class CPDF_TextPageFind {
+ public:
+  explicit CPDF_TextPageFind(const CPDF_TextPage* pTextPage);
+  ~CPDF_TextPageFind() {}
+
+  FX_BOOL FindFirst(const CFX_WideString& findwhat,
+                    int flags,
+                    int startPos = 0);
+  FX_BOOL FindNext();
+  FX_BOOL FindPrev();
+  void GetRectArray(CFX_RectArray& rects) const;
+  int GetCurOrder() const;
+  int GetMatchedCount() const;
+
+ protected:
+  void ExtractFindWhat(const CFX_WideString& findwhat);
+  FX_BOOL IsMatchWholeWord(const CFX_WideString& csPageText,
+                           int startPos,
+                           int endPos);
+  FX_BOOL ExtractSubString(CFX_WideString& rString,
+                           const FX_WCHAR* lpszFullString,
+                           int iSubString,
+                           FX_WCHAR chSep);
+  CFX_WideString MakeReverse(const CFX_WideString& str);
+  int ReverseFind(const CFX_WideString& csPageText,
+                  const CFX_WideString& csWord,
+                  int nStartPos,
+                  int& WordLength);
+  int GetCharIndex(int index) const;
+
+ private:
+  std::vector<uint16_t> m_CharIndex;
+  const CPDF_TextPage* m_pTextPage;
+  CFX_WideString m_strText;
+  CFX_WideString m_findWhat;
+  int m_flags;
+  std::vector<CFX_WideString> m_csFindWhatArray;
+  int m_findNextStart;
+  int m_findPreStart;
+  FX_BOOL m_bMatchCase;
+  FX_BOOL m_bMatchWholeWord;
+  int m_resStart;
+  int m_resEnd;
+  CFX_RectArray m_resArray;
+  FX_BOOL m_IsFind;
+};
+
+#endif  // CORE_FPDFTEXT_INCLUDE_CPDF_TEXTPAGEFIND_H_
diff --git a/core/fpdftext/include/ipdf_linkextract.h b/core/fpdftext/include/ipdf_linkextract.h
deleted file mode 100644
index c1a5f2f04e..0000000000
--- a/core/fpdftext/include/ipdf_linkextract.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2016 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#ifndef CORE_FPDFTEXT_INCLUDE_IPDF_LINKEXTRACT_H_
-#define CORE_FPDFTEXT_INCLUDE_IPDF_LINKEXTRACT_H_
-
-#include "core/fpdftext/include/ipdf_textpage.h"
-#include "core/fxcrt/include/fx_coordinates.h"
-#include "core/fxcrt/include/fx_system.h"
-
-class IPDF_LinkExtract {
- public:
-  static IPDF_LinkExtract* CreateLinkExtract();
-  virtual ~IPDF_LinkExtract() {}
-
-  virtual FX_BOOL ExtractLinks(const IPDF_TextPage* pTextPage) = 0;
-  virtual int CountLinks() const = 0;
-  virtual CFX_WideString GetURL(int index) const = 0;
-  virtual void GetBoundedSegment(int index, int& start, int& count) const = 0;
-  virtual void GetRects(int index, CFX_RectArray& rects) const = 0;
-};
-
-#endif  // CORE_FPDFTEXT_INCLUDE_IPDF_LINKEXTRACT_H_
diff --git a/core/fpdftext/include/ipdf_textpage.h b/core/fpdftext/include/ipdf_textpage.h
deleted file mode 100644
index 3849cd4004..0000000000
--- a/core/fpdftext/include/ipdf_textpage.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2016 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#ifndef CORE_FPDFTEXT_INCLUDE_IPDF_TEXTPAGE_H_
-#define CORE_FPDFTEXT_INCLUDE_IPDF_TEXTPAGE_H_
-
-#include "core/fxcrt/include/fx_coordinates.h"
-#include "core/fxcrt/include/fx_system.h"
-
-class CPDF_TextObject;
-class CPDF_Page;
-
-struct FPDF_CHAR_INFO {
-  FX_WCHAR m_Unicode;
-  FX_WCHAR m_Charcode;
-  int32_t m_Flag;
-  FX_FLOAT m_FontSize;
-  FX_FLOAT m_OriginX;
-  FX_FLOAT m_OriginY;
-  CFX_FloatRect m_CharBox;
-  CPDF_TextObject* m_pTextObj;
-  CFX_Matrix m_Matrix;
-};
-
-class IPDF_TextPage {
- public:
-  static IPDF_TextPage* CreateTextPage(const CPDF_Page* pPage, int flags = 0);
-  virtual ~IPDF_TextPage() {}
-
-  virtual void ParseTextPage() = 0;
-  virtual bool IsParsed() const = 0;
-  virtual int CharIndexFromTextIndex(int TextIndex) const = 0;
-  virtual int TextIndexFromCharIndex(int CharIndex) const = 0;
-  virtual int CountChars() const = 0;
-  virtual void GetCharInfo(int index, FPDF_CHAR_INFO* info) const = 0;
-  virtual void GetRectArray(int start,
-                            int nCount,
-                            CFX_RectArray& rectArray) const = 0;
-  virtual int GetIndexAtPos(CFX_FloatPoint point,
-                            FX_FLOAT xTolerance,
-                            FX_FLOAT yTolerance) const = 0;
-  virtual int GetIndexAtPos(FX_FLOAT x,
-                            FX_FLOAT y,
-                            FX_FLOAT xTolerance,
-                            FX_FLOAT yTolerance) const = 0;
-  virtual CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const = 0;
-  virtual void GetRectsArrayByRect(const CFX_FloatRect& rect,
-                                   CFX_RectArray& resRectArray) const = 0;
-  virtual int CountRects(int start, int nCount) = 0;
-  virtual void GetRect(int rectIndex,
-                       FX_FLOAT& left,
-                       FX_FLOAT& top,
-                       FX_FLOAT& right,
-                       FX_FLOAT& bottom) const = 0;
-  virtual FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate) = 0;
-  virtual FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) = 0;
-  virtual int CountBoundedSegments(FX_FLOAT left,
-                                   FX_FLOAT top,
-                                   FX_FLOAT right,
-                                   FX_FLOAT bottom,
-                                   FX_BOOL bContains = FALSE) = 0;
-  virtual void GetBoundedSegment(int index, int& start, int& count) const = 0;
-  virtual int GetWordBreak(int index, int direction) const = 0;
-  virtual CFX_WideString GetPageText(int start = 0, int nCount = -1) const = 0;
-};
-
-#endif  // CORE_FPDFTEXT_INCLUDE_IPDF_TEXTPAGE_H_
diff --git a/core/fpdftext/include/ipdf_textpagefind.h b/core/fpdftext/include/ipdf_textpagefind.h
deleted file mode 100644
index b13432b59b..0000000000
--- a/core/fpdftext/include/ipdf_textpagefind.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2016 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#ifndef CORE_FPDFTEXT_INCLUDE_IPDF_TEXTPAGEFIND_H_
-#define CORE_FPDFTEXT_INCLUDE_IPDF_TEXTPAGEFIND_H_
-
-#include "core/fpdftext/include/ipdf_textpage.h"
-#include "core/fxcrt/include/fx_coordinates.h"
-#include "core/fxcrt/include/fx_string.h"
-
-class IPDF_TextPageFind {
- public:
-  static IPDF_TextPageFind* CreatePageFind(const IPDF_TextPage* pTextPage);
-  virtual ~IPDF_TextPageFind() {}
-
-  virtual FX_BOOL FindFirst(const CFX_WideString& findwhat,
-                            int flags,
-                            int startPos = 0) = 0;
-  virtual FX_BOOL FindNext() = 0;
-  virtual FX_BOOL FindPrev() = 0;
-  virtual void GetRectArray(CFX_RectArray& rects) const = 0;
-  virtual int GetCurOrder() const = 0;
-  virtual int GetMatchedCount() const = 0;
-};
-
-#endif  // CORE_FPDFTEXT_INCLUDE_IPDF_TEXTPAGEFIND_H_
diff --git a/core/fpdftext/unicodenormalization.cpp b/core/fpdftext/unicodenormalization.cpp
deleted file mode 100644
index 67ab57cb5d..0000000000
--- a/core/fpdftext/unicodenormalization.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#include "core/fpdftext/unicodenormalization.h"
-
-#include "core/fpdftext/unicodenormalizationdata.h"
-#include "core/fxcrt/include/fx_string.h"
-
-namespace {
-
-const uint16_t* const g_UnicodeData_Normalization_Maps[5] = {
-    nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2,
-    g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4};
-
-}  // namespace
-
-FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst) {
-  wch = wch & 0xFFFF;
-  FX_WCHAR wFind = g_UnicodeData_Normalization[wch];
-  if (!wFind) {
-    if (pDst) {
-      *pDst = wch;
-    }
-    return 1;
-  }
-  if (wFind >= 0x8000) {
-    wch = wFind - 0x8000;
-    wFind = 1;
-  } else {
-    wch = wFind & 0x0FFF;
-    wFind >>= 12;
-  }
-  const uint16_t* pMap = g_UnicodeData_Normalization_Maps[wFind];
-  if (pMap == g_UnicodeData_Normalization_Map4) {
-    pMap = g_UnicodeData_Normalization_Map4 + wch;
-    wFind = (FX_WCHAR)(*pMap++);
-  } else {
-    pMap += wch;
-  }
-  if (pDst) {
-    FX_WCHAR n = wFind;
-    while (n--) {
-      *pDst++ = *pMap++;
-    }
-  }
-  return (FX_STRSIZE)wFind;
-}
diff --git a/core/fpdftext/unicodenormalization.h b/core/fpdftext/unicodenormalization.h
deleted file mode 100644
index ee3c8b2024..0000000000
--- a/core/fpdftext/unicodenormalization.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#ifndef CORE_FPDFTEXT_UNICODENORMALIZATION_H_
-#define CORE_FPDFTEXT_UNICODENORMALIZATION_H_
-
-#include "core/fxcrt/include/fx_system.h"
-
-FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst);
-
-#endif  // CORE_FPDFTEXT_UNICODENORMALIZATION_H_
diff --git a/fpdfsdk/fpdf_searchex.cpp b/fpdfsdk/fpdf_searchex.cpp
index ab2baf529c..9c987f2691 100644
--- a/fpdfsdk/fpdf_searchex.cpp
+++ b/fpdfsdk/fpdf_searchex.cpp
@@ -6,11 +6,12 @@
 
 #include "public/fpdf_searchex.h"
 
-#include "core/fpdftext/include/ipdf_textpage.h"
+#include "core/fpdftext/include/cpdf_textpage.h"
 
 DLLEXPORT int STDCALL
 FPDFText_GetCharIndexFromTextIndex(FPDF_TEXTPAGE text_page, int nTextIndex) {
   if (!text_page)
     return -1;
-  return ((IPDF_TextPage*)text_page)->CharIndexFromTextIndex(nTextIndex);
+  return static_cast<CPDF_TextPage*>(text_page)
+      ->CharIndexFromTextIndex(nTextIndex);
 }
diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp
index 0d0e86f12d..007e18274e 100644
--- a/fpdfsdk/fpdftext.cpp
+++ b/fpdfsdk/fpdftext.cpp
@@ -8,9 +8,9 @@
 
 #include "core/fpdfapi/fpdf_page/include/cpdf_page.h"
 #include "core/fpdfdoc/include/fpdf_doc.h"
-#include "core/fpdftext/include/ipdf_linkextract.h"
-#include "core/fpdftext/include/ipdf_textpage.h"
-#include "core/fpdftext/include/ipdf_textpagefind.h"
+#include "core/fpdftext/include/cpdf_linkextract.h"
+#include "core/fpdftext/include/cpdf_textpage.h"
+#include "core/fpdftext/include/cpdf_textpagefind.h"
 #include "fpdfsdk/include/fsdk_define.h"
 
 #ifdef PDF_ENABLE_XFA
@@ -22,10 +22,27 @@
 #include <tchar.h>
 #endif
 
+namespace {
+
+CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) {
+  return static_cast<CPDF_TextPage*>(text_page);
+}
+
+CPDF_TextPageFind* CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle) {
+  return static_cast<CPDF_TextPageFind*>(handle);
+}
+
+CPDF_LinkExtract* CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link) {
+  return static_cast<CPDF_LinkExtract*>(link);
+}
+
+}  // namespace
+
 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
   CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
   if (!pPDFPage)
     return nullptr;
+
 #ifdef PDF_ENABLE_XFA
   CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
   CPDFXFA_Document* pDoc = pPage->GetDocument();
@@ -33,18 +50,22 @@ DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
 #else  // PDF_ENABLE_XFA
   CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
 #endif  // PDF_ENABLE_XFA
-  IPDF_TextPage* textpage =
-      IPDF_TextPage::CreateTextPage(pPDFPage, viewRef.IsDirectionR2L());
+
+  CPDF_TextPage* textpage =
+      new CPDF_TextPage(pPDFPage, viewRef.IsDirectionR2L());
   textpage->ParseTextPage();
   return textpage;
 }
+
 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
-  delete (IPDF_TextPage*)text_page;
+  delete CPDFTextPageFromFPDFTextPage(text_page);
 }
+
 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
   if (!text_page)
     return -1;
-  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
+
+  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
   return textpage->CountChars();
 }
 
@@ -52,8 +73,8 @@ DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
                                                    int index) {
   if (!text_page)
     return 0;
-  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
 
+  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
   if (index < 0 || index >= textpage->CountChars())
     return 0;
 
@@ -66,7 +87,7 @@ DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
                                               int index) {
   if (!text_page)
     return 0;
-  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
+  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
 
   if (index < 0 || index >= textpage->CountChars())
     return 0;
@@ -84,7 +105,7 @@ DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
                                            double* top) {
   if (!text_page)
     return;
-  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
+  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
 
   if (index < 0 || index >= textpage->CountChars())
     return;
@@ -104,7 +125,8 @@ DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
                                                  double yTolerance) {
   if (!text_page)
     return -3;
-  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
+
+  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
   return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance,
                                  (FX_FLOAT)yTolerance);
 }
@@ -115,8 +137,8 @@ DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
                                        unsigned short* result) {
   if (!text_page)
     return 0;
-  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
 
+  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
   if (start >= textpage->CountChars())
     return 0;
 
@@ -137,9 +159,11 @@ DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
                                           int count) {
   if (!text_page)
     return 0;
-  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
+
+  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
   return textpage->CountRects(start, count);
 }
+
 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
                                         int rect_index,
                                         double* left,
@@ -148,7 +172,8 @@ DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
                                         double* bottom) {
   if (!text_page)
     return;
-  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
+
+  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
   CFX_FloatRect rect;
   textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
   *left = rect.left;
@@ -166,14 +191,14 @@ DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
                                               int buflen) {
   if (!text_page)
     return 0;
-  IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
+
+  CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
   CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
                      (FX_FLOAT)top);
   CFX_WideString str = textpage->GetTextByRect(rect);
 
-  if (buflen <= 0 || !buffer) {
+  if (buflen <= 0 || !buffer)
     return str.GetLength();
-  }
 
   CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
   int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
@@ -192,91 +217,110 @@ DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
                                                     unsigned long flags,
                                                     int start_index) {
   if (!text_page)
-    return NULL;
-  IPDF_TextPageFind* textpageFind = NULL;
-  textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page);
+    return nullptr;
+
+  CPDF_TextPageFind* textpageFind =
+      new CPDF_TextPageFind(CPDFTextPageFromFPDFTextPage(text_page));
   FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
   textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
                           start_index);
   return textpageFind;
 }
+
 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
   if (!handle)
     return FALSE;
-  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
+
+  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
   return textpageFind->FindNext();
 }
+
 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
   if (!handle)
     return FALSE;
-  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
+
+  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
   return textpageFind->FindPrev();
 }
+
 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
   if (!handle)
     return 0;
-  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
+
+  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
   return textpageFind->GetCurOrder();
 }
+
 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
   if (!handle)
     return 0;
-  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
+
+  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
   return textpageFind->GetMatchedCount();
 }
+
 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
   if (!handle)
     return;
-  IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
+
+  CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
   delete textpageFind;
-  handle = NULL;
+  handle = nullptr;
 }
 
 // web link
 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
   if (!text_page)
-    return NULL;
-  IPDF_LinkExtract* pageLink = NULL;
-  pageLink = IPDF_LinkExtract::CreateLinkExtract();
-  pageLink->ExtractLinks((IPDF_TextPage*)text_page);
+    return nullptr;
+
+  CPDF_LinkExtract* pageLink = new CPDF_LinkExtract;
+  pageLink->ExtractLinks(CPDFTextPageFromFPDFTextPage(text_page));
   return pageLink;
 }
+
 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
   if (!link_page)
     return 0;
-  IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
+
+  CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
   return pageLink->CountLinks();
 }
+
 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
                                       int link_index,
                                       unsigned short* buffer,
                                       int buflen) {
   if (!link_page)
     return 0;
-  IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
+
+  CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
   CFX_WideString url = pageLink->GetURL(link_index);
 
   CFX_ByteString cbUTF16URL = url.UTF16LE_Encode();
   int len = cbUTF16URL.GetLength() / sizeof(unsigned short);
   if (!buffer || buflen <= 0)
     return len;
+
   int size = len < buflen ? len : buflen;
   if (size > 0) {
-    FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)),
-                 size * sizeof(unsigned short));
-    cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short));
+    int buf_size = size * sizeof(unsigned short);
+    FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size);
+    cbUTF16URL.ReleaseBuffer(buf_size);
   }
   return size;
 }
+
 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
                                           int link_index) {
   if (!link_page)
     return 0;
-  IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
+
+  CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
   CFX_RectArray rectArray;
   pageLink->GetRects(link_index, rectArray);
   return rectArray.GetSize();
 }
+
 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
                                         int link_index,
                                         int rect_index,
@@ -286,7 +330,8 @@ DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
                                         double* bottom) {
   if (!link_page)
     return;
-  IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
+
+  CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
   CFX_RectArray rectArray;
   pageLink->GetRects(link_index, rectArray);
   if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
@@ -298,5 +343,5 @@ DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
   }
 }
 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
-  delete (IPDF_LinkExtract*)link_page;
+  delete CPDFLinkExtractFromFPDFPageLink(link_page);
 }
diff --git a/pdfium.gyp b/pdfium.gyp
index a34872b99b..9ca615d796 100644
--- a/pdfium.gyp
+++ b/pdfium.gyp
@@ -499,13 +499,10 @@
       'target_name': 'fpdftext',
       'type': 'static_library',
       'sources': [
-        'core/fpdftext/include/ipdf_linkextract.h',
-        'core/fpdftext/include/ipdf_textpage.h',
-        'core/fpdftext/include/ipdf_textpagefind.h',
+        'core/fpdftext/include/cpdf_linkextract.h',
+        'core/fpdftext/include/cpdf_textpage.h',
+        'core/fpdftext/include/cpdf_textpagefind.h',
         'core/fpdftext/fpdf_text_int.cpp',
-        'core/fpdftext/fpdf_text_int.h',
-        'core/fpdftext/unicodenormalization.cpp',
-        'core/fpdftext/unicodenormalization.h',
         'core/fpdftext/unicodenormalizationdata.cpp',
         'core/fpdftext/unicodenormalizationdata.h',
       ],