diff options
-rw-r--r-- | core/include/fpdftext/fpdf_text.h | 9 | ||||
-rw-r--r-- | core/src/fpdftext/fpdf_text_int.cpp | 201 | ||||
-rw-r--r-- | core/src/fpdftext/text_int.h | 4 |
3 files changed, 77 insertions, 137 deletions
diff --git a/core/include/fpdftext/fpdf_text.h b/core/include/fpdftext/fpdf_text.h index 77b0c29a6b..20efb9171f 100644 --- a/core/include/fpdftext/fpdf_text.h +++ b/core/include/fpdftext/fpdf_text.h @@ -67,13 +67,6 @@ typedef CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray; #define FPDFTEXT_WRITINGMODE_LRTB 1 #define FPDFTEXT_WRITINGMODE_RLTB 2 #define FPDFTEXT_WRITINGMODE_TBRL 3 -class CPDFText_ParseOptions { - public: - CPDFText_ParseOptions(); - FX_BOOL m_bGetCharCodeOnly; - FX_BOOL m_bNormalizeObjs; - FX_BOOL m_bOutputHyphen; -}; class IPDF_TextPage { public: @@ -82,8 +75,6 @@ class IPDF_TextPage { virtual ~IPDF_TextPage() {} - virtual void NormalizeObjects(FX_BOOL bNormalize) = 0; - virtual FX_BOOL ParseTextPage() = 0; virtual bool IsParsed() const = 0; diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp index 3b633a623f..67411e31b8 100644 --- a/core/src/fpdftext/fpdf_text_int.cpp +++ b/core/src/fpdftext/fpdf_text_int.cpp @@ -81,11 +81,6 @@ const FX_FLOAT kDefaultFontSize = 1.0f; } // namespace -CPDFText_ParseOptions::CPDFText_ParseOptions() - : m_bGetCharCodeOnly(FALSE), - m_bNormalizeObjs(TRUE), - m_bOutputHyphen(FALSE) {} - IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_Page* pPage, int flags) { return new CPDF_TextPage(pPage, flags); @@ -123,9 +118,6 @@ CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) (int)pPage->GetPageHeight(), 0); } -void CPDF_TextPage::NormalizeObjects(FX_BOOL bNormalize) { - m_ParseOptions.m_bNormalizeObjs = bNormalize; -} bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { switch (charInfo.m_Unicode) { case 0x2: @@ -151,55 +143,49 @@ FX_BOOL CPDF_TextPage::ParseTextPage() { m_pPreTextObj = NULL; ProcessObject(); m_bIsParsed = true; - if (!m_ParseOptions.m_bGetCharCodeOnly) { - m_CharIndex.RemoveAll(); - int nCount = m_charList.GetSize(); - if (nCount) { - m_CharIndex.Add(0); - } - for (int i = 0; i < nCount; i++) { - int indexSize = m_CharIndex.GetSize(); - FX_BOOL bNormal = FALSE; - PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); - if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { - bNormal = TRUE; - } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) { - bNormal = FALSE; + m_CharIndex.RemoveAll(); + int nCount = m_charList.GetSize(); + if (nCount) { + m_CharIndex.Add(0); + } + for (int i = 0; i < nCount; i++) { + int indexSize = m_CharIndex.GetSize(); + FX_BOOL bNormal = FALSE; + PAGECHAR_INFO charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(i); + if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { + bNormal = TRUE; + } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) { + bNormal = FALSE; + } else { + bNormal = TRUE; + } + if (bNormal) { + if (indexSize % 2) { + m_CharIndex.Add(1); } else { - bNormal = TRUE; + if (indexSize <= 0) { + continue; + } + m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1); } - if (bNormal) { - if (indexSize % 2) { - m_CharIndex.Add(1); - } else { - if (indexSize <= 0) { - continue; - } - m_CharIndex.SetAt(indexSize - 1, - m_CharIndex.GetAt(indexSize - 1) + 1); + } else { + if (indexSize % 2) { + if (indexSize <= 0) { + continue; } + m_CharIndex.SetAt(indexSize - 1, i + 1); } else { - if (indexSize % 2) { - if (indexSize <= 0) { - continue; - } - m_CharIndex.SetAt(indexSize - 1, i + 1); - } else { - m_CharIndex.Add(i + 1); - } + m_CharIndex.Add(i + 1); } } - int indexSize = m_CharIndex.GetSize(); - if (indexSize % 2) { - m_CharIndex.RemoveAt(indexSize - 1); - } + } + int indexSize = m_CharIndex.GetSize(); + if (indexSize % 2) { + m_CharIndex.RemoveAt(indexSize - 1); } return TRUE; } int CPDF_TextPage::CountChars() const { - if (m_ParseOptions.m_bGetCharCodeOnly) { - return m_TextBuf.GetSize(); - } return m_charList.GetSize(); } int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { @@ -232,9 +218,6 @@ int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { void CPDF_TextPage::GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const { - if (m_ParseOptions.m_bGetCharCodeOnly) { - return; - } if (start < 0 || nCount == 0) { return; } @@ -321,7 +304,7 @@ void CPDF_TextPage::GetRectArray(int start, int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, FX_FLOAT xTolerance, FX_FLOAT yTolerance) const { - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) + if (!m_bIsParsed) return -3; int pos = 0; @@ -366,7 +349,7 @@ int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, } CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { CFX_WideString strText; - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) + if (!m_bIsParsed) return strText; int nCount = m_charList.GetSize(); @@ -404,7 +387,7 @@ CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { } void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const { - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) + if (!m_bIsParsed) return; CFX_FloatRect curRect; @@ -454,15 +437,12 @@ int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTolerance, FX_FLOAT yTolerance) const { - if (m_ParseOptions.m_bGetCharCodeOnly) { - return -3; - } CPDF_Point point(x, y); return GetIndexAtPos(point, xTolerance, yTolerance); } void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) + if (!m_bIsParsed) return; if (index < 0 || index >= m_charList.GetSize()) @@ -573,7 +553,7 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { return m_TextBuf.GetWideString().Mid(startindex, nCount); } int CPDF_TextPage::CountRects(int start, int nCount) { - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0) + if (!m_bIsParsed || start < 0) return -1; if (nCount == -1 || nCount + start > m_charList.GetSize()) { @@ -588,7 +568,7 @@ void CPDF_TextPage::GetRect(int rectIndex, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT& bottom) const { - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) + if (!m_bIsParsed) return; if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) @@ -601,9 +581,6 @@ void CPDF_TextPage::GetRect(int rectIndex, } FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { - if (m_ParseOptions.m_bGetCharCodeOnly) { - return FALSE; - } if (end == start) { return FALSE; } @@ -641,9 +618,6 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) { - if (m_ParseOptions.m_bGetCharCodeOnly) { - return FALSE; - } int start, end, count, n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, TRUE); @@ -661,10 +635,10 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, return GetBaselineRotate(start, end, Rotate); } FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) + if (!m_bIsParsed) return FALSE; - if (rectIndex < 0 || rectIndex > m_SelRects.GetSize()) + if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) return FALSE; CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); @@ -675,9 +649,6 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains) { - if (m_ParseOptions.m_bGetCharCodeOnly) - return -1; - m_Segment.RemoveAll(); if (!m_bIsParsed) return -1; @@ -751,9 +722,6 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, return m_Segment.GetSize(); } void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { - if (m_ParseOptions.m_bGetCharCodeOnly) { - return; - } if (index < 0 || index >= m_Segment.GetSize()) { return; } @@ -761,7 +729,7 @@ void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { count = m_Segment.GetAt(index).m_nCount; } int CPDF_TextPage::GetWordBreak(int index, int direction) const { - if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) + if (!m_bIsParsed) return -1; if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) @@ -1007,9 +975,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) { Info2.m_Unicode = pDst[nIndex]; Info2.m_Flag = FPDFTEXT_CHAR_PIECE; m_TextBuf.AppendChar(Info2.m_Unicode); - if (!m_ParseOptions.m_bGetCharCodeOnly) { - m_charList.Add(Info2); - } + m_charList.Add(Info2); } FX_Free(pDst); return; @@ -1019,9 +985,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(CFX_WideString& str, int i) { } else { Info.m_Index = -1; } - if (!m_ParseOptions.m_bGetCharCodeOnly) { - m_charList.Add(Info); - } + m_charList.Add(Info); } void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { PAGECHAR_INFO Info = *(PAGECHAR_INFO*)m_TempCharList.GetAt(i); @@ -1038,9 +1002,7 @@ void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { Info2.m_Unicode = pDst[nIndex]; Info2.m_Flag = FPDFTEXT_CHAR_PIECE; m_TextBuf.AppendChar(Info2.m_Unicode); - if (!m_ParseOptions.m_bGetCharCodeOnly) { - m_charList.Add(Info2); - } + m_charList.Add(Info2); } FX_Free(pDst); return; @@ -1050,9 +1012,7 @@ void CPDF_TextPage::AddCharInfoByRLDirection(CFX_WideString& str, int i) { } else { Info.m_Index = -1; } - if (!m_ParseOptions.m_bGetCharCodeOnly) { - m_charList.Add(Info); - } + m_charList.Add(Info); } void CPDF_TextPage::CloseTempLine() { int count1 = m_TempCharList.GetSize(); @@ -1251,31 +1211,28 @@ void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, return; } int i = 0; - if (m_ParseOptions.m_bNormalizeObjs) { - for (i = count - 1; i >= 0; i--) { - PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i); - CFX_Matrix prev_matrix; - prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix); - FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(), - Prev_y = prev_Obj.m_pTextObj->GetPosY(); - prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y); - m_DisplayMatrix.Transform(Prev_x, Prev_y); - if (this_x >= Prev_x) { - if (i == count - 1) { - m_LineObj.Add(Obj); - } else { - m_LineObj.InsertAt(i + 1, Obj); - } - break; + for (i = count - 1; i >= 0; i--) { + PDFTEXT_Obj prev_Obj = m_LineObj.GetAt(i); + CFX_Matrix prev_matrix; + prev_Obj.m_pTextObj->GetTextMatrix(&prev_matrix); + FX_FLOAT Prev_x = prev_Obj.m_pTextObj->GetPosX(), + Prev_y = prev_Obj.m_pTextObj->GetPosY(); + prev_Obj.m_formMatrix.Transform(Prev_x, Prev_y); + m_DisplayMatrix.Transform(Prev_x, Prev_y); + if (this_x >= Prev_x) { + if (i == count - 1) { + m_LineObj.Add(Obj); + } else { + m_LineObj.InsertAt(i + 1, Obj); } + break; } - if (i < 0) { - m_LineObj.InsertAt(0, Obj); - } - } else { - m_LineObj.Add(Obj); + } + if (i < 0) { + m_LineObj.InsertAt(0, Obj); } } + int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { CPDF_TextObject* pTextObj = Obj.m_pTextObj; CPDF_ContentMarkData* pMarkData = @@ -1524,27 +1481,22 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { } else if (result == 2) { CloseTempLine(); if (m_TextBuf.GetSize()) { - if (m_ParseOptions.m_bGetCharCodeOnly) { + if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { m_TextBuf.AppendChar(TEXT_RETURN_CHAR); - m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); - } else { - if (GenerateCharInfo(TEXT_RETURN_CHAR, generateChar)) { - m_TextBuf.AppendChar(TEXT_RETURN_CHAR); - if (!formMatrix.IsIdentity()) { - generateChar.m_Matrix.Copy(formMatrix); - } - m_charList.Add(generateChar); + if (!formMatrix.IsIdentity()) { + generateChar.m_Matrix.Copy(formMatrix); } - if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { - m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); - if (!formMatrix.IsIdentity()) { - generateChar.m_Matrix.Copy(formMatrix); - } - m_charList.Add(generateChar); + m_charList.Add(generateChar); + } + if (GenerateCharInfo(TEXT_LINEFEED_CHAR, generateChar)) { + m_TextBuf.AppendChar(TEXT_LINEFEED_CHAR); + if (!formMatrix.IsIdentity()) { + generateChar.m_Matrix.Copy(formMatrix); } + m_charList.Add(generateChar); } } - } else if (result == 3 && !m_ParseOptions.m_bOutputHyphen) { + } else if (result == 3) { int32_t nChars = pTextObj->CountChars(); if (nChars == 1) { CPDF_TextObjectItem item; @@ -1985,8 +1937,7 @@ FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1, pTextObj2->m_Right, pTextObj2->m_Top); CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_Right, pTextObj1->m_Top); - if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty() && - !m_ParseOptions.m_bGetCharCodeOnly) { + if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) { FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left); int nCount = m_charList.GetSize(); if (nCount >= 2) { diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h index 3d179bab4c..2526a4abf6 100644 --- a/core/src/fpdftext/text_int.h +++ b/core/src/fpdftext/text_int.h @@ -54,9 +54,8 @@ class CPDF_TextPage : public IPDF_TextPage { CPDF_TextPage(const CPDF_Page* pPage, int flags); ~CPDF_TextPage() override {} - // IPDF_TextPage + // IPDF_TextPage: FX_BOOL ParseTextPage() override; - void NormalizeObjects(FX_BOOL bNormalize) override; bool IsParsed() const override { return m_bIsParsed; } int CharIndexFromTextIndex(int TextIndex) const override; int TextIndexFromCharIndex(int CharIndex) const override; @@ -130,7 +129,6 @@ class CPDF_TextPage : public IPDF_TextPage { const CPDF_Font* pFont, int nItems) const; - CPDFText_ParseOptions m_ParseOptions; CFX_WordArray m_CharIndex; const CPDF_PageObjectList* const m_pPage; PAGECHAR_InfoArray m_charList; |