summaryrefslogtreecommitdiff
path: root/core/src/fpdftext/fpdf_text_search.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/fpdftext/fpdf_text_search.cpp')
-rw-r--r--core/src/fpdftext/fpdf_text_search.cpp650
1 files changed, 325 insertions, 325 deletions
diff --git a/core/src/fpdftext/fpdf_text_search.cpp b/core/src/fpdftext/fpdf_text_search.cpp
index b029f69048..8c81ad9f66 100644
--- a/core/src/fpdftext/fpdf_text_search.cpp
+++ b/core/src/fpdftext/fpdf_text_search.cpp
@@ -1,325 +1,325 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#include "../../include/fpdfapi/fpdf_pageobj.h"
-#include "../../include/fpdftext/fpdf_text.h"
-#include "../../include/fpdfapi/fpdf_page.h"
-class CPDF_TextStream : public CFX_Object
-{
-public:
- CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjArray);
- ~CPDF_TextStream() {}
- FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine);
- CFX_WideTextBuf& m_Buffer;
- FX_BOOL m_bUseLF;
- CFX_PtrArray* m_pObjArray;
- const CPDF_TextObject* m_pLastObj;
-};
-CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjArray) : m_Buffer(buffer)
-{
- m_pLastObj = NULL;
- m_bUseLF = bUseLF;
- m_pObjArray = pObjArray;
-}
-FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_TextObject* pTextObj2)
-{
- if (!pTextObj1 || !pTextObj2) {
- return FALSE;
- }
- CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_Right, pTextObj2->m_Top);
- CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_Right, pTextObj1->m_Top);
- if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) {
- return TRUE;
- }
- if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) {
- rcPreObj.Intersect(rcCurObj);
- if (rcPreObj.IsEmpty()) {
- return FALSE;
- }
- if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() / 2) {
- return FALSE;
- }
- if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) {
- return FALSE;
- }
- }
- int nPreCount = pTextObj2->CountItems();
- int nCurCount = pTextObj1->CountItems();
- if (nPreCount != nCurCount) {
- return FALSE;
- }
- for (int i = 0; i < nPreCount; i++) {
- CPDF_TextObjectItem itemPer, itemCur;
- pTextObj2->GetItemInfo(i, &itemPer);
- pTextObj1->GetItemInfo(i, &itemCur);
- if (itemCur.m_CharCode != itemPer.m_CharCode) {
- return FALSE;
- }
- }
- return TRUE;
-}
-int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont)
-{
- if(charCode == -1) {
- return 0;
- }
- int w = pFont->GetCharWidthF(charCode);
- if(w == 0) {
- CFX_ByteString str;
- pFont->AppendChar(str, charCode);
- w = pFont->GetStringWidth(str, 1);
- if(w == 0) {
- FX_RECT BBox;
- pFont->GetCharBBox(charCode, BBox);
- w = BBox.right - BBox.left;
- }
- }
- return w;
-}
-int FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj, const CPDF_TextObject* pObj)
-{
- if(FPDFText_IsSameTextObject(pPrevObj, pObj)) {
- return -1;
- }
- CPDF_TextObjectItem item;
- int nItem = pPrevObj->CountItems();
- pPrevObj->GetItemInfo(nItem - 1, &item);
- FX_WCHAR preChar = 0, curChar = 0;
- CFX_WideString wstr = pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
- if(wstr.GetLength()) {
- preChar = wstr.GetAt(0);
- }
- FX_FLOAT last_pos = item.m_OriginX;
- int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont());
- FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000;
- last_width = FXSYS_fabs(last_width);
- pObj->GetItemInfo(0, &item);
- wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
- if(wstr.GetLength()) {
- curChar = wstr.GetAt(0);
- }
- int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont());
- FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000;
- this_width = FXSYS_fabs(this_width);
- FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width / 4;
- CFX_AffineMatrix prev_matrix, prev_reverse;
- pPrevObj->GetTextMatrix(&prev_matrix);
- prev_reverse.SetReverse(prev_matrix);
- FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY();
- prev_reverse.Transform(x, y);
- if (FXSYS_fabs(y) > threshold * 2) {
- return 2;
- }
- threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth);
- threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 : threshold / 5) : (threshold / 2);
- threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize()) : FXSYS_fabs(pObj->GetFontSize());
- threshold /= 1000;
- if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && preChar != L' ')
- if(curChar != L' ' && preChar != L' ') {
- if((x - last_pos - last_width) > threshold || (last_pos - x - last_width) > threshold) {
- return 1;
- }
- if(x < 0 && (last_pos - x - last_width) > threshold) {
- return 1;
- }
- if((x - last_pos - last_width) > this_width || (x - last_pos - this_width) > last_width ) {
- return 1;
- }
- }
- if(last_pos + last_width > x + this_width && curChar == L' ') {
- return 3;
- }
- return 0;
-}
-FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine)
-{
- if(pObj->m_Bottom > 380 && pObj->m_Left < 45 && pObj->m_Top < 402) {
- int i = 0;
- }
- CPDF_Font* pFont = pObj->GetFont();
- CFX_AffineMatrix matrix;
- pObj->GetTextMatrix(&matrix);
- FX_FLOAT fs = pObj->GetFontSize();
- int item_index = 0;
- if (m_pLastObj) {
- int result = FPDFText_ProcessInterObj(m_pLastObj, pObj);
- if (result == 2) {
- int len = m_Buffer.GetLength();
- if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') {
- m_Buffer.Delete(len - 1, 1);
- if (m_pObjArray) {
- m_pObjArray->RemoveAt((len - 1) * 2, 2);
- }
- } else {
- if (bFirstLine) {
- return TRUE;
- }
- if (m_bUseLF) {
- m_Buffer.AppendChar(L'\r');
- m_Buffer.AppendChar(L'\n');
- if (m_pObjArray) {
- for (int i = 0; i < 4; i ++) {
- m_pObjArray->Add(NULL);
- }
- }
- } else {
- m_Buffer.AppendChar(' ');
- if (m_pObjArray) {
- m_pObjArray->Add(NULL);
- m_pObjArray->Add(NULL);
- }
- }
- }
- } else if (result == 1) {
- m_Buffer.AppendChar(L' ');
- if (m_pObjArray) {
- m_pObjArray->Add(NULL);
- m_pObjArray->Add(NULL);
- }
- } else if (result == -1) {
- m_pLastObj = pObj;
- return FALSE;
- } else if (result == 3) {
- item_index = 1;
- }
- }
- m_pLastObj = pObj;
- int nItems = pObj->CountItems();
- FX_FLOAT Ignorekerning = 0;
- for(int i = 1; i < nItems - 1; i += 2) {
- CPDF_TextObjectItem item;
- pObj->GetItemInfo(i, &item);
- if (item.m_CharCode == (FX_DWORD) - 1) {
- if(i == 1) {
- Ignorekerning = item.m_OriginX;
- } else if(Ignorekerning > item.m_OriginX) {
- Ignorekerning = item.m_OriginX;
- }
- } else {
- Ignorekerning = 0;
- break;
- }
- }
- FX_FLOAT spacing = 0;
- for (; item_index < nItems; item_index ++) {
- CPDF_TextObjectItem item;
- pObj->GetItemInfo(item_index, &item);
- if (item.m_CharCode == (FX_DWORD) - 1) {
- CFX_WideString wstr = m_Buffer.GetWideString();
- if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') {
- continue;
- }
- FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH();
- spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000;
- continue;
- }
- FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace;
- if(nItems > 3 && !spacing) {
- charSpace = 0;
- }
- if((spacing || charSpace) && item_index > 0) {
- int last_width = 0;
- FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH();
- FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
- FX_FLOAT threshold = 0;
- if (space_charcode != -1) {
- threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000 ;
- }
- if(threshold > fontsize_h / 3) {
- threshold = 0;
- } else {
- threshold /= 2;
- }
- if (threshold == 0) {
- threshold = fontsize_h;
- int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
- threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX_FLOAT)last_width;
- int nDivide = 6;
- if (threshold < 300) {
- nDivide = 2;
- } else if (threshold < 500) {
- nDivide = 4;
- } else if (threshold < 700) {
- nDivide = 5;
- }
- threshold = threshold / nDivide;
- threshold = fontsize_h * threshold / 1000;
- }
- if(charSpace > 0.001) {
- spacing += matrix.TransformDistance(charSpace);
- } else if(charSpace < -0.001) {
- spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace));
- }
- if (threshold && (spacing && spacing >= threshold) ) {
- m_Buffer.AppendChar(L' ');
- if (m_pObjArray) {
- m_pObjArray->Add(NULL);
- m_pObjArray->Add(NULL);
- }
- }
- if (item.m_CharCode == (FX_DWORD) - 1) {
- continue;
- }
- spacing = 0;
- }
- CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode);
- if (unicode_str.IsEmpty()) {
- m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode);
- if (m_pObjArray) {
- m_pObjArray->Add((void*)pObj);
- m_pObjArray->Add((void*)(FX_INTPTR)item_index);
- }
- } else {
- m_Buffer << unicode_str;
- if (m_pObjArray) {
- for (int i = 0; i < unicode_str.GetLength(); i ++) {
- m_pObjArray->Add((void*)pObj);
- m_pObjArray->Add((void*)(FX_INTPTR)item_index);
- }
- }
- }
- }
- return FALSE;
-}
-void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage, FX_BOOL bUseLF,
- CFX_PtrArray* pObjArray)
-{
- CPDF_TextStream textstream(buffer, bUseLF, pObjArray);
- FX_POSITION pos = pPage->GetFirstObjectPosition();
- while (pos) {
- CPDF_PageObject* pObject = pPage->GetNextObject(pos);
- if (pObject == NULL) {
- continue;
- }
- if (pObject->m_Type != PDFPAGE_TEXT) {
- continue;
- }
- textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE);
- }
-}
-CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage)
-{
- CFX_WideTextBuf buffer;
- buffer.EstimateSize(0, 1024);
- CPDF_Page page;
- page.Load(pDoc, pPage);
- CPDF_ParseOptions options;
- options.m_bTextOnly = TRUE;
- options.m_bSeparateForm = FALSE;
- page.ParseContent(&options);
- CPDF_TextStream textstream(buffer, FALSE, NULL);
- FX_POSITION pos = page.GetFirstObjectPosition();
- while (pos) {
- CPDF_PageObject* pObject = page.GetNextObject(pos);
- if (pObject->m_Type != PDFPAGE_TEXT) {
- continue;
- }
- if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) {
- break;
- }
- }
- return buffer.GetWideString();
-}
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "../../include/fpdfapi/fpdf_pageobj.h"
+#include "../../include/fpdftext/fpdf_text.h"
+#include "../../include/fpdfapi/fpdf_page.h"
+class CPDF_TextStream : public CFX_Object
+{
+public:
+ CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjArray);
+ ~CPDF_TextStream() {}
+ FX_BOOL ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine);
+ CFX_WideTextBuf& m_Buffer;
+ FX_BOOL m_bUseLF;
+ CFX_PtrArray* m_pObjArray;
+ const CPDF_TextObject* m_pLastObj;
+};
+CPDF_TextStream::CPDF_TextStream(CFX_WideTextBuf& buffer, FX_BOOL bUseLF, CFX_PtrArray* pObjArray) : m_Buffer(buffer)
+{
+ m_pLastObj = NULL;
+ m_bUseLF = bUseLF;
+ m_pObjArray = pObjArray;
+}
+FX_BOOL FPDFText_IsSameTextObject(const CPDF_TextObject* pTextObj1, const CPDF_TextObject* pTextObj2)
+{
+ if (!pTextObj1 || !pTextObj2) {
+ return FALSE;
+ }
+ CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom, pTextObj2->m_Right, pTextObj2->m_Top);
+ CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom, pTextObj1->m_Right, pTextObj1->m_Top);
+ if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) {
+ return TRUE;
+ }
+ if (!rcPreObj.IsEmpty() || !rcCurObj.IsEmpty()) {
+ rcPreObj.Intersect(rcCurObj);
+ if (rcPreObj.IsEmpty()) {
+ return FALSE;
+ }
+ if (FXSYS_fabs(rcPreObj.Width() - rcCurObj.Width()) > rcCurObj.Width() / 2) {
+ return FALSE;
+ }
+ if (pTextObj2->GetFontSize() != pTextObj1->GetFontSize()) {
+ return FALSE;
+ }
+ }
+ int nPreCount = pTextObj2->CountItems();
+ int nCurCount = pTextObj1->CountItems();
+ if (nPreCount != nCurCount) {
+ return FALSE;
+ }
+ for (int i = 0; i < nPreCount; i++) {
+ CPDF_TextObjectItem itemPer, itemCur;
+ pTextObj2->GetItemInfo(i, &itemPer);
+ pTextObj1->GetItemInfo(i, &itemCur);
+ if (itemCur.m_CharCode != itemPer.m_CharCode) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont)
+{
+ if(charCode == -1) {
+ return 0;
+ }
+ int w = pFont->GetCharWidthF(charCode);
+ if(w == 0) {
+ CFX_ByteString str;
+ pFont->AppendChar(str, charCode);
+ w = pFont->GetStringWidth(str, 1);
+ if(w == 0) {
+ FX_RECT BBox;
+ pFont->GetCharBBox(charCode, BBox);
+ w = BBox.right - BBox.left;
+ }
+ }
+ return w;
+}
+int FPDFText_ProcessInterObj(const CPDF_TextObject* pPrevObj, const CPDF_TextObject* pObj)
+{
+ if(FPDFText_IsSameTextObject(pPrevObj, pObj)) {
+ return -1;
+ }
+ CPDF_TextObjectItem item;
+ int nItem = pPrevObj->CountItems();
+ pPrevObj->GetItemInfo(nItem - 1, &item);
+ FX_WCHAR preChar = 0, curChar = 0;
+ CFX_WideString wstr = pPrevObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
+ if(wstr.GetLength()) {
+ preChar = wstr.GetAt(0);
+ }
+ FX_FLOAT last_pos = item.m_OriginX;
+ int nLastWidth = GetCharWidth(item.m_CharCode, pPrevObj->GetFont());
+ FX_FLOAT last_width = nLastWidth * pPrevObj->GetFontSize() / 1000;
+ last_width = FXSYS_fabs(last_width);
+ pObj->GetItemInfo(0, &item);
+ wstr = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
+ if(wstr.GetLength()) {
+ curChar = wstr.GetAt(0);
+ }
+ int nThisWidth = GetCharWidth(item.m_CharCode, pObj->GetFont());
+ FX_FLOAT this_width = nThisWidth * pObj->GetFontSize() / 1000;
+ this_width = FXSYS_fabs(this_width);
+ FX_FLOAT threshold = last_width > this_width ? last_width / 4 : this_width / 4;
+ CFX_AffineMatrix prev_matrix, prev_reverse;
+ pPrevObj->GetTextMatrix(&prev_matrix);
+ prev_reverse.SetReverse(prev_matrix);
+ FX_FLOAT x = pObj->GetPosX(), y = pObj->GetPosY();
+ prev_reverse.Transform(x, y);
+ if (FXSYS_fabs(y) > threshold * 2) {
+ return 2;
+ }
+ threshold = (FX_FLOAT)(nLastWidth > nThisWidth ? nLastWidth : nThisWidth);
+ threshold = threshold > 400 ? (threshold < 700 ? threshold / 4 : threshold / 5) : (threshold / 2);
+ threshold *= nLastWidth > nThisWidth ? FXSYS_fabs(pPrevObj->GetFontSize()) : FXSYS_fabs(pObj->GetFontSize());
+ threshold /= 1000;
+ if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' && preChar != L' ')
+ if(curChar != L' ' && preChar != L' ') {
+ if((x - last_pos - last_width) > threshold || (last_pos - x - last_width) > threshold) {
+ return 1;
+ }
+ if(x < 0 && (last_pos - x - last_width) > threshold) {
+ return 1;
+ }
+ if((x - last_pos - last_width) > this_width || (x - last_pos - this_width) > last_width ) {
+ return 1;
+ }
+ }
+ if(last_pos + last_width > x + this_width && curChar == L' ') {
+ return 3;
+ }
+ return 0;
+}
+FX_BOOL CPDF_TextStream::ProcessObject(const CPDF_TextObject* pObj, FX_BOOL bFirstLine)
+{
+ if(pObj->m_Bottom > 380 && pObj->m_Left < 45 && pObj->m_Top < 402) {
+ int i = 0;
+ }
+ CPDF_Font* pFont = pObj->GetFont();
+ CFX_AffineMatrix matrix;
+ pObj->GetTextMatrix(&matrix);
+ FX_FLOAT fs = pObj->GetFontSize();
+ int item_index = 0;
+ if (m_pLastObj) {
+ int result = FPDFText_ProcessInterObj(m_pLastObj, pObj);
+ if (result == 2) {
+ int len = m_Buffer.GetLength();
+ if (len && m_bUseLF && m_Buffer.GetBuffer()[len - 1] == L'-') {
+ m_Buffer.Delete(len - 1, 1);
+ if (m_pObjArray) {
+ m_pObjArray->RemoveAt((len - 1) * 2, 2);
+ }
+ } else {
+ if (bFirstLine) {
+ return TRUE;
+ }
+ if (m_bUseLF) {
+ m_Buffer.AppendChar(L'\r');
+ m_Buffer.AppendChar(L'\n');
+ if (m_pObjArray) {
+ for (int i = 0; i < 4; i ++) {
+ m_pObjArray->Add(NULL);
+ }
+ }
+ } else {
+ m_Buffer.AppendChar(' ');
+ if (m_pObjArray) {
+ m_pObjArray->Add(NULL);
+ m_pObjArray->Add(NULL);
+ }
+ }
+ }
+ } else if (result == 1) {
+ m_Buffer.AppendChar(L' ');
+ if (m_pObjArray) {
+ m_pObjArray->Add(NULL);
+ m_pObjArray->Add(NULL);
+ }
+ } else if (result == -1) {
+ m_pLastObj = pObj;
+ return FALSE;
+ } else if (result == 3) {
+ item_index = 1;
+ }
+ }
+ m_pLastObj = pObj;
+ int nItems = pObj->CountItems();
+ FX_FLOAT Ignorekerning = 0;
+ for(int i = 1; i < nItems - 1; i += 2) {
+ CPDF_TextObjectItem item;
+ pObj->GetItemInfo(i, &item);
+ if (item.m_CharCode == (FX_DWORD) - 1) {
+ if(i == 1) {
+ Ignorekerning = item.m_OriginX;
+ } else if(Ignorekerning > item.m_OriginX) {
+ Ignorekerning = item.m_OriginX;
+ }
+ } else {
+ Ignorekerning = 0;
+ break;
+ }
+ }
+ FX_FLOAT spacing = 0;
+ for (; item_index < nItems; item_index ++) {
+ CPDF_TextObjectItem item;
+ pObj->GetItemInfo(item_index, &item);
+ if (item.m_CharCode == (FX_DWORD) - 1) {
+ CFX_WideString wstr = m_Buffer.GetWideString();
+ if (wstr.IsEmpty() || wstr.GetAt(wstr.GetLength() - 1) == L' ') {
+ continue;
+ }
+ FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH();
+ spacing = -fontsize_h * (item.m_OriginX - Ignorekerning) / 1000;
+ continue;
+ }
+ FX_FLOAT charSpace = pObj->m_TextState.GetObject()->m_CharSpace;
+ if(nItems > 3 && !spacing) {
+ charSpace = 0;
+ }
+ if((spacing || charSpace) && item_index > 0) {
+ int last_width = 0;
+ FX_FLOAT fontsize_h = pObj->m_TextState.GetFontSizeH();
+ FX_DWORD space_charcode = pFont->CharCodeFromUnicode(' ');
+ FX_FLOAT threshold = 0;
+ if (space_charcode != -1) {
+ threshold = fontsize_h * pFont->GetCharWidthF(space_charcode) / 1000 ;
+ }
+ if(threshold > fontsize_h / 3) {
+ threshold = 0;
+ } else {
+ threshold /= 2;
+ }
+ if (threshold == 0) {
+ threshold = fontsize_h;
+ int this_width = FXSYS_abs(GetCharWidth(item.m_CharCode, pFont));
+ threshold = this_width > last_width ? (FX_FLOAT)this_width : (FX_FLOAT)last_width;
+ int nDivide = 6;
+ if (threshold < 300) {
+ nDivide = 2;
+ } else if (threshold < 500) {
+ nDivide = 4;
+ } else if (threshold < 700) {
+ nDivide = 5;
+ }
+ threshold = threshold / nDivide;
+ threshold = fontsize_h * threshold / 1000;
+ }
+ if(charSpace > 0.001) {
+ spacing += matrix.TransformDistance(charSpace);
+ } else if(charSpace < -0.001) {
+ spacing -= matrix.TransformDistance(FXSYS_fabs(charSpace));
+ }
+ if (threshold && (spacing && spacing >= threshold) ) {
+ m_Buffer.AppendChar(L' ');
+ if (m_pObjArray) {
+ m_pObjArray->Add(NULL);
+ m_pObjArray->Add(NULL);
+ }
+ }
+ if (item.m_CharCode == (FX_DWORD) - 1) {
+ continue;
+ }
+ spacing = 0;
+ }
+ CFX_WideString unicode_str = pFont->UnicodeFromCharCode(item.m_CharCode);
+ if (unicode_str.IsEmpty()) {
+ m_Buffer.AppendChar((FX_WCHAR)item.m_CharCode);
+ if (m_pObjArray) {
+ m_pObjArray->Add((void*)pObj);
+ m_pObjArray->Add((void*)(FX_INTPTR)item_index);
+ }
+ } else {
+ m_Buffer << unicode_str;
+ if (m_pObjArray) {
+ for (int i = 0; i < unicode_str.GetLength(); i ++) {
+ m_pObjArray->Add((void*)pObj);
+ m_pObjArray->Add((void*)(FX_INTPTR)item_index);
+ }
+ }
+ }
+ }
+ return FALSE;
+}
+void _PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_PageObjects* pPage, FX_BOOL bUseLF,
+ CFX_PtrArray* pObjArray)
+{
+ CPDF_TextStream textstream(buffer, bUseLF, pObjArray);
+ FX_POSITION pos = pPage->GetFirstObjectPosition();
+ while (pos) {
+ CPDF_PageObject* pObject = pPage->GetNextObject(pos);
+ if (pObject == NULL) {
+ continue;
+ }
+ if (pObject->m_Type != PDFPAGE_TEXT) {
+ continue;
+ }
+ textstream.ProcessObject((CPDF_TextObject*)pObject, FALSE);
+ }
+}
+CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage)
+{
+ CFX_WideTextBuf buffer;
+ buffer.EstimateSize(0, 1024);
+ CPDF_Page page;
+ page.Load(pDoc, pPage);
+ CPDF_ParseOptions options;
+ options.m_bTextOnly = TRUE;
+ options.m_bSeparateForm = FALSE;
+ page.ParseContent(&options);
+ CPDF_TextStream textstream(buffer, FALSE, NULL);
+ FX_POSITION pos = page.GetFirstObjectPosition();
+ while (pos) {
+ CPDF_PageObject* pObject = page.GetNextObject(pos);
+ if (pObject->m_Type != PDFPAGE_TEXT) {
+ continue;
+ }
+ if (textstream.ProcessObject((CPDF_TextObject*)pObject, TRUE)) {
+ break;
+ }
+ }
+ return buffer.GetWideString();
+}