summaryrefslogtreecommitdiff
path: root/core/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext')
-rw-r--r--core/fpdftext/cpdf_textpage.cpp198
1 files changed, 90 insertions, 108 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index d3dc461992..21a102910d 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -55,7 +55,7 @@ FX_FLOAT CalculateBaseSpace(const CPDF_TextObject* pTextObj,
pTextObj->GetItemInfo(i, &item);
if (item.m_CharCode == static_cast<uint32_t>(-1)) {
FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
- FX_FLOAT kerning = -fontsize_h * item.m_OriginX / 1000;
+ FX_FLOAT kerning = -fontsize_h * item.m_Origin.x / 1000;
baseSpace = std::min(baseSpace, kerning + spacing);
bAllChar = false;
}
@@ -239,30 +239,33 @@ std::vector<CFX_FloatRect> CPDF_TextPage::GetRectArray(int start,
bFlagNewRect = true;
}
if (bFlagNewRect) {
- FX_FLOAT orgX = info_curchar.m_OriginX, orgY = info_curchar.m_OriginY;
CFX_Matrix matrix = info_curchar.m_pTextObj->GetTextMatrix();
matrix.Concat(info_curchar.m_Matrix);
CFX_Matrix matrix_reverse;
matrix_reverse.SetReverse(matrix);
- matrix_reverse.TransformPoint(orgX, orgY);
+
+ CFX_PointF origin =
+ CFX_PointF(info_curchar.m_OriginX, info_curchar.m_OriginY);
+ origin = matrix_reverse.Transform(origin);
+
rect.left = info_curchar.m_CharBox.left;
rect.right = info_curchar.m_CharBox.right;
if (pCurObj->GetFont()->GetTypeDescent()) {
- rect.bottom = orgY +
+ rect.bottom = origin.y +
pCurObj->GetFont()->GetTypeDescent() *
pCurObj->GetFontSize() / 1000;
- FX_FLOAT xPosTemp = orgX;
+ FX_FLOAT xPosTemp = origin.x;
matrix.TransformPoint(xPosTemp, rect.bottom);
} else {
rect.bottom = info_curchar.m_CharBox.bottom;
}
if (pCurObj->GetFont()->GetTypeAscent()) {
rect.top =
- orgY +
+ origin.y +
pCurObj->GetFont()->GetTypeAscent() * pCurObj->GetFontSize() / 1000;
FX_FLOAT xPosTemp =
- orgX +
+ origin.x +
GetCharWidth(info_curchar.m_CharCode, pCurObj->GetFont()) *
pCurObj->GetFontSize() / 1000;
matrix.TransformPoint(xPosTemp, rect.top);
@@ -297,7 +300,7 @@ int CPDF_TextPage::GetIndexAtPos(const CFX_PointF& point,
while (pos < pdfium::CollectionSize<int>(m_CharList)) {
PAGECHAR_INFO charinfo = m_CharList[pos];
CFX_FloatRect charrect = charinfo.m_CharBox;
- if (charrect.Contains(point.x, point.y))
+ if (charrect.Contains(point))
break;
if (xTolerance > 0 || yTolerance > 0) {
CFX_FloatRect charRectExt;
@@ -306,7 +309,7 @@ int CPDF_TextPage::GetIndexAtPos(const CFX_PointF& point,
charRectExt.right = charrect.right + xTolerance / 2;
charRectExt.top = charrect.top + yTolerance / 2;
charRectExt.bottom = charrect.bottom - yTolerance / 2;
- if (charRectExt.Contains(point.x, point.y)) {
+ if (charRectExt.Contains(point)) {
double curXdif, curYdif;
curXdif = FXSYS_fabs(point.x - charrect.left) <
FXSYS_fabs(point.x - charrect.right)
@@ -738,10 +741,9 @@ void CPDF_TextPage::ProcessTextObject(
const CFX_Matrix& formMatrix,
const CPDF_PageObjectList* pObjList,
CPDF_PageObjectList::const_iterator ObjPos) {
- CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right,
- pTextObj->m_Top);
if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f)
return;
+
size_t count = m_LineObj.size();
PDFTEXT_Obj Obj;
Obj.m_pTextObj = pTextObj;
@@ -752,6 +754,7 @@ void CPDF_TextPage::ProcessTextObject(
}
if (IsSameAsPreTextObject(pTextObj, pObjList, ObjPos))
return;
+
PDFTEXT_Obj prev_Obj = m_LineObj[count - 1];
CPDF_TextObjectItem item;
int nItem = prev_Obj.m_pTextObj->CountItems();
@@ -776,34 +779,29 @@ void CPDF_TextPage::ProcessTextObject(
FX_FLOAT threshold =
prev_width > this_width ? prev_width / 4 : this_width / 4;
- FX_FLOAT prev_x = prev_Obj.m_pTextObj->GetPosX(),
- prev_y = prev_Obj.m_pTextObj->GetPosY();
- prev_Obj.m_formMatrix.TransformPoint(prev_x, prev_y);
- m_DisplayMatrix.TransformPoint(prev_x, prev_y);
- FX_FLOAT this_x = pTextObj->GetPosX(), this_y = pTextObj->GetPosY();
- formMatrix.TransformPoint(this_x, this_y);
- m_DisplayMatrix.TransformPoint(this_x, this_y);
- if (FXSYS_fabs(this_y - prev_y) > threshold * 2) {
+ CFX_PointF prev_pos = m_DisplayMatrix.Transform(
+ prev_Obj.m_formMatrix.Transform(prev_Obj.m_pTextObj->GetPos()));
+ CFX_PointF this_pos =
+ m_DisplayMatrix.Transform(formMatrix.Transform(pTextObj->GetPos()));
+ if (FXSYS_fabs(this_pos.y - prev_pos.y) > threshold * 2) {
for (size_t i = 0; i < count; i++)
ProcessTextObject(m_LineObj[i]);
m_LineObj.clear();
m_LineObj.push_back(Obj);
return;
}
- size_t i;
- for (i = count; i > 0; --i) {
+
+ for (size_t i = count; i > 0; --i) {
PDFTEXT_Obj prev_text_obj = m_LineObj[i - 1];
- FX_FLOAT Prev_x = prev_text_obj.m_pTextObj->GetPosX(),
- Prev_y = prev_text_obj.m_pTextObj->GetPosY();
- prev_text_obj.m_formMatrix.TransformPoint(Prev_x, Prev_y);
- m_DisplayMatrix.TransformPoint(Prev_x, Prev_y);
- if (this_x >= Prev_x) {
+ CFX_PointF new_prev_pos =
+ m_DisplayMatrix.Transform(prev_text_obj.m_formMatrix.Transform(
+ prev_text_obj.m_pTextObj->GetPos()));
+ if (this_pos.x >= new_prev_pos.x) {
m_LineObj.insert(m_LineObj.begin() + i, Obj);
- break;
+ return;
}
}
- if (i == 0)
- m_LineObj.insert(m_LineObj.begin(), Obj);
+ m_LineObj.insert(m_LineObj.begin(), Obj);
}
FPDFText_MarkedContent CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
@@ -893,14 +891,6 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
CFX_Matrix matrix = pTextObj->GetTextMatrix();
matrix.Concat(Obj.m_formMatrix);
- FX_FLOAT fPosX = pTextObj->GetPosX();
- FX_FLOAT fPosY = pTextObj->GetPosY();
- int nCharInfoIndex = m_TextBuf.GetLength();
- CFX_FloatRect charBox;
- charBox.top = pTextObj->m_Top;
- charBox.left = pTextObj->m_Left;
- charBox.right = pTextObj->m_Right;
- charBox.bottom = pTextObj->m_Bottom;
for (FX_STRSIZE k = 0; k < nItems; k++) {
FX_WCHAR wChar = actText.GetAt(k);
if (wChar <= 0x80 && !isprint(wChar))
@@ -909,17 +899,14 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
continue;
PAGECHAR_INFO charinfo;
- charinfo.m_OriginX = fPosX;
- charinfo.m_OriginY = fPosY;
- charinfo.m_Index = nCharInfoIndex;
+ charinfo.m_OriginX = pTextObj->GetPos().x;
+ charinfo.m_OriginY = pTextObj->GetPos().y;
+ charinfo.m_Index = m_TextBuf.GetLength();
charinfo.m_Unicode = wChar;
charinfo.m_CharCode = pFont->CharCodeFromUnicode(wChar);
charinfo.m_Flag = FPDFTEXT_CHAR_PIECE;
charinfo.m_pTextObj = pTextObj;
- charinfo.m_CharBox.top = charBox.top;
- charinfo.m_CharBox.left = charBox.left;
- charinfo.m_CharBox.right = charBox.right;
- charinfo.m_CharBox.bottom = charBox.bottom;
+ charinfo.m_CharBox = pTextObj->GetRect();
charinfo.m_Matrix = matrix;
m_TempTextBuf.AppendChar(wChar);
m_TempCharList.push_back(charinfo);
@@ -989,15 +976,11 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
GenerateCharacter result = GenerateCharacter::None;
if (m_pPreTextObj) {
result = ProcessInsertObject(pTextObj, formMatrix);
- if (result == GenerateCharacter::LineBreak) {
- m_CurlineRect =
- CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
- Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
- } else {
- m_CurlineRect.Union(
- CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
- Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top));
- }
+ if (result == GenerateCharacter::LineBreak)
+ m_CurlineRect = Obj.m_pTextObj->GetRect();
+ else
+ m_CurlineRect.Union(Obj.m_pTextObj->GetRect());
+
switch (result) {
case GenerateCharacter::None:
break;
@@ -1044,10 +1027,9 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
break;
}
} else {
- m_CurlineRect =
- CFX_FloatRect(Obj.m_pTextObj->m_Left, Obj.m_pTextObj->m_Bottom,
- Obj.m_pTextObj->m_Right, Obj.m_pTextObj->m_Top);
+ m_CurlineRect = Obj.m_pTextObj->GetRect();
}
+
if (ePreMKC == FPDFText_MarkedContent::Delay) {
ProcessMarkedContent(Obj);
m_pPreTextObj = pTextObj;
@@ -1081,7 +1063,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
continue;
FX_FLOAT fontsize_h = pTextObj->m_TextState.GetFontSizeH();
- spacing = -fontsize_h * item.m_OriginX / 1000;
+ spacing = -fontsize_h * item.m_Origin.x / 1000;
continue;
}
FX_FLOAT charSpace = pTextObj->m_TextState.GetCharSpace();
@@ -1118,8 +1100,8 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
charinfo.m_CharCode = CPDF_Font::kInvalidCharCode;
charinfo.m_Matrix = formMatrix;
- charinfo.m_OriginX = item.m_OriginX;
- charinfo.m_OriginY = item.m_OriginY;
+ charinfo.m_OriginX = item.m_Origin.x;
+ charinfo.m_OriginY = item.m_Origin.y;
matrix.TransformPoint(charinfo.m_OriginX, charinfo.m_OriginY);
charinfo.m_CharBox =
CFX_FloatRect(charinfo.m_OriginX, charinfo.m_OriginY,
@@ -1144,20 +1126,20 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
charinfo.m_Flag = FPDFTEXT_CHAR_NORMAL;
charinfo.m_pTextObj = pTextObj;
- charinfo.m_OriginX = item.m_OriginX;
- charinfo.m_OriginY = item.m_OriginY;
+ charinfo.m_OriginX = item.m_Origin.x;
+ charinfo.m_OriginY = item.m_Origin.y;
matrix.TransformPoint(charinfo.m_OriginX, charinfo.m_OriginY);
FX_RECT rect =
charinfo.m_pTextObj->GetFont()->GetCharBBox(charinfo.m_CharCode);
charinfo.m_CharBox.top =
- rect.top * pTextObj->GetFontSize() / 1000 + item.m_OriginY;
+ rect.top * pTextObj->GetFontSize() / 1000 + item.m_Origin.y;
charinfo.m_CharBox.left =
- rect.left * pTextObj->GetFontSize() / 1000 + item.m_OriginX;
+ rect.left * pTextObj->GetFontSize() / 1000 + item.m_Origin.x;
charinfo.m_CharBox.right =
- rect.right * pTextObj->GetFontSize() / 1000 + item.m_OriginX;
+ rect.right * pTextObj->GetFontSize() / 1000 + item.m_Origin.x;
charinfo.m_CharBox.bottom =
- rect.bottom * pTextObj->GetFontSize() / 1000 + item.m_OriginY;
+ rect.bottom * pTextObj->GetFontSize() / 1000 + item.m_Origin.y;
if (fabsf(charinfo.m_CharBox.top - charinfo.m_CharBox.bottom) < 0.01f) {
charinfo.m_CharBox.top =
charinfo.m_CharBox.bottom + pTextObj->GetFontSize();
@@ -1227,10 +1209,11 @@ CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
pTextObj->GetCharInfo(nChars - 1, &last);
CFX_Matrix textMatrix = pTextObj->GetTextMatrix();
- textMatrix.TransformPoint(first.m_OriginX, first.m_OriginY);
- textMatrix.TransformPoint(last.m_OriginX, last.m_OriginY);
- FX_FLOAT dX = FXSYS_fabs(last.m_OriginX - first.m_OriginX);
- FX_FLOAT dY = FXSYS_fabs(last.m_OriginY - first.m_OriginY);
+ first.m_Origin = textMatrix.Transform(first.m_Origin);
+ last.m_Origin = textMatrix.Transform(last.m_Origin);
+
+ FX_FLOAT dX = FXSYS_fabs(last.m_Origin.x - first.m_Origin.x);
+ FX_FLOAT dY = FXSYS_fabs(last.m_Origin.y - first.m_Origin.y);
if (dX <= 0.0001f && dY <= 0.0001f)
return TextOrientation::Unknown;
@@ -1287,11 +1270,10 @@ CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
if (WritingMode == TextOrientation::Unknown)
WritingMode = GetTextObjectWritingMode(m_pPreTextObj);
- CFX_FloatRect this_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right,
- pObj->m_Top);
- CFX_FloatRect prev_rect(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom,
- m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
- CPDF_TextObjectItem PrevItem, item;
+ CFX_FloatRect this_rect = pObj->GetRect();
+ CFX_FloatRect prev_rect = m_pPreTextObj->GetRect();
+ CPDF_TextObjectItem PrevItem;
+ CPDF_TextObjectItem item;
int nItem = m_pPreTextObj->CountItems();
m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem);
pObj->GetItemInfo(0, &item);
@@ -1325,7 +1307,8 @@ CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
}
}
}
- FX_FLOAT last_pos = PrevItem.m_OriginX;
+
+ FX_FLOAT last_pos = PrevItem.m_Origin.x;
int nLastWidth = GetCharWidth(PrevItem.m_CharCode, m_pPreTextObj->GetFont());
FX_FLOAT last_width = nLastWidth * m_pPreTextObj->GetFontSize() / 1000;
last_width = FXSYS_fabs(last_width);
@@ -1340,42 +1323,39 @@ CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
CFX_Matrix prev_reverse;
prev_reverse.SetReverse(prev_matrix);
- FX_FLOAT x = pObj->GetPosX();
- FX_FLOAT y = pObj->GetPosY();
- formMatrix.TransformPoint(x, y);
- prev_reverse.TransformPoint(x, y);
+
+ CFX_PointF pos = prev_reverse.Transform(formMatrix.Transform(pObj->GetPos()));
if (last_width < this_width)
threshold = prev_reverse.TransformDistance(threshold);
+
bool bNewline = false;
if (WritingMode == TextOrientation::Horizontal) {
CFX_FloatRect rect1(m_pPreTextObj->m_Left, pObj->m_Bottom,
m_pPreTextObj->m_Right, pObj->m_Top);
- CFX_FloatRect rect2(m_pPreTextObj->m_Left, m_pPreTextObj->m_Bottom,
- m_pPreTextObj->m_Right, m_pPreTextObj->m_Top);
+ CFX_FloatRect rect2 = m_pPreTextObj->GetRect();
CFX_FloatRect rect3 = rect1;
rect1.Intersect(rect2);
if ((rect1.IsEmpty() && rect2.Height() > 5 && rect3.Height() > 5) ||
- ((y > threshold * 2 || y < threshold * -3) &&
- (FXSYS_fabs(y) < 1 ? FXSYS_fabs(x) < FXSYS_fabs(y) : true))) {
+ ((pos.y > threshold * 2 || pos.y < threshold * -3) &&
+ (FXSYS_fabs(pos.y) < 1 ? FXSYS_fabs(pos.x) < FXSYS_fabs(pos.y)
+ : true))) {
bNewline = true;
if (nItem > 1) {
CPDF_TextObjectItem tempItem;
m_pPreTextObj->GetItemInfo(0, &tempItem);
CFX_Matrix m = m_pPreTextObj->GetTextMatrix();
- if (PrevItem.m_OriginX > tempItem.m_OriginX &&
+ if (PrevItem.m_Origin.x > tempItem.m_Origin.x &&
m_DisplayMatrix.a > 0.9 && m_DisplayMatrix.b < 0.1 &&
m_DisplayMatrix.c < 0.1 && m_DisplayMatrix.d < -0.9 && m.b < 0.1 &&
m.c < 0.1) {
CFX_FloatRect re(0, m_pPreTextObj->m_Bottom, 1000,
m_pPreTextObj->m_Top);
- if (re.Contains(pObj->GetPosX(), pObj->GetPosY())) {
+ if (re.Contains(pObj->GetPos())) {
bNewline = false;
} else {
CFX_FloatRect rect(0, pObj->m_Bottom, 1000, pObj->m_Top);
- if (rect.Contains(m_pPreTextObj->GetPosX(),
- m_pPreTextObj->GetPosY())) {
+ if (rect.Contains(m_pPreTextObj->GetPos()))
bNewline = false;
- }
}
}
}
@@ -1415,17 +1395,17 @@ CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
(threshold < 1.39001 && threshold > 1.38999)) {
threshold *= 1.5;
}
- if (FXSYS_fabs(last_pos + last_width - x) > threshold && curChar != L' ' &&
- preChar != L' ') {
+ if (FXSYS_fabs(last_pos + last_width - pos.x) > threshold &&
+ curChar != L' ' && preChar != L' ') {
if (curChar != L' ' && preChar != L' ') {
- if ((x - last_pos - last_width) > threshold ||
- (last_pos - x - last_width) > threshold) {
+ if ((pos.x - last_pos - last_width) > threshold ||
+ (last_pos - pos.x - last_width) > threshold) {
return GenerateCharacter::Space;
}
- if (x < 0 && (last_pos - x - last_width) > threshold)
+ if (pos.x < 0 && (last_pos - pos.x - last_width) > threshold)
return GenerateCharacter::Space;
- if ((x - last_pos - last_width) > this_width ||
- (x - last_pos - this_width) > last_width) {
+ if ((pos.x - last_pos - last_width) > this_width ||
+ (pos.x - last_pos - this_width) > last_width) {
return GenerateCharacter::Space;
}
}
@@ -1437,10 +1417,9 @@ bool CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
CPDF_TextObject* pTextObj2) {
if (!pTextObj1 || !pTextObj2)
return false;
- CFX_FloatRect rcPreObj(pTextObj2->m_Left, pTextObj2->m_Bottom,
- pTextObj2->m_Right, pTextObj2->m_Top);
- CFX_FloatRect rcCurObj(pTextObj1->m_Left, pTextObj1->m_Bottom,
- pTextObj1->m_Right, pTextObj1->m_Top);
+
+ CFX_FloatRect rcPreObj = pTextObj2->GetRect();
+ CFX_FloatRect rcCurObj = pTextObj1->GetRect();
if (rcPreObj.IsEmpty() && rcCurObj.IsEmpty()) {
FX_FLOAT dbXdif = FXSYS_fabs(rcPreObj.left - rcCurObj.left);
size_t nCount = m_CharList.size();
@@ -1470,21 +1449,22 @@ bool CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
if (!nPreCount)
return true;
- CPDF_TextObjectItem itemPer = {0, 0.0f, 0.0f};
- CPDF_TextObjectItem itemCur = {0, 0.0f, 0.0f};
+ CPDF_TextObjectItem itemPer;
+ CPDF_TextObjectItem itemCur;
for (int i = 0; i < nPreCount; i++) {
pTextObj2->GetItemInfo(i, &itemPer);
pTextObj1->GetItemInfo(i, &itemCur);
if (itemCur.m_CharCode != itemPer.m_CharCode)
return false;
}
- if (FXSYS_fabs(pTextObj1->GetPosX() - pTextObj2->GetPosX()) >
- GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont()) *
- pTextObj2->GetFontSize() / 1000 * 0.9 ||
- FXSYS_fabs(pTextObj1->GetPosY() - pTextObj2->GetPosY()) >
- std::max(std::max(rcPreObj.Height(), rcPreObj.Width()),
- pTextObj2->GetFontSize()) /
- 8) {
+
+ CFX_PointF diff = pTextObj1->GetPos() - pTextObj2->GetPos();
+ FX_FLOAT font_size = pTextObj2->GetFontSize();
+ FX_FLOAT char_size = GetCharWidth(itemPer.m_CharCode, pTextObj2->GetFont());
+ FX_FLOAT max_pre_size =
+ std::max(std::max(rcPreObj.Height(), rcPreObj.Width()), font_size);
+ if (FXSYS_fabs(diff.x) > char_size * font_size / 1000 * 0.9 ||
+ FXSYS_fabs(diff.y) > max_pre_size / 8) {
return false;
}
return true;
@@ -1515,11 +1495,13 @@ bool CPDF_TextPage::GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info) {
preChar = &m_CharList.back();
else
return false;
+
info.m_Index = m_TextBuf.GetLength();
info.m_Unicode = unicode;
info.m_pTextObj = nullptr;
info.m_CharCode = CPDF_Font::kInvalidCharCode;
info.m_Flag = FPDFTEXT_CHAR_GENERATED;
+
int preWidth = 0;
if (preChar->m_pTextObj && preChar->m_CharCode != -1) {
preWidth =