From 2ae87d2e8ddff79d0e96aad3db97e090db21fb99 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Mon, 17 Aug 2015 18:00:48 -0700 Subject: Clean up IFX_BidiChar - Replace IFX_BidiChar with just CFX_BidiChar - Document implementation - Change out parameters to pointers - Remove dead code - Add an enum for bidi directions - Move several externs to a header - Add unit tests R=tsepez@chromium.org Review URL: https://codereview.chromium.org/1197643002 . --- BUILD.gn | 6 +- core/include/fxcrt/fx_arb.h | 24 ------- core/include/fxcrt/fx_bidi.h | 59 ++++++++++++++++ core/include/fxcrt/fx_ucd.h | 114 ++++++++---------------------- core/src/fpdftext/fpdf_text.cpp | 16 ++--- core/src/fpdftext/fpdf_text_int.cpp | 36 +++++----- core/src/fpdftext/text_int.h | 2 +- core/src/fxcrt/fx_arabic.cpp | 84 ---------------------- core/src/fxcrt/fx_arabic.h | 33 --------- core/src/fxcrt/fx_bidi.cpp | 66 +++++++++++++++++ core/src/fxcrt/fx_bidi_unittest.cpp | 136 ++++++++++++++++++++++++++++++++++++ core/src/fxcrt/fx_ucddata.cpp | 19 +++-- core/src/fxcrt/fx_unicode.cpp | 55 ++++----------- pdfium.gyp | 6 +- 14 files changed, 351 insertions(+), 305 deletions(-) delete mode 100644 core/include/fxcrt/fx_arb.h create mode 100644 core/include/fxcrt/fx_bidi.h delete mode 100644 core/src/fxcrt/fx_arabic.cpp delete mode 100644 core/src/fxcrt/fx_arabic.h create mode 100644 core/src/fxcrt/fx_bidi.cpp create mode 100644 core/src/fxcrt/fx_bidi_unittest.cpp diff --git a/BUILD.gn b/BUILD.gn index 3cc46180a6..d3c092e468 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -396,8 +396,8 @@ static_library("fxcodec") { static_library("fxcrt") { sources = [ - "core/include/fxcrt/fx_arb.h", "core/include/fxcrt/fx_basic.h", + "core/include/fxcrt/fx_bidi.h", "core/include/fxcrt/fx_coordinates.h", "core/include/fxcrt/fx_ext.h", "core/include/fxcrt/fx_memory.h", @@ -408,8 +408,6 @@ static_library("fxcrt") { "core/include/fxcrt/fx_ucd.h", "core/include/fxcrt/fx_xml.h", "core/src/fxcrt/extension.h", - "core/src/fxcrt/fx_arabic.cpp", - "core/src/fxcrt/fx_arabic.h", "core/src/fxcrt/fx_basic_array.cpp", "core/src/fxcrt/fx_basic_bstring.cpp", "core/src/fxcrt/fx_basic_buffer.cpp", @@ -422,6 +420,7 @@ static_library("fxcrt") { "core/src/fxcrt/fx_basic_utf.cpp", "core/src/fxcrt/fx_basic_util.cpp", "core/src/fxcrt/fx_basic_wstring.cpp", + "core/src/fxcrt/fx_bidi.cpp", "core/src/fxcrt/fx_extension.cpp", "core/src/fxcrt/fx_ucddata.cpp", "core/src/fxcrt/fx_unicode.cpp", @@ -739,6 +738,7 @@ test("pdfium_unittests") { "core/src/fxcrt/fx_basic_bstring_unittest.cpp", "core/src/fxcrt/fx_basic_memmgr_unittest.cpp", "core/src/fxcrt/fx_basic_wstring_unittest.cpp", + "core/src/fxcrt/fx_bidi_unittest.cpp", "core/src/fxcrt/fx_system_unittest.cpp", "testing/fx_string_testhelpers.cpp", "testing/fx_string_testhelpers.h", diff --git a/core/include/fxcrt/fx_arb.h b/core/include/fxcrt/fx_arb.h deleted file mode 100644 index 0f6de64b32..0000000000 --- a/core/include/fxcrt/fx_arb.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_INCLUDE_FXCRT_FX_ARB_H_ -#define CORE_INCLUDE_FXCRT_FX_ARB_H_ - -#include "fx_system.h" - -class IFX_BidiChar { - public: - static IFX_BidiChar* Create(); - virtual ~IFX_BidiChar() {} - - virtual void SetPolicy(FX_BOOL bSeparateNeutral = TRUE) = 0; - virtual FX_BOOL AppendChar(FX_WCHAR wch) = 0; - virtual FX_BOOL EndChar() = 0; - virtual int32_t GetBidiInfo(int32_t& iStart, int32_t& iCount) = 0; - virtual void Reset() = 0; -}; - -#endif // CORE_INCLUDE_FXCRT_FX_ARB_H_ diff --git a/core/include/fxcrt/fx_bidi.h b/core/include/fxcrt/fx_bidi.h new file mode 100644 index 0000000000..a55ce6cfd2 --- /dev/null +++ b/core/include/fxcrt/fx_bidi.h @@ -0,0 +1,59 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_INCLUDE_FXCRT_FX_BIDI_H_ +#define CORE_INCLUDE_FXCRT_FX_BIDI_H_ + +#include "fx_system.h" + +// Processes characters and group them into segments based on text direction. +class CFX_BidiChar { + public: + enum Direction { NEUTRAL, LEFT, RIGHT }; + + CFX_BidiChar(); + ~CFX_BidiChar(); + + // Append a character and classify it as left, right, or neutral. + // Returns true if the character has a different direction than the + // existing direction to indicate there is a segment to process. + bool AppendChar(FX_WCHAR wch); + + // Call this after the last character has been appended. AppendChar() + // must not be called after this. + // Returns true if there is still a segment to process. + bool EndChar(); + + // Get information about the segment to process. + // The segment's start position and character count is returned in |iStart| + // and |iCount|, respectively. Pass in null pointers if the information is + // not needed. + // Returns the segment direction. + Direction GetBidiInfo(int32_t* iStart, int32_t* iCount) const; + + private: + void SaveCurrentStateToLastState(); + + // Position of the current segment. + int32_t m_iCurStart; + + // Number of characters in the current segment. + int32_t m_iCurCount; + + // Direction of the current segment. + Direction m_CurBidi; + + // Number of characters in the last segment. + int32_t m_iLastStart; + + // Number of characters in the last segment. + int32_t m_iLastCount; + + // Direction of the last segment. + Direction m_LastBidi; +}; + +#endif // CORE_INCLUDE_FXCRT_FX_BIDI_H_ diff --git a/core/include/fxcrt/fx_ucd.h b/core/include/fxcrt/fx_ucd.h index 8bc5930dd4..62c2dfdf67 100644 --- a/core/include/fxcrt/fx_ucd.h +++ b/core/include/fxcrt/fx_ucd.h @@ -9,97 +9,41 @@ #include "fx_system.h" -enum FX_CHARBREAKPROP { - FX_CBP_OP = 0, - FX_CBP_CL = 1, - FX_CBP_QU = 2, - FX_CBP_GL = 3, - FX_CBP_NS = 4, - FX_CBP_EX = 5, - FX_CBP_SY = 6, - FX_CBP_IS = 7, - FX_CBP_PR = 8, - FX_CBP_PO = 9, - FX_CBP_NU = 10, - FX_CBP_AL = 11, - FX_CBP_ID = 12, - FX_CBP_IN = 13, - FX_CBP_HY = 14, - FX_CBP_BA = 15, - FX_CBP_BB = 16, - FX_CBP_B2 = 17, - FX_CBP_ZW = 18, - FX_CBP_CM = 19, - FX_CBP_WJ = 20, - FX_CBP_H2 = 21, - FX_CBP_H3 = 22, - FX_CBP_JL = 23, - FX_CBP_JV = 24, - FX_CBP_JT = 25, - - FX_CBP_BK = 26, - FX_CBP_CR = 27, - FX_CBP_LF = 28, - FX_CBP_NL = 29, - FX_CBP_SA = 30, - FX_CBP_SG = 31, - FX_CBP_CB = 32, - FX_CBP_XX = 33, - FX_CBP_AI = 34, - FX_CBP_SP = 35, - FX_CBP_TB = 37, - FX_CBP_NONE = 36, -}; #define FX_BIDICLASSBITS 6 #define FX_BIDICLASSBITSMASK (31 << FX_BIDICLASSBITS) enum FX_BIDICLASS { - FX_BIDICLASS_ON = 0, - FX_BIDICLASS_L = 1, - FX_BIDICLASS_R = 2, - FX_BIDICLASS_AN = 3, - FX_BIDICLASS_EN = 4, - FX_BIDICLASS_AL = 5, - FX_BIDICLASS_NSM = 6, - FX_BIDICLASS_CS = 7, - FX_BIDICLASS_ES = 8, - FX_BIDICLASS_ET = 9, - FX_BIDICLASS_BN = 10, - FX_BIDICLASS_S = 11, - FX_BIDICLASS_WS = 12, - FX_BIDICLASS_B = 13, - FX_BIDICLASS_RLO = 14, - FX_BIDICLASS_RLE = 15, - FX_BIDICLASS_LRO = 16, - FX_BIDICLASS_LRE = 17, - FX_BIDICLASS_PDF = 18, + FX_BIDICLASS_ON = 0, // Other Neutral + FX_BIDICLASS_L = 1, // Left Letter + FX_BIDICLASS_R = 2, // Right Letter + FX_BIDICLASS_AN = 3, // Arabic Number + FX_BIDICLASS_EN = 4, // European Number + FX_BIDICLASS_AL = 5, // Arabic Letter + FX_BIDICLASS_NSM = 6, // Non-spacing Mark + FX_BIDICLASS_CS = 7, // Common Number Separator + FX_BIDICLASS_ES = 8, // European Separator + FX_BIDICLASS_ET = 9, // European Number Terminator + FX_BIDICLASS_BN = 10, // Boundary Neutral + FX_BIDICLASS_S = 11, // Segment Separator + FX_BIDICLASS_WS = 12, // Whitespace + FX_BIDICLASS_B = 13, // Paragraph Separator + FX_BIDICLASS_RLO = 14, // Right-to-Left Override + FX_BIDICLASS_RLE = 15, // Right-to-Left Embedding + FX_BIDICLASS_LRO = 16, // Left-to-Right Override + FX_BIDICLASS_LRE = 17, // Left-to-Right Embedding + FX_BIDICLASS_PDF = 18, // Pop Directional Format FX_BIDICLASS_N = FX_BIDICLASS_ON, }; -#define FX_CHARTYPEBITS 11 -#define FX_CHARTYPEBITSMASK (15 << FX_CHARTYPEBITS) -enum FX_CHARTYPE { - FX_CHARTYPE_Unknown = 0, - FX_CHARTYPE_Tab = (1 << FX_CHARTYPEBITS), - FX_CHARTYPE_Space = (2 << FX_CHARTYPEBITS), - FX_CHARTYPE_Control = (3 << FX_CHARTYPEBITS), - FX_CHARTYPE_Combination = (4 << FX_CHARTYPEBITS), - FX_CHARTYPE_Numeric = (5 << FX_CHARTYPEBITS), - FX_CHARTYPE_Normal = (6 << FX_CHARTYPEBITS), - FX_CHARTYPE_ArabicAlef = (7 << FX_CHARTYPEBITS), - FX_CHARTYPE_ArabicSpecial = (8 << FX_CHARTYPEBITS), - FX_CHARTYPE_ArabicDistortion = (9 << FX_CHARTYPEBITS), - FX_CHARTYPE_ArabicNormal = (10 << FX_CHARTYPEBITS), - FX_CHARTYPE_ArabicForm = (11 << FX_CHARTYPEBITS), - FX_CHARTYPE_Arabic = (12 << FX_CHARTYPEBITS), -}; + +extern const FX_DWORD kTextLayoutCodeProperties[]; +extern const size_t kTextLayoutCodePropertiesSize; + +extern const FX_WCHAR kFXTextLayoutVerticalMirror[]; +extern const size_t kFXTextLayoutVerticalMirrorSize; + +extern const FX_WCHAR kFXTextLayoutBidiMirror[]; +extern const size_t kFXTextLayoutBidiMirrorSize; + FX_DWORD FX_GetUnicodeProperties(FX_WCHAR wch); -FX_BOOL FX_IsCtrlCode(FX_WCHAR ch); -FX_BOOL FX_IsRotationCode(FX_WCHAR ch); -FX_BOOL FX_IsCombinationChar(FX_WCHAR wch); -FX_BOOL FX_IsBidiChar(FX_WCHAR wch); FX_WCHAR FX_GetMirrorChar(FX_WCHAR wch, FX_BOOL bRTL, FX_BOOL bVertical); -FX_WCHAR FX_GetMirrorChar(FX_WCHAR wch, - FX_DWORD dwProps, - FX_BOOL bRTL, - FX_BOOL bVertical); #endif // CORE_INCLUDE_FXCRT_FX_UCD_H_ diff --git a/core/src/fpdftext/fpdf_text.cpp b/core/src/fpdftext/fpdf_text.cpp index e7ca6c136f..af944d965e 100644 --- a/core/src/fpdftext/fpdf_text.cpp +++ b/core/src/fpdftext/fpdf_text.cpp @@ -9,7 +9,7 @@ #include "../../include/fpdfapi/fpdf_pageobj.h" #include "../../include/fpdfapi/fpdf_resource.h" #include "../../include/fpdftext/fpdf_text.h" -#include "../../include/fxcrt/fx_arb.h" +#include "../../include/fxcrt/fx_bidi.h" #include "../../include/fxcrt/fx_ucd.h" #include "text_int.h" #include "txtproc.h" @@ -321,35 +321,35 @@ void NormalizeString(CFX_WideString& str) { return; } CFX_WideString sBuffer; - nonstd::unique_ptr pBidiChar(IFX_BidiChar::Create()); + nonstd::unique_ptr pBidiChar(new CFX_BidiChar); CFX_WordArray order; FX_BOOL bR2L = FALSE; int32_t start = 0, count = 0, i = 0; int nR2L = 0, nL2R = 0; for (i = 0; i < str.GetLength(); i++) { if (pBidiChar->AppendChar(str.GetAt(i))) { - int32_t ret = pBidiChar->GetBidiInfo(start, count); + CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); order.Add(start); order.Add(count); order.Add(ret); if (!bR2L) { - if (ret == 2) { + if (ret == CFX_BidiChar::RIGHT) { nR2L++; - } else if (ret == 1) { + } else if (ret == CFX_BidiChar::LEFT) { nL2R++; } } } } if (pBidiChar->EndChar()) { - int32_t ret = pBidiChar->GetBidiInfo(start, count); + CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); order.Add(start); order.Add(count); order.Add(ret); if (!bR2L) { - if (ret == 2) { + if (ret == CFX_BidiChar::RIGHT) { nR2L++; - } else if (ret == 1) { + } else if (ret == CFX_BidiChar::LEFT) { nL2R++; } } diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp index ce673447a4..462f1369dd 100644 --- a/core/src/fpdftext/fpdf_text_int.cpp +++ b/core/src/fpdftext/fpdf_text_int.cpp @@ -13,7 +13,7 @@ #include "../../include/fpdfapi/fpdf_pageobj.h" #include "../../include/fpdfapi/fpdf_resource.h" #include "../../include/fpdftext/fpdf_text.h" -#include "../../include/fxcrt/fx_arb.h" +#include "../../include/fxcrt/fx_bidi.h" #include "../../include/fxcrt/fx_ucd.h" #include "text_int.h" @@ -1020,10 +1020,10 @@ int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const { } return w; } -void CPDF_TextPage::OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str) { +void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) { int32_t start, count; - int32_t ret = pBidi->GetBidiInfo(start, count); - if (ret == 2) { + CFX_BidiChar::Direction ret = pBidi->GetBidiInfo(&start, &count); + if (ret == CFX_BidiChar::RIGHT) { for (int i = start + count - 1; i >= start; i--) { m_TextBuf.AppendChar(str.GetAt(i)); m_charList.Add(*(PAGECHAR_INFO*)m_TempCharList.GetAt(i)); @@ -1104,7 +1104,7 @@ void CPDF_TextPage::CloseTempLine() { if (count1 <= 0) { return; } - nonstd::unique_ptr pBidiChar(IFX_BidiChar::Create()); + nonstd::unique_ptr pBidiChar(new CFX_BidiChar); CFX_WideString str = m_TempTextBuf.GetWideString(); CFX_WordArray order; FX_BOOL bR2L = FALSE; @@ -1126,28 +1126,28 @@ void CPDF_TextPage::CloseTempLine() { bPrevSpace = FALSE; } if (pBidiChar->AppendChar(str.GetAt(i))) { - int32_t ret = pBidiChar->GetBidiInfo(start, count); + CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); order.Add(start); order.Add(count); order.Add(ret); if (!bR2L) { - if (ret == 2) { + if (ret == CFX_BidiChar::RIGHT) { nR2L++; - } else if (ret == 1) { + } else if (ret == CFX_BidiChar::LEFT) { nL2R++; } } } } if (pBidiChar->EndChar()) { - int32_t ret = pBidiChar->GetBidiInfo(start, count); + CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); order.Add(start); order.Add(count); order.Add(ret); if (!bR2L) { - if (ret == 2) { + if (ret == CFX_BidiChar::RIGHT) { nR2L++; - } else if (ret == 1) { + } else if (ret == CFX_BidiChar::LEFT) { nL2R++; } } @@ -1762,7 +1762,7 @@ void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, FX_BOOL CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj, const CPDF_Font* pFont, int nItems) const { - nonstd::unique_ptr pBidiChar(IFX_BidiChar::Create()); + nonstd::unique_ptr pBidiChar(new CFX_BidiChar); int32_t nR2L = 0; int32_t nL2R = 0; int32_t start = 0, count = 0; @@ -1781,19 +1781,19 @@ FX_BOOL CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj, continue; } if (pBidiChar->AppendChar(wChar)) { - int32_t ret = pBidiChar->GetBidiInfo(start, count); - if (ret == 2) { + CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); + if (ret == CFX_BidiChar::RIGHT) { nR2L++; - } else if (ret == 1) { + } else if (ret == CFX_BidiChar::LEFT) { nL2R++; } } } if (pBidiChar->EndChar()) { - int32_t ret = pBidiChar->GetBidiInfo(start, count); - if (ret == 2) { + CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); + if (ret == CFX_BidiChar::RIGHT) { nR2L++; - } else if (ret == 1) { + } else if (ret == CFX_BidiChar::LEFT) { nL2R++; } } diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h index 091ee241d4..3f1023ac8a 100644 --- a/core/src/fpdftext/text_int.h +++ b/core/src/fpdftext/text_int.h @@ -111,7 +111,7 @@ class CPDF_TextPage : public IPDF_TextPage { CPDF_TextObject* pTextObj2); int GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const; void CloseTempLine(); - void OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str); + void OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str); int32_t PreMarkedContent(PDFTEXT_Obj pObj); void ProcessMarkedContent(PDFTEXT_Obj pObj); void CheckMarkedContentObject(int32_t& start, int32_t& nCount) const; diff --git a/core/src/fxcrt/fx_arabic.cpp b/core/src/fxcrt/fx_arabic.cpp deleted file mode 100644 index 3b7d0c1c71..0000000000 --- a/core/src/fxcrt/fx_arabic.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "../../include/fxcrt/fx_ucd.h" -#include "fx_arabic.h" - -extern const FX_DWORD gs_FX_TextLayout_CodeProperties[65536]; -IFX_BidiChar* IFX_BidiChar::Create() { - return new CFX_BidiChar; -} -CFX_BidiChar::CFX_BidiChar() - : m_bSeparateNeutral(TRUE), - m_iCurStart(0), - m_iCurCount(0), - m_iCurBidi(0), - m_iLastBidi(0), - m_iLastStart(0), - m_iLastCount(0) {} -void CFX_BidiChar::SetPolicy(FX_BOOL bSeparateNeutral) { - m_bSeparateNeutral = bSeparateNeutral; -} - -FX_BOOL CFX_BidiChar::AppendChar(FX_WCHAR wch) { - FX_DWORD dwProps = gs_FX_TextLayout_CodeProperties[(FX_WORD)wch]; - int32_t iBidiCls = (dwProps & FX_BIDICLASSBITSMASK) >> FX_BIDICLASSBITS; - int32_t iContext = 0; - switch (iBidiCls) { - case FX_BIDICLASS_L: - case FX_BIDICLASS_AN: - case FX_BIDICLASS_EN: - iContext = 1; - break; - case FX_BIDICLASS_R: - case FX_BIDICLASS_AL: - iContext = 2; - break; - } - FX_BOOL bRet = FALSE; - if (iContext != m_iCurBidi) { - if (m_bSeparateNeutral) { - bRet = TRUE; - } else { - if (m_iCurBidi == 0) { - bRet = (m_iCurCount > 0); - } else { - bRet = (iContext != 0); - } - } - if (bRet) { - m_iLastBidi = m_iCurBidi; - m_iLastStart = m_iCurStart; - m_iCurStart = m_iCurCount; - m_iLastCount = m_iCurCount - m_iLastStart; - } - if (m_bSeparateNeutral || iContext != 0) { - m_iCurBidi = iContext; - } - } - m_iCurCount++; - return bRet; -} -FX_BOOL CFX_BidiChar::EndChar() { - m_iLastBidi = m_iCurBidi; - m_iLastStart = m_iCurStart; - m_iCurStart = m_iCurCount; - m_iLastCount = m_iCurCount - m_iLastStart; - return m_iLastCount > 0; -} -int32_t CFX_BidiChar::GetBidiInfo(int32_t& iStart, int32_t& iCount) { - iStart = m_iLastStart; - iCount = m_iLastCount; - return m_iLastBidi; -} -void CFX_BidiChar::Reset() { - m_iCurStart = 0; - m_iCurCount = 0; - m_iCurBidi = 0; - m_iLastBidi = 0; - m_iLastStart = 0; - m_iLastCount = 0; -} diff --git a/core/src/fxcrt/fx_arabic.h b/core/src/fxcrt/fx_arabic.h deleted file mode 100644 index 0230d40579..0000000000 --- a/core/src/fxcrt/fx_arabic.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_SRC_FXCRT_FX_ARABIC_H_ -#define CORE_SRC_FXCRT_FX_ARABIC_H_ - -#include "../../include/fxcrt/fx_arb.h" - -class CFX_BidiChar final : public IFX_BidiChar { - public: - CFX_BidiChar(); - ~CFX_BidiChar() override {} - - void SetPolicy(FX_BOOL bSeparateNeutral = TRUE) override; - FX_BOOL AppendChar(FX_WCHAR wch) override; - FX_BOOL EndChar() override; - int32_t GetBidiInfo(int32_t& iStart, int32_t& iCount) override; - void Reset() override; - - private: - FX_BOOL m_bSeparateNeutral; - int32_t m_iCurStart; - int32_t m_iCurCount; - int32_t m_iCurBidi; - int32_t m_iLastBidi; - int32_t m_iLastStart; - int32_t m_iLastCount; -}; - -#endif // CORE_SRC_FXCRT_FX_ARABIC_H_ diff --git a/core/src/fxcrt/fx_bidi.cpp b/core/src/fxcrt/fx_bidi.cpp new file mode 100644 index 0000000000..0310fa0e94 --- /dev/null +++ b/core/src/fxcrt/fx_bidi.cpp @@ -0,0 +1,66 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "../../include/fxcrt/fx_bidi.h" +#include "../../include/fxcrt/fx_ucd.h" + +CFX_BidiChar::CFX_BidiChar() + : m_iCurStart(0), + m_iCurCount(0), + m_CurBidi(NEUTRAL), + m_iLastStart(0), + m_iLastCount(0), + m_LastBidi(NEUTRAL) { +} + +CFX_BidiChar::~CFX_BidiChar() { +} + +bool CFX_BidiChar::AppendChar(FX_WCHAR wch) { + FX_DWORD dwProps = FX_GetUnicodeProperties(wch); + int32_t iBidiCls = (dwProps & FX_BIDICLASSBITSMASK) >> FX_BIDICLASSBITS; + Direction bidi = NEUTRAL; + switch (iBidiCls) { + case FX_BIDICLASS_L: + case FX_BIDICLASS_AN: + case FX_BIDICLASS_EN: + bidi = LEFT; + break; + case FX_BIDICLASS_R: + case FX_BIDICLASS_AL: + bidi = RIGHT; + break; + } + + bool bRet = (bidi != m_CurBidi); + if (bRet) { + SaveCurrentStateToLastState(); + m_CurBidi = bidi; + } + m_iCurCount++; + return bRet; +} + +bool CFX_BidiChar::EndChar() { + SaveCurrentStateToLastState(); + return m_iLastCount > 0; +} + +CFX_BidiChar::Direction CFX_BidiChar::GetBidiInfo(int32_t* iStart, + int32_t* iCount) const { + if (iStart) + *iStart = m_iLastStart; + if (iCount) + *iCount = m_iLastCount; + return m_LastBidi; +} + +void CFX_BidiChar::SaveCurrentStateToLastState() { + m_LastBidi = m_CurBidi; + m_iLastStart = m_iCurStart; + m_iCurStart = m_iCurCount; + m_iLastCount = m_iCurCount - m_iLastStart; +} diff --git a/core/src/fxcrt/fx_bidi_unittest.cpp b/core/src/fxcrt/fx_bidi_unittest.cpp new file mode 100644 index 0000000000..c629cbbdc6 --- /dev/null +++ b/core/src/fxcrt/fx_bidi_unittest.cpp @@ -0,0 +1,136 @@ +// Copyright 2015 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "testing/gtest/include/gtest/gtest.h" +#include "../../include/fxcrt/fx_bidi.h" + +namespace { + +const FX_WCHAR kNeutralChar = 32; +const FX_WCHAR kLeftChar = 65; +const FX_WCHAR kRightChar = 1424; + +} // namespace + +TEST(fxcrt, BidiCharEmpty) { + int32_t start = -1; + int32_t count = -1; + CFX_BidiChar bidi; + CFX_BidiChar::Direction dir = bidi.GetBidiInfo(nullptr, nullptr); + EXPECT_EQ(CFX_BidiChar::NEUTRAL, dir); + + dir = bidi.GetBidiInfo(&start, nullptr); + EXPECT_EQ(CFX_BidiChar::NEUTRAL, dir); + EXPECT_EQ(0, start); + + dir = bidi.GetBidiInfo(nullptr, &count); + EXPECT_EQ(CFX_BidiChar::NEUTRAL, dir); + EXPECT_EQ(0, count); + + start = -1; + count = -1; + dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(CFX_BidiChar::NEUTRAL, dir); + EXPECT_EQ(0, start); + EXPECT_EQ(0, count); + + EXPECT_FALSE(bidi.EndChar()); +} + +TEST(fxcrt, BidiCharLeft) { + int32_t start = -1; + int32_t count = -1; + CFX_BidiChar bidi; + + EXPECT_TRUE(bidi.AppendChar(kLeftChar)); + CFX_BidiChar::Direction dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(0, start); + EXPECT_EQ(0, count); + + EXPECT_FALSE(bidi.AppendChar(kLeftChar)); + EXPECT_FALSE(bidi.AppendChar(kLeftChar)); + + dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(CFX_BidiChar::NEUTRAL, dir); + EXPECT_EQ(0, start); + EXPECT_EQ(0, count); + + EXPECT_TRUE(bidi.EndChar()); + dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(CFX_BidiChar::LEFT, dir); + EXPECT_EQ(0, start); + EXPECT_EQ(3, count); + + EXPECT_FALSE(bidi.EndChar()); +} + +TEST(fxcrt, BidiCharLeftNeutralRight) { + int32_t start = -1; + int32_t count = -1; + CFX_BidiChar bidi; + + EXPECT_TRUE(bidi.AppendChar(kLeftChar)); + CFX_BidiChar::Direction dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(0, start); + EXPECT_EQ(0, count); + + EXPECT_FALSE(bidi.AppendChar(kLeftChar)); + EXPECT_FALSE(bidi.AppendChar(kLeftChar)); + EXPECT_TRUE(bidi.AppendChar(kNeutralChar)); + dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(0, start); + EXPECT_EQ(3, count); + + EXPECT_FALSE(bidi.AppendChar(kNeutralChar)); + EXPECT_FALSE(bidi.AppendChar(kNeutralChar)); + EXPECT_FALSE(bidi.AppendChar(kNeutralChar)); + EXPECT_TRUE(bidi.AppendChar(kRightChar)); + dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(CFX_BidiChar::NEUTRAL, dir); + EXPECT_EQ(3, start); + EXPECT_EQ(4, count); + + EXPECT_TRUE(bidi.EndChar()); + dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(CFX_BidiChar::RIGHT, dir); + EXPECT_EQ(7, start); + EXPECT_EQ(1, count); + + EXPECT_FALSE(bidi.EndChar()); +} + +TEST(fxcrt, BidiCharLeftRightLeft) { + int32_t start = -1; + int32_t count = -1; + CFX_BidiChar bidi; + + EXPECT_TRUE(bidi.AppendChar(kLeftChar)); + CFX_BidiChar::Direction dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(0, start); + EXPECT_EQ(0, count); + + EXPECT_FALSE(bidi.AppendChar(kLeftChar)); + EXPECT_FALSE(bidi.AppendChar(kLeftChar)); + EXPECT_TRUE(bidi.AppendChar(kRightChar)); + dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(0, start); + EXPECT_EQ(3, count); + + EXPECT_FALSE(bidi.AppendChar(kRightChar)); + EXPECT_FALSE(bidi.AppendChar(kRightChar)); + EXPECT_FALSE(bidi.AppendChar(kRightChar)); + EXPECT_TRUE(bidi.AppendChar(kLeftChar)); + dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(CFX_BidiChar::RIGHT, dir); + EXPECT_EQ(3, start); + EXPECT_EQ(4, count); + + EXPECT_TRUE(bidi.EndChar()); + dir = bidi.GetBidiInfo(&start, &count); + EXPECT_EQ(CFX_BidiChar::LEFT, dir); + EXPECT_EQ(7, start); + EXPECT_EQ(1, count); + + EXPECT_FALSE(bidi.EndChar()); +} diff --git a/core/src/fxcrt/fx_ucddata.cpp b/core/src/fxcrt/fx_ucddata.cpp index 8b35f31e2b..13c5da1419 100644 --- a/core/src/fxcrt/fx_ucddata.cpp +++ b/core/src/fxcrt/fx_ucddata.cpp @@ -4,9 +4,10 @@ // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com -#include "../../include/fxcrt/fx_system.h" +#include "../../include/fxcrt/fx_basic.h" +#include "../../include/fxcrt/fx_ucd.h" -extern const FX_DWORD gs_FX_TextLayout_CodeProperties[65536] = { +const FX_DWORD kTextLayoutCodeProperties[] = { 0xfffe9a93, 0xfffe9a93, 0xfffe9a93, 0xfffe9a93, 0xfffe9a93, 0xfffe9a93, 0xfffe9a93, 0xfffe9a93, 0xfffe9a93, 0xfffe8ae5, 0xfffe9b5c, 0xfffe9ada, 0xfffe9b1a, 0xfffe9b5b, 0xfffe9a93, 0xfffe9a93, 0xfffe9a93, 0xfffe9a93, @@ -10931,7 +10932,11 @@ extern const FX_DWORD gs_FX_TextLayout_CodeProperties[65536] = { 0xfffe02a4, 0xfffe02a4, 0xfffe02a4, 0xfffe3013, 0xfffe3013, 0xfffe3013, 0xfffe3011, 0xfffe3022, 0xfffe1aa4, 0xfffe1aa4, }; -extern const FX_WCHAR gs_FX_TextLayout_VerticalMirror[64] = { + +const size_t kTextLayoutCodePropertiesSize = + FX_ArraySize(kTextLayoutCodeProperties); + +const FX_WCHAR kFXTextLayoutVerticalMirror[] = { 0xFE33, 0xFE32, 0xFE31, 0xFE41, 0xFE42, 0xFE43, 0xFE44, 0xFE3F, 0xFE40, 0xFE3D, 0xFE3E, 0xFE41, 0xFE42, 0xFE43, 0xFE44, 0xFE3B, 0xFE3C, 0xFE39, 0xFE3A, 0xFE34, 0xFE35, 0xFE36, 0xFE37, 0xFE38, @@ -10941,7 +10946,10 @@ extern const FX_WCHAR gs_FX_TextLayout_VerticalMirror[64] = { 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, }; -extern const FX_WCHAR gs_FX_TextLayout_BidiMirror[512] = { +const size_t kFXTextLayoutVerticalMirrorSize = + FX_ArraySize(kFXTextLayoutVerticalMirror); + +const FX_WCHAR kFXTextLayoutBidiMirror[] = { 0x0029, 0x0028, 0x003E, 0x003C, 0x005D, 0x005B, 0x007D, 0x007B, 0x00BB, 0x00AB, 0x0F3B, 0x0F3A, 0x0F3D, 0x0F3C, 0x169C, 0x169B, 0x2019, 0x2018, 0x201D, 0x201C, 0x203A, 0x2039, 0x2046, 0x2045, 0x207E, 0x207D, 0x208E, @@ -11000,3 +11008,6 @@ extern const FX_WCHAR gs_FX_TextLayout_BidiMirror[512] = { 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, 0xFEFF, }; + +const size_t kFXTextLayoutBidiMirrorSize = + FX_ArraySize(kFXTextLayoutBidiMirror); diff --git a/core/src/fxcrt/fx_unicode.cpp b/core/src/fxcrt/fx_unicode.cpp index f05aeb5efb..c7ab618200 100644 --- a/core/src/fxcrt/fx_unicode.cpp +++ b/core/src/fxcrt/fx_unicode.cpp @@ -6,58 +6,29 @@ #include "../../include/fxcrt/fx_ucd.h" -extern const FX_DWORD gs_FX_TextLayout_CodeProperties[65536]; -extern const FX_WCHAR gs_FX_TextLayout_VerticalMirror[64]; -extern const FX_WCHAR gs_FX_TextLayout_BidiMirror[512]; FX_DWORD FX_GetUnicodeProperties(FX_WCHAR wch) { - return gs_FX_TextLayout_CodeProperties[(FX_WORD)wch]; -} -FX_BOOL FX_IsCtrlCode(FX_WCHAR ch) { - FX_DWORD dwRet = - (gs_FX_TextLayout_CodeProperties[(FX_WORD)ch] & FX_CHARTYPEBITSMASK); - return dwRet == FX_CHARTYPE_Tab || dwRet == FX_CHARTYPE_Control; -} -FX_BOOL FX_IsRotationCode(FX_WCHAR ch) { - return (gs_FX_TextLayout_CodeProperties[(FX_WORD)ch] & 0x8000) != 0; -} -FX_BOOL FX_IsCombinationChar(FX_WCHAR wch) { - FX_DWORD dwProps = - (gs_FX_TextLayout_CodeProperties[(FX_WORD)wch] & FX_CHARTYPEBITSMASK); - return dwProps == FX_CHARTYPE_Combination; -} -FX_BOOL FX_IsBidiChar(FX_WCHAR wch) { - FX_DWORD dwProps = gs_FX_TextLayout_CodeProperties[(FX_WORD)wch]; - int32_t iBidiCls = (dwProps & FX_BIDICLASSBITSMASK) >> FX_BIDICLASSBITS; - return (FX_BIDICLASS_R == iBidiCls || FX_BIDICLASS_AL == iBidiCls); + size_t idx = static_cast(wch); + if (idx < kTextLayoutCodePropertiesSize) + return kTextLayoutCodeProperties[(FX_WORD)wch]; + return 0; } + FX_WCHAR FX_GetMirrorChar(FX_WCHAR wch, FX_BOOL bRTL, FX_BOOL bVertical) { - FX_DWORD dwProps = gs_FX_TextLayout_CodeProperties[(FX_WORD)wch]; + FX_DWORD dwProps = FX_GetUnicodeProperties(wch); FX_DWORD dwTemp = (dwProps & 0xFF800000); if (bRTL && dwTemp < 0xFF800000) { - wch = gs_FX_TextLayout_BidiMirror[dwTemp >> 23]; - dwProps = gs_FX_TextLayout_CodeProperties[(FX_WORD)wch]; - } - if (bVertical) { - dwTemp = (dwProps & 0x007E0000); - if (dwTemp < 0x007E0000) { - wch = gs_FX_TextLayout_VerticalMirror[dwTemp >> 17]; + size_t idx = dwTemp >> 23; + if (idx < kFXTextLayoutBidiMirrorSize) { + wch = kFXTextLayoutBidiMirror[idx]; + dwProps = FX_GetUnicodeProperties(wch); } } - return wch; -} -FX_WCHAR FX_GetMirrorChar(FX_WCHAR wch, - FX_DWORD dwProps, - FX_BOOL bRTL, - FX_BOOL bVertical) { - FX_DWORD dwTemp = (dwProps & 0xFF800000); - if (bRTL && dwTemp < 0xFF800000) { - wch = gs_FX_TextLayout_BidiMirror[dwTemp >> 23]; - dwProps = gs_FX_TextLayout_CodeProperties[(FX_WORD)wch]; - } if (bVertical) { dwTemp = (dwProps & 0x007E0000); if (dwTemp < 0x007E0000) { - wch = gs_FX_TextLayout_VerticalMirror[dwTemp >> 17]; + size_t idx = dwTemp >> 17; + if (idx < kFXTextLayoutVerticalMirrorSize) + wch = kFXTextLayoutVerticalMirror[idx]; } } return wch; diff --git a/pdfium.gyp b/pdfium.gyp index d7f00bc8b4..6e48c34435 100644 --- a/pdfium.gyp +++ b/pdfium.gyp @@ -392,8 +392,8 @@ 'type': 'static_library', 'ldflags': [ '-L<(PRODUCT_DIR)',], 'sources': [ - 'core/include/fxcrt/fx_arb.h', 'core/include/fxcrt/fx_basic.h', + 'core/include/fxcrt/fx_bidi.h', 'core/include/fxcrt/fx_coordinates.h', 'core/include/fxcrt/fx_ext.h', 'core/include/fxcrt/fx_memory.h', @@ -410,8 +410,6 @@ 'core/src/fxcrt/fxcrt_posix.h', 'core/src/fxcrt/fxcrt_windows.cpp', 'core/src/fxcrt/fxcrt_windows.h', - 'core/src/fxcrt/fx_arabic.cpp', - 'core/src/fxcrt/fx_arabic.h', 'core/src/fxcrt/fx_basic_array.cpp', 'core/src/fxcrt/fx_basic_bstring.cpp', 'core/src/fxcrt/fx_basic_buffer.cpp', @@ -424,6 +422,7 @@ 'core/src/fxcrt/fx_basic_utf.cpp', 'core/src/fxcrt/fx_basic_util.cpp', 'core/src/fxcrt/fx_basic_wstring.cpp', + 'core/src/fxcrt/fx_bidi.cpp', 'core/src/fxcrt/fx_extension.cpp', 'core/src/fxcrt/fx_ucddata.cpp', 'core/src/fxcrt/fx_unicode.cpp', @@ -723,6 +722,7 @@ 'core/src/fxcrt/fx_basic_bstring_unittest.cpp', 'core/src/fxcrt/fx_basic_memmgr_unittest.cpp', 'core/src/fxcrt/fx_basic_wstring_unittest.cpp', + 'core/src/fxcrt/fx_bidi_unittest.cpp', 'core/src/fxcrt/fx_system_unittest.cpp', 'testing/fx_string_testhelpers.h', 'testing/fx_string_testhelpers.cpp', -- cgit v1.2.3