From 47a90b894ecca2d3547226169602d7f8729d564f Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Thu, 31 Aug 2017 14:53:44 -0400 Subject: More BIDI code shuffling Change-Id: I6df7e7d1283541ea2e6b9bcf3de172f6a886054a Reviewed-on: https://pdfium-review.googlesource.com/12731 Reviewed-by: Tom Sepez Commit-Queue: dsinclair --- core/fxcrt/fx_bidi.cpp | 32 ++++++++++++++++-- core/fxcrt/fx_ucd.h | 73 +++------------------------------------- xfa/fgas/layout/cfx_rtfbreak.cpp | 14 ++++---- xfa/fgas/layout/cfx_txtbreak.cpp | 5 +-- 4 files changed, 45 insertions(+), 79 deletions(-) diff --git a/core/fxcrt/fx_bidi.cpp b/core/fxcrt/fx_bidi.cpp index 8b20cfbf46..5b5b1982cc 100644 --- a/core/fxcrt/fx_bidi.cpp +++ b/core/fxcrt/fx_bidi.cpp @@ -13,9 +13,37 @@ #ifdef PDF_ENABLE_XFA #include "core/fxcrt/fx_extension.h" +#endif // PDF_ENABLE_XFA namespace { +enum FX_BIDICLASS { + FX_BIDICLASS_ON = 0, // Other Neutral + FX_BIDICLASS_L = 1, // Left Letter + FX_BIDICLASS_R = 2, // Right Letter + FX_BIDICLASS_AN = 3, // Arabic Number + FX_BIDICLASS_EN = 4, // European Number + FX_BIDICLASS_AL = 5, // Arabic Letter + FX_BIDICLASS_NSM = 6, // Non-spacing Mark + FX_BIDICLASS_CS = 7, // Common Number Separator + FX_BIDICLASS_ES = 8, // European Separator + FX_BIDICLASS_ET = 9, // European Number Terminator + FX_BIDICLASS_BN = 10, // Boundary Neutral + FX_BIDICLASS_S = 11, // Segment Separator + FX_BIDICLASS_WS = 12, // Whitespace + FX_BIDICLASS_B = 13, // Paragraph Separator + FX_BIDICLASS_RLO = 14, // Right-to-Left Override + FX_BIDICLASS_RLE = 15, // Right-to-Left Embedding + FX_BIDICLASS_LRO = 16, // Left-to-Right Override + FX_BIDICLASS_LRE = 17, // Left-to-Right Embedding + FX_BIDICLASS_PDF = 18, // Pop Directional Format + FX_BIDICLASS_N = FX_BIDICLASS_ON, +}; +constexpr uint32_t FX_BIDICLASSBITS = 6; +constexpr uint32_t FX_BIDICLASSBITSMASK = 0x1F << FX_BIDICLASSBITS; + +#ifdef PDF_ENABLE_XFA + #ifndef NDEBUG constexpr int32_t kBidiMaxLevel = 61; #endif // NDEBUG @@ -510,10 +538,10 @@ class CFX_BidiLine { } }; -} // namespace - #endif // PDF_ENABLE_XFA +} // namespace + CFX_BidiChar::CFX_BidiChar() : m_CurrentSegment({0, 0, NEUTRAL}), m_LastSegment({0, 0, NEUTRAL}) {} diff --git a/core/fxcrt/fx_ucd.h b/core/fxcrt/fx_ucd.h index 50f3801c87..bca830fc0f 100644 --- a/core/fxcrt/fx_ucd.h +++ b/core/fxcrt/fx_ucd.h @@ -9,32 +9,6 @@ #include "core/fxcrt/cfx_retain_ptr.h" -constexpr uint32_t FX_BIDICLASSBITS = 6; -constexpr uint32_t FX_BIDICLASSBITSMASK = 0x1F << FX_BIDICLASSBITS; - -enum FX_BIDICLASS { - FX_BIDICLASS_ON = 0, // Other Neutral - FX_BIDICLASS_L = 1, // Left Letter - FX_BIDICLASS_R = 2, // Right Letter - FX_BIDICLASS_AN = 3, // Arabic Number - FX_BIDICLASS_EN = 4, // European Number - FX_BIDICLASS_AL = 5, // Arabic Letter - FX_BIDICLASS_NSM = 6, // Non-spacing Mark - FX_BIDICLASS_CS = 7, // Common Number Separator - FX_BIDICLASS_ES = 8, // European Separator - FX_BIDICLASS_ET = 9, // European Number Terminator - FX_BIDICLASS_BN = 10, // Boundary Neutral - FX_BIDICLASS_S = 11, // Segment Separator - FX_BIDICLASS_WS = 12, // Whitespace - FX_BIDICLASS_B = 13, // Paragraph Separator - FX_BIDICLASS_RLO = 14, // Right-to-Left Override - FX_BIDICLASS_RLE = 15, // Right-to-Left Embedding - FX_BIDICLASS_LRO = 16, // Left-to-Right Override - FX_BIDICLASS_LRE = 17, // Left-to-Right Embedding - FX_BIDICLASS_PDF = 18, // Pop Directional Format - FX_BIDICLASS_N = FX_BIDICLASS_ON, -}; - extern const uint32_t kTextLayoutCodeProperties[]; extern const size_t kTextLayoutCodePropertiesSize; @@ -42,53 +16,13 @@ extern const uint16_t kFXTextLayoutBidiMirror[]; extern const size_t kFXTextLayoutBidiMirrorSize; uint32_t FX_GetUnicodeProperties(wchar_t wch); - wchar_t FX_GetMirrorChar(wchar_t wch); #ifdef PDF_ENABLE_XFA -// As defined in http://www.unicode.org/reports/tr14/ -enum FXCHAR_BREAKPROP { - FX_CBP_OP = 0, // Opening Punctuation - FX_CBP_CL = 1, // Closing Punctuation - FX_CBP_QU = 2, // Ambiguous Quotation - FX_CBP_GL = 3, // Non-breaking ("Glue") - FX_CBP_NS = 4, // Non Starter - FX_CBP_EX = 5, // Exclamation/Interrogation - FX_CBP_SY = 6, // Symbols Allowing Breaks - FX_CBP_IS = 7, // Infix Separator (Numeric) - FX_CBP_PR = 8, // Prefix (Numeric) - FX_CBP_PO = 9, // Postfix (Numeric) - FX_CBP_NU = 10, // Numeric - FX_CBP_AL = 11, // Ordinary Alphabetic and Symbol Characters - FX_CBP_ID = 12, // Ideographic - FX_CBP_IN = 13, // Inseparable - FX_CBP_HY = 14, // Hyphen - FX_CBP_BA = 15, // Break Opportunity After - FX_CBP_BB = 16, // Break Opportunity Before - FX_CBP_B2 = 17, // Break Opportunity Before and After - FX_CBP_ZW = 18, // Zero Width Space - FX_CBP_CM = 19, // Attached Characters and Combining Marks - FX_CBP_WJ = 20, // Word Joiner - FX_CBP_H2 = 21, // Hangul LV Syllable - FX_CBP_H3 = 22, // Hangul LVT Syllable - FX_CBP_JL = 23, // Hangul Leading Jamo - FX_CBP_JV = 24, // Hangul Vowel Jamo - FX_CBP_JT = 25, // Hangul Trailing Jamo - - FX_CBP_BK = 26, // Mandatory Break - FX_CBP_CR = 27, // Carriage Return - FX_CBP_LF = 28, // Line Feed - FX_CBP_NL = 29, // Next Line - FX_CBP_SA = 30, // Complex Context (South East Asian) - FX_CBP_SG = 31, // Surrogate - FX_CBP_CB = 32, // Contingent Break Opportunity - FX_CBP_XX = 33, // Unknown - FX_CBP_AI = 34, // Ambiguous (Alphabetic or Ideographic) - FX_CBP_SP = 35, // Space - FX_CBP_NONE = 36, - FX_CBP_TB = 37, // ? -}; +// As defined in http://www.unicode.org/reports/tr14 +constexpr uint8_t kBreakPropertySpace = 35; +constexpr uint8_t kBreakPropertyTB = 37; // Don't know what this is ... constexpr uint32_t FX_CHARTYPEBITS = 11; constexpr uint32_t FX_CHARTYPEBITSMASK = 0xF << FX_CHARTYPEBITS; @@ -108,6 +42,7 @@ enum FX_CHARTYPE { FX_CHARTYPE_ArabicForm = (11 << FX_CHARTYPEBITS), FX_CHARTYPE_Arabic = (12 << FX_CHARTYPEBITS), }; + inline FX_CHARTYPE GetCharTypeFromProp(uint32_t prop) { return static_cast(prop & FX_CHARTYPEBITSMASK); } diff --git a/xfa/fgas/layout/cfx_rtfbreak.cpp b/xfa/fgas/layout/cfx_rtfbreak.cpp index a097361d95..be5780fc7a 100644 --- a/xfa/fgas/layout/cfx_rtfbreak.cpp +++ b/xfa/fgas/layout/cfx_rtfbreak.cpp @@ -565,16 +565,18 @@ int32_t CFX_RTFBreak::GetBreakPos(std::vector& tca, uint32_t nCur = nCodeProp & 0x003F; bool bNeedBreak = false; FX_LINEBREAKTYPE eType; - if (nCur == FX_CBP_TB) { + if (nCur == kBreakPropertyTB) { bNeedBreak = true; - eType = nNext == FX_CBP_TB ? FX_LBT_PROHIBITED_BRK - : gs_FX_LineBreak_PairTable[nCur][nNext]; + eType = nNext == kBreakPropertyTB + ? FX_LBT_PROHIBITED_BRK + : gs_FX_LineBreak_PairTable[nCur][nNext]; } else { - if (nCur == FX_CBP_SP) + if (nCur == kBreakPropertySpace) bNeedBreak = true; - eType = nNext == FX_CBP_SP ? FX_LBT_PROHIBITED_BRK - : gs_FX_LineBreak_PairTable[nCur][nNext]; + eType = nNext == kBreakPropertySpace + ? FX_LBT_PROHIBITED_BRK + : gs_FX_LineBreak_PairTable[nCur][nNext]; } if (bAllChars) pCur->m_nBreakType = eType; diff --git a/xfa/fgas/layout/cfx_txtbreak.cpp b/xfa/fgas/layout/cfx_txtbreak.cpp index 3ba7640904..2ea64d2bd5 100644 --- a/xfa/fgas/layout/cfx_txtbreak.cpp +++ b/xfa/fgas/layout/cfx_txtbreak.cpp @@ -548,14 +548,15 @@ int32_t CFX_TxtBreak::GetBreakPos(std::vector& ca, pCur = &ca[iLength]; nCodeProp = pCur->char_props(); nCur = nCodeProp & 0x003F; - if (nNext == FX_CBP_SP) + if (nNext == kBreakPropertySpace) eType = FX_LBT_PROHIBITED_BRK; else eType = gs_FX_LineBreak_PairTable[nCur][nNext]; if (bAllChars) pCur->m_nBreakType = static_cast(eType); if (!bOnlyBrk) { - if (m_bSingleLine || iEndPos <= m_iLineWidth || nCur == FX_CBP_SP) { + if (m_bSingleLine || iEndPos <= m_iLineWidth || + nCur == kBreakPropertySpace) { if (eType == FX_LBT_DIRECT_BRK && iBreak < 0) { iBreak = iLength; iBreakPos = iEndPos; -- cgit v1.2.3