From 5278cebc468e6975f217d0c016501a554d10fd97 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Mon, 18 Sep 2017 16:21:03 -0400 Subject: Cleanup word break properties This CL attempts to clarify the contents of the gs_FX_WordBreak_Table and adds static_asserts that each entry in the WordBreakProperty table has the value we expect. Change-Id: I33c1f12a9e18240b01969be9902204eba5074eb7 Reviewed-on: https://pdfium-review.googlesource.com/13430 Commit-Queue: dsinclair Reviewed-by: Henrique Nakashima --- xfa/fde/cfde_texteditengine.cpp | 27 ++++------ xfa/fde/cfde_wordbreak_data.cpp | 109 ++++++++++++++++++++++++++++++++++++++-- xfa/fde/cfde_wordbreak_data.h | 16 ++++++ 3 files changed, 131 insertions(+), 21 deletions(-) diff --git a/xfa/fde/cfde_texteditengine.cpp b/xfa/fde/cfde_texteditengine.cpp index 999380b401..8075ea7a98 100644 --- a/xfa/fde/cfde_texteditengine.cpp +++ b/xfa/fde/cfde_texteditengine.cpp @@ -18,22 +18,6 @@ constexpr size_t kMaxEditOperations = 128; constexpr size_t kGapSize = 128; constexpr size_t kPageWidthMax = 0xffff; -enum class WordBreakProperty { - kNone = 0, - kCR, - kLF, - kNewLine, - kExtend, - kFormat, - kKataKana, - kALetter, - kMidLetter, - kMidNum, - kMidNumLet, - kNumeric, - kExtendNumLet, -}; - class InsertOperation : public CFDE_TextEditEngine::Operation { public: InsertOperation(CFDE_TextEditEngine* engine, @@ -110,6 +94,14 @@ class ReplaceOperation : public CFDE_TextEditEngine::Operation { DeleteOperation delete_op_; }; +bool CheckStateChangeForWordBreak(WordBreakProperty from, + WordBreakProperty to) { + ASSERT(static_cast(from) < 13); + + return !!(gs_FX_WordBreak_Table[static_cast(from)] & + static_cast(1 << static_cast(to))); +} + WordBreakProperty GetWordBreakProperty(wchar_t wcCodePoint) { uint8_t dwProperty = gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1]; return static_cast((wcCodePoint & 1) ? (dwProperty & 0x0F) @@ -1043,8 +1035,7 @@ void CFDE_TextEditEngine::Iterator::FindNextBreakPos(bool bPrev) { Next(bPrev); WordBreakProperty eNextType = GetWordBreakProperty(GetChar()); - uint16_t wBreak = gs_FX_WordBreak_Table[static_cast(eCurType)] & - ((uint16_t)(1 << static_cast(eNextType))); + bool wBreak = CheckStateChangeForWordBreak(eCurType, eNextType); if (wBreak) { if (IsEOF(!bPrev)) { Next(!bPrev); diff --git a/xfa/fde/cfde_wordbreak_data.cpp b/xfa/fde/cfde_wordbreak_data.cpp index 35c097e932..3c4864be69 100644 --- a/xfa/fde/cfde_wordbreak_data.cpp +++ b/xfa/fde/cfde_wordbreak_data.cpp @@ -6,9 +6,112 @@ #include "xfa/fde/cfde_wordbreak_data.h" -const uint16_t gs_FX_WordBreak_Table[16] = { - 0xFFFF, 0xFFF9, 0xFFFB, 0xFFFB, 0xFFFB, 0xFFFB, 0xEFBB, 0xE77B, - 0xFFFB, 0xFFFB, 0xFFFB, 0xE77B, 0xE73B, 0xFFFB, 0xFFFB, 0xFFFB, +namespace { + +enum WordBreakValue : uint16_t { + kWordBreakValueNone = 1 << 0, + kWordBreakValueCR = 1 << 1, + kWordBreakValueLF = 1 << 2, + kWordBreakValueNewLine = 1 << 3, + kWordBreakValueExtend = 1 << 4, + kWordBreakValueFormat = 1 << 5, + kWordBreakValueKataKana = 1 << 6, + kWordBreakValueALetter = 1 << 7, + kWordBreakValueMidLetter = 1 << 8, + kWordBreakValueMidNum = 1 << 9, + kWordBreakValueMidNumLet = 1 << 10, + kWordBreakValueNumeric = 1 << 11, + kWordBreakValueExtendNumLet = 1 << 12, +}; + +static_assert(kWordBreakValueNone == + (1 << static_cast(WordBreakProperty::kNone)), + "WordBreakValue must match"); +static_assert(kWordBreakValueCR == + (1 << static_cast(WordBreakProperty::kCR)), + "WordBreakValue must match"); +static_assert(kWordBreakValueLF == + (1 << static_cast(WordBreakProperty::kLF)), + "WordBreakValue must match"); +static_assert(kWordBreakValueNewLine == + (1 << static_cast(WordBreakProperty::kNewLine)), + "WordBreakValue must match"); +static_assert(kWordBreakValueExtend == + (1 << static_cast(WordBreakProperty::kExtend)), + "WordBreakValue must match"); +static_assert(kWordBreakValueFormat == + (1 << static_cast(WordBreakProperty::kFormat)), + "WordBreakValue must match"); +static_assert(kWordBreakValueKataKana == + (1 << static_cast(WordBreakProperty::kKataKana)), + "WordBreakValue must match"); +static_assert(kWordBreakValueALetter == + (1 << static_cast(WordBreakProperty::kALetter)), + "WordBreakValue must match"); +static_assert(kWordBreakValueMidLetter == + (1 << static_cast(WordBreakProperty::kMidLetter)), + "WordBreakValue must match"); +static_assert(kWordBreakValueMidNum == + (1 << static_cast(WordBreakProperty::kMidNum)), + "WordBreakValue must match"); +static_assert(kWordBreakValueMidNumLet == + (1 << static_cast(WordBreakProperty::kMidNumLet)), + "WordBreakValue must match"); +static_assert(kWordBreakValueNumeric == + (1 << static_cast(WordBreakProperty::kNumeric)), + "WordBreakValue must match"); +static_assert(kWordBreakValueExtendNumLet == + (1 << static_cast(WordBreakProperty::kExtendNumLet)), + "WordBreakValue must match"); + +} // namespace + +const uint16_t gs_FX_WordBreak_Table[] = { + // WordBreakProperty::kNone + 0xFFFF, + + // WordBreakProperty::kCR + static_cast(~(kWordBreakValueLF | kWordBreakValueCR)), + + // WordBreakProperty::kLF + static_cast(~(kWordBreakValueLF)), + + // WordBreakProperty::kNewLine + static_cast(~(kWordBreakValueLF)), + + // WordBreakProperty::kExtend + static_cast(~(kWordBreakValueLF)), + + // WordBreakPropery:: kFormat + static_cast(~(kWordBreakValueLF)), + + // WordBreakProperty::kKataKana + static_cast(~(kWordBreakValueLF | kWordBreakValueKataKana | + kWordBreakValueExtendNumLet)), + + // WordBreakProperty::kALetter + static_cast(~(kWordBreakValueLF | kWordBreakValueALetter | + kWordBreakValueNumeric | + kWordBreakValueExtendNumLet)), + + // WordBreakProperty::kMidLetter + static_cast(~(kWordBreakValueLF)), + + // WordBreakProperty::kMidNum + static_cast(~(kWordBreakValueLF)), + + // WordBreakProperty::kMidNumLet + static_cast(~(kWordBreakValueLF)), + + // WordBreakProperty::kNumeric + static_cast(~(kWordBreakValueLF | kWordBreakValueALetter | + kWordBreakValueNumeric | + kWordBreakValueExtendNumLet)), + + // WordBreakProperty::kExtendNumLet + static_cast(~(kWordBreakValueLF | kWordBreakValueKataKana | + kWordBreakValueALetter | kWordBreakValueNumeric | + kWordBreakValueExtendNumLet)), }; const uint8_t gs_FX_WordBreak_CodePointProperties[(0xFFFF - 1) / 2 + 1] = { diff --git a/xfa/fde/cfde_wordbreak_data.h b/xfa/fde/cfde_wordbreak_data.h index 8cc13e3f4b..28e26cc4fe 100644 --- a/xfa/fde/cfde_wordbreak_data.h +++ b/xfa/fde/cfde_wordbreak_data.h @@ -9,6 +9,22 @@ #include +enum class WordBreakProperty : uint8_t { + kNone = 0, + kCR, + kLF, + kNewLine, + kExtend, + kFormat, + kKataKana, + kALetter, + kMidLetter, + kMidNum, + kMidNumLet, + kNumeric, + kExtendNumLet, +}; + extern const uint16_t gs_FX_WordBreak_Table[]; extern const uint8_t gs_FX_WordBreak_CodePointProperties[]; -- cgit v1.2.3