summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Sinclair <dsinclair@chromium.org>2017-09-18 16:21:03 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-09-18 20:35:25 +0000
commit5278cebc468e6975f217d0c016501a554d10fd97 (patch)
tree5e92b047e2bdd2c0b58a50f71e4445c001eff0d0
parent91c2f7cdcf8545d764a8b6543b42f0819f8d4ad3 (diff)
downloadpdfium-5278cebc468e6975f217d0c016501a554d10fd97.tar.xz
Cleanup word break properties
This CL attempts to clarify the contents of the gs_FX_WordBreak_Table and adds static_asserts that each entry in the WordBreakProperty table has the value we expect. Change-Id: I33c1f12a9e18240b01969be9902204eba5074eb7 Reviewed-on: https://pdfium-review.googlesource.com/13430 Commit-Queue: dsinclair <dsinclair@chromium.org> Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
-rw-r--r--xfa/fde/cfde_texteditengine.cpp27
-rw-r--r--xfa/fde/cfde_wordbreak_data.cpp109
-rw-r--r--xfa/fde/cfde_wordbreak_data.h16
3 files changed, 131 insertions, 21 deletions
diff --git a/xfa/fde/cfde_texteditengine.cpp b/xfa/fde/cfde_texteditengine.cpp
index 999380b401..8075ea7a98 100644
--- a/xfa/fde/cfde_texteditengine.cpp
+++ b/xfa/fde/cfde_texteditengine.cpp
@@ -18,22 +18,6 @@ constexpr size_t kMaxEditOperations = 128;
constexpr size_t kGapSize = 128;
constexpr size_t kPageWidthMax = 0xffff;
-enum class WordBreakProperty {
- kNone = 0,
- kCR,
- kLF,
- kNewLine,
- kExtend,
- kFormat,
- kKataKana,
- kALetter,
- kMidLetter,
- kMidNum,
- kMidNumLet,
- kNumeric,
- kExtendNumLet,
-};
-
class InsertOperation : public CFDE_TextEditEngine::Operation {
public:
InsertOperation(CFDE_TextEditEngine* engine,
@@ -110,6 +94,14 @@ class ReplaceOperation : public CFDE_TextEditEngine::Operation {
DeleteOperation delete_op_;
};
+bool CheckStateChangeForWordBreak(WordBreakProperty from,
+ WordBreakProperty to) {
+ ASSERT(static_cast<int>(from) < 13);
+
+ return !!(gs_FX_WordBreak_Table[static_cast<int>(from)] &
+ static_cast<uint16_t>(1 << static_cast<int>(to)));
+}
+
WordBreakProperty GetWordBreakProperty(wchar_t wcCodePoint) {
uint8_t dwProperty = gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1];
return static_cast<WordBreakProperty>((wcCodePoint & 1) ? (dwProperty & 0x0F)
@@ -1043,8 +1035,7 @@ void CFDE_TextEditEngine::Iterator::FindNextBreakPos(bool bPrev) {
Next(bPrev);
WordBreakProperty eNextType = GetWordBreakProperty(GetChar());
- uint16_t wBreak = gs_FX_WordBreak_Table[static_cast<int>(eCurType)] &
- ((uint16_t)(1 << static_cast<int>(eNextType)));
+ bool wBreak = CheckStateChangeForWordBreak(eCurType, eNextType);
if (wBreak) {
if (IsEOF(!bPrev)) {
Next(!bPrev);
diff --git a/xfa/fde/cfde_wordbreak_data.cpp b/xfa/fde/cfde_wordbreak_data.cpp
index 35c097e932..3c4864be69 100644
--- a/xfa/fde/cfde_wordbreak_data.cpp
+++ b/xfa/fde/cfde_wordbreak_data.cpp
@@ -6,9 +6,112 @@
#include "xfa/fde/cfde_wordbreak_data.h"
-const uint16_t gs_FX_WordBreak_Table[16] = {
- 0xFFFF, 0xFFF9, 0xFFFB, 0xFFFB, 0xFFFB, 0xFFFB, 0xEFBB, 0xE77B,
- 0xFFFB, 0xFFFB, 0xFFFB, 0xE77B, 0xE73B, 0xFFFB, 0xFFFB, 0xFFFB,
+namespace {
+
+enum WordBreakValue : uint16_t {
+ kWordBreakValueNone = 1 << 0,
+ kWordBreakValueCR = 1 << 1,
+ kWordBreakValueLF = 1 << 2,
+ kWordBreakValueNewLine = 1 << 3,
+ kWordBreakValueExtend = 1 << 4,
+ kWordBreakValueFormat = 1 << 5,
+ kWordBreakValueKataKana = 1 << 6,
+ kWordBreakValueALetter = 1 << 7,
+ kWordBreakValueMidLetter = 1 << 8,
+ kWordBreakValueMidNum = 1 << 9,
+ kWordBreakValueMidNumLet = 1 << 10,
+ kWordBreakValueNumeric = 1 << 11,
+ kWordBreakValueExtendNumLet = 1 << 12,
+};
+
+static_assert(kWordBreakValueNone ==
+ (1 << static_cast<int>(WordBreakProperty::kNone)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueCR ==
+ (1 << static_cast<int>(WordBreakProperty::kCR)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueLF ==
+ (1 << static_cast<int>(WordBreakProperty::kLF)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueNewLine ==
+ (1 << static_cast<int>(WordBreakProperty::kNewLine)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueExtend ==
+ (1 << static_cast<int>(WordBreakProperty::kExtend)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueFormat ==
+ (1 << static_cast<int>(WordBreakProperty::kFormat)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueKataKana ==
+ (1 << static_cast<int>(WordBreakProperty::kKataKana)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueALetter ==
+ (1 << static_cast<int>(WordBreakProperty::kALetter)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueMidLetter ==
+ (1 << static_cast<int>(WordBreakProperty::kMidLetter)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueMidNum ==
+ (1 << static_cast<int>(WordBreakProperty::kMidNum)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueMidNumLet ==
+ (1 << static_cast<int>(WordBreakProperty::kMidNumLet)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueNumeric ==
+ (1 << static_cast<int>(WordBreakProperty::kNumeric)),
+ "WordBreakValue must match");
+static_assert(kWordBreakValueExtendNumLet ==
+ (1 << static_cast<int>(WordBreakProperty::kExtendNumLet)),
+ "WordBreakValue must match");
+
+} // namespace
+
+const uint16_t gs_FX_WordBreak_Table[] = {
+ // WordBreakProperty::kNone
+ 0xFFFF,
+
+ // WordBreakProperty::kCR
+ static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueCR)),
+
+ // WordBreakProperty::kLF
+ static_cast<uint16_t>(~(kWordBreakValueLF)),
+
+ // WordBreakProperty::kNewLine
+ static_cast<uint16_t>(~(kWordBreakValueLF)),
+
+ // WordBreakProperty::kExtend
+ static_cast<uint16_t>(~(kWordBreakValueLF)),
+
+ // WordBreakPropery:: kFormat
+ static_cast<uint16_t>(~(kWordBreakValueLF)),
+
+ // WordBreakProperty::kKataKana
+ static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueKataKana |
+ kWordBreakValueExtendNumLet)),
+
+ // WordBreakProperty::kALetter
+ static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueALetter |
+ kWordBreakValueNumeric |
+ kWordBreakValueExtendNumLet)),
+
+ // WordBreakProperty::kMidLetter
+ static_cast<uint16_t>(~(kWordBreakValueLF)),
+
+ // WordBreakProperty::kMidNum
+ static_cast<uint16_t>(~(kWordBreakValueLF)),
+
+ // WordBreakProperty::kMidNumLet
+ static_cast<uint16_t>(~(kWordBreakValueLF)),
+
+ // WordBreakProperty::kNumeric
+ static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueALetter |
+ kWordBreakValueNumeric |
+ kWordBreakValueExtendNumLet)),
+
+ // WordBreakProperty::kExtendNumLet
+ static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueKataKana |
+ kWordBreakValueALetter | kWordBreakValueNumeric |
+ kWordBreakValueExtendNumLet)),
};
const uint8_t gs_FX_WordBreak_CodePointProperties[(0xFFFF - 1) / 2 + 1] = {
diff --git a/xfa/fde/cfde_wordbreak_data.h b/xfa/fde/cfde_wordbreak_data.h
index 8cc13e3f4b..28e26cc4fe 100644
--- a/xfa/fde/cfde_wordbreak_data.h
+++ b/xfa/fde/cfde_wordbreak_data.h
@@ -9,6 +9,22 @@
#include <stdint.h>
+enum class WordBreakProperty : uint8_t {
+ kNone = 0,
+ kCR,
+ kLF,
+ kNewLine,
+ kExtend,
+ kFormat,
+ kKataKana,
+ kALetter,
+ kMidLetter,
+ kMidNum,
+ kMidNumLet,
+ kNumeric,
+ kExtendNumLet,
+};
+
extern const uint16_t gs_FX_WordBreak_Table[];
extern const uint8_t gs_FX_WordBreak_CodePointProperties[];