diff options
author | Robin Watts <robin.watts@artifex.com> | 2016-01-19 18:00:49 +0000 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2016-01-20 16:34:38 +0100 |
commit | 201388486324758d08ab3b1be4cc0a84b8b1ae9a (patch) | |
tree | c15d24e6b6e31c21a9fb8d8235a1993a99299c2f | |
parent | ac27f6c40787c34830b5273ca51fb915d341c697 (diff) | |
download | mupdf-201388486324758d08ab3b1be4cc0a84b8b1ae9a.tar.xz |
Tidy bidirectional source.
Make the import follow mupdf style (better, if not perfect).
Use ucdn where possible to avoid duplicating tables.
Shrink the types, make them explicit (e.g. use fz_bidi_level rather
than int) and make tables const.
Use 32-bit integers for text.
-rw-r--r-- | include/mupdf/fitz/bidi.h | 262 | ||||
-rw-r--r-- | source/fitz/bidi-impl.h | 60 | ||||
-rw-r--r-- | source/fitz/bidi-std.c | 1764 | ||||
-rw-r--r-- | source/fitz/bidi.c | 955 | ||||
-rw-r--r-- | source/fitz/stext-device.c | 4 | ||||
-rw-r--r-- | source/html/html-layout.c | 68 |
6 files changed, 1259 insertions, 1854 deletions
diff --git a/include/mupdf/fitz/bidi.h b/include/mupdf/fitz/bidi.h index 978696ef..dfa439f3 100644 --- a/include/mupdf/fitz/bidi.h +++ b/include/mupdf/fitz/bidi.h @@ -1,13 +1,12 @@ -/** - * Bidirectional text processing. - * - */ -/* Copyright (C) Picsel, 2004-2008. All Rights Reserved. */ -/** @defgroup BidiText - * @ingroup EpageText - * @{ +/* * Bidirectional text processing. * + * Derived from the SmartOffice code, which is itself derived + * from the example unicode standard code. Original copyright + * messages follow: + * + * Copyright (C) Picsel, 2004-2008. All Rights Reserved. + * * Processes Unicode text by arranging the characters into an order suitable * for display. E.g. Hebrew text will be arranged from right-to-left and * any English within the text will remain in the left-to-right order. @@ -19,242 +18,79 @@ * on the reference implementation found on Unicode.org. */ -#ifndef BIDI_BIDI_H -#define BIDI_BIDI_H +#ifndef FITZ_BIDI_H +#define FITZ_BIDI_H #include "mupdf/fitz/system.h" - -/* Bidirectional Character Types - * as defined by the Unicode Bidirectional Algorithm Table 3-7. - * The list of bidirectional character types here is not grouped the - * same way as the table 3-7, since the numeric values for the types - * are chosen to keep the state and action tables compact. - */ -enum -{ - /* input types */ - /* ON MUST be zero, code relies on ON = N = 0 */ - BDI_ON = 0, /**< Other Neutral */ - BDI_L, /**< Left-to-right Letter */ - BDI_R, /**< Right-to-left Letter */ - BDI_AN, /**< Arabic Number */ - BDI_EN, /**< European Number */ - BDI_AL, /**< Arabic Letter (Right-to-left) */ - BDI_NSM, /**< Non-spacing Mark */ - BDI_CS, /**< Common Separator */ - BDI_ES, /**< European Separator */ - BDI_ET, /**< European Terminator (post/prefix e.g. $ and %) */ - - /* resolved types */ - BDI_BN, /**< Boundary neutral (type of RLE etc after explicit levels)*/ - - /* input types, */ - BDI_S, /**< Segment Separator (TAB) used only in L1 */ - BDI_WS, /**< White space used only in L1 */ - BDI_B, /**< Paragraph Separator (aka as PS) */ - - /* types for explicit controls */ - BDI_RLO, /**< these are used only in X1-X9 */ - BDI_RLE, - BDI_LRO, - BDI_LRE, - BDI_PDF, - - /* resolved types, also resolved directions */ - BDI_N = BDI_ON /**< alias, where ON, WS and S are treated the same */ -}; - -typedef enum Bidi_Direction +typedef enum fz_bidi_direction_e { - Bidi_LeftToRight = 0, - Bidi_RightToLeft = 1, - Bidi_Neutral = 2 + BIDI_LEFT_TO_RIGHT = 0, + BIDI_RIGHT_TO_LEFT = 1, + BIDI_NEUTRAL = 2 } -Bidi_Direction; +fz_bidi_direction; -typedef enum Bidi_Flags +typedef enum fz_bidi_flags_e { - Bidi_classifyWhiteSpace = 1, - Bidi_replaceTab = 2 + BIDI_CLASSIFY_WHITE_SPACE = 1, + BIDI_REPLACE_TAB = 2 } -Bidi_Flags; +fz_bidi_flags; -typedef int Bidi_CharType; /**< Same as in the third-party library */ -typedef int Bidi_Level; /**< Same as in the third-party library. - * Note: Max level is 61 */ +typedef uint8_t fz_bidi_chartype; +typedef int fz_bidi_level; /* Note: Max level is 125 */ enum { - Bidi_LevelMax = 61 + BIDI_LEVEL_MAX = 125 /* Updated for 6.3.0 */ }; - - -/** - * Prototype for function supplied to Bidi_processLine and - * invoked for each fragment. - * - * @param offset first character in fragment - * @param fragmentLen number of characters in fragment - * @param rightToLeft true if fragment is right-to-left - * @param mirror The mirror code of the fragment if it exists - * @param arg data from caller of Bidi_processLine - * @param isRtlNumber true if fragment contains only RTL numbers. - */ -typedef void (Bidi_PL_Fragment_Callback)(unsigned int offset, - size_t fragmentLen, - int rightToLeft, - uint16_t mirror, - void *arg, - int isRtlNumber); - - - -/** - * Prototype for function supplied to Bidi_processLine, which invokes it - * repeatedly to build a single 'string' to represent the entire line. - * - * @param arg data from caller of Bidi_processLine. - * @param objText receives pointer to either the object's text or - * NULL if there are no more objects on the line. - * @param objLength receives length of object's text. - * @param more receives true if there is more text on this 'line'. - * @param explicitDirection receives Explicit direction of object'stext. - * Bidi_Neutral can be passed if no Direction is specified. - */ -typedef void (Bidi_PL_NextObj_Callback)(void *arg, - const uint16_t **objText, - size_t *objLength, - int *more, - Bidi_Direction *explicitDirection); - - - -/** - * Converts text from logical to display order. The conversion - * is in-place, i.e. the given string is changed by this function. - * - * @param text input/output text - * @param[in] baseDir base direction - * @param[in] outputDir render direction - * @param[in] len string length - * - * @return error - */ -void Bidi_processText(fz_context *ctx, - uint16_t *text, - Bidi_Direction baseDir, - Bidi_Direction outputDir, - int len); - - - -/** - * Determines the bidi class for a single character - * - * @param[in] ch The character - * - * @return The bidi character type - */ -Bidi_CharType Bidi_classFromChN(uint16_t ch); - - - -/** - * Determines the character classes for given text - * - * @param[in] text input text - * @param[out] types output types - * @param[in] len text length - * @param[in] flags classifyWhiteSpace, forceRightToLeft - */ -void Bidi_classifyCharacters(const uint16_t *text, - Bidi_CharType *types, - int len, - Bidi_Flags flags); - - - -/** - * Iterates over a collection of strings, and rearranges them - * according to their directions. - * - * @param[in] nextObjCb function invoked repeatedly to describe the - * collection of strings on the line. - * @param[in] fragmentCb function to be invoked for each fragment. - * @param[in] callerData caller-defined structure which will be passed - * to each of the callback functions. - * @param[in] bidiFlag Bidi flag passed to the function. - * @param[out] more Location to place true if the "line" has more - * data - * - * @returns direction of paragraph - */ -Bidi_Direction Bidi_processLine(fz_context *ctx, - Bidi_PL_NextObj_Callback nextObjCb, - Bidi_PL_Fragment_Callback fragmentCb, - void *callerData, - int bidiFlag, - int *more); - - -int Bidi_isEuropeanNumber(const uint16_t *str, unsigned int len); - -/** - * returns a character's mirrored equivalent - * - * @param u Unicode character to process - */ -uint16_t Bidi_mirrorChar(const uint16_t u); - - - /** * Prototype for callback function supplied to Bidi_fragmentText. * - * @param fragment first character in fragment - * @param fragmentLen number of characters in fragment - * @param rightToLeft true if fragment is right-to-left - * @param mirror The mirror code of the fragment if it exists - * @param arg data from caller of Bidi_fragmentText + * @param fragment first character in fragment + * @param fragmentLen number of characters in fragment + * @param block_r2l true if block should concatenate with other blocks + * as right-to-left + * @param char_r2l true if characters within block should be laid out + * as right-to-left + * @param mirror The mirror code of the fragment if it exists + * @param arg data from caller of Bidi_fragmentText */ -typedef void (Bidi_Fragment_Callback)(const uint16_t *fragment, +typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment, size_t fragmentLen, - int rightToLeft, - uint16_t mirror, + int block_r2l, + int char_r2l, + uint32_t mirror, void *arg); - - /** * Partitions the given Unicode sequence into one or more unidirectional * fragments and invokes the given callback function for each fragment. * * For example, if directionality of text is: - * 0123456789 - * rrlllrrrrr, + * 0123456789 + * rrlllrrrrr, * we'll invoke callback with: - * &text[0], length == 2, rightToLeft == true - * &text[2], length == 3, rightToLeft == false - * &text[5], length == 5, rightToLeft == true. + * &text[0], length == 2, rightToLeft == true + * &text[2], length == 3, rightToLeft == false + * &text[5], length == 5, rightToLeft == true. * - * @param[in] text start of Unicode sequence + * @param[in] text start of Unicode sequence * @param[in] textlen number of Unicodes to analyse - * @param[in] baseDir direction of paragraph (specify Bidi_Neutral - * to force auto-detection) + * @param[in] baseDir direction of paragraph (specify BIDI_NEUTRAL + * to force auto-detection) * @param[in] callback function to be called for each fragment - * @param[in] arg data to be passed to the callback function - * @param[in] bidiFlag flag to be passed to the callback function + * @param[in] arg data to be passed to the callback function + * @param[in] flags flags to control operation (see fz_bidi_flags above) */ -void Bidi_fragmentText(fz_context *ctx, - const uint16_t *text, +void fz_bidi_fragment_text(fz_context *ctx, + const uint32_t *text, size_t textlen, - Bidi_Direction *baseDir, - Bidi_Fragment_Callback callback, + fz_bidi_direction *baseDir, + fz_bidi_fragment_callback *callback, void *arg, - int bidiFlag); - -#endif /* BIDI_BIDI_H */ + int flags); -/** @} */ +#endif /* FITZ_BIDI_H */ diff --git a/source/fitz/bidi-impl.h b/source/fitz/bidi-impl.h index 86aeb8d1..0de3458b 100644 --- a/source/fitz/bidi-impl.h +++ b/source/fitz/bidi-impl.h @@ -1,7 +1,5 @@ -// File: Bidi.h -// -/* For use with Bidi Reference Implementation - For more information see the associated file bidi.cpp +/* For use with Bidi Reference Implementation + For more information see the associated file bidi-std.c Credits: ------- @@ -22,16 +20,52 @@ OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE SOFTWARE. - - The files bidi.rc, and resource.h are distributed together with this file and are included - in the above definition of software. */ -// Copyright (C) 1999-2009, ASMUS, Inc. All Rights Reserved #include "mupdf/fitz.h" -void Bidi_resolveNeutrals(int baselevel, int *pcls, const int *plevel, int cch); -void Bidi_resolveImplicit(const int * pcls, int * plevel, int cch); -void Bidi_resolveWeak(fz_context *ctx, int baselevel, int *pcls, int *plevel, int cch); -void Bidi_resolveWhitespace(int baselevel, const int *pcls, int *plevel, int cch); -int Bidi_resolveExplicit(int level, int dir, int * pcls, int * plevel, int cch, int nNest); +/* Bidirectional Character Types + * as defined by the Unicode Bidirectional Algorithm Table 3-7. + * The list of bidirectional character types here is not grouped the + * same way as the table 3-7, since the numeric values for the types + * are chosen to keep the state and action tables compact. + */ +enum +{ + /* input types */ + /* ON MUST be zero, code relies on ON = N = 0 */ + BDI_ON = 0, /* Other Neutral */ + BDI_L, /* Left-to-right Letter */ + BDI_R, /* Right-to-left Letter */ + BDI_AN, /* Arabic Number */ + BDI_EN, /* European Number */ + BDI_AL, /* Arabic Letter (Right-to-left) */ + BDI_NSM, /* Non-spacing Mark */ + BDI_CS, /* Common Separator */ + BDI_ES, /* European Separator */ + BDI_ET, /* European Terminator (post/prefix e.g. $ and %) */ + + /* resolved types */ + BDI_BN, /* Boundary neutral (type of RLE etc after explicit levels)*/ + + /* input types, */ + BDI_S, /* Segment Separator (TAB) used only in L1 */ + BDI_WS, /* White space used only in L1 */ + BDI_B, /* Paragraph Separator (aka as PS) */ + + /* types for explicit controls */ + BDI_RLO, /* these are used only in X1-X9 */ + BDI_RLE, + BDI_LRO, + BDI_LRE, + BDI_PDF, + + /* resolved types, also resolved directions */ + BDI_N = BDI_ON /* alias, where ON, WS and S are treated the same */ +}; + +void fz_bidi_resolve_neutrals(fz_bidi_level baselevel, fz_bidi_chartype *pcls, const fz_bidi_level *plevel, int cch); +void fz_bidi_resolve_implicit(const fz_bidi_chartype *pcls, fz_bidi_level *plevel, int cch); +void fz_bidi_resolve_weak(fz_context *ctx, fz_bidi_level baselevel, fz_bidi_chartype *pcls, fz_bidi_level *plevel, int cch); +void fz_bidi_resolve_whitespace(fz_bidi_level baselevel, const fz_bidi_chartype *pcls, fz_bidi_level *plevel, int cch); +int fz_bidi_resolve_explicit(fz_bidi_level level, fz_bidi_chartype dir, fz_bidi_chartype *pcls, fz_bidi_level *plevel, int cch, fz_bidi_level nNest); diff --git a/source/fitz/bidi-std.c b/source/fitz/bidi-std.c index e8282b96..5d74d361 100644 --- a/source/fitz/bidi-std.c +++ b/source/fitz/bidi-std.c @@ -14,256 +14,256 @@ #endif /*------------------------------------------------------------------------ - File: Bidi.Cpp + File: Bidi.Cpp + + Description + ----------- + + Sample Implementation of the Unicode Bidirectional Algorithm as it + was revised by Revision 5 of the Uniode Technical Report # 9 + (1999-8-17) + + Verified for changes to the algorithm up through Unicode 5.2.0 (2009). + + This implementation is organized into several passes, each implemen- + ting one or more of the rules of the Unicode Bidi Algorithm. The + resolution of Weak Types and of Neutrals each use a state table + approach. + + Both a printf based interface and a Windows DlgProc are provided for + interactive testing. + + A stress harness comparing this implementation (v24) to a Java based + implementation was used by Doug Felt to verify that the two + implementations produce identical results for all strings up to six + bidi classes and stochastic strings up to length 20. + + Version 26 was verified by the author against the Unicode 5.2.0 + file BidiTest.txt, which provides an exhaustive text of strings of + length 4 or less, but covers some important cases where the language + in UAX#9 had been clarified. + + To see this code running in an actual Windows program, + download the free Unibook uitlity from http://unicode.org/unibook + The bidi demo is executed from the tools menu. It is build from + this source file. + + Build Notes + ----------- + + To compile the sample implementation please set the #define + directives above so the correct headers get included. Not all the + files are needed for all purposes. For the commandline version + only bidi.h and bidi.cpp are needed. + + The Win32 version is provided as a dialog procedure. To use as a + standalone program compile with the the lbmain.cpp file. If all you + need is the ability to run the code "as is", you can instead download + the unibook utility from http://uincode.org/unibook/ which contains + the bidi demo compiled from this source file. + + This code uses an extension to C++ that gives variables declared in + a for() statement function the same scope as the for() statement. + If your compiler does not support this extension, you may need to + move the declaration, e.g. int ich = 0; in front of the for statement. + + Implementation Note + ------------------- + + NOTE: The Unicode Bidirectional Algorithm removes all explicit + formatting codes in rule X9, but states that this can be + simulated by conformant implementations. This implementation + attempts to demonstrate such a simulation - Description - ----------- - - Sample Implementation of the Unicode Bidirectional Algorithm as it - was revised by Revision 5 of the Uniode Technical Report # 9 - (1999-8-17) - - Verified for changes to the algorithm up through Unicode 5.2.0 (2009). + To demonstrate this, the current implementation does the + following: - This implementation is organized into several passes, each implemen- - ting one or more of the rules of the Unicode Bidi Algorithm. The - resolution of Weak Types and of Neutrals each use a state table - approach. - - Both a printf based interface and a Windows DlgProc are provided for - interactive testing. - - A stress harness comparing this implementation (v24) to a Java based - implementation was used by Doug Felt to verify that the two - implementations produce identical results for all strings up to six - bidi classes and stochastic strings up to length 20. - - Version 26 was verified by the author against the Unicode 5.2.0 - file BidiTest.txt, which provides an exhaustive text of strings of - length 4 or less, but covers some important cases where the language - in UAX#9 had been clarified. - - To see this code running in an actual Windows program, - download the free Unibook uitlity from http://unicode.org/unibook - The bidi demo is executed from the tools menu. It is build from - this source file. - - Build Notes - ----------- - - To compile the sample implementation please set the #define - directives above so the correct headers get included. Not all the - files are needed for all purposes. For the commandline version - only bidi.h and bidi.cpp are needed. - - The Win32 version is provided as a dialog procedure. To use as a - standalone program compile with the the lbmain.cpp file. If all you - need is the ability to run the code "as is", you can instead download - the unibook utility from http://uincode.org/unibook/ which contains - the bidi demo compiled from this source file. - - This code uses an extension to C++ that gives variables declared in - a for() statement function the same scope as the for() statement. - If your compiler does not support this extension, you may need to - move the declaration, e.g. int ich = 0; in front of the for statement. - - Implementation Note - ------------------- - - NOTE: The Unicode Bidirectional Algorithm removes all explicit - formatting codes in rule X9, but states that this can be - simulated by conformant implementations. This implementation - attempts to demonstrate such a simulation + in resolveExplicit() + - change LRE, LRO, RLE, RLO, PDF to BN + - assign nested levels to BN - To demonstrate this, the current implementation does the - following: + in resolveWeak and resolveNeutrals + - assign L and R to BN's where they exist in place of + sor and eor by changing the last BN in front of a + level change to a strong type + - skip over BN's for the purpose of determining actions + - include BN in the count of deferred runs + which will resolve some of them to EN, AN and N - in resolveExplicit() - - change LRE, LRO, RLE, RLO, PDF to BN - - assign nested levels to BN + in resolveWhiteSpace + - set the level of any surviving BN to the base level, + or the level of the preceding character + - include LRE,LRO, RLE, RLO, PDF and BN in the count + whitespace to be reset - in resolveWeak and resolveNeutrals - - assign L and R to BN's where they exist in place of - sor and eor by changing the last BN in front of a - level change to a strong type - - skip over BN's for the purpose of determining actions - - include BN in the count of deferred runs - which will resolve some of them to EN, AN and N + This will result in the same order for non-BN characters as + if the BN characters had been removed. - in resolveWhiteSpace - - set the level of any surviving BN to the base level, - or the level of the preceding character - - include LRE,LRO, RLE, RLO, PDF and BN in the count - whitespace to be reset + The clean() function can be used to remove boundary marks for + verification purposes. - This will result in the same order for non-BN characters as - if the BN characters had been removed. + Notation + -------- + Pointer variables generally start with the letter p + Counter variables generally start with the letter c + Index variables generally start with the letter i + Boolean variables generally start with the letter f - The clean() function can be used to remove boundary marks for - verification purposes. + The enumerated bidirectional types have the same name as in the + description for the Unicode Bidirectional Algorithm - Notation - -------- - Pointer variables generally start with the letter p - Counter variables generally start with the letter c - Index variables generally start with the letter i - Boolean variables generally start with the letter f + Using this code outside a demo context + -------------------------------------- - The enumerated bidirectional types have the same name as in the - description for the Unicode Bidirectional Algorithm + The way the functions are broken down in this demo code is based + on the needs of the demo to show the evolution in internal state + as the algorithm proceeds. This obscures how the algorithm would + be used in practice. These are the steps: + 1. Allocate a pair of arrays large enough to hold bidi class + and calculated levels (one for each input character) - Using this code outside a demo context - -------------------------------------- + 2. Provide your own function to assign directional types (bidi + classes) corresponding to each character in the input, + conflating ON, WS, S to N. Unlike the classify function in this + demo, the input would be actual Unicode characters. - The way the functions are broken down in this demo code is based - on the needs of the demo to show the evolution in internal state - as the algorithm proceeds. This obscures how the algorithm would - be used in practice. These are the steps: + 3. Process the first paragraph by calling BidiParagraph. That + function changes B into BN and returns a length including the + paragraph separator. (The iteration over multiple paragraphs + is left as excercise for the reader). - 1. Allocate a pair of arrays large enough to hold bidi class - and calculated levels (one for each input character) + 4. Assign directional types again, but now assign specific types + to whitespace characters. - 2. Provide your own function to assign directional types (bidi - classes) corresponding to each character in the input, - conflating ON, WS, S to N. Unlike the classify function in this - demo, the input would be actual Unicode characters. + 5. Instead of reordering the input in place it is often desirable + to calculate an array of offsets indicating the reordering. + To that end, allocate such an array here and use it instead + of the input array in the next step. - 3. Process the first paragraph by calling BidiParagraph. That - function changes B into BN and returns a length including the - paragraph separator. (The iteration over multiple paragraphs - is left as excercise for the reader). - - 4. Assign directional types again, but now assign specific types - to whitespace characters. + 6. Resolve and reorder the lines by calling BidiLines. That + function 'breaks' lines on LS characters. Provide an optional + array of flags indicating the location of other line breaks as + needed. - 5. Instead of reordering the input in place it is often desirable - to calculate an array of offsets indicating the reordering. - To that end, allocate such an array here and use it instead - of the input array in the next step. - - 6. Resolve and reorder the lines by calling BidiLines. That - function 'breaks' lines on LS characters. Provide an optional - array of flags indicating the location of other line breaks as - needed. + Update History + -------------- + Version 24 is the initial published and verified version of this + reference implementation. Version 25 and its updates fix various + minor issues with the scaffolding used for demonstrating the + algorithm using pseudo-alphabets from the command line or dialog + box. No changes to the implementation of the actual bidi algrithm + are made in any of the minor updates to version 25. Version 26 + also makes no change to the actual algorithm but was verified + against the official BidiTest.txt file for Unicode 5.2.0. + - updated pseudo-alphabet - Update History - -------------- - Version 24 is the initial published and verified version of this - reference implementation. Version 25 and its updates fix various - minor issues with the scaffolding used for demonstrating the - algorithm using pseudo-alphabets from the command line or dialog - box. No changes to the implementation of the actual bidi algrithm - are made in any of the minor updates to version 25. Version 26 - also makes no change to the actual algorithm but was verified - against the official BidiTest.txt file for Unicode 5.2.0. + - Last Revised 12-10-99 (25) - - updated pseudo-alphabet + - enable demo mode for release builds - no other changes - - Last Revised 12-10-99 (25) + - Last Revised 12-10-00 (25a) - - enable demo mode for release builds - no other changes + - fix regression in pseudo alphabet use for Windows UI - - Last Revised 12-10-00 (25a) + - Last Revised 02-01-01 (25b) - - fix regression in pseudo alphabet use for Windows UI + - fixed a few comments, renamed a variable - - Last Revised 02-01-01 (25b) + - Last Revised 03-04-01 (25c) - - fixed a few comments, renamed a variable + - make base level settable, enable mirror by default, + fix dialog size - - Last Revised 03-04-01 (25c) + - Last Revised 06-02-01 (25e) - - make base level settable, enable mirror by default, - fix dialog size + - fixed some comments - - Last Revised 06-02-01 (25e) + - Last Revised 09-29-01 (25f) - - fixed some comments + - fixed classification for LS,RLM,LRM in pseudo alphabet, + focus issues in UI, regression fix to commandline from 25(e) + fix DEMO switch - - Last Revised 09-29-01 (25f) + - Last Revised 11-07-01 (25g) - - fixed classification for LS,RLM,LRM in pseudo alphabet, - focus issues in UI, regression fix to commandline from 25(e) - fix DEMO switch + - fixed classification for plus/minus in pseudo alphabet + to track changes made in Unicode 4.0.1 - - Last Revised 11-07-01 (25g) + - Last Revised 12-03-04 (25h) - - fixed classification for plus/minus in pseudo alphabet - to track changes made in Unicode 4.0.1 + - now compiles as dialog-only program for WINDOWS_UI==1 + using new bidimain.cpp - - Last Revised 12-03-04 (25h) + - Last Revised 12-02-07 (25i) - - now compiles as dialog-only program for WINDOWS_UI==1 - using new bidimain.cpp + - cleaned up whitespace and indenting in the source, + fixed two comments (table headers) - - Last Revised 12-02-07 (25i) + - Last Revised 15-03-07 (25j) - - cleaned up whitespace and indenting in the source, - fixed two comments (table headers) + - named enumerations - - Last Revised 15-03-07 (25j) + - Last Revised 30-05-07 (25k) - - named enumerations + - added usage notes, minor edits to comments, indentation, etc + throughout. Added the bidiParagraph function. Checked against + changes in the Unicode Bidi Algorithm for Unicode 5.2.0. No + changes needed to this implementation to match the values in + the BidiTest.txt file in the Unicode Character Database. + Minor fixes to dialog/windows proc, updated preprocessor directives. - - Last Revised 30-05-07 (25k) + - Last Revised 03-08-09 (26) - - added usage notes, minor edits to comments, indentation, etc - throughout. Added the bidiParagraph function. Checked against - changes in the Unicode Bidi Algorithm for Unicode 5.2.0. No - changes needed to this implementation to match the values in - the BidiTest.txt file in the Unicode Character Database. - Minor fixes to dialog/windows proc, updated preprocessor directives. + Credits: + ------- + Written by: Asmus Freytag + Command line interface by: Rick McGowan + Verification (v24): Doug Felt - - Last Revised 03-08-09 (26) + Disclaimer and legal rights: + --------------------------- + Copyright (C) 1999-2009, ASMUS, Inc. All Rights Reserved. + Distributed under the Terms of Use in http://www.unicode.org/copyright.html. - Credits: - ------- - Written by: Asmus Freytag - Command line interface by: Rick McGowan - Verification (v24): Doug Felt + THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE + BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, + OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE SOFTWARE. - Disclaimer and legal rights: - --------------------------- - Copyright (C) 1999-2009, ASMUS, Inc. All Rights Reserved. - Distributed under the Terms of Use in http://www.unicode.org/copyright.html. - - THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE - BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, - OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, - WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, - ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE SOFTWARE. - - The file bid.rc is included in the software covered by the above. + The file bid.rc is included in the software covered by the above. ------------------------------------------------------------------------*/ - /* === HELPER FUNCTIONS AND DECLARATIONS ================================= */ #define odd(x) ((x) & 1) /*---------------------------------------------------------------------- - The following array maps character codes to types for the purpose of - this sample implementation. The legend string gives a human readable - explanation of the pseudo alphabet. + The following array maps character codes to types for the purpose of + this sample implementation. The legend string gives a human readable + explanation of the pseudo alphabet. - For simplicity, characters entered by buttons are given a 1:1 mapping - between their type and pseudo character value. Pseudo characters that - can be typed from the keyboard are explained in the legend string. + For simplicity, characters entered by buttons are given a 1:1 mapping + between their type and pseudo character value. Pseudo characters that + can be typed from the keyboard are explained in the legend string. - Use the Unicode Character Database for the real values in real use. + Use the Unicode Character Database for the real values in real use. ---------------------------------------------------------------------*/ -const int chLS = 0x15; - -int TypesFromChar[] = +enum { + chLS = 0x15 +}; -// 0 1 2 3 4 5 6 7 8 9 a b c d e f +#if 0 +static const fz_bidi_chartype types_from_char[] = +{ +// 0 1 2 3 4 5 6 7 8 9 a b c d e f BDI_BN, BDI_BN, BDI_BN, BDI_BN, BDI_L, BDI_R, BDI_BN, BDI_BN, BDI_BN, BDI_S, BDI_B, BDI_S, BDI_WS, BDI_B, BDI_BN, BDI_BN, /*00-0f*/ BDI_LRO,BDI_LRE,BDI_PDF,BDI_RLO,BDI_RLE,BDI_WS, BDI_L, BDI_R, BDI_BN, BDI_BN, BDI_BN, BDI_BN, BDI_B, BDI_B, BDI_B, BDI_S, /*10-1f*/ BDI_WS, BDI_ON, BDI_ON, BDI_ET, BDI_ET, BDI_ET, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ES, BDI_CS, BDI_ES, BDI_CS, BDI_ES, /*20-2f*/ @@ -273,87 +273,75 @@ BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_NSM,BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, /*60-6f*/ BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_LRO,BDI_B, BDI_RLO,BDI_BN, BDI_ON, /*70-7f*/ }; - +#endif /*************************************** - Reverse, human readable reference: - - LRM: 0x4 - RLM: 0x5 - L: 0x16,a-z - LRE: 0x11,[ - LRO: 0x10,{ - R: 0x17,G-Z - AL: A-F - RLE: 0x14,] - RLO: 0x13,} - PDF: 0x12,^ - EN: 0-5 - ES: /,+,[hyphen] - ET: #,$,% - AN: 6-9 - CS: [comma],.,: - NSM: ` - BN: 0x0-0x8,0xe,0xf,0x18-0x1b,~ - B: 0xa,0xd,0x1c-0x1e,| - S: 0x9,0xb,0x1f,_ - WS: 0xc,0x15,[space] - ON: !,",&,',(,),*,;,<,=,>,?,@,\,0x7f + Reverse, human readable reference: + + LRM: 0x4 + RLM: 0x5 + L: 0x16,a-z + LRE: 0x11,[ + LRO: 0x10,{ + R: 0x17,G-Z + AL: A-F + RLE: 0x14,] + RLO: 0x13,} + PDF: 0x12,^ + EN: 0-5 + ES: /,+,[hyphen] + ET: #,$,% + AN: 6-9 + CS: [comma],.,: + NSM: ` + BN: 0x0-0x8,0xe,0xf,0x18-0x1b,~ + B: 0xa,0xd,0x1c-0x1e,| + S: 0x9,0xb,0x1f,_ + WS: 0xc,0x15,[space] + ON: !,",&,',(,),*,;,<,=,>,?,@,\,0x7f ****************************************/ -// WS, LS and S are not explicitly needed except for L1. Therefore this -// table conflates ON, S, WS, and LS to N, all others unchanged -int NTypes[] = { - BDI_N, // ON, - BDI_L, // L, - BDI_R, // R, - BDI_AN, // AN, - BDI_EN, // EN, - BDI_AL, // AL - BDI_NSM, // NSM - BDI_CS, // CS - BDI_ES, // ES - BDI_ET, // ET - BDI_BN, // BN - BDI_N, // S - BDI_N, // WS - BDI_B, // B - BDI_RLO, // RLO - BDI_RLE, // RLE - BDI_LRO, // LRO - BDI_LRE, // LRE - BDI_PDF, // PDF - BDI_ON, // LS -}; - // === HELPER FUNCTIONS ================================================ +#ifdef BIDI_LINE_AT_A_TIME // reverse cch characters static -void reverse(uint16_t * psz, int cch) +void reverse(uint32_t *psz, int cch) { - uint16_t chTemp; - int ich; - - for (ich = 0; ich < --cch; ich++) - { - chTemp = psz[ich]; - psz[ich] = psz[cch]; - psz[cch] = chTemp; - } + uint32_t ch_temp; + int ich; + + for (ich = 0; ich < --cch; ich++) + { + ch_temp = psz[ich]; + psz[ich] = psz[cch]; + psz[cch] = ch_temp; + } } +#endif // Set a run of cval values at locations all prior to, but not including // iStart, to the new value nval. static -void SetDeferredRun(int *pval, int cval, int iStart, int nval) +void set_deferred_run(fz_bidi_chartype *pval, int cval, int iStart, fz_bidi_chartype nval) +{ + int i; + + for (i = iStart - 1; i >= iStart - cval; i--) + { + pval[i] = nval; + } +} + +static +void set_deferred_level_run(fz_bidi_level *pval, int cval, int iStart, fz_bidi_level nval) { - int i; + int i; - for (i = iStart - 1; i >= iStart - cval; i--) - { - pval[i] = nval; - } + for (i = iStart - 1; i >= iStart - cval; i--) + { + pval[i] = nval; + } } // === ASSIGNING BIDI CLASSES ============================================ @@ -361,459 +349,451 @@ void SetDeferredRun(int *pval, int cval, int iStart, int nval) // === THE PARAGRAPH LEVEL =============================================== /*------------------------------------------------------------------------ - Function: resolveParagraphs + Function: resolve_paragraphs - Resolves the input strings into blocks over which the algorithm - is then applied. + Resolves the input strings into blocks over which the algorithm + is then applied. - Implements Rule P1 of the Unicode Bidi Algorithm + Implements Rule P1 of the Unicode Bidi Algorithm - Input: Text string - Character count + Input: Text string + Character count - Output: revised character count + Output: revised character count - Note: This is a very simplistic function. In effect it restricts - the action of the algorithm to the first paragraph in the input - where a paragraph ends at the end of the first block separator - or at the end of the input text. + Note: This is a very simplistic function. In effect it restricts + the action of the algorithm to the first paragraph in the input + where a paragraph ends at the end of the first block separator + or at the end of the input text. ------------------------------------------------------------------------*/ - -static -int resolveParagraphs(int * types, int cch) +#if 0 +static int resolve_paragraphs(fz_bidi_chartype *types, int cch) { - int ich; - - // skip characters not of type B - for(ich = 0; ich < cch && types[ich] != BDI_B; ich++) - ; - // stop after first B, make it a BN for use in the next steps - if (ich < cch && types[ich] == BDI_B) - types[ich++] = BDI_BN; - return ich; + int ich; + + // skip characters not of type B + for(ich = 0; ich < cch && types[ich] != BDI_B; ich++) + ; + // stop after first B, make it a BN for use in the next steps + if (ich < cch && types[ich] == BDI_B) + types[ich++] = BDI_BN; + return ich; } /*------------------------------------------------------------------------ - Function: baseLevel + Function: base_level - Determines the base level + Determines the base level - Implements rule P2 of the Unicode Bidi Algorithm. + Implements rule P2 of the Unicode Bidi Algorithm. - Input: Array of directional classes - Character count + Input: Array of directional classes + Character count - Note: Ignores explicit embeddings + Note: Ignores explicit embeddings ------------------------------------------------------------------------*/ -static -int baseLevel(const int * pcls, int cch) +static int base_level(const fz_bidi_chartype *pcls, int cch) { - int ich; - - for (ich = 0; ich < cch; ich++) - { - switch (pcls[ich]) - { - // strong left - case BDI_L: - return 0; - - // strong right - case BDI_R: - case BDI_AL: - return 1; - } - } - return 0; + int ich; + + for (ich = 0; ich < cch; ich++) + { + switch (pcls[ich]) + { + // strong left + case BDI_L: + return 0; + + // strong right + case BDI_R: + case BDI_AL: + return 1; + } + } + return 0; } +#endif //====== RESOLVE EXPLICIT ================================================ -static -int GreaterEven(int i) +static fz_bidi_level greater_even(fz_bidi_level i) { - return odd(i) ? i + 1 : i + 2; + return odd(i) ? i + 1 : i + 2; } -static -int GreaterOdd(int i) +static fz_bidi_level greater_odd(fz_bidi_level i) { - return odd(i) ? i + 2 : i + 1; + return odd(i) ? i + 2 : i + 1; } -static -int EmbeddingDirection(int level) +static fz_bidi_chartype embedding_direction(fz_bidi_chartype level) { - return odd(level) ? BDI_R : BDI_L; + return odd(level) ? BDI_R : BDI_L; } - /*------------------------------------------------------------------------ - Function: resolveExplicit - - Recursively resolves explicit embedding levels and overrides. - Implements rules X1-X9, of the Unicode Bidirectional Algorithm. + Function: resolveExplicit - Input: Base embedding level and direction - Character count + Recursively resolves explicit embedding levels and overrides. + Implements rules X1-X9, of the Unicode Bidirectional Algorithm. - Output: Array of embedding levels - Caller must allocate (one level per input character) + Input: Base embedding level and direction + Character count - In/Out: Array of direction classes + Output: Array of embedding levels + Caller must allocate (one level per input character) + In/Out: Array of direction classes - Note: The function uses two simple counters to keep track of - matching explicit codes and PDF. Use the default argument for - the outermost call. The nesting counter counts the recursion - depth and not the embedding level. + Note: The function uses two simple counters to keep track of + matching explicit codes and PDF. Use the default argument for + the outermost call. The nesting counter counts the recursion + depth and not the embedding level. ------------------------------------------------------------------------*/ -const int MAX_LEVEL = 61; // the real value - -int Bidi_resolveExplicit(int level, int dir, int * pcls, int * plevel, int cch, - int nNest) +int fz_bidi_resolve_explicit(fz_bidi_level level, fz_bidi_chartype dir, fz_bidi_chartype *pcls, fz_bidi_level *plevel, int cch, + fz_bidi_level n_nest) { - int ich; - - // always called with a valid nesting level - // nesting levels are != embedding levels - int nLastValid = nNest; - - // check input values - assert(nNest >= 0 && level >= 0 && level <= MAX_LEVEL); - - // process the text - for (ich = 0; ich < cch; ich++) - { - int cls = pcls[ich]; - switch (cls) - { - case BDI_LRO: - case BDI_LRE: - nNest++; - if (GreaterEven(level) <= MAX_LEVEL) - { - plevel[ich] = GreaterEven(level); - pcls[ich] = BDI_BN; - ich += Bidi_resolveExplicit(plevel[ich], (cls == BDI_LRE ? BDI_N : BDI_L), - &pcls[ich+1], &plevel[ich+1], - cch - (ich+1), nNest); - nNest--; - continue; - } - cls = pcls[ich] = BDI_BN; - break; - - case BDI_RLO: - case BDI_RLE: - nNest++; - if (GreaterOdd(level) <= MAX_LEVEL) - { - plevel[ich] = GreaterOdd(level); - pcls[ich] = BDI_BN; - ich += Bidi_resolveExplicit(plevel[ich], (cls == BDI_RLE ? BDI_N : BDI_R), - &pcls[ich+1], &plevel[ich+1], - cch - (ich+1), nNest); - nNest--; - continue; - } - cls = pcls[ich] = BDI_BN; - break; - - case BDI_PDF: - cls = pcls[ich] = BDI_BN; - if (nNest) - { - if (nLastValid < nNest) - { - nNest--; - } - else - { - cch = ich; // break the loop, but complete body - } - } - break; - } - - // Apply the override - if (dir != BDI_N) - { - cls = dir; - } - plevel[ich] = level; - if (pcls[ich] != BDI_BN) - pcls[ich] = cls; - } - - return ich; + int ich; + + // always called with a valid nesting level + // nesting levels are != embedding levels + int nLastValid = n_nest; + + // check input values + assert(n_nest >= 0 && level >= 0 && level <= BIDI_LEVEL_MAX); + + // process the text + for (ich = 0; ich < cch; ich++) + { + fz_bidi_chartype cls = pcls[ich]; + switch (cls) + { + case BDI_LRO: + case BDI_LRE: + n_nest++; + if (greater_even(level) <= BIDI_LEVEL_MAX) + { + plevel[ich] = greater_even(level); + pcls[ich] = BDI_BN; + ich += fz_bidi_resolve_explicit(plevel[ich], (cls == BDI_LRE ? BDI_N : BDI_L), + &pcls[ich+1], &plevel[ich+1], + cch - (ich+1), n_nest); + n_nest--; + continue; + } + cls = pcls[ich] = BDI_BN; + break; + + case BDI_RLO: + case BDI_RLE: + n_nest++; + if (greater_odd(level) <= BIDI_LEVEL_MAX) + { + plevel[ich] = greater_odd(level); + pcls[ich] = BDI_BN; + ich += fz_bidi_resolve_explicit(plevel[ich], (cls == BDI_RLE ? BDI_N : BDI_R), + &pcls[ich+1], &plevel[ich+1], + cch - (ich+1), n_nest); + n_nest--; + continue; + } + cls = pcls[ich] = BDI_BN; + break; + + case BDI_PDF: + cls = pcls[ich] = BDI_BN; + if (n_nest) + { + if (nLastValid < n_nest) + { + n_nest--; + } + else + { + cch = ich; // break the loop, but complete body + } + } + break; + } + + // Apply the override + if (dir != BDI_N) + { + cls = dir; + } + plevel[ich] = level; + if (pcls[ich] != BDI_BN) + pcls[ich] = cls; + } + + return ich; } // === RESOLVE WEAK TYPES ================================================ enum bidi_state // possible states { - xa, // arabic letter - xr, // right leter - xl, // left letter - - ao, // arabic lett. foll by ON - ro, // right lett. foll by ON - lo, // left lett. foll by ON - - rt, // ET following R - lt, // ET following L - - cn, // EN, AN following AL - ra, // arabic number foll R - re, // european number foll R - la, // arabic number foll L - le, // european number foll L - - ac, // CS following cn - rc, // CS following ra - rs, // CS,ES following re - lc, // CS following la - ls, // CS,ES following le - - ret, // ET following re - let // ET following le + xa, // arabic letter + xr, // right leter + xl, // left letter + + ao, // arabic lett. foll by ON + ro, // right lett. foll by ON + lo, // left lett. foll by ON + + rt, // ET following R + lt, // ET following L + + cn, // EN, AN following AL + ra, // arabic number foll R + re, // european number foll R + la, // arabic number foll L + le, // european number foll L + + ac, // CS following cn + rc, // CS following ra + rs, // CS,ES following re + lc, // CS following la + ls, // CS,ES following le + + ret, // ET following re + let // ET following le } ; -int stateWeak[][10] = +const unsigned char state_weak[][10] = { - // N, L, R, AN, EN, AL,NSM, CS, ES, ET, -/*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* arabic letter */ -/*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */ -/*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */ + // N, L, R, AN, EN, AL,NSM, CS, ES, ET, +/*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* arabic letter */ +/*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */ +/*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */ /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* arabic lett. foll by ON*/ /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */ /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */ -/*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */ -/*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */ +/*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */ +/*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */ -/*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */ +/*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */ /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* arabic number foll R */ /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* european number foll R */ /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* arabic number foll L */ /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* european number foll L */ -/*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */ -/*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */ -/*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */ -/*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */ -/*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */ - -/*ret*/ { ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */ -/*let*/ { lo, xl, xr, la, le, xa,let, lo, lo,let } /* ET following le */ +/*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */ +/*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */ +/*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */ +/*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */ +/*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */ +/*ret*/ { ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */ +/*let*/ { lo, xl, xr, la, le, xa,let, lo, lo,let } /* ET following le */ }; enum bidi_action // possible actions { - // primitives - IX = 0x100, // increment - XX = 0xF, // no-op - - // actions - xxx = (XX << 4) + XX, // no-op - xIx = IX + xxx, // increment run - xxN = (XX << 4) + BDI_ON, // set current to N - xxE = (XX << 4) + BDI_EN, // set current to EN - xxA = (XX << 4) + BDI_AN, // set current to AN - xxR = (XX << 4) + BDI_R, // set current to R - xxL = (XX << 4) + BDI_L, // set current to L - Nxx = (BDI_ON << 4) + 0xF, // set run to neutral - Axx = (BDI_AN << 4) + 0xF, // set run to AN - ExE = (BDI_EN << 4) + BDI_EN,// set run to EN, set current to EN - NIx = (BDI_ON << 4) + 0xF + IX,// set run to N, increment - NxN = (BDI_ON << 4) + BDI_ON,// set run to N, set current to N - NxR = (BDI_ON << 4) + BDI_R, // set run to N, set current to R - NxE = (BDI_ON << 4) + BDI_EN,// set run to N, set current to EN - - AxA = (BDI_AN << 4) + BDI_AN,// set run to AN, set current to AN - NxL = (BDI_ON << 4) + BDI_L, // set run to N, set current to L - LxL = (BDI_L << 4) + BDI_L // set run to L, set current to L + // primitives + IX = 0x100, // increment + XX = 0xF, // no-op + + // actions + xxx = (XX << 4) + XX, // no-op + xIx = IX + xxx, // increment run + xxN = (XX << 4) + BDI_ON, // set current to N + xxE = (XX << 4) + BDI_EN, // set current to EN + xxA = (XX << 4) + BDI_AN, // set current to AN + xxR = (XX << 4) + BDI_R, // set current to R + xxL = (XX << 4) + BDI_L, // set current to L + Nxx = (BDI_ON << 4) + 0xF, // set run to neutral + Axx = (BDI_AN << 4) + 0xF, // set run to AN + ExE = (BDI_EN << 4) + BDI_EN, // set run to EN, set current to EN + NIx = (BDI_ON << 4) + 0xF + IX, // set run to N, increment + NxN = (BDI_ON << 4) + BDI_ON, // set run to N, set current to N + NxR = (BDI_ON << 4) + BDI_R, // set run to N, set current to R + NxE = (BDI_ON << 4) + BDI_EN, // set run to N, set current to EN + + AxA = (BDI_AN << 4) + BDI_AN, // set run to AN, set current to AN + NxL = (BDI_ON << 4) + BDI_L, // set run to N, set current to L + LxL = (BDI_L << 4) + BDI_L // set run to L, set current to L }; +typedef uint16_t fz_bidi_action; -int actionWeak[][10] = +const fz_bidi_action action_weak[][10] = { - // N,.. L, R, AN, EN, AL, NSM, CS,..ES, ET, -/*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* arabic letter */ -/*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right leter */ -/*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */ - -/*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* arabic lett. foll by ON */ -/*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */ -/*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */ - -/*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */ -/*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */ - -/*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */ -/*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll R */ -/*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* european number foll R */ -/*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll L */ -/*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* european number foll L */ - -/*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */ -/*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */ -/*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */ -/*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */ -/*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */ - -/*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */ -/*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL } /* ET following le */ + // N,.. L, R, AN, EN, AL, NSM, CS,..ES, ET, +/*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* arabic letter */ +/*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right leter */ +/*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */ + +/*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* arabic lett. foll by ON */ +/*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */ +/*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */ + +/*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */ +/*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */ + +/*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */ +/*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll R */ +/*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* european number foll R */ +/*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll L */ +/*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* european number foll L */ + +/*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */ +/*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */ +/*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */ +/*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */ +/*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */ + +/*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */ +/*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL } /* ET following le */ }; static -int GetDeferredType(int action) +fz_bidi_chartype get_deferred_type(fz_bidi_action action) { - return (action >> 4) & 0xF; + return (action >> 4) & 0xF; } static -int GetResolvedType(int action) +fz_bidi_chartype get_resolved_type(fz_bidi_action action) { - return action & 0xF; + return action & 0xF; } /* Note on action table: - States can be of two kinds: - - Immediate Resolution State, where each input token - is resolved as soon as it is seen. These states havve - only single action codes (xxN) or the no-op (xxx) - for static input tokens. - - Deferred Resolution State, where input tokens either - either extend the run (xIx) or resolve its Type (e.g. Nxx). - - Input classes are of three kinds - - Static Input Token, where the class of the token remains - unchanged on output (AN, L, N, R) - - Replaced Input Token, where the class of the token is - always replaced on output (AL, BDI_BN, NSM, CS, ES, ET) - - Conditional Input Token, where the class of the token is - changed on output in some but not all cases (EN) - - Where tokens are subject to change, a double action - (e.g. NxA, or NxN) is _required_ after deferred states, - resolving both the deferred state and changing the current token. - - These properties of the table are verified by assertions below. - This code is needed only during debugging and maintenance + States can be of two kinds: + - Immediate Resolution State, where each input token + is resolved as soon as it is seen. These states havve + only single action codes (xxN) or the no-op (xxx) + for static input tokens. + - Deferred Resolution State, where input tokens either + either extend the run (xIx) or resolve its Type (e.g. Nxx). + + Input classes are of three kinds + - Static Input Token, where the class of the token remains + unchanged on output (AN, L, N, R) + - Replaced Input Token, where the class of the token is + always replaced on output (AL, BDI_BN, NSM, CS, ES, ET) + - Conditional Input Token, where the class of the token is + changed on output in some but not all cases (EN) + + Where tokens are subject to change, a double action + (e.g. NxA, or NxN) is _required_ after deferred states, + resolving both the deferred state and changing the current token. + + These properties of the table are verified by assertions below. + This code is needed only during debugging and maintenance */ /*------------------------------------------------------------------------ - Function: resolveWeak + Function: resolveWeak - Resolves the directionality of numeric and other weak character types + Resolves the directionality of numeric and other weak character types - Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm. + Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm. - Input: Array of embedding levels - Character count + Input: Array of embedding levels + Character count - In/Out: Array of directional classes + In/Out: Array of directional classes - Note: On input only these directional classes are expected - AL, HL, R, L, ON, BDI_BN, NSM, AN, EN, ES, ET, CS, + Note: On input only these directional classes are expected + AL, HL, R, L, ON, BDI_BN, NSM, AN, EN, ES, ET, CS, ------------------------------------------------------------------------*/ -void Bidi_resolveWeak(fz_context *ctx, int baselevel, int *pcls, int *plevel, int cch) +void fz_bidi_resolve_weak(fz_context *ctx, fz_bidi_level baselevel, fz_bidi_chartype *pcls, fz_bidi_level *plevel, int cch) { - int state = odd(baselevel) ? xr : xl; - int cls; - int ich; - int action; - int clsRun; - int clsNew; - - int level = baselevel; - - int cchRun = 0; - - for (ich = 0; ich < cch; ich++) - { - if (pcls[ich] > BDI_BN) { - fz_warn(ctx, "error: pcls[%d] > BN (%d)\n", ich, pcls[ich]); - } - - // ignore boundary neutrals - if (pcls[ich] == BDI_BN) - { - // must flatten levels unless at a level change; - plevel[ich] = level; - - // lookahead for level changes - if (ich + 1 == cch && level != baselevel) - { - // have to fixup last BN before end of the loop, since - // its fix-upped value will be needed below the assert - pcls[ich] = EmbeddingDirection(level); - } - else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BDI_BN) - { - // fixup LAST BN in front / after a level run to make - // it act like the SOR/EOR in rule X10 - int newlevel = plevel[ich+1]; - if (level > newlevel) { - newlevel = level; - } - plevel[ich] = newlevel; - - // must match assigned level - pcls[ich] = EmbeddingDirection(newlevel); - level = plevel[ich+1]; - } - else - { - // don't interrupt runs - if (cchRun) - { - cchRun++; - } - continue; - } - } - - assert(pcls[ich] <= BDI_BN); - cls = pcls[ich]; - - action = actionWeak[state][cls]; - - // resolve the directionality for deferred runs - clsRun = GetDeferredType(action); - if (clsRun != XX) - { - SetDeferredRun(pcls, cchRun, ich, clsRun); - cchRun = 0; - } - - // resolve the directionality class at the current location - clsNew = GetResolvedType(action); - if (clsNew != XX) - pcls[ich] = clsNew; - - // increment a deferred run - if (IX & action) - cchRun++; - - state = stateWeak[state][cls]; - } - - // resolve any deferred runs - // use the direction of the current level to emulate PDF - cls = EmbeddingDirection(level); - - // resolve the directionality for deferred runs - clsRun = GetDeferredType(actionWeak[state][cls]); - if (clsRun != XX) - SetDeferredRun(pcls, cchRun, ich, clsRun); + int state = odd(baselevel) ? xr : xl; + fz_bidi_chartype cls; + int ich; + fz_bidi_action action; + fz_bidi_chartype cls_run; + fz_bidi_chartype cls_new; + + fz_bidi_level level = baselevel; + + int cch_run = 0; + + for (ich = 0; ich < cch; ich++) + { + if (pcls[ich] > BDI_BN) { + fz_warn(ctx, "error: pcls[%d] > BN (%d)\n", ich, pcls[ich]); + } + + // ignore boundary neutrals + if (pcls[ich] == BDI_BN) + { + // must flatten levels unless at a level change; + plevel[ich] = level; + + // lookahead for level changes + if (ich + 1 == cch && level != baselevel) + { + // have to fixup last BN before end of the loop, since + // its fix-upped value will be needed below the assert + pcls[ich] = embedding_direction(level); + } + else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BDI_BN) + { + // fixup LAST BN in front / after a level run to make + // it act like the SOR/EOR in rule X10 + int newlevel = plevel[ich+1]; + if (level > newlevel) { + newlevel = level; + } + plevel[ich] = newlevel; + + // must match assigned level + pcls[ich] = embedding_direction(newlevel); + level = plevel[ich+1]; + } + else + { + // don't interrupt runs + if (cch_run) + { + cch_run++; + } + continue; + } + } + + assert(pcls[ich] <= BDI_BN); + cls = pcls[ich]; + + action = action_weak[state][cls]; + + // resolve the directionality for deferred runs + cls_run = get_deferred_type(action); + if (cls_run != XX) + { + set_deferred_run(pcls, cch_run, ich, cls_run); + cch_run = 0; + } + + // resolve the directionality class at the current location + cls_new = get_resolved_type(action); + if (cls_new != XX) + pcls[ich] = cls_new; + + // increment a deferred run + if (IX & action) + cch_run++; + + state = state_weak[state][cls]; + } + + // resolve any deferred runs + // use the direction of the current level to emulate PDF + cls = embedding_direction(level); + + // resolve the directionality for deferred runs + cls_run = get_deferred_type(action_weak[state][cls]); + if (cls_run != XX) + set_deferred_run(pcls, cch_run, ich, cls_run); } // === RESOLVE NEUTAL TYPES ============================================== @@ -821,365 +801,363 @@ void Bidi_resolveWeak(fz_context *ctx, int baselevel, int *pcls, int *plevel, in // action values enum neutral_action { - // action to resolve previous input - nL = BDI_L, // resolve EN to L - En = 3 << 4, // resolve neutrals run to embedding level direction - Rn = BDI_R << 4, // resolve neutrals run to strong right - Ln = BDI_L << 4, // resolved neutrals run to strong left - In = (1<<8), // increment count of deferred neutrals - LnL = (1<<4)+BDI_L // set run and EN to L + // action to resolve previous input + nL = BDI_L, // resolve EN to L + En = 3 << 4, // resolve neutrals run to embedding level direction + Rn = BDI_R << 4, // resolve neutrals run to strong right + Ln = BDI_L << 4, // resolved neutrals run to strong left + In = (1<<8), // increment count of deferred neutrals + LnL = (1<<4)+BDI_L // set run and EN to L }; -static -int GetDeferredNeutrals(int action, int level) +static fz_bidi_chartype +get_deferred_neutrals(fz_bidi_action action, fz_bidi_level level) { - action = (action >> 4) & 0xF; - if (action == (En >> 4)) - return EmbeddingDirection(level); - else - return action; + action = (action >> 4) & 0xF; + if (action == (En >> 4)) + return embedding_direction(level); + else + return action; } -static -int GetResolvedNeutrals(int action) +static fz_bidi_chartype get_resolved_neutrals(fz_bidi_action action) { - action = action & 0xF; - if (action == In) - return 0; - else - return action; + action = action & 0xF; + if (action == In) + return 0; + else + return action; } // state values enum neutral_state { - // new temporary class - r, // R and characters resolved to R - l, // L and characters resolved to L - rn, // N preceded by right - ln, // N preceded by left - a, // AN preceded by left (the abbrev 'la' is used up above) - na // N preceeded by a + // new temporary class + r, // R and characters resolved to R + l, // L and characters resolved to L + rn, // N preceded by right + ln, // N preceded by left + a, // AN preceded by left (the abbrev 'la' is used up above) + na // N preceeded by a } ; - /*------------------------------------------------------------------------ - Notes: + Notes: - By rule W7, whenever a EN is 'dominated' by an L (including start of - run with embedding direction = L) it is resolved to, and further treated - as L. + By rule W7, whenever a EN is 'dominated' by an L (including start of + run with embedding direction = L) it is resolved to, and further treated + as L. - This leads to the need for 'a' and 'na' states. + This leads to the need for 'a' and 'na' states. ------------------------------------------------------------------------*/ -int actionNeutrals[][5] = +const int action_neutrals[][5] = { -// N, L, R, AN, EN, = cls - // state = - {In, 0, 0, 0, 0}, // r right - {In, 0, 0, 0, BDI_L},// l left +// N, L, R, AN, EN, = cls + // state = + {In, 0, 0, 0, 0}, // r right + {In, 0, 0, 0, BDI_L}, // l left - {In, En, Rn, Rn, Rn}, // rn N preceded by right - {In, Ln, En, En, LnL}, // ln N preceded by left + {In, En, Rn, Rn, Rn}, // rn N preceded by right + {In, Ln, En, En, LnL}, // ln N preceded by left - {In, 0, 0, 0, BDI_L},// a AN preceded by left - {In, En, Rn, Rn, En} // na N preceded by a + {In, 0, 0, 0, BDI_L}, // a AN preceded by left + {In, En, Rn, Rn, En} // na N preceded by a } ; -int stateNeutrals[][5] = +const int state_neutrals[][5] = { -// N, L, R, AN, EN = cls - // state = - {rn, l, r, r, r}, // r right - {ln, l, r, a, l}, // l left +// N, L, R, AN, EN = cls + // state = + {rn, l, r, r, r}, // r right + {ln, l, r, a, l}, // l left - {rn, l, r, r, r}, // rn N preceded by right - {ln, l, r, a, l}, // ln N preceded by left + {rn, l, r, r, r}, // rn N preceded by right + {ln, l, r, a, l}, // ln N preceded by left - {na, l, r, a, l}, // a AN preceded by left - {na, l, r, a, l} // na N preceded by la + {na, l, r, a, l}, // a AN preceded by left + {na, l, r, a, l} // na N preceded by la } ; /*------------------------------------------------------------------------ - Function: resolveNeutrals + Function: resolveNeutrals - Resolves the directionality of neutral character types. + Resolves the directionality of neutral character types. - Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm. + Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm. - Input: Array of embedding levels - Character count - Baselevel + Input: Array of embedding levels + Character count + Baselevel - In/Out: Array of directional classes + In/Out: Array of directional classes - Note: On input only these directional classes are expected - R, L, N, AN, EN and BN + Note: On input only these directional classes are expected + R, L, N, AN, EN and BN - W8 resolves a number of ENs to L + W8 resolves a number of ENs to L ------------------------------------------------------------------------*/ -void Bidi_resolveNeutrals(int baselevel, int *pcls, const int *plevel, int cch) +void fz_bidi_resolve_neutrals(fz_bidi_level baselevel, fz_bidi_chartype *pcls, const fz_bidi_level *plevel, int cch) { - // the state at the start of text depends on the base level - int state = odd(baselevel) ? r : l; - int cls; - int ich; - int clsRun; - - int cchRun = 0; - int level = baselevel; - - for (ich = 0; ich < cch; ich++) - { - int action; - int clsNew; - - // ignore boundary neutrals - if (pcls[ich] == BDI_BN) - { - // include in the count for a deferred run - if (cchRun) - cchRun++; - - // skip any further processing - continue; - } - - assert(pcls[ich] < 5); // "Only N, L, R, AN, EN are allowed" - cls = pcls[ich]; - - action = actionNeutrals[state][cls]; - - // resolve the directionality for deferred runs - clsRun = GetDeferredNeutrals(action, level); - if (clsRun != BDI_N) - { - SetDeferredRun(pcls, cchRun, ich, clsRun); - cchRun = 0; - } - - // resolve the directionality class at the current location - clsNew = GetResolvedNeutrals(action); - if (clsNew != BDI_N) - pcls[ich] = clsNew; - - if (In & action) - cchRun++; - - state = stateNeutrals[state][cls]; - level = plevel[ich]; - } - - // resolve any deferred runs - cls = EmbeddingDirection(level); // eor has type of current level - - // resolve the directionality for deferred runs - clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level); - if (clsRun != BDI_N) - SetDeferredRun(pcls, cchRun, ich, clsRun); + // the state at the start of text depends on the base level + int state = odd(baselevel) ? r : l; + fz_bidi_chartype cls; + int ich; + fz_bidi_chartype cls_run; + + int cch_run = 0; + fz_bidi_level level = baselevel; + + for (ich = 0; ich < cch; ich++) + { + int action; + fz_bidi_chartype cls_new; + + // ignore boundary neutrals + if (pcls[ich] == BDI_BN) + { + // include in the count for a deferred run + if (cch_run) + cch_run++; + + // skip any further processing + continue; + } + + assert(pcls[ich] < 5); // "Only N, L, R, AN, EN are allowed" + cls = pcls[ich]; + + action = action_neutrals[state][cls]; + + // resolve the directionality for deferred runs + cls_run = get_deferred_neutrals(action, level); + if (cls_run != BDI_N) + { + set_deferred_run(pcls, cch_run, ich, cls_run); + cch_run = 0; + } + + // resolve the directionality class at the current location + cls_new = get_resolved_neutrals(action); + if (cls_new != BDI_N) + pcls[ich] = cls_new; + + if (In & action) + cch_run++; + + state = state_neutrals[state][cls]; + level = plevel[ich]; + } + + // resolve any deferred runs + cls = embedding_direction(level); // eor has type of current level + + // resolve the directionality for deferred runs + cls_run = get_deferred_neutrals(action_neutrals[state][cls], level); + if (cls_run != BDI_N) + set_deferred_run(pcls, cch_run, ich, cls_run); } // === RESOLVE IMPLLICIT ================================================= /*------------------------------------------------------------------------ - Function: resolveImplicit + Function: resolveImplicit - Recursively resolves implicit embedding levels. - Implements rules I1 and I2 of the Unicode Bidirectional Algorithm. + Recursively resolves implicit embedding levels. + Implements rules I1 and I2 of the Unicode Bidirectional Algorithm. - Input: Array of direction classes - Character count - Base level + Input: Array of direction classes + Character count + Base level - In/Out: Array of embedding levels + In/Out: Array of embedding levels - Note: levels may exceed 15 on output. - Accepted subset of direction classes - R, L, AN, EN + Note: levels may exceed 15 on output. + Accepted subset of direction classes + R, L, AN, EN ------------------------------------------------------------------------*/ -int addLevel[][4] = +const fz_bidi_level add_level[][4] = { - // L, R, AN, EN = cls - // level = -/* even */ { 0, 1, 2, 2 }, // EVEN -/* odd */ { 1, 0, 1, 1 } // ODD + // L, R, AN, EN = cls + // level = +/* even */ { 0, 1, 2, 2 }, // EVEN +/* odd */ { 1, 0, 1, 1 } // ODD }; -void Bidi_resolveImplicit(const int * pcls, int * plevel, int cch) +void fz_bidi_resolve_implicit(const fz_bidi_chartype *pcls, fz_bidi_level *plevel, int cch) { - int ich; - - for (ich = 0; ich < cch; ich++) - { - // cannot resolve bn here, since some bn were resolved to strong - // types in resolveWeak. To remove these we need the original - // types, which are available again in resolveWhiteSpace - if (pcls[ich] == BDI_BN) - { - continue; - } - assert(pcls[ich] > 0); // "No Neutrals allowed to survive here." - assert(pcls[ich] < 5); // "Out of range." - plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1]; - } + int ich; + + for (ich = 0; ich < cch; ich++) + { + // cannot resolve bn here, since some bn were resolved to strong + // types in resolveWeak. To remove these we need the original + // types, which are available again in resolveWhiteSpace + if (pcls[ich] == BDI_BN) + { + continue; + } + assert(pcls[ich] > 0); // "No Neutrals allowed to survive here." + assert(pcls[ich] < 5); // "Out of range." + plevel[ich] += add_level[odd(plevel[ich])][pcls[ich] - 1]; + } } +#if 0 // === REORDER =========================================================== /*------------------------------------------------------------------------ - Function: resolveLines + Function: resolve_lines - Breaks a paragraph into lines + Breaks a paragraph into lines - Input: Character count - Array of line break flags - In/Out: Array of characters + Input: Character count + Array of line break flags + In/Out: Array of characters - Returns the count of characters on the first line + Returns the count of characters on the first line - Note: This function only breaks lines at hard line breaks. Other - line breaks can be passed in. If pbrk[n] is true, then a break - occurs after the character in pszInput[n]. Breaks before the first - character are not allowed. + Note: This function only breaks lines at hard line breaks. Other + line breaks can be passed in. If pbrk[n] is true, then a break + occurs after the character in psz_input[n]. Breaks before the first + character are not allowed. ------------------------------------------------------------------------*/ -static -int resolveLines(uint16_t * pszInput, int * pbrk, int cch) +static int resolve_lines(uint32_t *psz_input, int *pbrk, int cch) { - int ich; - - // skip characters not of type LS - for(ich = 0; ich < cch; ich++) - { - if (pszInput[ich] == chLS || (pbrk && pbrk[ich])) - { - ich++; - break; - } - } - - return ich; + int ich; + + // skip characters not of type LS + for(ich = 0; ich < cch; ich++) + { + if (psz_input[ich] == chLS || (pbrk && pbrk[ich])) + { + ich++; + break; + } + } + + return ich; } +#endif /*------------------------------------------------------------------------ - Function: resolveWhiteSpace + Function: fz_bidi_resolve_whitespace - Resolves levels for WS and S - Implements rule L1 of the Unicode bidi Algorithm. + Resolves levels for WS and S + Implements rule L1 of the Unicode bidi Algorithm. - Input: Base embedding level - Character count - Array of direction classes (for one line of text) + Input: Base embedding level + Character count + Array of direction classes (for one line of text) - In/Out: Array of embedding levels (for one line of text) + In/Out: Array of embedding levels (for one line of text) - Note: this should be applied a line at a time. The default driver - code supplied in this file assumes a single line of text; for - a real implementation, cch and the initial pointer values - would have to be adjusted. + Note: this should be applied a line at a time. The default driver + code supplied in this file assumes a single line of text; for + a real implementation, cch and the initial pointer values + would have to be adjusted. ------------------------------------------------------------------------*/ -void Bidi_resolveWhitespace(int baselevel, const int *pcls, int *plevel, - int cch) +void fz_bidi_resolve_whitespace(fz_bidi_level baselevel, const fz_bidi_chartype *pcls, fz_bidi_level *plevel, + int cch) { - int cchrun = 0; - int oldlevel = baselevel; - int ich; - - for (ich = 0; ich < cch; ich++) - { - switch(pcls[ich]) - { - default: - cchrun = 0; // any other character breaks the run - break; - case BDI_WS: - cchrun++; - break; - - case BDI_RLE: - case BDI_LRE: - case BDI_LRO: - case BDI_RLO: - case BDI_PDF: - case BDI_BN: - plevel[ich] = oldlevel; - cchrun++; - break; - - case BDI_S: - case BDI_B: - // reset levels for WS before eot - SetDeferredRun(plevel, cchrun, ich, baselevel); - cchrun = 0; - plevel[ich] = baselevel; - break; - } - oldlevel = plevel[ich]; - } - // reset level before eot - SetDeferredRun(plevel, cchrun, ich, baselevel); + int cchrun = 0; + fz_bidi_level oldlevel = baselevel; + int ich; + + for (ich = 0; ich < cch; ich++) + { + switch(pcls[ich]) + { + default: + cchrun = 0; // any other character breaks the run + break; + case BDI_WS: + cchrun++; + break; + + case BDI_RLE: + case BDI_LRE: + case BDI_LRO: + case BDI_RLO: + case BDI_PDF: + case BDI_BN: + plevel[ich] = oldlevel; + cchrun++; + break; + + case BDI_S: + case BDI_B: + // reset levels for WS before eot + set_deferred_level_run(plevel, cchrun, ich, baselevel); + cchrun = 0; + plevel[ich] = baselevel; + break; + } + oldlevel = plevel[ich]; + } + // reset level before eot + set_deferred_level_run(plevel, cchrun, ich, baselevel); } #ifdef BIDI_LINE_AT_A_TIME /*------------------------------------------------------------------------ - Functions: reorder/reorderLevel + Functions: reorder/reorderLevel - Recursively reorders the display string - "From the highest level down, reverse all characters at that level and - higher, down to the lowest odd level" + Recursively reorders the display string + "From the highest level down, reverse all characters at that level and + higher, down to the lowest odd level" - Implements rule L2 of the Unicode bidi Algorithm. + Implements rule L2 of the Unicode bidi Algorithm. - Input: Array of embedding levels - Character count - Flag enabling reversal (set to false by initial caller) + Input: Array of embedding levels + Character count + Flag enabling reversal (set to false by initial caller) - In/Out: Text to reorder + In/Out: Text to reorder - Note: levels may exceed 15 resp. 61 on input. + Note: levels may exceed 15 resp. 61 on input. - Rule L3 - reorder combining marks is not implemented here - Rule L4 - glyph mirroring is implemented as a display option below + Rule L3 - reorder combining marks is not implemented here + Rule L4 - glyph mirroring is implemented as a display option below - Note: this should be applied a line at a time + Note: this should be applied a line at a time -------------------------------------------------------------------------*/ -static int reorderLevel(int level, uint16_t * pszText, const int * plevel, int cch, - int fReverse) +static int reorderLevel(fz_bidi_level level, uint32_t *psz_text, const fz_bidi_level *plevel, int cch, + int f_reverse) { - int ich; - - // true as soon as first odd level encountered - fReverse = fReverse || odd(level); - - for (ich = 0; ich < cch; ich++) - { - if (plevel[ich] < level) - { - break; - } - else if (plevel[ich] > level) - { - ich += reorderLevel(level + 1, pszText + ich, plevel + ich, - cch - ich, fReverse) - 1; - } - } - if (fReverse) - { - reverse(pszText, ich); - } - return ich; + int ich; + + // true as soon as first odd level encountered + f_reverse = f_reverse || odd(level); + + for (ich = 0; ich < cch; ich++) + { + if (plevel[ich] < level) + { + break; + } + else if (plevel[ich] > level) + { + ich += reorderLevel(level + 1, psz_text + ich, plevel + ich, + cch - ich, f_reverse) - 1; + } + } + if (f_reverse) + { + reverse(psz_text, ich); + } + return ich; } - -int Bidi_reorder(int baselevel, uint16_t * pszText, const int * plevel, int cch) +int Bidi_reorder(fz_bidi_level baselevel, uint32_t *psz_text, const fz_bidi_level *plevel, int cch) { - int ich = 0; - - while (ich < cch) - { - ich += reorderLevel(baselevel, pszText + ich, plevel + ich, - cch - ich, FALSE); - } - return ich; + int ich = 0; + + while (ich < cch) + { + ich += reorderLevel(baselevel, psz_text + ich, plevel + ich, + cch - ich, FALSE); + } + return ich; } #endif diff --git a/source/fitz/bidi.c b/source/fitz/bidi.c index 59d1578a..e711e705 100644 --- a/source/fitz/bidi.c +++ b/source/fitz/bidi.c @@ -1,33 +1,34 @@ -/** - * Bidirectional text processing. - * - * Processes uint16_t text by arranging the characters into an order suitable - * for display. E.g. Hebrew text will be arranged from right-to-left and - * any English within the text will remain in the left-to-right order. - * Characters such as parenthesis will be substituted for their mirrored - * equivalents if they are part of text which must be reversed. - * - * This is an implementation of the uint16_t Bidirectional Algorithm which - * can be found here: http://www.uint16_t.org/reports/tr9/ and is based - * on the reference implementation of the algorithm found on that page. - * - * FIXME - Describe the role of this module from the point of view of EDR. - * - * For a nice overview of how it works, read this... - * http://www.w3.org/TR/REC-html40/struct/dirlang.html - * - * Copyright (C) Picsel, 2004. All Rights Reserved. - */ +/* + * Bidirectional text processing. + * + * Processes unicode text by arranging the characters into an order suitable + * for display. E.g. Hebrew text will be arranged from right-to-left and + * any English within the text will remain in the left-to-right order. + * Characters such as parenthesis will be substituted for their mirrored + * equivalents if they are part of text which must be reversed. + * + * This is an implementation of the unicode Bidirectional Algorithm which + * can be found here: http://www.unicode.org/reports/tr9/ and is based + * on the reference implementation of the algorithm found on that page. + * + * For a nice overview of how it works, read this... + * http://www.w3.org/TR/REC-html40/struct/dirlang.html + * + * Extracted from the SmartOffice code, where it was modified by Ian + * Beveridge. + * + * Copyright (C) Picsel, 2004. All Rights Reserved. + */ -/** - * Original copyright notice from uint16_t reference implementation. - * ---------------------------------------------------------------- - * Written by: Asmus Freytag - * C++ and Windows dependencies removed, and - * command line interface added by: Rick McGowan - * - * Copyright (C) 1999, ASMUS, Inc. All Rights Reserved - */ +/* + * Original copyright notice from unicode reference implementation. + * ---------------------------------------------------------------- + * Written by: Asmus Freytag + * C++ and Windows dependencies removed, and + * command line interface added by: Rick McGowan + * + * Copyright (C) 1999, ASMUS, Inc. All Rights Reserved + */ /* * Includes... @@ -42,10 +43,10 @@ #define ODD(x) ((x) & 1) -#define REPLACEABLE_TYPE(t)\ - (((t)==BDI_ES) || ((t)==BDI_ET )|| ((t)==BDI_CS )||\ - ((t)==BDI_NSM)|| ((t)==BDI_PDF)|| ((t)==BDI_BN )||\ - ((t)==BDI_S) || ((t)==BDI_WS )|| ((t)==BDI_N ) ) +#define REPLACEABLE_TYPE(t) ( \ + ((t)==BDI_ES) || ((t)==BDI_ET) || ((t)==BDI_CS) || \ + ((t)==BDI_NSM) || ((t)==BDI_PDF) || ((t)==BDI_BN) || \ + ((t)==BDI_S) || ((t)==BDI_WS) || ((t)==BDI_N) ) #ifdef DEBUG_BIDI_VERBOSE #define DBUGVF(params) do { fz_warn params; } while (0) @@ -59,29 +60,29 @@ #define DBUGH(params) do {} while (0) #endif -#define UNICODE_EOS ((uint16_t)0) -#define UNICODE_DIGIT_ZERO ((uint16_t)0x0030) -#define UNICODE_DIGIT_NINE ((uint16_t)0x0039) -#define UNICODE_SUPERSCRIPT_TWO ((uint16_t)0x00B2) -#define UNICODE_SUPERSCRIPT_THREE ((uint16_t)0x00B3) -#define UNICODE_SUPERSCRIPT_ONE ((uint16_t)0x00B9) -#define UNICODE_RTL_START ((uint16_t)0x0590) -#define UNICODE_RTL_END ((uint16_t)0x07BF) -#define UNICODE_ARABIC_INDIC_DIGIT_ZERO ((uint16_t)0x0660) -#define UNICODE_ARABIC_INDIC_DIGIT_NINE ((uint16_t)0x0669) -#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO ((uint16_t)0x06F0) -#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE ((uint16_t)0x06F9) -#define UNICODE_ZERO_WIDTH_NON_JOINER ((uint16_t)0x200C) -#define UNICODE_SUPERSCRIPT_ZERO ((uint16_t)0x2070) -#define UNICODE_SUPERSCRIPT_FOUR ((uint16_t)0x2074) -#define UNICODE_SUPERSCRIPT_NINE ((uint16_t)0x2079) -#define UNICODE_SUBSCRIPT_ZERO ((uint16_t)0x2080) -#define UNICODE_SUBSCRIPT_NINE ((uint16_t)0x2089) -#define UNICODE_CIRCLED_DIGIT_ONE ((uint16_t)0x2460) -#define UNICODE_NUMBER_TWENTY_FULL_STOP ((uint16_t)0x249B) -#define UNICODE_CIRCLED_DIGIT_ZERO ((uint16_t)0x24EA) -#define UNICODE_FULLWIDTH_DIGIT_ZERO ((uint16_t)0xFF10) -#define UNICODE_FULLWIDTH_DIGIT_NINE ((uint16_t)0xFF19) +#define UNICODE_EOS 0 +#define UNICODE_DIGIT_ZERO 0x0030 +#define UNICODE_DIGIT_NINE 0x0039 +#define UNICODE_SUPERSCRIPT_TWO 0x00B2 +#define UNICODE_SUPERSCRIPT_THREE 0x00B3 +#define UNICODE_SUPERSCRIPT_ONE 0x00B9 +#define UNICODE_RTL_START 0x0590 +#define UNICODE_RTL_END 0x07BF +#define UNICODE_ARABIC_INDIC_DIGIT_ZERO 0x0660 +#define UNICODE_ARABIC_INDIC_DIGIT_NINE 0x0669 +#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO 0x06F0 +#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE 0x06F9 +#define UNICODE_ZERO_WIDTH_NON_JOINER 0x200C +#define UNICODE_SUPERSCRIPT_ZERO 0x2070 +#define UNICODE_SUPERSCRIPT_FOUR 0x2074 +#define UNICODE_SUPERSCRIPT_NINE 0x2079 +#define UNICODE_SUBSCRIPT_ZERO 0x2080 +#define UNICODE_SUBSCRIPT_NINE 0x2089 +#define UNICODE_CIRCLED_DIGIT_ONE 0x2460 +#define UNICODE_NUMBER_TWENTY_FULL_STOP 0x249B +#define UNICODE_CIRCLED_DIGIT_ZERO 0x24EA +#define UNICODE_FULLWIDTH_DIGIT_ZERO 0xFF10 +#define UNICODE_FULLWIDTH_DIGIT_NINE 0xFF19 #ifndef TRUE #define TRUE (1) @@ -94,475 +95,97 @@ * Enumerations... */ - -#ifdef DEBUG +#ifdef DEBUG_BIDI_VERBOSE /* display support: */ -static const char charFromTypes[] = +static const char char_from_types[] = { - ' ', /* ON, */ - '>', /* L, */ - '<', /* R, */ - '9', /* AN, */ - '1', /* EN, */ - 'a', /* AL */ + ' ', /* ON */ + '>', /* L */ + '<', /* R */ + '9', /* AN */ + '1', /* EN */ + 'a', /* AL */ '@', /* NSM */ - '.', /* CS */ - ',', /* ES */ - '$', /* ET */ - ':', /* BN */ - 'X', /* S */ - '_', /* WS */ - 'B', /* B */ + '.', /* CS */ + ',', /* ES */ + '$', /* ET */ + ':', /* BN */ + 'X', /* S */ + '_', /* WS */ + 'B', /* B */ '+', /* RLO */ '+', /* RLE */ '+', /* LRO */ '+', /* LRE */ '-', /* PDF */ - '=' /* LS */ + '=' /* LS */ }; -#endif /* DEBUG */ - - - -typedef struct Bidi_ProcessLine_fragData -{ - uint16_t *entireText; - Bidi_PL_Fragment_Callback *callersCallback; - void *callersData; -} -Bidi_ProcessLine_fragData; - - -typedef struct BidiPropList -{ - uint16_t first; - uint16_t last; - Bidi_CharType type; -} BidiPropList; - - -/* The following two arrays were generated - * using the perl script unidata2array.pl - */ - -static const BidiPropList bidiPropList[] = -{ - {0x0000,0x0008,BDI_BN}, {0x0009,0x0009,BDI_S}, {0x000A,0x000A,BDI_B}, - {0x000B,0x000B,BDI_S}, {0x000C,0x000C,BDI_WS}, {0x000D,0x000D,BDI_B}, - {0x000E,0x001B,BDI_BN}, {0x001C,0x001E,BDI_B}, {0x001F,0x001F,BDI_S}, - {0x0020,0x0020,BDI_WS}, {0x0021,0x0022,BDI_ON}, {0x0023,0x0025,BDI_ET}, - {0x0026,0x002A,BDI_ON}, {0x002B,0x002B,BDI_ES}, {0x002C,0x002C,BDI_CS}, - {0x002D,0x002D,BDI_ES}, {0x002E,0x002E,BDI_CS}, {0x002F,0x002F,BDI_ES}, - {0x0030,0x0039,BDI_EN}, {0x003A,0x003A,BDI_CS}, {0x003B,0x0040,BDI_ON}, - {0x0041,0x005A,BDI_L}, {0x005B,0x0060,BDI_ON}, {0x0061,0x007A,BDI_L}, - {0x007B,0x007E,BDI_ON}, {0x007F,0x0084,BDI_BN}, {0x0085,0x0085,BDI_B}, - {0x0086,0x009F,BDI_BN}, {0x00A0,0x00A0,BDI_CS}, {0x00A1,0x00A1,BDI_ON}, - {0x00A2,0x00A5,BDI_ET}, {0x00A6,0x00A9,BDI_ON}, {0x00AA,0x00AA,BDI_L}, - {0x00AB,0x00AC,BDI_ON}, {0x00AD,0x00AD,BDI_BN}, {0x00AE,0x00AF,BDI_ON}, - {0x00B0,0x00B1,BDI_ET}, {0x00B2,0x00B3,BDI_EN}, {0x00B4,0x00B4,BDI_ON}, - {0x00B5,0x00B5,BDI_L}, {0x00B6,0x00B8,BDI_ON}, {0x00B9,0x00B9,BDI_EN}, - {0x00BA,0x00BA,BDI_L}, {0x00BB,0x00BF,BDI_ON}, {0x00C0,0x00D6,BDI_L}, - {0x00D7,0x00D7,BDI_ON}, {0x00D8,0x00F6,BDI_L}, {0x00F7,0x00F7,BDI_ON}, - {0x00F8,0x02B8,BDI_L}, {0x02B9,0x02BA,BDI_ON}, {0x02BB,0x02C1,BDI_L}, - {0x02C2,0x02CF,BDI_ON}, {0x02D0,0x02D1,BDI_L}, {0x02D2,0x02DF,BDI_ON}, - {0x02E0,0x02E4,BDI_L}, {0x02E5,0x02ED,BDI_ON}, {0x02EE,0x02EE,BDI_L}, - {0x02EF,0x02FF,BDI_ON}, {0x0300,0x036F,BDI_NSM},{0x0374,0x0375,BDI_ON}, - {0x0376,0x037D,BDI_L}, {0x037E,0x037E,BDI_ON}, {0x0384,0x0385,BDI_ON}, - {0x0386,0x0386,BDI_L}, {0x0387,0x0387,BDI_ON}, {0x0388,0x03F5,BDI_L}, - {0x03F6,0x03F6,BDI_ON}, {0x03F7,0x0482,BDI_L}, {0x0483,0x0486,BDI_NSM}, - {0x0488,0x0489,BDI_NSM},{0x048A,0x0589,BDI_L}, {0x058A,0x058A,BDI_ON}, - {0x0591,0x05BD,BDI_NSM},{0x05BE,0x05BE,BDI_R}, {0x05BF,0x05BF,BDI_NSM}, - {0x05C0,0x05C0,BDI_R}, {0x05C1,0x05C2,BDI_NSM},{0x05C3,0x05C3,BDI_R}, - {0x05C4,0x05C5,BDI_NSM},{0x05C6,0x05C6,BDI_R}, {0x05C7,0x05C7,BDI_NSM}, - {0x05D0,0x05EA,BDI_R}, {0x05F0,0x05F4,BDI_R}, {0x0600,0x060B,BDI_AL}, - {0x060C,0x060C,BDI_CS}, {0x060D,0x060D,BDI_AL}, {0x060E,0x060F,BDI_ON}, - {0x0610,0x0615,BDI_NSM},{0x0616,0x064A,BDI_AL}, {0x064B,0x065E,BDI_NSM}, - {0x065F,0x065F,BDI_AL}, {0x0660,0x0669,BDI_EN}, {0x066A,0x066A,BDI_ET}, - {0x066B,0x066C,BDI_EN}, {0x066D,0x066F,BDI_AL}, {0x0670,0x0670,BDI_NSM}, - {0x0671,0x06D5,BDI_AL}, {0x06D6,0x06DC,BDI_NSM},{0x06DD,0x06DD,BDI_AL}, - {0x06DE,0x06E4,BDI_NSM},{0x06E5,0x06E6,BDI_AL}, {0x06E7,0x06E8,BDI_NSM}, - {0x06E9,0x06E9,BDI_ON}, {0x06EA,0x06ED,BDI_NSM},{0x06EE,0x06EF,BDI_AL}, - {0x06F0,0x06F9,BDI_EN}, {0x06FA,0x070E,BDI_AL}, {0x070F,0x070F,BDI_BN}, - {0x0710,0x0710,BDI_AL}, {0x0711,0x0711,BDI_NSM},{0x0712,0x072F,BDI_AL}, - {0x0730,0x074A,BDI_NSM},{0x074B,0x07A5,BDI_AL}, {0x07A6,0x07B0,BDI_NSM}, - {0x07B1,0x07BF,BDI_AL}, {0x07C0,0x07EA,BDI_R}, {0x07EB,0x07F3,BDI_NSM}, - {0x07F4,0x07F5,BDI_R}, {0x07F6,0x07F9,BDI_ON}, {0x07FA,0x08FF,BDI_R}, - {0x0901,0x0902,BDI_NSM},{0x0903,0x093B,BDI_L}, {0x093C,0x093C,BDI_NSM}, - {0x093D,0x0940,BDI_L}, {0x0941,0x0948,BDI_NSM},{0x0949,0x094C,BDI_L}, - {0x094D,0x094D,BDI_NSM},{0x094E,0x0950,BDI_L}, {0x0951,0x0954,BDI_NSM}, - {0x0955,0x0961,BDI_L}, {0x0962,0x0963,BDI_NSM},{0x0964,0x0980,BDI_L}, - {0x0981,0x0981,BDI_NSM},{0x0982,0x09BB,BDI_L}, {0x09BC,0x09BC,BDI_NSM}, - {0x09BD,0x09C0,BDI_L}, {0x09C1,0x09C4,BDI_NSM},{0x09C5,0x09CC,BDI_L}, - {0x09CD,0x09CD,BDI_NSM},{0x09CE,0x09E1,BDI_L}, {0x09E2,0x09E3,BDI_NSM}, - {0x09E4,0x09F1,BDI_L}, {0x09F2,0x09F3,BDI_ET}, {0x09F4,0x0A00,BDI_L}, - {0x0A01,0x0A02,BDI_NSM},{0x0A03,0x0A3B,BDI_L}, {0x0A3C,0x0A3C,BDI_NSM}, - {0x0A3D,0x0A40,BDI_L}, {0x0A41,0x0A42,BDI_NSM},{0x0A47,0x0A48,BDI_NSM}, - {0x0A4B,0x0A4D,BDI_NSM},{0x0A4E,0x0A6F,BDI_L}, {0x0A70,0x0A71,BDI_NSM}, - {0x0A72,0x0A80,BDI_L}, {0x0A81,0x0A82,BDI_NSM},{0x0A83,0x0ABB,BDI_L}, - {0x0ABC,0x0ABC,BDI_NSM},{0x0ABD,0x0AC0,BDI_L}, {0x0AC1,0x0AC5,BDI_NSM}, - {0x0AC7,0x0AC8,BDI_NSM},{0x0AC9,0x0ACC,BDI_L}, {0x0ACD,0x0ACD,BDI_NSM}, - {0x0ACE,0x0AE1,BDI_L}, {0x0AE2,0x0AE3,BDI_NSM},{0x0AE4,0x0AF0,BDI_L}, - {0x0AF1,0x0AF1,BDI_ET}, {0x0B01,0x0B01,BDI_NSM},{0x0B02,0x0B3B,BDI_L}, - {0x0B3C,0x0B3C,BDI_NSM},{0x0B3D,0x0B3E,BDI_L}, {0x0B3F,0x0B3F,BDI_NSM}, - {0x0B40,0x0B40,BDI_L}, {0x0B41,0x0B43,BDI_NSM},{0x0B44,0x0B4C,BDI_L}, - {0x0B4D,0x0B4D,BDI_NSM},{0x0B56,0x0B56,BDI_NSM},{0x0B57,0x0B81,BDI_L}, - {0x0B82,0x0B82,BDI_NSM},{0x0B83,0x0BBF,BDI_L}, {0x0BC0,0x0BC0,BDI_NSM}, - {0x0BC1,0x0BCC,BDI_L}, {0x0BCD,0x0BCD,BDI_NSM},{0x0BCE,0x0BF2,BDI_L}, - {0x0BF3,0x0BF8,BDI_ON}, {0x0BF9,0x0BF9,BDI_ET}, {0x0BFA,0x0BFA,BDI_ON}, - {0x0BFB,0x0C3D,BDI_L}, {0x0C3E,0x0C40,BDI_NSM},{0x0C41,0x0C45,BDI_L}, - {0x0C46,0x0C48,BDI_NSM},{0x0C4A,0x0C4D,BDI_NSM},{0x0C55,0x0C56,BDI_NSM}, - {0x0C57,0x0CBB,BDI_L}, {0x0CBC,0x0CBC,BDI_NSM},{0x0CBD,0x0CCB,BDI_L}, - {0x0CCC,0x0CCD,BDI_NSM},{0x0CCE,0x0CE1,BDI_L}, {0x0CE2,0x0CE3,BDI_NSM}, - {0x0CE4,0x0CF0,BDI_L}, {0x0CF1,0x0CF2,BDI_ON}, {0x0CF3,0x0D40,BDI_L}, - {0x0D41,0x0D43,BDI_NSM},{0x0D44,0x0D4C,BDI_L}, {0x0D4D,0x0D4D,BDI_NSM}, - {0x0D4E,0x0DC9,BDI_L}, {0x0DCA,0x0DCA,BDI_NSM},{0x0DCB,0x0DD1,BDI_L}, - {0x0DD2,0x0DD4,BDI_NSM},{0x0DD6,0x0DD6,BDI_NSM},{0x0DD7,0x0E30,BDI_L}, - {0x0E31,0x0E31,BDI_NSM},{0x0E32,0x0E33,BDI_L}, {0x0E34,0x0E3A,BDI_NSM}, - {0x0E3F,0x0E3F,BDI_ET}, {0x0E40,0x0E46,BDI_L}, {0x0E47,0x0E4E,BDI_NSM}, - {0x0E4F,0x0EB0,BDI_L}, {0x0EB1,0x0EB1,BDI_NSM},{0x0EB2,0x0EB3,BDI_L}, - {0x0EB4,0x0EB9,BDI_NSM},{0x0EBB,0x0EBC,BDI_NSM},{0x0EBD,0x0EC7,BDI_L}, - {0x0EC8,0x0ECD,BDI_NSM},{0x0ECE,0x0F17,BDI_L}, {0x0F18,0x0F19,BDI_NSM}, - {0x0F1A,0x0F34,BDI_L}, {0x0F35,0x0F35,BDI_NSM},{0x0F36,0x0F36,BDI_L}, - {0x0F37,0x0F37,BDI_NSM},{0x0F38,0x0F38,BDI_L}, {0x0F39,0x0F39,BDI_NSM}, - {0x0F3A,0x0F3D,BDI_ON}, {0x0F3E,0x0F70,BDI_L}, {0x0F71,0x0F7E,BDI_NSM}, - {0x0F7F,0x0F7F,BDI_L}, {0x0F80,0x0F84,BDI_NSM},{0x0F85,0x0F85,BDI_L}, - {0x0F86,0x0F87,BDI_NSM},{0x0F88,0x0F8F,BDI_L}, {0x0F90,0x0F97,BDI_NSM}, - {0x0F99,0x0FBC,BDI_NSM},{0x0FBD,0x0FC5,BDI_L}, {0x0FC6,0x0FC6,BDI_NSM}, - {0x0FC7,0x102C,BDI_L}, {0x102D,0x1030,BDI_NSM},{0x1031,0x1031,BDI_L}, - {0x1032,0x1032,BDI_NSM},{0x1036,0x1037,BDI_NSM},{0x1038,0x1038,BDI_L}, - {0x1039,0x1039,BDI_NSM},{0x103A,0x1057,BDI_L}, {0x1058,0x1059,BDI_NSM}, - {0x105A,0x135E,BDI_L}, {0x135F,0x135F,BDI_NSM},{0x1360,0x138F,BDI_L}, - {0x1390,0x1399,BDI_ON}, {0x139A,0x167F,BDI_L}, {0x1680,0x1680,BDI_WS}, - {0x1681,0x169A,BDI_L}, {0x169B,0x169C,BDI_ON}, {0x169D,0x1711,BDI_L}, - {0x1712,0x1714,BDI_NSM},{0x1715,0x1731,BDI_L}, {0x1732,0x1734,BDI_NSM}, - {0x1735,0x1751,BDI_L}, {0x1752,0x1753,BDI_NSM},{0x1754,0x1771,BDI_L}, - {0x1772,0x1773,BDI_NSM},{0x1774,0x17B6,BDI_L}, {0x17B7,0x17BD,BDI_NSM}, - {0x17BE,0x17C5,BDI_L}, {0x17C6,0x17C6,BDI_NSM},{0x17C7,0x17C8,BDI_L}, - {0x17C9,0x17D3,BDI_NSM},{0x17D4,0x17DA,BDI_L}, {0x17DB,0x17DB,BDI_ET}, - {0x17DC,0x17DC,BDI_L}, {0x17DD,0x17DD,BDI_NSM},{0x17DE,0x17EF,BDI_L}, - {0x17F0,0x17F9,BDI_ON}, {0x1800,0x180A,BDI_ON}, {0x180B,0x180D,BDI_NSM}, - {0x180E,0x180E,BDI_WS}, {0x180F,0x18A8,BDI_L}, {0x18A9,0x18A9,BDI_NSM}, - {0x18AA,0x191F,BDI_L}, {0x1920,0x1922,BDI_NSM},{0x1923,0x1926,BDI_L}, - {0x1927,0x192B,BDI_NSM},{0x192C,0x1931,BDI_L}, {0x1932,0x1932,BDI_NSM}, - {0x1933,0x1938,BDI_L}, {0x1939,0x193B,BDI_NSM},{0x1940,0x1940,BDI_ON}, - {0x1944,0x1945,BDI_ON}, {0x1946,0x19DD,BDI_L}, {0x19DE,0x19FF,BDI_ON}, - {0x1A00,0x1A16,BDI_L}, {0x1A17,0x1A18,BDI_NSM},{0x1A19,0x1AFF,BDI_L}, - {0x1B00,0x1B03,BDI_NSM},{0x1B04,0x1B33,BDI_L}, {0x1B34,0x1B34,BDI_NSM}, - {0x1B35,0x1B35,BDI_L}, {0x1B36,0x1B3A,BDI_NSM},{0x1B3B,0x1B3B,BDI_L}, - {0x1B3C,0x1B3C,BDI_NSM},{0x1B3D,0x1B41,BDI_L}, {0x1B42,0x1B42,BDI_NSM}, - {0x1B43,0x1B6A,BDI_L}, {0x1B6B,0x1B73,BDI_NSM},{0x1B74,0x1DBF,BDI_L}, - {0x1DC0,0x1DCA,BDI_NSM},{0x1DFE,0x1DFF,BDI_NSM},{0x1E00,0x1FBC,BDI_L}, - {0x1FBD,0x1FBD,BDI_ON}, {0x1FBE,0x1FBE,BDI_L}, {0x1FBF,0x1FC1,BDI_ON}, - {0x1FC2,0x1FCC,BDI_L}, {0x1FCD,0x1FCF,BDI_ON}, {0x1FD0,0x1FDC,BDI_L}, - {0x1FDD,0x1FDF,BDI_ON}, {0x1FE0,0x1FEC,BDI_L}, {0x1FED,0x1FEF,BDI_ON}, - {0x1FF0,0x1FFC,BDI_L}, {0x1FFD,0x1FFE,BDI_ON}, {0x2000,0x200A,BDI_WS}, - {0x200B,0x200D,BDI_BN}, {0x200E,0x200E,BDI_L}, {0x200F,0x200F,BDI_R}, - {0x2010,0x2027,BDI_ON}, {0x2028,0x2028,BDI_WS}, {0x2029,0x2029,BDI_B}, - {0x202A,0x202A,BDI_LRE},{0x202B,0x202B,BDI_RLE},{0x202C,0x202C,BDI_PDF}, - {0x202D,0x202D,BDI_LRO},{0x202E,0x202E,BDI_RLO},{0x202F,0x202F,BDI_CS}, - {0x2030,0x2034,BDI_ET}, {0x2035,0x2043,BDI_ON}, {0x2044,0x2044,BDI_CS}, - {0x2045,0x205E,BDI_ON}, {0x205F,0x205F,BDI_WS}, {0x2060,0x2063,BDI_BN}, - {0x206A,0x206F,BDI_BN}, {0x2070,0x2070,BDI_EN}, {0x2071,0x2073,BDI_L}, - {0x2074,0x2079,BDI_EN}, {0x207A,0x207B,BDI_ES}, {0x207C,0x207E,BDI_ON}, - {0x207F,0x207F,BDI_L}, {0x2080,0x2089,BDI_EN}, {0x208A,0x208B,BDI_ES}, - {0x208C,0x208E,BDI_ON}, {0x208F,0x209F,BDI_L}, {0x20A0,0x20B5,BDI_ET}, - {0x20D0,0x20EF,BDI_NSM},{0x2100,0x2101,BDI_ON}, {0x2102,0x2102,BDI_L}, - {0x2103,0x2106,BDI_ON}, {0x2107,0x2107,BDI_L}, {0x2108,0x2109,BDI_ON}, - {0x210A,0x2113,BDI_L}, {0x2114,0x2114,BDI_ON}, {0x2115,0x2115,BDI_L}, - {0x2116,0x2118,BDI_ON}, {0x2119,0x211D,BDI_L}, {0x211E,0x2123,BDI_ON}, - {0x2124,0x2124,BDI_L}, {0x2125,0x2125,BDI_ON}, {0x2126,0x2126,BDI_L}, - {0x2127,0x2127,BDI_ON}, {0x2128,0x2128,BDI_L}, {0x2129,0x2129,BDI_ON}, - {0x212A,0x212D,BDI_L}, {0x212E,0x212E,BDI_ET}, {0x212F,0x2139,BDI_L}, - {0x213A,0x213B,BDI_ON}, {0x213C,0x213F,BDI_L}, {0x2140,0x2144,BDI_ON}, - {0x2145,0x2149,BDI_L}, {0x214A,0x214D,BDI_ON}, {0x214E,0x2152,BDI_L}, - {0x2153,0x215F,BDI_ON}, {0x2160,0x218F,BDI_L}, {0x2190,0x2211,BDI_ON}, - {0x2212,0x2212,BDI_ES}, {0x2213,0x2213,BDI_ET}, {0x2214,0x2335,BDI_ON}, - {0x2336,0x237A,BDI_L}, {0x237B,0x2394,BDI_ON}, {0x2395,0x2395,BDI_L}, - {0x2396,0x23E7,BDI_ON}, {0x2400,0x2426,BDI_ON}, {0x2440,0x244A,BDI_ON}, - {0x2460,0x2487,BDI_ON}, {0x2488,0x249B,BDI_EN}, {0x249C,0x24E9,BDI_L}, - {0x24EA,0x269C,BDI_ON}, {0x26A0,0x26AB,BDI_ON}, {0x26AC,0x26AC,BDI_L}, - {0x26AD,0x26B2,BDI_ON}, {0x2701,0x2704,BDI_ON}, {0x2706,0x2709,BDI_ON}, - {0x270C,0x2727,BDI_ON}, {0x2729,0x274B,BDI_ON}, {0x274D,0x274D,BDI_ON}, - {0x274F,0x2752,BDI_ON}, {0x2756,0x2756,BDI_ON}, {0x2758,0x275E,BDI_ON}, - {0x2761,0x2794,BDI_ON}, {0x2798,0x27AF,BDI_ON}, {0x27B1,0x27BE,BDI_ON}, - {0x27C0,0x27CA,BDI_ON}, {0x27D0,0x27EB,BDI_ON}, {0x27F0,0x27FF,BDI_ON}, - {0x2800,0x28FF,BDI_L}, {0x2900,0x2B1A,BDI_ON}, {0x2B20,0x2B23,BDI_ON}, - {0x2B24,0x2CE4,BDI_L}, {0x2CE5,0x2CEA,BDI_ON}, {0x2CF9,0x2CFF,BDI_ON}, - {0x2D00,0x2DFF,BDI_L}, {0x2E00,0x2E17,BDI_ON}, {0x2E1C,0x2E1D,BDI_ON}, - {0x2E80,0x2E99,BDI_ON}, {0x2E9B,0x2EF3,BDI_ON}, {0x2F00,0x2FD5,BDI_ON}, - {0x2FF0,0x2FFB,BDI_ON}, {0x3000,0x3000,BDI_WS}, {0x3001,0x3004,BDI_ON}, - {0x3005,0x3007,BDI_L}, {0x3008,0x3020,BDI_ON}, {0x3021,0x3029,BDI_L}, - {0x302A,0x302F,BDI_NSM},{0x3030,0x3030,BDI_ON}, {0x3031,0x3035,BDI_L}, - {0x3036,0x3037,BDI_ON}, {0x3038,0x303C,BDI_L}, {0x303D,0x303F,BDI_ON}, - {0x3040,0x3098,BDI_L}, {0x3099,0x309A,BDI_NSM},{0x309B,0x309C,BDI_ON}, - {0x309D,0x309F,BDI_L}, {0x30A0,0x30A0,BDI_ON}, {0x30A1,0x30FA,BDI_L}, - {0x30FB,0x30FB,BDI_ON}, {0x30FC,0x31BF,BDI_L}, {0x31C0,0x31CF,BDI_ON}, - {0x31D0,0x321C,BDI_L}, {0x321D,0x321E,BDI_ON}, {0x321F,0x324F,BDI_L}, - {0x3250,0x325F,BDI_ON}, {0x3260,0x327B,BDI_L}, {0x327C,0x327E,BDI_ON}, - {0x327F,0x32B0,BDI_L}, {0x32B1,0x32BF,BDI_ON}, {0x32C0,0x32CB,BDI_L}, - {0x32CC,0x32CF,BDI_ON}, {0x32D0,0x3376,BDI_L}, {0x3377,0x337A,BDI_ON}, - {0x337B,0x33DD,BDI_L}, {0x33DE,0x33DF,BDI_ON}, {0x33E0,0x33FE,BDI_L}, - {0x33FF,0x33FF,BDI_ON}, {0x3400,0x4DBF,BDI_L}, {0x4DC0,0x4DFF,BDI_ON}, - {0x4E00,0xA48F,BDI_L}, {0xA490,0xA4C6,BDI_ON}, {0xA700,0xA71A,BDI_ON}, - {0xA720,0xA721,BDI_ON}, {0xA722,0xA801,BDI_L}, {0xA802,0xA802,BDI_NSM}, - {0xA803,0xA805,BDI_L}, {0xA806,0xA806,BDI_NSM},{0xA807,0xA80A,BDI_L}, - {0xA80B,0xA80B,BDI_NSM},{0xA80C,0xA824,BDI_L}, {0xA825,0xA826,BDI_NSM}, - {0xA827,0xA827,BDI_L}, {0xA828,0xA82B,BDI_ON}, {0xA82C,0xA873,BDI_L}, - {0xA874,0xA877,BDI_ON}, {0xA878,0xFB1C,BDI_L}, {0xFB1D,0xFB1D,BDI_R}, - {0xFB1E,0xFB1E,BDI_NSM},{0xFB1F,0xFB28,BDI_R}, {0xFB29,0xFB29,BDI_ES}, - {0xFB2A,0xFB4F,BDI_R}, {0xFB50,0xFD3D,BDI_AL}, {0xFD3E,0xFD3F,BDI_ON}, - {0xFD40,0xFDFC,BDI_AL}, {0xFDFD,0xFDFD,BDI_ON}, {0xFDFE,0xFDFF,BDI_AL}, - {0xFE00,0xFE0F,BDI_NSM},{0xFE10,0xFE19,BDI_ON}, {0xFE20,0xFE23,BDI_NSM}, - {0xFE30,0xFE4F,BDI_ON}, {0xFE50,0xFE50,BDI_CS}, {0xFE51,0xFE51,BDI_ON}, - {0xFE52,0xFE52,BDI_CS}, {0xFE54,0xFE54,BDI_ON}, {0xFE55,0xFE55,BDI_CS}, - {0xFE56,0xFE5E,BDI_ON}, {0xFE5F,0xFE5F,BDI_ET}, {0xFE60,0xFE61,BDI_ON}, - {0xFE62,0xFE63,BDI_ES}, {0xFE64,0xFE66,BDI_ON}, {0xFE68,0xFE68,BDI_ON}, - {0xFE69,0xFE6A,BDI_ET}, {0xFE6B,0xFE6B,BDI_ON}, {0xFE70,0xFEFE,BDI_AL}, - {0xFEFF,0xFEFF,BDI_BN}, {0xFF01,0xFF02,BDI_ON}, {0xFF03,0xFF05,BDI_ET}, - {0xFF06,0xFF0A,BDI_ON}, {0xFF0B,0xFF0B,BDI_ES}, {0xFF0C,0xFF0C,BDI_CS}, - {0xFF0D,0xFF0D,BDI_ES}, {0xFF0E,0xFF0F,BDI_CS}, {0xFF10,0xFF19,BDI_EN}, - {0xFF1A,0xFF1A,BDI_CS}, {0xFF1B,0xFF20,BDI_ON}, {0xFF21,0xFF3A,BDI_L}, - {0xFF3B,0xFF40,BDI_ON}, {0xFF41,0xFF5A,BDI_L}, {0xFF5B,0xFF65,BDI_ON}, - {0xFF66,0xFFDF,BDI_L}, {0xFFE0,0xFFE1,BDI_ET}, {0xFFE2,0xFFE4,BDI_ON}, - {0xFFE5,0xFFE6,BDI_ET}, {0xFFE8,0xFFEE,BDI_ON}, {0xFFF9,0xFFFD,BDI_ON} -}; - -#define NUM_BDIPROP_SPANS ( sizeof( bidiPropList ) / sizeof( BidiPropList ) ) - -/* - *The original BidiMirrorList bidiMirrorList[] array has been replaced by - *uint32_t bidiOptMirrorList[] an array that represents a tree structure - *formed from the original mirror list by the Perl script - *create-mirrorlist.pl in Review #11191 - * - *below is the first part of the tree:- representing the uint16_t values - *0x0028, 0x0029, 0x003C, 0x003E, 0x005B, 0x005D - * - * 0 - * | - * ----0---- - * / | \ - * 2 3 5 - * / \ / \ / \ - * 8 9 C E B D - * - *Every uint16_t value that shares nybble ancestors share parent nodes - *on the tree. - *The tree is of fixed depth 4 and each node holds the following values - *{4-bits nybble, 12-bits next, 16-bits uint16_t mirror}. - *The tree is represented as an array of bit-masks. - * - *The nybble value is the value in the tree above. - *The next Offset is the index into the array which points to the next - *sibling for the current node (i.e. the next node horizintally to the right - *that shares the same parent). - *The mirror value is the code to be mapped. - */ -static const uint32_t bidiOptMirrorList[] = -{ - 0x00120000, 0x0FFF0000, 0x20050000, - 0x80040029, 0x9FFF0028, 0x30080000, - 0xC007003E, 0xEFFF003C, 0x500B0000, - 0xB00A005D, 0xDFFF005B, 0x700E0000, - 0xB00D007D, 0xDFFF007B, 0xA0100000, - 0xBFFF00BB, 0xBFFF0000, 0xBFFF00AB, - 0x215A0000, 0x00200000, 0x30170000, - 0x9016203A, 0xAFFF2039, 0x401A0000, - 0x50192046, 0x6FFF2045, 0x701D0000, - 0xD01C207E, 0xEFFF207D, 0x8FFF0000, - 0xD01F208E, 0xEFFF208D, 0x20A20000, - 0x00280000, 0x8023220B, 0x9024220C, - 0xA025220D, 0xB0262208, 0xC0272209, - 0xDFFF220A, 0x102A0000, 0x5FFF29F5, - 0x302D0000, 0xC02C223D, 0xDFFF223C, - 0x402F0000, 0x3FFF22CD, 0x50340000, - 0x20312253, 0x30322252, 0x40332255, - 0x5FFF2254, 0x603F0000, 0x40362265, - 0x50372264, 0x60382267, 0x70392266, - 0x803A2269, 0x903B2268, 0xA03C226B, - 0xB03D226A, 0xE03E226F, 0xFFFF226E, - 0x70500000, 0x00412271, 0x10422270, - 0x20432273, 0x30442272, 0x40452275, - 0x50462274, 0x60472277, 0x70482276, - 0x80492279, 0x904A2278, 0xA04B227B, - 0xB04C227A, 0xC04D227D, 0xD04E227C, - 0xE04F227F, 0xFFFF227E, 0x805E0000, - 0x00522281, 0x10532280, 0x20542283, - 0x30552282, 0x40562285, 0x50572284, - 0x60582287, 0x70592286, 0x805A2289, - 0x905B2288, 0xA05C228B, 0xB05D228A, - 0xFFFF2290, 0x90630000, 0x0060228F, - 0x10612292, 0x20622291, 0x8FFF29B8, - 0xA06A0000, 0x206522A3, 0x306622A2, - 0x60672ADE, 0x80682AE4, 0x90692AE3, - 0xBFFF2AE5, 0xB0730000, 0x006C22B1, - 0x106D22B0, 0x206E22B3, 0x306F22B2, - 0x407022B5, 0x507122B4, 0x607222B7, - 0x7FFF22B6, 0xC0790000, 0x907522CA, - 0xA07622C9, 0xB07722CC, 0xC07822CB, - 0xDFFF2243, 0xD0860000, 0x007B22D1, - 0x107C22D0, 0x607D22D7, 0x707E22D6, - 0x807F22D9, 0x908022D8, 0xA08122DB, - 0xB08222DA, 0xC08322DD, 0xD08422DC, - 0xE08522DF, 0xFFFF22DE, 0xE0950000, - 0x008822E1, 0x108922E0, 0x208A22E3, - 0x308B22E2, 0x408C22E5, 0x508D22E4, - 0x608E22E7, 0x708F22E6, 0x809022E9, - 0x909122E8, 0xA09222EB, 0xB09322EA, - 0xC09422ED, 0xDFFF22EC, 0xFFFF0000, - 0x009722F1, 0x109822F0, 0x209922FA, - 0x309A22FB, 0x409B22FC, 0x609C22FD, - 0x709D22FE, 0xA09E22F2, 0xB09F22F3, - 0xC0A022F4, 0xD0A122F6, 0xEFFF22F7, - 0x30AB0000, 0x00A80000, 0x80A52309, - 0x90A62308, 0xA0A7230B, 0xBFFF230A, - 0x2FFF0000, 0x90AA232A, 0xAFFF2329, - 0x70CC0000, 0x60B50000, 0x80AE2769, - 0x90AF2768, 0xA0B0276B, 0xB0B1276A, - 0xC0B2276D, 0xD0B3276C, 0xE0B4276F, - 0xFFFF276E, 0x70BC0000, 0x00B72771, - 0x10B82770, 0x20B92773, 0x30BA2772, - 0x40BB2775, 0x5FFF2774, 0xD0C10000, - 0x50BE27D6, 0x60BF27D5, 0xD0C027DE, - 0xEFFF27DD, 0xEFFF0000, 0x20C327E3, - 0x30C427E2, 0x40C527E5, 0x50C627E4, - 0x60C727E7, 0x70C827E6, 0x80C927E9, - 0x90CA27E8, 0xA0CB27EB, 0xBFFF27EA, - 0x90FD0000, 0x80DB0000, 0x30CF2984, - 0x40D02983, 0x50D12986, 0x60D22985, - 0x70D32988, 0x80D42987, 0x90D5298A, - 0xA0D62989, 0xB0D7298C, 0xC0D8298B, - 0xD0D92990, 0xE0DA298F, 0xFFFF298E, - 0x90E50000, 0x00DD298D, 0x10DE2992, - 0x20DF2991, 0x30E02994, 0x40E12993, - 0x50E22996, 0x60E32995, 0x70E42998, - 0x8FFF2997, 0xB0E70000, 0x8FFF2298, - 0xC0ED0000, 0x00E929C1, 0x10EA29C0, - 0x40EB29C5, 0x50EC29C4, 0xFFFF29D0, - 0xD0F70000, 0x00EF29CF, 0x10F029D2, - 0x20F129D1, 0x40F229D5, 0x50F329D4, - 0x80F429D9, 0x90F529D8, 0xA0F629DB, - 0xBFFF29DA, 0xFFFF0000, 0x50F92215, - 0x80FA29F9, 0x90FB29F8, 0xC0FC29FD, - 0xDFFF29FC, 0xAFFF0000, 0x21030000, - 0xB1002A2C, 0xC1012A2B, 0xD1022A2E, - 0xEFFF2A2D, 0x31080000, 0x41052A35, - 0x51062A34, 0xC1072A3D, 0xDFFF2A3C, - 0x610B0000, 0x410A2A65, 0x5FFF2A64, - 0x71110000, 0x910D2A7A, 0xA10E2A79, - 0xD10F2A7E, 0xE1102A7D, 0xFFFF2A80, - 0x81190000, 0x01132A7F, 0x11142A82, - 0x21152A81, 0x31162A84, 0x41172A83, - 0xB1182A8C, 0xCFFF2A8B, 0x91260000, - 0x111B2A92, 0x211C2A91, 0x311D2A94, - 0x411E2A93, 0x511F2A96, 0x61202A95, - 0x71212A98, 0x81222A97, 0x91232A9A, - 0xA1242A99, 0xB1252A9C, 0xCFFF2A9B, - 0xA1320000, 0x11282AA2, 0x21292AA1, - 0x612A2AA7, 0x712B2AA6, 0x812C2AA9, - 0x912D2AA8, 0xA12E2AAB, 0xB12F2AAA, - 0xC1302AAD, 0xD1312AAC, 0xFFFF2AB0, - 0xB13B0000, 0x01342AAF, 0x31352AB4, - 0x41362AB3, 0xB1372ABC, 0xC1382ABB, - 0xD1392ABE, 0xE13A2ABD, 0xFFFF2AC0, - 0xC1460000, 0x013D2ABF, 0x113E2AC2, - 0x213F2AC1, 0x31402AC4, 0x41412AC3, - 0x51422AC6, 0x61432AC5, 0xD1442ACE, - 0xE1452ACD, 0xFFFF2AD0, 0xD14F0000, - 0x01482ACF, 0x11492AD2, 0x214A2AD1, - 0x314B2AD4, 0x414C2AD3, 0x514D2AD6, - 0x614E2AD5, 0xEFFF22A6, 0xE1550000, - 0x315122A9, 0x415222A8, 0x515322AB, - 0xC1542AED, 0xDFFF2AEC, 0xFFFF0000, - 0x71572AF8, 0x81582AF7, 0x91592AFA, - 0xAFFF2AF9, 0x31700000, 0x0FFF0000, - 0x01650000, 0x815E3009, 0x915F3008, - 0xA160300B, 0xB161300A, 0xC162300D, - 0xD163300C, 0xE164300F, 0xFFFF300E, - 0x1FFF0000, 0x01673011, 0x11683010, - 0x41693015, 0x516A3014, 0x616B3017, - 0x716C3016, 0x816D3019, 0x916E3018, - 0xA16F301B, 0xBFFF301A, 0xFFFF0000, - 0xFFFF0000, 0x01750000, 0x8174FF09, - 0x9FFFFF08, 0x11780000, 0xC177FF1E, - 0xEFFFFF1C, 0x317B0000, 0xB17AFF3D, - 0xDFFFFF3B, 0x517F0000, 0xB17DFF5D, - 0xD17EFF5B, 0xFFFFFF60, 0x6FFF0000, - 0x0181FF5F, 0x2182FF63, 0x3FFFFF62 -}; - -#define NUM_BIDI_OPTMIRRORS (sizeof(bidiOptMirrorList)/sizeof(uint32_t)) - -#define MIRROR_NYBBLE_SHIFT 28 -#define MIRROR_NEXT_MASK 0x0FFF0000 -#define MIRROR_NEXT_SHIFT 16 -#define MIRROR_VALUE_MASK 0x0000FFFF -#define TRIPPLE_NYBBLE_NEG 4095 +#endif /* * Functions and static functions... */ - -/** Returns a directional type for different characters. The type is found in - * the lookup table bidiPropList using a binary search. Each element in the - * table is a range of code-points which share the same type, all arranged - * in ascending order. - */ -static Bidi_CharType classFromChWS(uint16_t ch) +/* UCDN uses a different ordering than Bidi does. We cannot + * change to the UCDN ordering, as the bidi-std.c code relies + * on the exact ordering (at least that N = ON = 0). We + * therefore map between the two using this small table. It + * also takes care of fudging LRI, RLI, FSI and PDI, that this + * code does not currently support. */ +static const uint8_t ucdn_to_bidi[] = { - int start = 0; - int end = NUM_BDIPROP_SPANS - 1; - int mid = end / 2; - - while (start <= end) - { - if (ch < bidiPropList[mid].first) - end = mid - 1; - else if (ch > bidiPropList[mid].last) - start = mid + 1; - else - { - if (bidiPropList[mid].type == BDI_B) - return BDI_ON; - return bidiPropList[mid].type; - } - - mid = (start + end) / 2; - } + BDI_L, /* UCDN_BIDI_CLASS_L = 0 */ + BDI_LRE, /* UCDN_BIDI_CLASS_LRE = 1 */ + BDI_LRO, /* UCDN_BIDI_CLASS_LRO = 2 */ + BDI_R, /* UCDN_BIDI_CLASS_R = 3 */ + BDI_AL, /* UCDN_BIDI_CLASS_AL = 4 */ + BDI_RLE, /* UCDN_BIDI_CLASS_RLE = 5 */ + BDI_RLO, /* UCDN_BIDI_CLASS_RLO = 6 */ + BDI_PDF, /* UCDN_BIDI_CLASS_PDF = 7 */ + BDI_EN, /* UCDN_BIDI_CLASS_EN = 8 */ + BDI_ES, /* UCDN_BIDI_CLASS_ES = 9 */ + BDI_ET, /* UCDN_BIDI_CLASS_ET = 10 */ + BDI_AN, /* UCDN_BIDI_CLASS_AN = 11 */ + BDI_CS, /* UCDN_BIDI_CLASS_CS = 12 */ + BDI_NSM, /* UCDN_BIDI_CLASS_NSM = 13 */ + BDI_BN, /* UCDN_BIDI_CLASS_BN = 14 */ + BDI_B, /* UCDN_BIDI_CLASS_B = 15 */ + BDI_S, /* UCDN_BIDI_CLASS_S = 16 */ + BDI_WS, /* UCDN_BIDI_CLASS_WS = 17 */ + BDI_ON, /* UCDN_BIDI_CLASS_ON = 18 */ + BDI_LRE, /* UCDN_BIDI_CLASS_LRI = 19 */ + BDI_RLE, /* UCDN_BIDI_CLASS_RLI = 20 */ + BDI_N, /* UCDN_BIDI_CLASS_FSI = 21 */ + BDI_N, /* UCDN_BIDI_CLASS_PDI = 22 */ +}; - return BDI_L; /* in the absence of anything better to do. */ -} +#define class_from_ch_ws(ch) (ucdn_to_bidi[ucdn_get_bidi_class(ch)]) -/** Return a direction for white-space on the second pass of the algorithm. - */ -Bidi_CharType Bidi_classFromChN(uint16_t ch) +/* Return a direction for white-space on the second pass of the algorithm. */ +static fz_bidi_chartype class_from_ch_n(uint32_t ch) { - int fromChWS = classFromChWS(ch); - - if (fromChWS == BDI_S || fromChWS == BDI_WS) + fz_bidi_chartype from_ch_ws = class_from_ch_ws(ch); + if (from_ch_ws == BDI_S || from_ch_ws == BDI_WS) return BDI_N; - - return fromChWS; + return from_ch_ws; } - -int Bidi_isEuropeanNumber(const uint16_t *str, unsigned int len) +static int +is_european_number(const uint32_t *str, unsigned int len) { - const uint16_t *end = str + len; + const uint32_t *end = str + len; for ( ; str != end; str++) { - const uint16_t u = *str; + const uint32_t u = *str; if ((u >= UNICODE_RTL_START && u < UNICODE_ARABIC_INDIC_DIGIT_ZERO) || - (u > UNICODE_ARABIC_INDIC_DIGIT_NINE && u < UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO) || - (u > UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE && u <= UNICODE_RTL_END)) + (u > UNICODE_ARABIC_INDIC_DIGIT_NINE && u < UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO) || + (u > UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE && u <= UNICODE_RTL_END)) { /* This is just a normal RTL character or accent */ return FALSE; } - else if(!( - (u >= UNICODE_DIGIT_ZERO && u <= UNICODE_DIGIT_NINE) || + else if (!((u >= UNICODE_DIGIT_ZERO && u <= UNICODE_DIGIT_NINE) || (u == UNICODE_SUPERSCRIPT_TWO) || (u == UNICODE_SUPERSCRIPT_THREE) || (u == UNICODE_SUPERSCRIPT_ONE) || @@ -574,9 +197,7 @@ int Bidi_isEuropeanNumber(const uint16_t *str, unsigned int len) (u >= UNICODE_CIRCLED_DIGIT_ONE && u <= UNICODE_NUMBER_TWENTY_FULL_STOP) || (u == UNICODE_CIRCLED_DIGIT_ZERO) || (u >= UNICODE_FULLWIDTH_DIGIT_ZERO && u <= UNICODE_FULLWIDTH_DIGIT_NINE) || - (u == UNICODE_ZERO_WIDTH_NON_JOINER) - ) - ) + (u == UNICODE_ZERO_WIDTH_NON_JOINER))) { return FALSE; } @@ -584,147 +205,101 @@ int Bidi_isEuropeanNumber(const uint16_t *str, unsigned int len) return TRUE; } -/** The Bidi_mirrorChar function tranverses the tree looking for nybble - * matches. - * - * EXAMPLE: - * If uint16_t value u=0x005D has been passed to Bidi_mirrorChar for testing, - * it is separated into nybbles 0,0,5,D - * The first node in the tree (index 0 in the array) is the starting point, - * and luckily the first 4-bits hold a 0, which matches. We can therefore - * drop down to the child to check that. The child of a node is stored in - * the next array position, in this case index 1. - * The nybble of index 1 is a 0 and matches the second nybble we are - * searching for. Because of the match, the next child index 2 is moved to. - * Now the nybble at index 2 of the tree is a 2 and doesn't match the 5 we - * are searching for. Bits 5-16 of the array represent the offset to the - * next sibling. In this case the offset is 5, when we move to this array - * value we see that it holds a 3 nybble, again this doesn't match, so we - * get the next offset, which is 8, and move to that sibling. - * The sibling holds the nybble 5, which is the next one we want; so - * increment the index to its child. The child doesn't match and holds a B - * instead of a D. So we move to the next sibling ... which matches!! - * This node holds the mirror character 0x005B in bits 17-32. - */ -uint16_t Bidi_mirrorChar(const uint16_t u) +static void +do_callback(const uint32_t *fragment, + size_t fragment_len, + int block_r2l, + uint32_t mirror, + void *arg, + fz_bidi_fragment_callback *callback) { - int i, index; - uint16_t uCpy; - uint8_t n, nybble; + char char_r2l = block_r2l; - if((u < UNICODE_RTL_START) || (u > UNICODE_RTL_END)) - { - /* uint16_t value lies outside of RTL character range - * and could therefore be a mirrorable charcter - */ - index = 0; - uCpy = (uint16_t)u; - /* look through each nybble of the uint16_t and search in tree - * for matches - */ - for(i = 0; i < 4; i++) - { - n = (uint8_t)(uCpy >> 12); /* the current nybble of the target */ - /* the nybble of the first child node */ - nybble = (uint8_t)(bidiOptMirrorList[index] >> MIRROR_NYBBLE_SHIFT); + char_r2l = block_r2l && !is_european_number(fragment, fragment_len); - /* search through node siblings for the matching nybble */ - while(nybble != n) - { - /* index to next sibling */ - index = (bidiOptMirrorList[index] & MIRROR_NEXT_MASK) >> MIRROR_NEXT_SHIFT; - if(index == TRIPPLE_NYBBLE_NEG) /* 12-bit next value is -1 */ - { - /* no more siblings */ - return UNICODE_EOS; - } - nybble = (uint8_t)(bidiOptMirrorList[index] >> MIRROR_NYBBLE_SHIFT); - } - uCpy <<= 4; - index++; - } - return (uint16_t)(bidiOptMirrorList[--index] & MIRROR_VALUE_MASK); - } - return UNICODE_EOS; + (*callback)(fragment, fragment_len, block_r2l, char_r2l, mirror, arg); } -/** Searches a RTL fragment for a mirror character +/* Searches a RTL fragment for a mirror character * When it finds one it creates a separate fragment for the * character and the surrounding fragments. It passes the mirrored - * uint16_t back through the callback. + * character back through the callback. */ -static void Bidi_createFragmentMirrors(const uint16_t *text, - int len, - Bidi_Fragment_Callback callback, - void *arg) +static void +create_fragment_mirrors(const uint32_t *text, + int len, + fz_bidi_fragment_callback *callback, + void *arg) { int i; int lastPtr; - uint16_t mirror; + uint32_t mirror; assert(text != NULL); assert(len > 0); lastPtr = 0; for (i = 0; i < len; i ++) { - mirror = Bidi_mirrorChar(text[i]); - if(mirror != UNICODE_EOS) + mirror = ucdn_mirror(text[i]); + if (mirror != UNICODE_EOS) { /* create preceding fragment */ - if(i > lastPtr) + if (i > lastPtr) { - (*callback)(&text[lastPtr], - i - lastPtr, - TRUE, - UNICODE_EOS, - arg); + do_callback(&text[lastPtr], + i - lastPtr, + TRUE, + UNICODE_EOS, + arg, + callback); DBUGVF(("create mirror fragment for %x\n",(int)text[i])); } /* create mirror fragment */ - (*callback)(&text[i], - 1, - TRUE, - mirror, - arg); + do_callback(&text[i], + 1, + TRUE, + mirror, + arg, + callback); lastPtr = i + 1; } } - if(lastPtr < len) + if (lastPtr < len) { /* create end fragment */ - (*callback)(&text[lastPtr], - len - lastPtr, - TRUE, - UNICODE_EOS, - arg); + do_callback(&text[lastPtr], + len - lastPtr, + TRUE, + UNICODE_EOS, + arg, + callback); } } - - -/** Determines the character classes for all following - * passes of the algorithm. A character class is basically the type of Bidi - * behaviour that the character exhibits. - */ -void Bidi_classifyCharacters(const uint16_t *text, - Bidi_CharType *types, - int len, - Bidi_Flags flags) +/* Determines the character classes for all following + * passes of the algorithm. A character class is basically the type of Bidi + * behaviour that the character exhibits. + */ +static void +classify_characters(const uint32_t *text, + fz_bidi_chartype *types, + int len, + fz_bidi_flags flags) { int i; - if ((flags & Bidi_classifyWhiteSpace)!=0) + if ((flags & BIDI_CLASSIFY_WHITE_SPACE)!=0) { for (i = 0; i < len; i++) { - types[i] = classFromChWS(text[i]); + types[i] = class_from_ch_ws(text[i]); } } else { #ifdef DEBUG_BIDI_VERBOSE - fprintf(stderr, "Text: "); + fprintf(stderr, "Text: "); for (i = 0; i < len; i++) { /* So that we can actually sort of read the debug string, any @@ -732,18 +307,16 @@ void Bidi_classifyCharacters(const uint16_t *text, * value from 0-9, making non-english characters appear * as numbers */ - fprintf(stderr, "%c", (text[i] <= 127 && text[i ]>= 32)? - text[i] - :(char)((text[i] % 9) + 48) - ); + fprintf(stderr, "%c", (text[i] <= 127 && text[i] >= 32) ? + text[i] : text[i] % 9 + '0'); } - fprintf(stderr, "\nTypes: "); + fprintf(stderr, "\nTypes: "); #endif for (i = 0; i < len; i++) { - types[i] = Bidi_classFromChN(text[i]); + types[i] = class_from_ch_n(text[i]); #ifdef DEBUG_BIDI_VERBOSE - fprintf(stderr, "%c", charFromTypes[(int)types[i]]); + fprintf(stderr, "%c", char_from_types[(int)types[i]]); #endif } #ifdef DEBUG_BIDI_VERBOSE @@ -752,13 +325,11 @@ void Bidi_classifyCharacters(const uint16_t *text, } } - - -/** Determines the base level of the text. - * Implements rule P2 of the uint16_t Bidi Algorithm. - * Note: Ignores explicit embeddings - */ -static Bidi_Level baseLevelFromText(Bidi_CharType *types, int len) +/* Determines the base level of the text. + * Implements rule P2 of the Unicode Bidi Algorithm. + * Note: Ignores explicit embeddings + */ +static fz_bidi_level base_level_from_text(fz_bidi_chartype *types, int len) { int i; @@ -768,41 +339,38 @@ static Bidi_Level baseLevelFromText(Bidi_CharType *types, int len) { /* strong left */ case BDI_L: - return Bidi_LeftToRight; + return BIDI_LEFT_TO_RIGHT; /* strong right */ case BDI_R: case BDI_AL: - return Bidi_RightToLeft; + return BIDI_RIGHT_TO_LEFT; } } - return Bidi_LeftToRight; + return BIDI_LEFT_TO_RIGHT; } - - -static Bidi_Direction directionFromType( Bidi_CharType type ) +static fz_bidi_direction direction_from_type(fz_bidi_chartype type) { - switch( type ) + switch (type) { case BDI_L: case BDI_EN: - return Bidi_LeftToRight; + return BIDI_LEFT_TO_RIGHT; case BDI_R: case BDI_AL: - return Bidi_RightToLeft; + return BIDI_RIGHT_TO_LEFT; default: - return Bidi_Neutral; + return BIDI_NEUTRAL; } } - - -static void classifyQuotedBlocks(const uint16_t *text, - Bidi_CharType *types, - size_t len) +static void +classify_quoted_blocks(const uint32_t *text, + fz_bidi_chartype *types, + size_t len) { size_t i; int inQuote = FALSE; @@ -815,13 +383,13 @@ static void classifyQuotedBlocks(const uint16_t *text, */ for (i = 0; i < len; i++) { - switch ( directionFromType(types[i]) ) + switch (direction_from_type(types[i])) { - case Bidi_LeftToRight: + case BIDI_LEFT_TO_RIGHT: ltrFound = TRUE; break; - case Bidi_RightToLeft: + case BIDI_RIGHT_TO_LEFT: rtlFound = TRUE; break; @@ -833,7 +401,7 @@ static void classifyQuotedBlocks(const uint16_t *text, /* Only make any changes if *both* LTR and RTL characters exist * in this text. */ - if ( !ltrFound || !rtlFound ) + if (!ltrFound || !rtlFound) { return; } @@ -845,10 +413,10 @@ static void classifyQuotedBlocks(const uint16_t *text, /* If we're already in a quote then terminate it, * else start a new block. */ - if ( inQuote ) + if (inQuote) { inQuote = FALSE; - if ( pdfNeeded ) + if (pdfNeeded) { pdfNeeded = FALSE; types[i] = BDI_PDF; @@ -857,21 +425,19 @@ static void classifyQuotedBlocks(const uint16_t *text, else { size_t j; - int done = FALSE; + int done = FALSE; inQuote = TRUE; /* Find the first strong right or left type and * use that to determine whether we should classify - * the quote as LRE or RLE. Or neither, if we + * the quote as LRE or RLE. Or neither, if we * hit another quote before any strongly-directional * character. */ - for ( j = i + 1; - !done && (j < len) && text[j] != '"'; - ++j ) + for (j = i + 1; !done && (j < len) && text[j] != '"'; ++j) { - switch( types[j] ) + switch(types[j]) { case BDI_RLE: case BDI_LRE: @@ -880,16 +446,16 @@ static void classifyQuotedBlocks(const uint16_t *text, case BDI_L: case BDI_EN: - types[i] = BDI_LRE; + types[i] = BDI_LRE; pdfNeeded = TRUE; - done = TRUE; + done = TRUE; break; case BDI_R: case BDI_AL: - types[i] = BDI_RLE; + types[i] = BDI_RLE; pdfNeeded = TRUE; - done = TRUE; + done = TRUE; break; default: @@ -901,23 +467,21 @@ static void classifyQuotedBlocks(const uint16_t *text, } } - - -/* Creates a buffer with an embedding level for every uint16_t in the - * given text. Also determines the base level and returns it in +/* Creates a buffer with an embedding level for every character in the + * given text. Also determines the base level and returns it in * *baseDir if *baseDir does not initially contain a valid direction. */ -static Bidi_Level * -createLevels(fz_context *ctx, - const uint16_t *text, +static fz_bidi_level * +create_levels(fz_context *ctx, + const uint32_t *text, size_t len, - Bidi_Direction *baseDir, + fz_bidi_direction *baseDir, int resolveWhiteSpace, - int bidiFlag) + int flags) { - Bidi_Level *levels; - Bidi_CharType *types = NULL; - Bidi_Level baseLevel; + fz_bidi_level *levels; + fz_bidi_chartype *types = NULL; + fz_bidi_level baseLevel; levels = fz_malloc(ctx, len * sizeof(*levels)); @@ -925,27 +489,27 @@ createLevels(fz_context *ctx, fz_try(ctx) { - types = fz_malloc(ctx, len * sizeof(Bidi_CharType)); + types = fz_malloc(ctx, len * sizeof(fz_bidi_chartype)); - Bidi_classifyCharacters(text, types, len, bidiFlag); + classify_characters(text, types, len, flags); - if (*baseDir != Bidi_LeftToRight && *baseDir != Bidi_RightToLeft) + if (*baseDir != BIDI_LEFT_TO_RIGHT && *baseDir != BIDI_RIGHT_TO_LEFT) { /* Derive the base level from the text and * update *baseDir in case the caller wants to know. */ - baseLevel = baseLevelFromText(types, len); - *baseDir = ODD(baseLevel)==1 ? Bidi_RightToLeft : Bidi_LeftToRight; + baseLevel = base_level_from_text(types, len); + *baseDir = ODD(baseLevel)==1 ? BIDI_RIGHT_TO_LEFT : BIDI_LEFT_TO_RIGHT; } else { - baseLevel = (Bidi_Level)*baseDir; + baseLevel = (fz_bidi_level)*baseDir; } { /* Replace tab with base direction, i.e. make tab appear as * 'strong left' if the base direction is left-to-right and - * 'strong right' if base direction is right-to-left. This + * 'strong right' if base direction is right-to-left. This * allows Layout to implicitly treat tabs as 'segment separators'. */ size_t i; @@ -954,28 +518,28 @@ createLevels(fz_context *ctx, { if (text[i]=='\t') { - types[i] = (*baseDir == Bidi_RightToLeft) ? BDI_R : BDI_L; + types[i] = (*baseDir == BIDI_RIGHT_TO_LEFT) ? BDI_R : BDI_L; } } } - /* Look for quotation marks. Classify them as RLE or LRE + /* Look for quotation marks. Classify them as RLE or LRE * or leave them alone, depending on what follows them. */ - classifyQuotedBlocks( text, types, len ); + classify_quoted_blocks(text, types, len); /* Work out the levels and character types... */ - (void)Bidi_resolveExplicit(baseLevel, BDI_N, types, levels, len, 0); - Bidi_resolveWeak(ctx, baseLevel, types, levels, len); - Bidi_resolveNeutrals(baseLevel,types, levels, len); - Bidi_resolveImplicit(types, levels, len); + (void)fz_bidi_resolve_explicit(baseLevel, BDI_N, types, levels, len, 0); + fz_bidi_resolve_weak(ctx, baseLevel, types, levels, len); + fz_bidi_resolve_neutrals(baseLevel,types, levels, len); + fz_bidi_resolve_implicit(types, levels, len); - Bidi_classifyCharacters(text, types, len, Bidi_classifyWhiteSpace); + classify_characters(text, types, len, BIDI_CLASSIFY_WHITE_SPACE); if (resolveWhiteSpace) { /* resolve whitespace */ - Bidi_resolveWhitespace(baseLevel, types, levels, len); + fz_bidi_resolve_whitespace(baseLevel, types, levels, len); } /* The levels buffer now has odd and even numbers indicating @@ -1005,32 +569,30 @@ createLevels(fz_context *ctx, return levels; } - - -/* Partitions the given uint16_t sequence into one or more unidirectional +/* Partitions the given character sequence into one or more unidirectional * fragments and invokes the given callback function for each fragment. */ -void Bidi_fragmentText(fz_context *ctx, - const uint16_t *text, - size_t textlen, - Bidi_Direction *baseDir, - Bidi_Fragment_Callback callback, - void *arg, - int bidiFlag) +void fz_bidi_fragment_text(fz_context *ctx, + const uint32_t *text, + size_t textlen, + fz_bidi_direction *baseDir, + fz_bidi_fragment_callback *callback, + void *arg, + int flags) { size_t startOfFragment; size_t i; - Bidi_Level *levels; + fz_bidi_level *levels; if (text == NULL || callback == NULL || textlen == 0) return; - DBUGH(("Bidi_fragmentText( '%S', len = %d )\n", text, textlen )); + DBUGH(("fz_bidi_fragment_text('%S', len = %d)\n", text, textlen)); - levels = createLevels(ctx, text, textlen, baseDir, FALSE, bidiFlag); + levels = create_levels(ctx, text, textlen, baseDir, FALSE, flags); /* We now have an array with an embedding level - * for each uint16_t in text. + * for each character in text. */ assert(levels != NULL); @@ -1045,31 +607,31 @@ void Bidi_fragmentText(fz_context *ctx, * Create a text object for it, then start * a new fragment. */ - if(ODD(levels[startOfFragment]) != 0) + if (ODD(levels[startOfFragment]) != 0) { /* if RTL check for mirrors and create sub-frags */ - Bidi_createFragmentMirrors(&text[startOfFragment], - i - startOfFragment, - callback, - arg); + create_fragment_mirrors(&text[startOfFragment], + i - startOfFragment, + callback, + arg); } else { - /* otherwise create 1 fragment */ - (*callback)(&text[startOfFragment], + do_callback(&text[startOfFragment], i - startOfFragment, ODD(levels[startOfFragment]), UNICODE_EOS, - arg); + arg, + callback); } startOfFragment = i; } } - /* Now i == textlen. Deal with the final (or maybe only) fragment. */ - if(ODD(levels[startOfFragment]) != 0) + /* Now i == textlen. Deal with the final (or maybe only) fragment. */ + if (ODD(levels[startOfFragment]) != 0) { /* if RTL check for mirrors and create sub-frags */ - Bidi_createFragmentMirrors(&text[startOfFragment], + create_fragment_mirrors(&text[startOfFragment], i - startOfFragment, callback, arg); @@ -1077,11 +639,12 @@ void Bidi_fragmentText(fz_context *ctx, else { /* otherwise create 1 fragment */ - (*callback)(&text[startOfFragment], + do_callback(&text[startOfFragment], i - startOfFragment, ODD(levels[startOfFragment]), UNICODE_EOS, - arg); + arg, + callback); } } fz_always(ctx) diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c index c15eaca2..3cf207c2 100644 --- a/source/fitz/stext-device.c +++ b/source/fitz/stext-device.c @@ -935,7 +935,7 @@ fz_stext_fill_image(fz_context *ctx, fz_device *dev, const fz_image *img, const } static int -fz_bidi_direction(int bidiclass, int curdir) +direction_from_bidi_class(int bidiclass, int curdir) { switch (bidiclass) { @@ -994,7 +994,7 @@ fz_bidi_reorder_span(fz_stext_span *span) curdir = 1; for (b = 0; b < span->len; b++) { - dir = fz_bidi_direction(ucdn_get_bidi_class(span->text[b].c), curdir); + dir = direction_from_bidi_class(ucdn_get_bidi_class(span->text[b].c), curdir); if (dir != curdir) { fz_bidi_reorder_run(span, a, b, curdir); diff --git a/source/html/html-layout.c b/source/html/html-layout.c index af480cbc..96cb6453 100644 --- a/source/html/html-layout.c +++ b/source/html/html-layout.c @@ -941,7 +941,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p t += fz_chartorune(&c, t); if (node->mirror) { - c = Bidi_mirrorChar(c); + c = ucdn_mirror(c); } g = fz_encode_character(ctx, node->style->font, c); if (g) @@ -969,7 +969,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p s += fz_chartorune(&c, s); if (node->mirror) { - c = Bidi_mirrorChar(c); + c = ucdn_mirror(c); } g = fz_encode_character(ctx, node->style->font, c); if (g) @@ -1478,8 +1478,8 @@ fz_layout_html(fz_context *ctx, fz_html *box, float w, float h, float em) typedef struct { - uint16_t *buffer; - size_t max; + uint32_t *data; + size_t cap; size_t len; } uni_buf; @@ -1504,26 +1504,25 @@ static size_t utf8len(const char *text) return len; } -static void newFragCb(const uint16_t *fragment, - size_t fragmentLen, - int rightToLeft, - uint16_t mirror, +static void newFragCb(const uint32_t *fragment, + size_t fragment_len, + int block_r2l, + int char_r2l, + uint32_t mirror, void *arg) { bidi_data *data = (bidi_data *)arg; - size_t fragmentOffset = fragment - data->buffer->buffer; - int charDirR2L = rightToLeft; + size_t fragment_offset = fragment - data->buffer->data; - if((fragmentOffset != 0) && - Bidi_isEuropeanNumber(fragment, fragmentLen)) - { - /* fragment contains digits only */ - charDirR2L = 0; - } + /* The Picsel code used to (effectively) do: + * if (fragment_offset == 0) char_r2l = block_r2l; + * but that makes no sense to me. All that could do is stop + * a european number being treated as l2r because it was the + * first thing on a line. */ /* We are guaranteed that fragmentOffset will be at the beginning * of flow. */ - while (fragmentLen > 0) + while (fragment_len > 0) { size_t len; @@ -1535,22 +1534,22 @@ static void newFragCb(const uint16_t *fragment, { /* Must be text */ len = utf8len(data->flow->content.text); - if (len > fragmentLen) + if (len > fragment_len) { /* We need to split this flow box */ - (void)split_flow(data->ctx, data->pool, data->flow, fragmentLen); + (void)split_flow(data->ctx, data->pool, data->flow, fragment_len); len = utf8len(data->flow->content.text); } } /* This flow box is entirely contained within this fragment. */ - data->flow->block_r2l = rightToLeft; - data->flow->char_r2l = charDirR2L; + data->flow->block_r2l = block_r2l; + data->flow->char_r2l = char_r2l; if (mirror != 0) data->flow->mirror = 1; data->flow = data->flow->next; - fragmentOffset += len; - fragmentLen -= len; + fragment_offset += len; + fragment_len -= len; } } @@ -1560,7 +1559,7 @@ detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_h fz_html_flow *end = flow; const char *text; bidi_data data; - Bidi_Direction baseDir = -1; + fz_bidi_direction baseDir = -1; /* Stage 1: Gather the text from the flow up into a single buffer */ buffer->len = 0; @@ -1589,13 +1588,13 @@ detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_h break; /* Make sure the buffer is large enough */ - if (buffer->len + len > buffer->max) + if (buffer->len + len > buffer->cap) { - size_t newmax = buffer->max * 2; - if (newmax == 0) - newmax = 128; /* Sensible small default */ - buffer->buffer = fz_resize_array(ctx, buffer->buffer, newmax, sizeof(uint16_t)); - buffer->max = newmax; + size_t newcap = buffer->cap * 2; + if (newcap == 0) + newcap = 128; /* Sensible small default */ + buffer->data = fz_resize_array(ctx, buffer->data, newcap, sizeof(uint32_t)); + buffer->cap = newcap; } /* Expand the utf8 text into Unicode and store it in the buffer */ @@ -1603,7 +1602,7 @@ detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_h { int rune; text += fz_chartorune(&rune, text); - buffer->buffer[buffer->len++] = (uint16_t)rune; + buffer->data[buffer->len++] = rune; } end = end->next; @@ -1614,12 +1613,7 @@ detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_h data.pool = pool; data.flow = flow; data.buffer = buffer; - Bidi_fragmentText(ctx, - buffer->buffer, - buffer->len, - &baseDir, - newFragCb, &data, - 0 /* Flags */); + fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &baseDir, &newFragCb, &data, 0 /* Flags */); } static void |