summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/fitz.h2
-rw-r--r--include/mupdf/fitz/bidi.h205
-rw-r--r--platform/win32/libmupdf.vcproj12
-rw-r--r--source/fitz/bidi-impl.h37
-rw-r--r--source/fitz/bidi-std.c1185
-rw-r--r--source/fitz/bidi.c1309
6 files changed, 2750 insertions, 0 deletions
diff --git a/include/mupdf/fitz.h b/include/mupdf/fitz.h
index cf78060c..ca075ebc 100644
--- a/include/mupdf/fitz.h
+++ b/include/mupdf/fitz.h
@@ -60,4 +60,6 @@
#include "mupdf/fitz/output-svg.h"
#include "mupdf/fitz/output-tga.h"
+#include "mupdf/fitz/bidi.h"
+
#endif
diff --git a/include/mupdf/fitz/bidi.h b/include/mupdf/fitz/bidi.h
new file mode 100644
index 00000000..0b5b9553
--- /dev/null
+++ b/include/mupdf/fitz/bidi.h
@@ -0,0 +1,205 @@
+/**
+ * Bidirectional text processing.
+ *
+ */
+/* Copyright (C) Picsel, 2004-2008. All Rights Reserved. */
+/** @defgroup BidiText
+ * @ingroup EpageText
+ * @{
+ * Bidirectional text processing.
+ *
+ * Processes Unicode text by arranging the characters into an order suitable
+ * for display. E.g. Hebrew text will be arranged from right-to-left and
+ * any English within the text will remain in the left-to-right order.
+ * Characters such as parenthesis will be substituted for their mirrored
+ * equivalents if they are part of text which must be reversed.
+ *
+ * This is an implementation of the Unicode Bidirectional Algorithm which
+ * can be found here: http://www.unicode.org/reports/tr9/ and is based
+ * on the reference implementation found on Unicode.org.
+ */
+
+#ifndef BIDI_BIDI_H
+#define BIDI_BIDI_H
+
+#include "mupdf/fitz/system.h"
+
+
+/* Bidirectional Character Types
+ * as defined by the Unicode Bidirectional Algorithm Table 3-7.
+ * The list of bidirectional character types here is not grouped the
+ * same way as the table 3-7, since the numeric values for the types
+ * are chosen to keep the state and action tables compact.
+ */
+enum
+{
+ /* input types */
+ /* ON MUST be zero, code relies on ON = N = 0 */
+ BDI_ON = 0, /**< Other Neutral */
+ BDI_L, /**< Left-to-right Letter */
+ BDI_R, /**< Right-to-left Letter */
+ BDI_AN, /**< Arabic Number */
+ BDI_EN, /**< European Number */
+ BDI_AL, /**< Arabic Letter (Right-to-left) */
+ BDI_NSM, /**< Non-spacing Mark */
+ BDI_CS, /**< Common Separator */
+ BDI_ES, /**< European Separator */
+ BDI_ET, /**< European Terminator (post/prefix e.g. $ and %) */
+
+ /* resolved types */
+ BDI_BN, /**< Boundary neutral (type of RLE etc after explicit levels)*/
+
+ /* input types, */
+ BDI_S, /**< Segment Separator (TAB) used only in L1 */
+ BDI_WS, /**< White space used only in L1 */
+ BDI_B, /**< Paragraph Separator (aka as PS) */
+
+ /* types for explicit controls */
+ BDI_RLO, /**< these are used only in X1-X9 */
+ BDI_RLE,
+ BDI_LRO,
+ BDI_LRE,
+ BDI_PDF,
+
+ /* resolved types, also resolved directions */
+ BDI_N = BDI_ON /**< alias, where ON, WS and S are treated the same */
+};
+
+typedef enum Bidi_Direction
+{
+ Bidi_LeftToRight = 0,
+ Bidi_RightToLeft = 1,
+ Bidi_Neutral = 2
+}
+Bidi_Direction;
+
+typedef enum Bidi_Flags
+{
+ Bidi_classifyWhiteSpace = 1,
+ Bidi_replaceTab = 2
+}
+Bidi_Flags;
+
+typedef int Bidi_CharType; /**< Same as in the third-party library */
+typedef int Bidi_Level; /**< Same as in the third-party library.
+ * Note: Max level is 61 */
+
+enum
+{
+ Bidi_LevelMax = 61
+};
+
+
+
+/**
+ * Prototype for function supplied to Bidi_processLine and
+ * invoked for each fragment.
+ *
+ * @param offset first character in fragment
+ * @param fragmentLen number of characters in fragment
+ * @param rightToLeft true if fragment is right-to-left
+ * @param mirror The mirror code of the fragment if it exists
+ * @param arg data from caller of Bidi_processLine
+ * @param isRtlNumber true if fragment contains only RTL numbers.
+ */
+typedef void (Bidi_PL_Fragment_Callback)(unsigned int offset,
+ size_t fragmentLen,
+ int rightToLeft,
+ uint16_t mirror,
+ void *arg,
+ int isRtlNumber);
+
+
+
+/**
+ * Prototype for function supplied to Bidi_processLine, which invokes it
+ * repeatedly to build a single 'string' to represent the entire line.
+ *
+ * @param arg data from caller of Bidi_processLine.
+ * @param objText receives pointer to either the object's text or
+ * NULL if there are no more objects on the line.
+ * @param objLength receives length of object's text.
+ * @param more receives true if there is more text on this 'line'.
+ * @param explicitDirection receives Explicit direction of object'stext.
+ * Bidi_Neutral can be passed if no Direction is specified.
+ */
+typedef void (Bidi_PL_NextObj_Callback)(void *arg,
+ const uint16_t **objText,
+ size_t *objLength,
+ int *more,
+ Bidi_Direction *explicitDirection);
+
+
+
+/**
+ * Converts text from logical to display order. The conversion
+ * is in-place, i.e. the given string is changed by this function.
+ *
+ * @param text input/output text
+ * @param[in] baseDir base direction
+ * @param[in] outputDir render direction
+ * @param[in] len string length
+ *
+ * @return error
+ */
+void Bidi_processText(fz_context *ctx,
+ uint16_t *text,
+ Bidi_Direction baseDir,
+ Bidi_Direction outputDir,
+ int len);
+
+
+
+/**
+ * Determines the bidi class for a single character
+ *
+ * @param[in] ch The character
+ *
+ * @return The bidi character type
+ */
+Bidi_CharType Bidi_classFromChN(uint16_t ch);
+
+
+
+/**
+ * Determines the character classes for given text
+ *
+ * @param[in] text input text
+ * @param[out] types output types
+ * @param[in] len text length
+ * @param[in] flags classifyWhiteSpace, forceRightToLeft
+ */
+void Bidi_classifyCharacters(const uint16_t *text,
+ Bidi_CharType *types,
+ int len,
+ Bidi_Flags flags);
+
+
+
+/**
+ * Iterates over a collection of strings, and rearranges them
+ * according to their directions.
+ *
+ * @param[in] nextObjCb function invoked repeatedly to describe the
+ * collection of strings on the line.
+ * @param[in] fragmentCb function to be invoked for each fragment.
+ * @param[in] callerData caller-defined structure which will be passed
+ * to each of the callback functions.
+ * @param[in] bidiFlag Bidi flag passed to the function.
+ * @param[out] more Location to place true if the "line" has more
+ * data
+ *
+ * @returns direction of paragraph
+ */
+Bidi_Direction Bidi_processLine(fz_context *ctx,
+ Bidi_PL_NextObj_Callback nextObjCb,
+ Bidi_PL_Fragment_Callback fragmentCb,
+ void *callerData,
+ int bidiFlag,
+ int *more);
+
+
+
+#endif /* BIDI_BIDI_H */
+
+/** @} */
diff --git a/platform/win32/libmupdf.vcproj b/platform/win32/libmupdf.vcproj
index 15d921bd..460a7442 100644
--- a/platform/win32/libmupdf.vcproj
+++ b/platform/win32/libmupdf.vcproj
@@ -727,6 +727,14 @@
>
</File>
<File
+ RelativePath="..\..\source\fitz\bidi-std.c"
+ >
+ </File>
+ <File
+ RelativePath="..\..\source\fitz\bidi.c"
+ >
+ </File>
+ <File
RelativePath="..\..\source\fitz\bitmap.c"
>
</File>
@@ -1386,6 +1394,10 @@
>
</File>
<File
+ RelativePath="..\..\include\mupdf\fitz\bidi.h"
+ >
+ </File>
+ <File
RelativePath="..\..\include\mupdf\fitz\bitmap.h"
>
</File>
diff --git a/source/fitz/bidi-impl.h b/source/fitz/bidi-impl.h
new file mode 100644
index 00000000..86aeb8d1
--- /dev/null
+++ b/source/fitz/bidi-impl.h
@@ -0,0 +1,37 @@
+// File: Bidi.h
+//
+/* For use with Bidi Reference Implementation
+ For more information see the associated file bidi.cpp
+
+ Credits:
+ -------
+ Written by: Asmus Freytag
+ Command line interface by: Rick McGowan
+ Verification (v24): Doug Felt
+
+ Disclaimer and legal rights:
+ ---------------------------
+ Copyright (C) 1999-2009, ASMUS, Inc. All Rights Reserved.
+ Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
+
+ THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+ IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+ BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+ OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+
+ The files bidi.rc, and resource.h are distributed together with this file and are included
+ in the above definition of software.
+*/
+// Copyright (C) 1999-2009, ASMUS, Inc. All Rights Reserved
+
+#include "mupdf/fitz.h"
+
+void Bidi_resolveNeutrals(int baselevel, int *pcls, const int *plevel, int cch);
+void Bidi_resolveImplicit(const int * pcls, int * plevel, int cch);
+void Bidi_resolveWeak(fz_context *ctx, int baselevel, int *pcls, int *plevel, int cch);
+void Bidi_resolveWhitespace(int baselevel, const int *pcls, int *plevel, int cch);
+int Bidi_resolveExplicit(int level, int dir, int * pcls, int * plevel, int cch, int nNest);
diff --git a/source/fitz/bidi-std.c b/source/fitz/bidi-std.c
new file mode 100644
index 00000000..e8282b96
--- /dev/null
+++ b/source/fitz/bidi-std.c
@@ -0,0 +1,1185 @@
+// Extracted from Bidi.cpp - version 26
+
+// Reference implementation for Unicode Bidirectional Algorithm
+
+// Bidi include file
+#include "mupdf/fitz.h"
+#include "bidi-impl.h"
+
+#ifndef TRUE
+#define TRUE (1)
+#endif
+#ifndef FALSE
+#define FALSE (0)
+#endif
+
+/*------------------------------------------------------------------------
+ File: Bidi.Cpp
+
+ Description
+ -----------
+
+ Sample Implementation of the Unicode Bidirectional Algorithm as it
+ was revised by Revision 5 of the Uniode Technical Report # 9
+ (1999-8-17)
+
+ Verified for changes to the algorithm up through Unicode 5.2.0 (2009).
+
+ This implementation is organized into several passes, each implemen-
+ ting one or more of the rules of the Unicode Bidi Algorithm. The
+ resolution of Weak Types and of Neutrals each use a state table
+ approach.
+
+ Both a printf based interface and a Windows DlgProc are provided for
+ interactive testing.
+
+ A stress harness comparing this implementation (v24) to a Java based
+ implementation was used by Doug Felt to verify that the two
+ implementations produce identical results for all strings up to six
+ bidi classes and stochastic strings up to length 20.
+
+ Version 26 was verified by the author against the Unicode 5.2.0
+ file BidiTest.txt, which provides an exhaustive text of strings of
+ length 4 or less, but covers some important cases where the language
+ in UAX#9 had been clarified.
+
+ To see this code running in an actual Windows program,
+ download the free Unibook uitlity from http://unicode.org/unibook
+ The bidi demo is executed from the tools menu. It is build from
+ this source file.
+
+ Build Notes
+ -----------
+
+ To compile the sample implementation please set the #define
+ directives above so the correct headers get included. Not all the
+ files are needed for all purposes. For the commandline version
+ only bidi.h and bidi.cpp are needed.
+
+ The Win32 version is provided as a dialog procedure. To use as a
+ standalone program compile with the the lbmain.cpp file. If all you
+ need is the ability to run the code "as is", you can instead download
+ the unibook utility from http://uincode.org/unibook/ which contains
+ the bidi demo compiled from this source file.
+
+ This code uses an extension to C++ that gives variables declared in
+ a for() statement function the same scope as the for() statement.
+ If your compiler does not support this extension, you may need to
+ move the declaration, e.g. int ich = 0; in front of the for statement.
+
+ Implementation Note
+ -------------------
+
+ NOTE: The Unicode Bidirectional Algorithm removes all explicit
+ formatting codes in rule X9, but states that this can be
+ simulated by conformant implementations. This implementation
+ attempts to demonstrate such a simulation
+
+ To demonstrate this, the current implementation does the
+ following:
+
+ in resolveExplicit()
+ - change LRE, LRO, RLE, RLO, PDF to BN
+ - assign nested levels to BN
+
+ in resolveWeak and resolveNeutrals
+ - assign L and R to BN's where they exist in place of
+ sor and eor by changing the last BN in front of a
+ level change to a strong type
+ - skip over BN's for the purpose of determining actions
+ - include BN in the count of deferred runs
+ which will resolve some of them to EN, AN and N
+
+ in resolveWhiteSpace
+ - set the level of any surviving BN to the base level,
+ or the level of the preceding character
+ - include LRE,LRO, RLE, RLO, PDF and BN in the count
+ whitespace to be reset
+
+ This will result in the same order for non-BN characters as
+ if the BN characters had been removed.
+
+ The clean() function can be used to remove boundary marks for
+ verification purposes.
+
+ Notation
+ --------
+ Pointer variables generally start with the letter p
+ Counter variables generally start with the letter c
+ Index variables generally start with the letter i
+ Boolean variables generally start with the letter f
+
+ The enumerated bidirectional types have the same name as in the
+ description for the Unicode Bidirectional Algorithm
+
+
+ Using this code outside a demo context
+ --------------------------------------
+
+ The way the functions are broken down in this demo code is based
+ on the needs of the demo to show the evolution in internal state
+ as the algorithm proceeds. This obscures how the algorithm would
+ be used in practice. These are the steps:
+
+ 1. Allocate a pair of arrays large enough to hold bidi class
+ and calculated levels (one for each input character)
+
+ 2. Provide your own function to assign directional types (bidi
+ classes) corresponding to each character in the input,
+ conflating ON, WS, S to N. Unlike the classify function in this
+ demo, the input would be actual Unicode characters.
+
+ 3. Process the first paragraph by calling BidiParagraph. That
+ function changes B into BN and returns a length including the
+ paragraph separator. (The iteration over multiple paragraphs
+ is left as excercise for the reader).
+
+ 4. Assign directional types again, but now assign specific types
+ to whitespace characters.
+
+ 5. Instead of reordering the input in place it is often desirable
+ to calculate an array of offsets indicating the reordering.
+ To that end, allocate such an array here and use it instead
+ of the input array in the next step.
+
+ 6. Resolve and reorder the lines by calling BidiLines. That
+ function 'breaks' lines on LS characters. Provide an optional
+ array of flags indicating the location of other line breaks as
+ needed.
+
+
+ Update History
+ --------------
+ Version 24 is the initial published and verified version of this
+ reference implementation. Version 25 and its updates fix various
+ minor issues with the scaffolding used for demonstrating the
+ algorithm using pseudo-alphabets from the command line or dialog
+ box. No changes to the implementation of the actual bidi algrithm
+ are made in any of the minor updates to version 25. Version 26
+ also makes no change to the actual algorithm but was verified
+ against the official BidiTest.txt file for Unicode 5.2.0.
+
+ - updated pseudo-alphabet
+
+ - Last Revised 12-10-99 (25)
+
+ - enable demo mode for release builds - no other changes
+
+ - Last Revised 12-10-00 (25a)
+
+ - fix regression in pseudo alphabet use for Windows UI
+
+ - Last Revised 02-01-01 (25b)
+
+ - fixed a few comments, renamed a variable
+
+ - Last Revised 03-04-01 (25c)
+
+ - make base level settable, enable mirror by default,
+ fix dialog size
+
+ - Last Revised 06-02-01 (25e)
+
+ - fixed some comments
+
+ - Last Revised 09-29-01 (25f)
+
+ - fixed classification for LS,RLM,LRM in pseudo alphabet,
+ focus issues in UI, regression fix to commandline from 25(e)
+ fix DEMO switch
+
+ - Last Revised 11-07-01 (25g)
+
+ - fixed classification for plus/minus in pseudo alphabet
+ to track changes made in Unicode 4.0.1
+
+ - Last Revised 12-03-04 (25h)
+
+ - now compiles as dialog-only program for WINDOWS_UI==1
+ using new bidimain.cpp
+
+ - Last Revised 12-02-07 (25i)
+
+ - cleaned up whitespace and indenting in the source,
+ fixed two comments (table headers)
+
+ - Last Revised 15-03-07 (25j)
+
+ - named enumerations
+
+ - Last Revised 30-05-07 (25k)
+
+ - added usage notes, minor edits to comments, indentation, etc
+ throughout. Added the bidiParagraph function. Checked against
+ changes in the Unicode Bidi Algorithm for Unicode 5.2.0. No
+ changes needed to this implementation to match the values in
+ the BidiTest.txt file in the Unicode Character Database.
+ Minor fixes to dialog/windows proc, updated preprocessor directives.
+
+ - Last Revised 03-08-09 (26)
+
+ Credits:
+ -------
+ Written by: Asmus Freytag
+ Command line interface by: Rick McGowan
+ Verification (v24): Doug Felt
+
+ Disclaimer and legal rights:
+ ---------------------------
+ Copyright (C) 1999-2009, ASMUS, Inc. All Rights Reserved.
+ Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
+
+ THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+ IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+ BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+ OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+
+ The file bid.rc is included in the software covered by the above.
+------------------------------------------------------------------------*/
+
+
+/* === HELPER FUNCTIONS AND DECLARATIONS ================================= */
+
+#define odd(x) ((x) & 1)
+
+/*----------------------------------------------------------------------
+ The following array maps character codes to types for the purpose of
+ this sample implementation. The legend string gives a human readable
+ explanation of the pseudo alphabet.
+
+ For simplicity, characters entered by buttons are given a 1:1 mapping
+ between their type and pseudo character value. Pseudo characters that
+ can be typed from the keyboard are explained in the legend string.
+
+ Use the Unicode Character Database for the real values in real use.
+ ---------------------------------------------------------------------*/
+
+const int chLS = 0x15;
+
+int TypesFromChar[] =
+{
+
+// 0 1 2 3 4 5 6 7 8 9 a b c d e f
+BDI_BN, BDI_BN, BDI_BN, BDI_BN, BDI_L, BDI_R, BDI_BN, BDI_BN, BDI_BN, BDI_S, BDI_B, BDI_S, BDI_WS, BDI_B, BDI_BN, BDI_BN, /*00-0f*/
+BDI_LRO,BDI_LRE,BDI_PDF,BDI_RLO,BDI_RLE,BDI_WS, BDI_L, BDI_R, BDI_BN, BDI_BN, BDI_BN, BDI_BN, BDI_B, BDI_B, BDI_B, BDI_S, /*10-1f*/
+BDI_WS, BDI_ON, BDI_ON, BDI_ET, BDI_ET, BDI_ET, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ES, BDI_CS, BDI_ES, BDI_CS, BDI_ES, /*20-2f*/
+BDI_EN, BDI_EN, BDI_EN, BDI_EN, BDI_EN, BDI_EN, BDI_AN, BDI_AN, BDI_AN, BDI_AN, BDI_CS, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ON, /*30-3f*/
+BDI_ON, BDI_AL, BDI_AL, BDI_AL, BDI_AL, BDI_AL, BDI_AL, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, /*40-4f*/
+BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_LRE,BDI_ON, BDI_RLE,BDI_PDF,BDI_S, /*50-5f*/
+BDI_NSM,BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, /*60-6f*/
+BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_LRO,BDI_B, BDI_RLO,BDI_BN, BDI_ON, /*70-7f*/
+};
+
+
+/***************************************
+ Reverse, human readable reference:
+
+ LRM: 0x4
+ RLM: 0x5
+ L: 0x16,a-z
+ LRE: 0x11,[
+ LRO: 0x10,{
+ R: 0x17,G-Z
+ AL: A-F
+ RLE: 0x14,]
+ RLO: 0x13,}
+ PDF: 0x12,^
+ EN: 0-5
+ ES: /,+,[hyphen]
+ ET: #,$,%
+ AN: 6-9
+ CS: [comma],.,:
+ NSM: `
+ BN: 0x0-0x8,0xe,0xf,0x18-0x1b,~
+ B: 0xa,0xd,0x1c-0x1e,|
+ S: 0x9,0xb,0x1f,_
+ WS: 0xc,0x15,[space]
+ ON: !,",&,',(,),*,;,<,=,>,?,@,\,0x7f
+****************************************/
+
+// WS, LS and S are not explicitly needed except for L1. Therefore this
+// table conflates ON, S, WS, and LS to N, all others unchanged
+int NTypes[] = {
+ BDI_N, // ON,
+ BDI_L, // L,
+ BDI_R, // R,
+ BDI_AN, // AN,
+ BDI_EN, // EN,
+ BDI_AL, // AL
+ BDI_NSM, // NSM
+ BDI_CS, // CS
+ BDI_ES, // ES
+ BDI_ET, // ET
+ BDI_BN, // BN
+ BDI_N, // S
+ BDI_N, // WS
+ BDI_B, // B
+ BDI_RLO, // RLO
+ BDI_RLE, // RLE
+ BDI_LRO, // LRO
+ BDI_LRE, // LRE
+ BDI_PDF, // PDF
+ BDI_ON, // LS
+};
+
+// === HELPER FUNCTIONS ================================================
+
+// reverse cch characters
+static
+void reverse(uint16_t * psz, int cch)
+{
+ uint16_t chTemp;
+ int ich;
+
+ for (ich = 0; ich < --cch; ich++)
+ {
+ chTemp = psz[ich];
+ psz[ich] = psz[cch];
+ psz[cch] = chTemp;
+ }
+}
+
+// Set a run of cval values at locations all prior to, but not including
+// iStart, to the new value nval.
+static
+void SetDeferredRun(int *pval, int cval, int iStart, int nval)
+{
+ int i;
+
+ for (i = iStart - 1; i >= iStart - cval; i--)
+ {
+ pval[i] = nval;
+ }
+}
+
+// === ASSIGNING BIDI CLASSES ============================================
+
+// === THE PARAGRAPH LEVEL ===============================================
+
+/*------------------------------------------------------------------------
+ Function: resolveParagraphs
+
+ Resolves the input strings into blocks over which the algorithm
+ is then applied.
+
+ Implements Rule P1 of the Unicode Bidi Algorithm
+
+ Input: Text string
+ Character count
+
+ Output: revised character count
+
+ Note: This is a very simplistic function. In effect it restricts
+ the action of the algorithm to the first paragraph in the input
+ where a paragraph ends at the end of the first block separator
+ or at the end of the input text.
+
+------------------------------------------------------------------------*/
+
+static
+int resolveParagraphs(int * types, int cch)
+{
+ int ich;
+
+ // skip characters not of type B
+ for(ich = 0; ich < cch && types[ich] != BDI_B; ich++)
+ ;
+ // stop after first B, make it a BN for use in the next steps
+ if (ich < cch && types[ich] == BDI_B)
+ types[ich++] = BDI_BN;
+ return ich;
+}
+
+/*------------------------------------------------------------------------
+ Function: baseLevel
+
+ Determines the base level
+
+ Implements rule P2 of the Unicode Bidi Algorithm.
+
+ Input: Array of directional classes
+ Character count
+
+ Note: Ignores explicit embeddings
+------------------------------------------------------------------------*/
+static
+int baseLevel(const int * pcls, int cch)
+{
+ int ich;
+
+ for (ich = 0; ich < cch; ich++)
+ {
+ switch (pcls[ich])
+ {
+ // strong left
+ case BDI_L:
+ return 0;
+
+ // strong right
+ case BDI_R:
+ case BDI_AL:
+ return 1;
+ }
+ }
+ return 0;
+}
+
+//====== RESOLVE EXPLICIT ================================================
+
+static
+int GreaterEven(int i)
+{
+ return odd(i) ? i + 1 : i + 2;
+}
+
+static
+int GreaterOdd(int i)
+{
+ return odd(i) ? i + 2 : i + 1;
+}
+
+static
+int EmbeddingDirection(int level)
+{
+ return odd(level) ? BDI_R : BDI_L;
+}
+
+
+/*------------------------------------------------------------------------
+ Function: resolveExplicit
+
+ Recursively resolves explicit embedding levels and overrides.
+ Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
+
+ Input: Base embedding level and direction
+ Character count
+
+ Output: Array of embedding levels
+ Caller must allocate (one level per input character)
+
+ In/Out: Array of direction classes
+
+
+ Note: The function uses two simple counters to keep track of
+ matching explicit codes and PDF. Use the default argument for
+ the outermost call. The nesting counter counts the recursion
+ depth and not the embedding level.
+------------------------------------------------------------------------*/
+const int MAX_LEVEL = 61; // the real value
+
+int Bidi_resolveExplicit(int level, int dir, int * pcls, int * plevel, int cch,
+ int nNest)
+{
+ int ich;
+
+ // always called with a valid nesting level
+ // nesting levels are != embedding levels
+ int nLastValid = nNest;
+
+ // check input values
+ assert(nNest >= 0 && level >= 0 && level <= MAX_LEVEL);
+
+ // process the text
+ for (ich = 0; ich < cch; ich++)
+ {
+ int cls = pcls[ich];
+ switch (cls)
+ {
+ case BDI_LRO:
+ case BDI_LRE:
+ nNest++;
+ if (GreaterEven(level) <= MAX_LEVEL)
+ {
+ plevel[ich] = GreaterEven(level);
+ pcls[ich] = BDI_BN;
+ ich += Bidi_resolveExplicit(plevel[ich], (cls == BDI_LRE ? BDI_N : BDI_L),
+ &pcls[ich+1], &plevel[ich+1],
+ cch - (ich+1), nNest);
+ nNest--;
+ continue;
+ }
+ cls = pcls[ich] = BDI_BN;
+ break;
+
+ case BDI_RLO:
+ case BDI_RLE:
+ nNest++;
+ if (GreaterOdd(level) <= MAX_LEVEL)
+ {
+ plevel[ich] = GreaterOdd(level);
+ pcls[ich] = BDI_BN;
+ ich += Bidi_resolveExplicit(plevel[ich], (cls == BDI_RLE ? BDI_N : BDI_R),
+ &pcls[ich+1], &plevel[ich+1],
+ cch - (ich+1), nNest);
+ nNest--;
+ continue;
+ }
+ cls = pcls[ich] = BDI_BN;
+ break;
+
+ case BDI_PDF:
+ cls = pcls[ich] = BDI_BN;
+ if (nNest)
+ {
+ if (nLastValid < nNest)
+ {
+ nNest--;
+ }
+ else
+ {
+ cch = ich; // break the loop, but complete body
+ }
+ }
+ break;
+ }
+
+ // Apply the override
+ if (dir != BDI_N)
+ {
+ cls = dir;
+ }
+ plevel[ich] = level;
+ if (pcls[ich] != BDI_BN)
+ pcls[ich] = cls;
+ }
+
+ return ich;
+}
+
+// === RESOLVE WEAK TYPES ================================================
+
+enum bidi_state // possible states
+{
+ xa, // arabic letter
+ xr, // right leter
+ xl, // left letter
+
+ ao, // arabic lett. foll by ON
+ ro, // right lett. foll by ON
+ lo, // left lett. foll by ON
+
+ rt, // ET following R
+ lt, // ET following L
+
+ cn, // EN, AN following AL
+ ra, // arabic number foll R
+ re, // european number foll R
+ la, // arabic number foll L
+ le, // european number foll L
+
+ ac, // CS following cn
+ rc, // CS following ra
+ rs, // CS,ES following re
+ lc, // CS following la
+ ls, // CS,ES following le
+
+ ret, // ET following re
+ let // ET following le
+} ;
+
+int stateWeak[][10] =
+{
+ // N, L, R, AN, EN, AL,NSM, CS, ES, ET,
+/*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* arabic letter */
+/*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */
+/*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */
+
+/*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* arabic lett. foll by ON*/
+/*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */
+/*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */
+
+/*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */
+/*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */
+
+/*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */
+/*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* arabic number foll R */
+/*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* european number foll R */
+/*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* arabic number foll L */
+/*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* european number foll L */
+
+/*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */
+/*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */
+/*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */
+/*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */
+/*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */
+
+/*ret*/ { ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */
+/*let*/ { lo, xl, xr, la, le, xa,let, lo, lo,let } /* ET following le */
+
+
+};
+
+enum bidi_action // possible actions
+{
+ // primitives
+ IX = 0x100, // increment
+ XX = 0xF, // no-op
+
+ // actions
+ xxx = (XX << 4) + XX, // no-op
+ xIx = IX + xxx, // increment run
+ xxN = (XX << 4) + BDI_ON, // set current to N
+ xxE = (XX << 4) + BDI_EN, // set current to EN
+ xxA = (XX << 4) + BDI_AN, // set current to AN
+ xxR = (XX << 4) + BDI_R, // set current to R
+ xxL = (XX << 4) + BDI_L, // set current to L
+ Nxx = (BDI_ON << 4) + 0xF, // set run to neutral
+ Axx = (BDI_AN << 4) + 0xF, // set run to AN
+ ExE = (BDI_EN << 4) + BDI_EN,// set run to EN, set current to EN
+ NIx = (BDI_ON << 4) + 0xF + IX,// set run to N, increment
+ NxN = (BDI_ON << 4) + BDI_ON,// set run to N, set current to N
+ NxR = (BDI_ON << 4) + BDI_R, // set run to N, set current to R
+ NxE = (BDI_ON << 4) + BDI_EN,// set run to N, set current to EN
+
+ AxA = (BDI_AN << 4) + BDI_AN,// set run to AN, set current to AN
+ NxL = (BDI_ON << 4) + BDI_L, // set run to N, set current to L
+ LxL = (BDI_L << 4) + BDI_L // set run to L, set current to L
+};
+
+
+int actionWeak[][10] =
+{
+ // N,.. L, R, AN, EN, AL, NSM, CS,..ES, ET,
+/*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* arabic letter */
+/*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right leter */
+/*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */
+
+/*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* arabic lett. foll by ON */
+/*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */
+/*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */
+
+/*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */
+/*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */
+
+/*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */
+/*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll R */
+/*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* european number foll R */
+/*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll L */
+/*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* european number foll L */
+
+/*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */
+/*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */
+/*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */
+/*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */
+/*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */
+
+/*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */
+/*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL } /* ET following le */
+};
+
+static
+int GetDeferredType(int action)
+{
+ return (action >> 4) & 0xF;
+}
+
+static
+int GetResolvedType(int action)
+{
+ return action & 0xF;
+}
+
+/* Note on action table:
+
+ States can be of two kinds:
+ - Immediate Resolution State, where each input token
+ is resolved as soon as it is seen. These states havve
+ only single action codes (xxN) or the no-op (xxx)
+ for static input tokens.
+ - Deferred Resolution State, where input tokens either
+ either extend the run (xIx) or resolve its Type (e.g. Nxx).
+
+ Input classes are of three kinds
+ - Static Input Token, where the class of the token remains
+ unchanged on output (AN, L, N, R)
+ - Replaced Input Token, where the class of the token is
+ always replaced on output (AL, BDI_BN, NSM, CS, ES, ET)
+ - Conditional Input Token, where the class of the token is
+ changed on output in some but not all cases (EN)
+
+ Where tokens are subject to change, a double action
+ (e.g. NxA, or NxN) is _required_ after deferred states,
+ resolving both the deferred state and changing the current token.
+
+ These properties of the table are verified by assertions below.
+ This code is needed only during debugging and maintenance
+*/
+
+/*------------------------------------------------------------------------
+ Function: resolveWeak
+
+ Resolves the directionality of numeric and other weak character types
+
+ Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
+
+ Input: Array of embedding levels
+ Character count
+
+ In/Out: Array of directional classes
+
+ Note: On input only these directional classes are expected
+ AL, HL, R, L, ON, BDI_BN, NSM, AN, EN, ES, ET, CS,
+------------------------------------------------------------------------*/
+void Bidi_resolveWeak(fz_context *ctx, int baselevel, int *pcls, int *plevel, int cch)
+{
+ int state = odd(baselevel) ? xr : xl;
+ int cls;
+ int ich;
+ int action;
+ int clsRun;
+ int clsNew;
+
+ int level = baselevel;
+
+ int cchRun = 0;
+
+ for (ich = 0; ich < cch; ich++)
+ {
+ if (pcls[ich] > BDI_BN) {
+ fz_warn(ctx, "error: pcls[%d] > BN (%d)\n", ich, pcls[ich]);
+ }
+
+ // ignore boundary neutrals
+ if (pcls[ich] == BDI_BN)
+ {
+ // must flatten levels unless at a level change;
+ plevel[ich] = level;
+
+ // lookahead for level changes
+ if (ich + 1 == cch && level != baselevel)
+ {
+ // have to fixup last BN before end of the loop, since
+ // its fix-upped value will be needed below the assert
+ pcls[ich] = EmbeddingDirection(level);
+ }
+ else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BDI_BN)
+ {
+ // fixup LAST BN in front / after a level run to make
+ // it act like the SOR/EOR in rule X10
+ int newlevel = plevel[ich+1];
+ if (level > newlevel) {
+ newlevel = level;
+ }
+ plevel[ich] = newlevel;
+
+ // must match assigned level
+ pcls[ich] = EmbeddingDirection(newlevel);
+ level = plevel[ich+1];
+ }
+ else
+ {
+ // don't interrupt runs
+ if (cchRun)
+ {
+ cchRun++;
+ }
+ continue;
+ }
+ }
+
+ assert(pcls[ich] <= BDI_BN);
+ cls = pcls[ich];
+
+ action = actionWeak[state][cls];
+
+ // resolve the directionality for deferred runs
+ clsRun = GetDeferredType(action);
+ if (clsRun != XX)
+ {
+ SetDeferredRun(pcls, cchRun, ich, clsRun);
+ cchRun = 0;
+ }
+
+ // resolve the directionality class at the current location
+ clsNew = GetResolvedType(action);
+ if (clsNew != XX)
+ pcls[ich] = clsNew;
+
+ // increment a deferred run
+ if (IX & action)
+ cchRun++;
+
+ state = stateWeak[state][cls];
+ }
+
+ // resolve any deferred runs
+ // use the direction of the current level to emulate PDF
+ cls = EmbeddingDirection(level);
+
+ // resolve the directionality for deferred runs
+ clsRun = GetDeferredType(actionWeak[state][cls]);
+ if (clsRun != XX)
+ SetDeferredRun(pcls, cchRun, ich, clsRun);
+}
+
+// === RESOLVE NEUTAL TYPES ==============================================
+
+// action values
+enum neutral_action
+{
+ // action to resolve previous input
+ nL = BDI_L, // resolve EN to L
+ En = 3 << 4, // resolve neutrals run to embedding level direction
+ Rn = BDI_R << 4, // resolve neutrals run to strong right
+ Ln = BDI_L << 4, // resolved neutrals run to strong left
+ In = (1<<8), // increment count of deferred neutrals
+ LnL = (1<<4)+BDI_L // set run and EN to L
+};
+
+static
+int GetDeferredNeutrals(int action, int level)
+{
+ action = (action >> 4) & 0xF;
+ if (action == (En >> 4))
+ return EmbeddingDirection(level);
+ else
+ return action;
+}
+
+static
+int GetResolvedNeutrals(int action)
+{
+ action = action & 0xF;
+ if (action == In)
+ return 0;
+ else
+ return action;
+}
+
+// state values
+enum neutral_state
+{
+ // new temporary class
+ r, // R and characters resolved to R
+ l, // L and characters resolved to L
+ rn, // N preceded by right
+ ln, // N preceded by left
+ a, // AN preceded by left (the abbrev 'la' is used up above)
+ na // N preceeded by a
+} ;
+
+
+/*------------------------------------------------------------------------
+ Notes:
+
+ By rule W7, whenever a EN is 'dominated' by an L (including start of
+ run with embedding direction = L) it is resolved to, and further treated
+ as L.
+
+ This leads to the need for 'a' and 'na' states.
+------------------------------------------------------------------------*/
+
+int actionNeutrals[][5] =
+{
+// N, L, R, AN, EN, = cls
+ // state =
+ {In, 0, 0, 0, 0}, // r right
+ {In, 0, 0, 0, BDI_L},// l left
+
+ {In, En, Rn, Rn, Rn}, // rn N preceded by right
+ {In, Ln, En, En, LnL}, // ln N preceded by left
+
+ {In, 0, 0, 0, BDI_L},// a AN preceded by left
+ {In, En, Rn, Rn, En} // na N preceded by a
+} ;
+
+int stateNeutrals[][5] =
+{
+// N, L, R, AN, EN = cls
+ // state =
+ {rn, l, r, r, r}, // r right
+ {ln, l, r, a, l}, // l left
+
+ {rn, l, r, r, r}, // rn N preceded by right
+ {ln, l, r, a, l}, // ln N preceded by left
+
+ {na, l, r, a, l}, // a AN preceded by left
+ {na, l, r, a, l} // na N preceded by la
+} ;
+
+/*------------------------------------------------------------------------
+ Function: resolveNeutrals
+
+ Resolves the directionality of neutral character types.
+
+ Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
+
+ Input: Array of embedding levels
+ Character count
+ Baselevel
+
+ In/Out: Array of directional classes
+
+ Note: On input only these directional classes are expected
+ R, L, N, AN, EN and BN
+
+ W8 resolves a number of ENs to L
+------------------------------------------------------------------------*/
+void Bidi_resolveNeutrals(int baselevel, int *pcls, const int *plevel, int cch)
+{
+ // the state at the start of text depends on the base level
+ int state = odd(baselevel) ? r : l;
+ int cls;
+ int ich;
+ int clsRun;
+
+ int cchRun = 0;
+ int level = baselevel;
+
+ for (ich = 0; ich < cch; ich++)
+ {
+ int action;
+ int clsNew;
+
+ // ignore boundary neutrals
+ if (pcls[ich] == BDI_BN)
+ {
+ // include in the count for a deferred run
+ if (cchRun)
+ cchRun++;
+
+ // skip any further processing
+ continue;
+ }
+
+ assert(pcls[ich] < 5); // "Only N, L, R, AN, EN are allowed"
+ cls = pcls[ich];
+
+ action = actionNeutrals[state][cls];
+
+ // resolve the directionality for deferred runs
+ clsRun = GetDeferredNeutrals(action, level);
+ if (clsRun != BDI_N)
+ {
+ SetDeferredRun(pcls, cchRun, ich, clsRun);
+ cchRun = 0;
+ }
+
+ // resolve the directionality class at the current location
+ clsNew = GetResolvedNeutrals(action);
+ if (clsNew != BDI_N)
+ pcls[ich] = clsNew;
+
+ if (In & action)
+ cchRun++;
+
+ state = stateNeutrals[state][cls];
+ level = plevel[ich];
+ }
+
+ // resolve any deferred runs
+ cls = EmbeddingDirection(level); // eor has type of current level
+
+ // resolve the directionality for deferred runs
+ clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
+ if (clsRun != BDI_N)
+ SetDeferredRun(pcls, cchRun, ich, clsRun);
+}
+
+// === RESOLVE IMPLLICIT =================================================
+
+/*------------------------------------------------------------------------
+ Function: resolveImplicit
+
+ Recursively resolves implicit embedding levels.
+ Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
+
+ Input: Array of direction classes
+ Character count
+ Base level
+
+ In/Out: Array of embedding levels
+
+ Note: levels may exceed 15 on output.
+ Accepted subset of direction classes
+ R, L, AN, EN
+------------------------------------------------------------------------*/
+int addLevel[][4] =
+{
+ // L, R, AN, EN = cls
+ // level =
+/* even */ { 0, 1, 2, 2 }, // EVEN
+/* odd */ { 1, 0, 1, 1 } // ODD
+
+};
+
+void Bidi_resolveImplicit(const int * pcls, int * plevel, int cch)
+{
+ int ich;
+
+ for (ich = 0; ich < cch; ich++)
+ {
+ // cannot resolve bn here, since some bn were resolved to strong
+ // types in resolveWeak. To remove these we need the original
+ // types, which are available again in resolveWhiteSpace
+ if (pcls[ich] == BDI_BN)
+ {
+ continue;
+ }
+ assert(pcls[ich] > 0); // "No Neutrals allowed to survive here."
+ assert(pcls[ich] < 5); // "Out of range."
+ plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1];
+ }
+}
+
+// === REORDER ===========================================================
+/*------------------------------------------------------------------------
+ Function: resolveLines
+
+ Breaks a paragraph into lines
+
+ Input: Character count
+ Array of line break flags
+ In/Out: Array of characters
+
+ Returns the count of characters on the first line
+
+ Note: This function only breaks lines at hard line breaks. Other
+ line breaks can be passed in. If pbrk[n] is true, then a break
+ occurs after the character in pszInput[n]. Breaks before the first
+ character are not allowed.
+------------------------------------------------------------------------*/
+static
+int resolveLines(uint16_t * pszInput, int * pbrk, int cch)
+{
+ int ich;
+
+ // skip characters not of type LS
+ for(ich = 0; ich < cch; ich++)
+ {
+ if (pszInput[ich] == chLS || (pbrk && pbrk[ich]))
+ {
+ ich++;
+ break;
+ }
+ }
+
+ return ich;
+}
+
+/*------------------------------------------------------------------------
+ Function: resolveWhiteSpace
+
+ Resolves levels for WS and S
+ Implements rule L1 of the Unicode bidi Algorithm.
+
+ Input: Base embedding level
+ Character count
+ Array of direction classes (for one line of text)
+
+ In/Out: Array of embedding levels (for one line of text)
+
+ Note: this should be applied a line at a time. The default driver
+ code supplied in this file assumes a single line of text; for
+ a real implementation, cch and the initial pointer values
+ would have to be adjusted.
+------------------------------------------------------------------------*/
+void Bidi_resolveWhitespace(int baselevel, const int *pcls, int *plevel,
+ int cch)
+{
+ int cchrun = 0;
+ int oldlevel = baselevel;
+ int ich;
+
+ for (ich = 0; ich < cch; ich++)
+ {
+ switch(pcls[ich])
+ {
+ default:
+ cchrun = 0; // any other character breaks the run
+ break;
+ case BDI_WS:
+ cchrun++;
+ break;
+
+ case BDI_RLE:
+ case BDI_LRE:
+ case BDI_LRO:
+ case BDI_RLO:
+ case BDI_PDF:
+ case BDI_BN:
+ plevel[ich] = oldlevel;
+ cchrun++;
+ break;
+
+ case BDI_S:
+ case BDI_B:
+ // reset levels for WS before eot
+ SetDeferredRun(plevel, cchrun, ich, baselevel);
+ cchrun = 0;
+ plevel[ich] = baselevel;
+ break;
+ }
+ oldlevel = plevel[ich];
+ }
+ // reset level before eot
+ SetDeferredRun(plevel, cchrun, ich, baselevel);
+}
+
+#ifdef BIDI_LINE_AT_A_TIME
+/*------------------------------------------------------------------------
+ Functions: reorder/reorderLevel
+
+ Recursively reorders the display string
+ "From the highest level down, reverse all characters at that level and
+ higher, down to the lowest odd level"
+
+ Implements rule L2 of the Unicode bidi Algorithm.
+
+ Input: Array of embedding levels
+ Character count
+ Flag enabling reversal (set to false by initial caller)
+
+ In/Out: Text to reorder
+
+ Note: levels may exceed 15 resp. 61 on input.
+
+ Rule L3 - reorder combining marks is not implemented here
+ Rule L4 - glyph mirroring is implemented as a display option below
+
+ Note: this should be applied a line at a time
+-------------------------------------------------------------------------*/
+static int reorderLevel(int level, uint16_t * pszText, const int * plevel, int cch,
+ int fReverse)
+{
+ int ich;
+
+ // true as soon as first odd level encountered
+ fReverse = fReverse || odd(level);
+
+ for (ich = 0; ich < cch; ich++)
+ {
+ if (plevel[ich] < level)
+ {
+ break;
+ }
+ else if (plevel[ich] > level)
+ {
+ ich += reorderLevel(level + 1, pszText + ich, plevel + ich,
+ cch - ich, fReverse) - 1;
+ }
+ }
+ if (fReverse)
+ {
+ reverse(pszText, ich);
+ }
+ return ich;
+}
+
+
+int Bidi_reorder(int baselevel, uint16_t * pszText, const int * plevel, int cch)
+{
+ int ich = 0;
+
+ while (ich < cch)
+ {
+ ich += reorderLevel(baselevel, pszText + ich, plevel + ich,
+ cch - ich, FALSE);
+ }
+ return ich;
+}
+#endif
diff --git a/source/fitz/bidi.c b/source/fitz/bidi.c
new file mode 100644
index 00000000..a3bbbc1d
--- /dev/null
+++ b/source/fitz/bidi.c
@@ -0,0 +1,1309 @@
+/**
+ * Bidirectional text processing.
+ *
+ * Processes uint16_t text by arranging the characters into an order suitable
+ * for display. E.g. Hebrew text will be arranged from right-to-left and
+ * any English within the text will remain in the left-to-right order.
+ * Characters such as parenthesis will be substituted for their mirrored
+ * equivalents if they are part of text which must be reversed.
+ *
+ * This is an implementation of the uint16_t Bidirectional Algorithm which
+ * can be found here: http://www.uint16_t.org/reports/tr9/ and is based
+ * on the reference implementation of the algorithm found on that page.
+ *
+ * FIXME - Describe the role of this module from the point of view of EDR.
+ *
+ * For a nice overview of how it works, read this...
+ * http://www.w3.org/TR/REC-html40/struct/dirlang.html
+ *
+ * Copyright (C) Picsel, 2004. All Rights Reserved.
+ */
+
+/**
+ * Original copyright notice from uint16_t reference implementation.
+ * ----------------------------------------------------------------
+ * Written by: Asmus Freytag
+ * C++ and Windows dependencies removed, and
+ * command line interface added by: Rick McGowan
+ *
+ * Copyright (C) 1999, ASMUS, Inc. All Rights Reserved
+ */
+
+/*
+ * Includes...
+ */
+
+#include "mupdf/fitz.h"
+#include "bidi-impl.h" /* standard bidi code interface */
+#include "bidi-internal.h"
+
+/*
+ * Macros...
+ */
+
+#define ODD(x) ((x) & 1)
+
+#define REPLACEABLE_TYPE(t)\
+ (((t)==BDI_ES) || ((t)==BDI_ET )|| ((t)==BDI_CS )||\
+ ((t)==BDI_NSM)|| ((t)==BDI_PDF)|| ((t)==BDI_BN )||\
+ ((t)==BDI_S) || ((t)==BDI_WS )|| ((t)==BDI_N ) )
+
+#ifdef DEBUG_BIDI_VERBOSE
+#define DBUGVF(params) do { fz_warn params; } while (0)
+#else
+#define DBUGVF(params) do {} while (0)
+#endif
+
+#ifdef DEBUG_BIDI_OUTLINE
+#define DBUGH(params) do { fz_warn params; } while (0)
+#else
+#define DBUGH(params) do {} while (0)
+#endif
+
+#define UNICODE_EOS ((uint16_t)0)
+#define UNICODE_DIGIT_ZERO ((uint16_t)0x0030)
+#define UNICODE_DIGIT_NINE ((uint16_t)0x0039)
+#define UNICODE_SUPERSCRIPT_TWO ((uint16_t)0x00B2)
+#define UNICODE_SUPERSCRIPT_THREE ((uint16_t)0x00B3)
+#define UNICODE_SUPERSCRIPT_ONE ((uint16_t)0x00B9)
+#define UNICODE_RTL_START ((uint16_t)0x0590)
+#define UNICODE_RTL_END ((uint16_t)0x07BF)
+#define UNICODE_ARABIC_INDIC_DIGIT_ZERO ((uint16_t)0x0660)
+#define UNICODE_ARABIC_INDIC_DIGIT_NINE ((uint16_t)0x0669)
+#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO ((uint16_t)0x06F0)
+#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE ((uint16_t)0x06F9)
+#define UNICODE_ZERO_WIDTH_NON_JOINER ((uint16_t)0x200C)
+#define UNICODE_SUPERSCRIPT_ZERO ((uint16_t)0x2070)
+#define UNICODE_SUPERSCRIPT_FOUR ((uint16_t)0x2074)
+#define UNICODE_SUPERSCRIPT_NINE ((uint16_t)0x2079)
+#define UNICODE_SUBSCRIPT_ZERO ((uint16_t)0x2080)
+#define UNICODE_SUBSCRIPT_NINE ((uint16_t)0x2089)
+#define UNICODE_CIRCLED_DIGIT_ONE ((uint16_t)0x2460)
+#define UNICODE_NUMBER_TWENTY_FULL_STOP ((uint16_t)0x249B)
+#define UNICODE_CIRCLED_DIGIT_ZERO ((uint16_t)0x24EA)
+#define UNICODE_FULLWIDTH_DIGIT_ZERO ((uint16_t)0xFF10)
+#define UNICODE_FULLWIDTH_DIGIT_NINE ((uint16_t)0xFF19)
+
+#ifndef TRUE
+#define TRUE (1)
+#endif
+#ifndef FALSE
+#define FALSE (0)
+#endif
+
+/*
+ * Enumerations...
+ */
+
+
+#ifdef DEBUG
+/* display support: */
+static const char charFromTypes[] =
+{
+ ' ', /* ON, */
+ '>', /* L, */
+ '<', /* R, */
+ '9', /* AN, */
+ '1', /* EN, */
+ 'a', /* AL */
+ '@', /* NSM */
+ '.', /* CS */
+ ',', /* ES */
+ '$', /* ET */
+ ':', /* BN */
+ 'X', /* S */
+ '_', /* WS */
+ 'B', /* B */
+ '+', /* RLO */
+ '+', /* RLE */
+ '+', /* LRO */
+ '+', /* LRE */
+ '-', /* PDF */
+ '=' /* LS */
+};
+#endif /* DEBUG */
+
+
+
+typedef struct Bidi_ProcessLine_fragData
+{
+ uint16_t *entireText;
+ Bidi_PL_Fragment_Callback *callersCallback;
+ void *callersData;
+}
+Bidi_ProcessLine_fragData;
+
+
+typedef struct BidiPropList
+{
+ uint16_t first;
+ uint16_t last;
+ Bidi_CharType type;
+} BidiPropList;
+
+
+/* The following two arrays were generated
+ * using the perl script unidata2array.pl
+ */
+
+static const BidiPropList bidiPropList[] =
+{
+ {0x0000,0x0008,BDI_BN}, {0x0009,0x0009,BDI_S}, {0x000A,0x000A,BDI_B},
+ {0x000B,0x000B,BDI_S}, {0x000C,0x000C,BDI_WS}, {0x000D,0x000D,BDI_B},
+ {0x000E,0x001B,BDI_BN}, {0x001C,0x001E,BDI_B}, {0x001F,0x001F,BDI_S},
+ {0x0020,0x0020,BDI_WS}, {0x0021,0x0022,BDI_ON}, {0x0023,0x0025,BDI_ET},
+ {0x0026,0x002A,BDI_ON}, {0x002B,0x002B,BDI_ES}, {0x002C,0x002C,BDI_CS},
+ {0x002D,0x002D,BDI_ES}, {0x002E,0x002E,BDI_CS}, {0x002F,0x002F,BDI_ES},
+ {0x0030,0x0039,BDI_EN}, {0x003A,0x003A,BDI_CS}, {0x003B,0x0040,BDI_ON},
+ {0x0041,0x005A,BDI_L}, {0x005B,0x0060,BDI_ON}, {0x0061,0x007A,BDI_L},
+ {0x007B,0x007E,BDI_ON}, {0x007F,0x0084,BDI_BN}, {0x0085,0x0085,BDI_B},
+ {0x0086,0x009F,BDI_BN}, {0x00A0,0x00A0,BDI_CS}, {0x00A1,0x00A1,BDI_ON},
+ {0x00A2,0x00A5,BDI_ET}, {0x00A6,0x00A9,BDI_ON}, {0x00AA,0x00AA,BDI_L},
+ {0x00AB,0x00AC,BDI_ON}, {0x00AD,0x00AD,BDI_BN}, {0x00AE,0x00AF,BDI_ON},
+ {0x00B0,0x00B1,BDI_ET}, {0x00B2,0x00B3,BDI_EN}, {0x00B4,0x00B4,BDI_ON},
+ {0x00B5,0x00B5,BDI_L}, {0x00B6,0x00B8,BDI_ON}, {0x00B9,0x00B9,BDI_EN},
+ {0x00BA,0x00BA,BDI_L}, {0x00BB,0x00BF,BDI_ON}, {0x00C0,0x00D6,BDI_L},
+ {0x00D7,0x00D7,BDI_ON}, {0x00D8,0x00F6,BDI_L}, {0x00F7,0x00F7,BDI_ON},
+ {0x00F8,0x02B8,BDI_L}, {0x02B9,0x02BA,BDI_ON}, {0x02BB,0x02C1,BDI_L},
+ {0x02C2,0x02CF,BDI_ON}, {0x02D0,0x02D1,BDI_L}, {0x02D2,0x02DF,BDI_ON},
+ {0x02E0,0x02E4,BDI_L}, {0x02E5,0x02ED,BDI_ON}, {0x02EE,0x02EE,BDI_L},
+ {0x02EF,0x02FF,BDI_ON}, {0x0300,0x036F,BDI_NSM},{0x0374,0x0375,BDI_ON},
+ {0x0376,0x037D,BDI_L}, {0x037E,0x037E,BDI_ON}, {0x0384,0x0385,BDI_ON},
+ {0x0386,0x0386,BDI_L}, {0x0387,0x0387,BDI_ON}, {0x0388,0x03F5,BDI_L},
+ {0x03F6,0x03F6,BDI_ON}, {0x03F7,0x0482,BDI_L}, {0x0483,0x0486,BDI_NSM},
+ {0x0488,0x0489,BDI_NSM},{0x048A,0x0589,BDI_L}, {0x058A,0x058A,BDI_ON},
+ {0x0591,0x05BD,BDI_NSM},{0x05BE,0x05BE,BDI_R}, {0x05BF,0x05BF,BDI_NSM},
+ {0x05C0,0x05C0,BDI_R}, {0x05C1,0x05C2,BDI_NSM},{0x05C3,0x05C3,BDI_R},
+ {0x05C4,0x05C5,BDI_NSM},{0x05C6,0x05C6,BDI_R}, {0x05C7,0x05C7,BDI_NSM},
+ {0x05D0,0x05EA,BDI_R}, {0x05F0,0x05F4,BDI_R}, {0x0600,0x060B,BDI_AL},
+ {0x060C,0x060C,BDI_CS}, {0x060D,0x060D,BDI_AL}, {0x060E,0x060F,BDI_ON},
+ {0x0610,0x0615,BDI_NSM},{0x0616,0x064A,BDI_AL}, {0x064B,0x065E,BDI_NSM},
+ {0x065F,0x065F,BDI_AL}, {0x0660,0x0669,BDI_EN}, {0x066A,0x066A,BDI_ET},
+ {0x066B,0x066C,BDI_EN}, {0x066D,0x066F,BDI_AL}, {0x0670,0x0670,BDI_NSM},
+ {0x0671,0x06D5,BDI_AL}, {0x06D6,0x06DC,BDI_NSM},{0x06DD,0x06DD,BDI_AL},
+ {0x06DE,0x06E4,BDI_NSM},{0x06E5,0x06E6,BDI_AL}, {0x06E7,0x06E8,BDI_NSM},
+ {0x06E9,0x06E9,BDI_ON}, {0x06EA,0x06ED,BDI_NSM},{0x06EE,0x06EF,BDI_AL},
+ {0x06F0,0x06F9,BDI_EN}, {0x06FA,0x070E,BDI_AL}, {0x070F,0x070F,BDI_BN},
+ {0x0710,0x0710,BDI_AL}, {0x0711,0x0711,BDI_NSM},{0x0712,0x072F,BDI_AL},
+ {0x0730,0x074A,BDI_NSM},{0x074B,0x07A5,BDI_AL}, {0x07A6,0x07B0,BDI_NSM},
+ {0x07B1,0x07BF,BDI_AL}, {0x07C0,0x07EA,BDI_R}, {0x07EB,0x07F3,BDI_NSM},
+ {0x07F4,0x07F5,BDI_R}, {0x07F6,0x07F9,BDI_ON}, {0x07FA,0x08FF,BDI_R},
+ {0x0901,0x0902,BDI_NSM},{0x0903,0x093B,BDI_L}, {0x093C,0x093C,BDI_NSM},
+ {0x093D,0x0940,BDI_L}, {0x0941,0x0948,BDI_NSM},{0x0949,0x094C,BDI_L},
+ {0x094D,0x094D,BDI_NSM},{0x094E,0x0950,BDI_L}, {0x0951,0x0954,BDI_NSM},
+ {0x0955,0x0961,BDI_L}, {0x0962,0x0963,BDI_NSM},{0x0964,0x0980,BDI_L},
+ {0x0981,0x0981,BDI_NSM},{0x0982,0x09BB,BDI_L}, {0x09BC,0x09BC,BDI_NSM},
+ {0x09BD,0x09C0,BDI_L}, {0x09C1,0x09C4,BDI_NSM},{0x09C5,0x09CC,BDI_L},
+ {0x09CD,0x09CD,BDI_NSM},{0x09CE,0x09E1,BDI_L}, {0x09E2,0x09E3,BDI_NSM},
+ {0x09E4,0x09F1,BDI_L}, {0x09F2,0x09F3,BDI_ET}, {0x09F4,0x0A00,BDI_L},
+ {0x0A01,0x0A02,BDI_NSM},{0x0A03,0x0A3B,BDI_L}, {0x0A3C,0x0A3C,BDI_NSM},
+ {0x0A3D,0x0A40,BDI_L}, {0x0A41,0x0A42,BDI_NSM},{0x0A47,0x0A48,BDI_NSM},
+ {0x0A4B,0x0A4D,BDI_NSM},{0x0A4E,0x0A6F,BDI_L}, {0x0A70,0x0A71,BDI_NSM},
+ {0x0A72,0x0A80,BDI_L}, {0x0A81,0x0A82,BDI_NSM},{0x0A83,0x0ABB,BDI_L},
+ {0x0ABC,0x0ABC,BDI_NSM},{0x0ABD,0x0AC0,BDI_L}, {0x0AC1,0x0AC5,BDI_NSM},
+ {0x0AC7,0x0AC8,BDI_NSM},{0x0AC9,0x0ACC,BDI_L}, {0x0ACD,0x0ACD,BDI_NSM},
+ {0x0ACE,0x0AE1,BDI_L}, {0x0AE2,0x0AE3,BDI_NSM},{0x0AE4,0x0AF0,BDI_L},
+ {0x0AF1,0x0AF1,BDI_ET}, {0x0B01,0x0B01,BDI_NSM},{0x0B02,0x0B3B,BDI_L},
+ {0x0B3C,0x0B3C,BDI_NSM},{0x0B3D,0x0B3E,BDI_L}, {0x0B3F,0x0B3F,BDI_NSM},
+ {0x0B40,0x0B40,BDI_L}, {0x0B41,0x0B43,BDI_NSM},{0x0B44,0x0B4C,BDI_L},
+ {0x0B4D,0x0B4D,BDI_NSM},{0x0B56,0x0B56,BDI_NSM},{0x0B57,0x0B81,BDI_L},
+ {0x0B82,0x0B82,BDI_NSM},{0x0B83,0x0BBF,BDI_L}, {0x0BC0,0x0BC0,BDI_NSM},
+ {0x0BC1,0x0BCC,BDI_L}, {0x0BCD,0x0BCD,BDI_NSM},{0x0BCE,0x0BF2,BDI_L},
+ {0x0BF3,0x0BF8,BDI_ON}, {0x0BF9,0x0BF9,BDI_ET}, {0x0BFA,0x0BFA,BDI_ON},
+ {0x0BFB,0x0C3D,BDI_L}, {0x0C3E,0x0C40,BDI_NSM},{0x0C41,0x0C45,BDI_L},
+ {0x0C46,0x0C48,BDI_NSM},{0x0C4A,0x0C4D,BDI_NSM},{0x0C55,0x0C56,BDI_NSM},
+ {0x0C57,0x0CBB,BDI_L}, {0x0CBC,0x0CBC,BDI_NSM},{0x0CBD,0x0CCB,BDI_L},
+ {0x0CCC,0x0CCD,BDI_NSM},{0x0CCE,0x0CE1,BDI_L}, {0x0CE2,0x0CE3,BDI_NSM},
+ {0x0CE4,0x0CF0,BDI_L}, {0x0CF1,0x0CF2,BDI_ON}, {0x0CF3,0x0D40,BDI_L},
+ {0x0D41,0x0D43,BDI_NSM},{0x0D44,0x0D4C,BDI_L}, {0x0D4D,0x0D4D,BDI_NSM},
+ {0x0D4E,0x0DC9,BDI_L}, {0x0DCA,0x0DCA,BDI_NSM},{0x0DCB,0x0DD1,BDI_L},
+ {0x0DD2,0x0DD4,BDI_NSM},{0x0DD6,0x0DD6,BDI_NSM},{0x0DD7,0x0E30,BDI_L},
+ {0x0E31,0x0E31,BDI_NSM},{0x0E32,0x0E33,BDI_L}, {0x0E34,0x0E3A,BDI_NSM},
+ {0x0E3F,0x0E3F,BDI_ET}, {0x0E40,0x0E46,BDI_L}, {0x0E47,0x0E4E,BDI_NSM},
+ {0x0E4F,0x0EB0,BDI_L}, {0x0EB1,0x0EB1,BDI_NSM},{0x0EB2,0x0EB3,BDI_L},
+ {0x0EB4,0x0EB9,BDI_NSM},{0x0EBB,0x0EBC,BDI_NSM},{0x0EBD,0x0EC7,BDI_L},
+ {0x0EC8,0x0ECD,BDI_NSM},{0x0ECE,0x0F17,BDI_L}, {0x0F18,0x0F19,BDI_NSM},
+ {0x0F1A,0x0F34,BDI_L}, {0x0F35,0x0F35,BDI_NSM},{0x0F36,0x0F36,BDI_L},
+ {0x0F37,0x0F37,BDI_NSM},{0x0F38,0x0F38,BDI_L}, {0x0F39,0x0F39,BDI_NSM},
+ {0x0F3A,0x0F3D,BDI_ON}, {0x0F3E,0x0F70,BDI_L}, {0x0F71,0x0F7E,BDI_NSM},
+ {0x0F7F,0x0F7F,BDI_L}, {0x0F80,0x0F84,BDI_NSM},{0x0F85,0x0F85,BDI_L},
+ {0x0F86,0x0F87,BDI_NSM},{0x0F88,0x0F8F,BDI_L}, {0x0F90,0x0F97,BDI_NSM},
+ {0x0F99,0x0FBC,BDI_NSM},{0x0FBD,0x0FC5,BDI_L}, {0x0FC6,0x0FC6,BDI_NSM},
+ {0x0FC7,0x102C,BDI_L}, {0x102D,0x1030,BDI_NSM},{0x1031,0x1031,BDI_L},
+ {0x1032,0x1032,BDI_NSM},{0x1036,0x1037,BDI_NSM},{0x1038,0x1038,BDI_L},
+ {0x1039,0x1039,BDI_NSM},{0x103A,0x1057,BDI_L}, {0x1058,0x1059,BDI_NSM},
+ {0x105A,0x135E,BDI_L}, {0x135F,0x135F,BDI_NSM},{0x1360,0x138F,BDI_L},
+ {0x1390,0x1399,BDI_ON}, {0x139A,0x167F,BDI_L}, {0x1680,0x1680,BDI_WS},
+ {0x1681,0x169A,BDI_L}, {0x169B,0x169C,BDI_ON}, {0x169D,0x1711,BDI_L},
+ {0x1712,0x1714,BDI_NSM},{0x1715,0x1731,BDI_L}, {0x1732,0x1734,BDI_NSM},
+ {0x1735,0x1751,BDI_L}, {0x1752,0x1753,BDI_NSM},{0x1754,0x1771,BDI_L},
+ {0x1772,0x1773,BDI_NSM},{0x1774,0x17B6,BDI_L}, {0x17B7,0x17BD,BDI_NSM},
+ {0x17BE,0x17C5,BDI_L}, {0x17C6,0x17C6,BDI_NSM},{0x17C7,0x17C8,BDI_L},
+ {0x17C9,0x17D3,BDI_NSM},{0x17D4,0x17DA,BDI_L}, {0x17DB,0x17DB,BDI_ET},
+ {0x17DC,0x17DC,BDI_L}, {0x17DD,0x17DD,BDI_NSM},{0x17DE,0x17EF,BDI_L},
+ {0x17F0,0x17F9,BDI_ON}, {0x1800,0x180A,BDI_ON}, {0x180B,0x180D,BDI_NSM},
+ {0x180E,0x180E,BDI_WS}, {0x180F,0x18A8,BDI_L}, {0x18A9,0x18A9,BDI_NSM},
+ {0x18AA,0x191F,BDI_L}, {0x1920,0x1922,BDI_NSM},{0x1923,0x1926,BDI_L},
+ {0x1927,0x192B,BDI_NSM},{0x192C,0x1931,BDI_L}, {0x1932,0x1932,BDI_NSM},
+ {0x1933,0x1938,BDI_L}, {0x1939,0x193B,BDI_NSM},{0x1940,0x1940,BDI_ON},
+ {0x1944,0x1945,BDI_ON}, {0x1946,0x19DD,BDI_L}, {0x19DE,0x19FF,BDI_ON},
+ {0x1A00,0x1A16,BDI_L}, {0x1A17,0x1A18,BDI_NSM},{0x1A19,0x1AFF,BDI_L},
+ {0x1B00,0x1B03,BDI_NSM},{0x1B04,0x1B33,BDI_L}, {0x1B34,0x1B34,BDI_NSM},
+ {0x1B35,0x1B35,BDI_L}, {0x1B36,0x1B3A,BDI_NSM},{0x1B3B,0x1B3B,BDI_L},
+ {0x1B3C,0x1B3C,BDI_NSM},{0x1B3D,0x1B41,BDI_L}, {0x1B42,0x1B42,BDI_NSM},
+ {0x1B43,0x1B6A,BDI_L}, {0x1B6B,0x1B73,BDI_NSM},{0x1B74,0x1DBF,BDI_L},
+ {0x1DC0,0x1DCA,BDI_NSM},{0x1DFE,0x1DFF,BDI_NSM},{0x1E00,0x1FBC,BDI_L},
+ {0x1FBD,0x1FBD,BDI_ON}, {0x1FBE,0x1FBE,BDI_L}, {0x1FBF,0x1FC1,BDI_ON},
+ {0x1FC2,0x1FCC,BDI_L}, {0x1FCD,0x1FCF,BDI_ON}, {0x1FD0,0x1FDC,BDI_L},
+ {0x1FDD,0x1FDF,BDI_ON}, {0x1FE0,0x1FEC,BDI_L}, {0x1FED,0x1FEF,BDI_ON},
+ {0x1FF0,0x1FFC,BDI_L}, {0x1FFD,0x1FFE,BDI_ON}, {0x2000,0x200A,BDI_WS},
+ {0x200B,0x200D,BDI_BN}, {0x200E,0x200E,BDI_L}, {0x200F,0x200F,BDI_R},
+ {0x2010,0x2027,BDI_ON}, {0x2028,0x2028,BDI_WS}, {0x2029,0x2029,BDI_B},
+ {0x202A,0x202A,BDI_LRE},{0x202B,0x202B,BDI_RLE},{0x202C,0x202C,BDI_PDF},
+ {0x202D,0x202D,BDI_LRO},{0x202E,0x202E,BDI_RLO},{0x202F,0x202F,BDI_CS},
+ {0x2030,0x2034,BDI_ET}, {0x2035,0x2043,BDI_ON}, {0x2044,0x2044,BDI_CS},
+ {0x2045,0x205E,BDI_ON}, {0x205F,0x205F,BDI_WS}, {0x2060,0x2063,BDI_BN},
+ {0x206A,0x206F,BDI_BN}, {0x2070,0x2070,BDI_EN}, {0x2071,0x2073,BDI_L},
+ {0x2074,0x2079,BDI_EN}, {0x207A,0x207B,BDI_ES}, {0x207C,0x207E,BDI_ON},
+ {0x207F,0x207F,BDI_L}, {0x2080,0x2089,BDI_EN}, {0x208A,0x208B,BDI_ES},
+ {0x208C,0x208E,BDI_ON}, {0x208F,0x209F,BDI_L}, {0x20A0,0x20B5,BDI_ET},
+ {0x20D0,0x20EF,BDI_NSM},{0x2100,0x2101,BDI_ON}, {0x2102,0x2102,BDI_L},
+ {0x2103,0x2106,BDI_ON}, {0x2107,0x2107,BDI_L}, {0x2108,0x2109,BDI_ON},
+ {0x210A,0x2113,BDI_L}, {0x2114,0x2114,BDI_ON}, {0x2115,0x2115,BDI_L},
+ {0x2116,0x2118,BDI_ON}, {0x2119,0x211D,BDI_L}, {0x211E,0x2123,BDI_ON},
+ {0x2124,0x2124,BDI_L}, {0x2125,0x2125,BDI_ON}, {0x2126,0x2126,BDI_L},
+ {0x2127,0x2127,BDI_ON}, {0x2128,0x2128,BDI_L}, {0x2129,0x2129,BDI_ON},
+ {0x212A,0x212D,BDI_L}, {0x212E,0x212E,BDI_ET}, {0x212F,0x2139,BDI_L},
+ {0x213A,0x213B,BDI_ON}, {0x213C,0x213F,BDI_L}, {0x2140,0x2144,BDI_ON},
+ {0x2145,0x2149,BDI_L}, {0x214A,0x214D,BDI_ON}, {0x214E,0x2152,BDI_L},
+ {0x2153,0x215F,BDI_ON}, {0x2160,0x218F,BDI_L}, {0x2190,0x2211,BDI_ON},
+ {0x2212,0x2212,BDI_ES}, {0x2213,0x2213,BDI_ET}, {0x2214,0x2335,BDI_ON},
+ {0x2336,0x237A,BDI_L}, {0x237B,0x2394,BDI_ON}, {0x2395,0x2395,BDI_L},
+ {0x2396,0x23E7,BDI_ON}, {0x2400,0x2426,BDI_ON}, {0x2440,0x244A,BDI_ON},
+ {0x2460,0x2487,BDI_ON}, {0x2488,0x249B,BDI_EN}, {0x249C,0x24E9,BDI_L},
+ {0x24EA,0x269C,BDI_ON}, {0x26A0,0x26AB,BDI_ON}, {0x26AC,0x26AC,BDI_L},
+ {0x26AD,0x26B2,BDI_ON}, {0x2701,0x2704,BDI_ON}, {0x2706,0x2709,BDI_ON},
+ {0x270C,0x2727,BDI_ON}, {0x2729,0x274B,BDI_ON}, {0x274D,0x274D,BDI_ON},
+ {0x274F,0x2752,BDI_ON}, {0x2756,0x2756,BDI_ON}, {0x2758,0x275E,BDI_ON},
+ {0x2761,0x2794,BDI_ON}, {0x2798,0x27AF,BDI_ON}, {0x27B1,0x27BE,BDI_ON},
+ {0x27C0,0x27CA,BDI_ON}, {0x27D0,0x27EB,BDI_ON}, {0x27F0,0x27FF,BDI_ON},
+ {0x2800,0x28FF,BDI_L}, {0x2900,0x2B1A,BDI_ON}, {0x2B20,0x2B23,BDI_ON},
+ {0x2B24,0x2CE4,BDI_L}, {0x2CE5,0x2CEA,BDI_ON}, {0x2CF9,0x2CFF,BDI_ON},
+ {0x2D00,0x2DFF,BDI_L}, {0x2E00,0x2E17,BDI_ON}, {0x2E1C,0x2E1D,BDI_ON},
+ {0x2E80,0x2E99,BDI_ON}, {0x2E9B,0x2EF3,BDI_ON}, {0x2F00,0x2FD5,BDI_ON},
+ {0x2FF0,0x2FFB,BDI_ON}, {0x3000,0x3000,BDI_WS}, {0x3001,0x3004,BDI_ON},
+ {0x3005,0x3007,BDI_L}, {0x3008,0x3020,BDI_ON}, {0x3021,0x3029,BDI_L},
+ {0x302A,0x302F,BDI_NSM},{0x3030,0x3030,BDI_ON}, {0x3031,0x3035,BDI_L},
+ {0x3036,0x3037,BDI_ON}, {0x3038,0x303C,BDI_L}, {0x303D,0x303F,BDI_ON},
+ {0x3040,0x3098,BDI_L}, {0x3099,0x309A,BDI_NSM},{0x309B,0x309C,BDI_ON},
+ {0x309D,0x309F,BDI_L}, {0x30A0,0x30A0,BDI_ON}, {0x30A1,0x30FA,BDI_L},
+ {0x30FB,0x30FB,BDI_ON}, {0x30FC,0x31BF,BDI_L}, {0x31C0,0x31CF,BDI_ON},
+ {0x31D0,0x321C,BDI_L}, {0x321D,0x321E,BDI_ON}, {0x321F,0x324F,BDI_L},
+ {0x3250,0x325F,BDI_ON}, {0x3260,0x327B,BDI_L}, {0x327C,0x327E,BDI_ON},
+ {0x327F,0x32B0,BDI_L}, {0x32B1,0x32BF,BDI_ON}, {0x32C0,0x32CB,BDI_L},
+ {0x32CC,0x32CF,BDI_ON}, {0x32D0,0x3376,BDI_L}, {0x3377,0x337A,BDI_ON},
+ {0x337B,0x33DD,BDI_L}, {0x33DE,0x33DF,BDI_ON}, {0x33E0,0x33FE,BDI_L},
+ {0x33FF,0x33FF,BDI_ON}, {0x3400,0x4DBF,BDI_L}, {0x4DC0,0x4DFF,BDI_ON},
+ {0x4E00,0xA48F,BDI_L}, {0xA490,0xA4C6,BDI_ON}, {0xA700,0xA71A,BDI_ON},
+ {0xA720,0xA721,BDI_ON}, {0xA722,0xA801,BDI_L}, {0xA802,0xA802,BDI_NSM},
+ {0xA803,0xA805,BDI_L}, {0xA806,0xA806,BDI_NSM},{0xA807,0xA80A,BDI_L},
+ {0xA80B,0xA80B,BDI_NSM},{0xA80C,0xA824,BDI_L}, {0xA825,0xA826,BDI_NSM},
+ {0xA827,0xA827,BDI_L}, {0xA828,0xA82B,BDI_ON}, {0xA82C,0xA873,BDI_L},
+ {0xA874,0xA877,BDI_ON}, {0xA878,0xFB1C,BDI_L}, {0xFB1D,0xFB1D,BDI_R},
+ {0xFB1E,0xFB1E,BDI_NSM},{0xFB1F,0xFB28,BDI_R}, {0xFB29,0xFB29,BDI_ES},
+ {0xFB2A,0xFB4F,BDI_R}, {0xFB50,0xFD3D,BDI_AL}, {0xFD3E,0xFD3F,BDI_ON},
+ {0xFD40,0xFDFC,BDI_AL}, {0xFDFD,0xFDFD,BDI_ON}, {0xFDFE,0xFDFF,BDI_AL},
+ {0xFE00,0xFE0F,BDI_NSM},{0xFE10,0xFE19,BDI_ON}, {0xFE20,0xFE23,BDI_NSM},
+ {0xFE30,0xFE4F,BDI_ON}, {0xFE50,0xFE50,BDI_CS}, {0xFE51,0xFE51,BDI_ON},
+ {0xFE52,0xFE52,BDI_CS}, {0xFE54,0xFE54,BDI_ON}, {0xFE55,0xFE55,BDI_CS},
+ {0xFE56,0xFE5E,BDI_ON}, {0xFE5F,0xFE5F,BDI_ET}, {0xFE60,0xFE61,BDI_ON},
+ {0xFE62,0xFE63,BDI_ES}, {0xFE64,0xFE66,BDI_ON}, {0xFE68,0xFE68,BDI_ON},
+ {0xFE69,0xFE6A,BDI_ET}, {0xFE6B,0xFE6B,BDI_ON}, {0xFE70,0xFEFE,BDI_AL},
+ {0xFEFF,0xFEFF,BDI_BN}, {0xFF01,0xFF02,BDI_ON}, {0xFF03,0xFF05,BDI_ET},
+ {0xFF06,0xFF0A,BDI_ON}, {0xFF0B,0xFF0B,BDI_ES}, {0xFF0C,0xFF0C,BDI_CS},
+ {0xFF0D,0xFF0D,BDI_ES}, {0xFF0E,0xFF0F,BDI_CS}, {0xFF10,0xFF19,BDI_EN},
+ {0xFF1A,0xFF1A,BDI_CS}, {0xFF1B,0xFF20,BDI_ON}, {0xFF21,0xFF3A,BDI_L},
+ {0xFF3B,0xFF40,BDI_ON}, {0xFF41,0xFF5A,BDI_L}, {0xFF5B,0xFF65,BDI_ON},
+ {0xFF66,0xFFDF,BDI_L}, {0xFFE0,0xFFE1,BDI_ET}, {0xFFE2,0xFFE4,BDI_ON},
+ {0xFFE5,0xFFE6,BDI_ET}, {0xFFE8,0xFFEE,BDI_ON}, {0xFFF9,0xFFFD,BDI_ON}
+};
+
+#define NUM_BDIPROP_SPANS ( sizeof( bidiPropList ) / sizeof( BidiPropList ) )
+
+/*
+ *The original BidiMirrorList bidiMirrorList[] array has been replaced by
+ *uint32_t bidiOptMirrorList[] an array that represents a tree structure
+ *formed from the original mirror list by the Perl script
+ *create-mirrorlist.pl in Review #11191
+ *
+ *below is the first part of the tree:- representing the uint16_t values
+ *0x0028, 0x0029, 0x003C, 0x003E, 0x005B, 0x005D
+ *
+ * 0
+ * |
+ * ----0----
+ * / | \
+ * 2 3 5
+ * / \ / \ / \
+ * 8 9 C E B D
+ *
+ *Every uint16_t value that shares nybble ancestors share parent nodes
+ *on the tree.
+ *The tree is of fixed depth 4 and each node holds the following values
+ *{4-bits nybble, 12-bits next, 16-bits uint16_t mirror}.
+ *The tree is represented as an array of bit-masks.
+ *
+ *The nybble value is the value in the tree above.
+ *The next Offset is the index into the array which points to the next
+ *sibling for the current node (i.e. the next node horizintally to the right
+ *that shares the same parent).
+ *The mirror value is the code to be mapped.
+ */
+static const uint32_t bidiOptMirrorList[] =
+{
+ 0x00120000, 0x0FFF0000, 0x20050000,
+ 0x80040029, 0x9FFF0028, 0x30080000,
+ 0xC007003E, 0xEFFF003C, 0x500B0000,
+ 0xB00A005D, 0xDFFF005B, 0x700E0000,
+ 0xB00D007D, 0xDFFF007B, 0xA0100000,
+ 0xBFFF00BB, 0xBFFF0000, 0xBFFF00AB,
+ 0x215A0000, 0x00200000, 0x30170000,
+ 0x9016203A, 0xAFFF2039, 0x401A0000,
+ 0x50192046, 0x6FFF2045, 0x701D0000,
+ 0xD01C207E, 0xEFFF207D, 0x8FFF0000,
+ 0xD01F208E, 0xEFFF208D, 0x20A20000,
+ 0x00280000, 0x8023220B, 0x9024220C,
+ 0xA025220D, 0xB0262208, 0xC0272209,
+ 0xDFFF220A, 0x102A0000, 0x5FFF29F5,
+ 0x302D0000, 0xC02C223D, 0xDFFF223C,
+ 0x402F0000, 0x3FFF22CD, 0x50340000,
+ 0x20312253, 0x30322252, 0x40332255,
+ 0x5FFF2254, 0x603F0000, 0x40362265,
+ 0x50372264, 0x60382267, 0x70392266,
+ 0x803A2269, 0x903B2268, 0xA03C226B,
+ 0xB03D226A, 0xE03E226F, 0xFFFF226E,
+ 0x70500000, 0x00412271, 0x10422270,
+ 0x20432273, 0x30442272, 0x40452275,
+ 0x50462274, 0x60472277, 0x70482276,
+ 0x80492279, 0x904A2278, 0xA04B227B,
+ 0xB04C227A, 0xC04D227D, 0xD04E227C,
+ 0xE04F227F, 0xFFFF227E, 0x805E0000,
+ 0x00522281, 0x10532280, 0x20542283,
+ 0x30552282, 0x40562285, 0x50572284,
+ 0x60582287, 0x70592286, 0x805A2289,
+ 0x905B2288, 0xA05C228B, 0xB05D228A,
+ 0xFFFF2290, 0x90630000, 0x0060228F,
+ 0x10612292, 0x20622291, 0x8FFF29B8,
+ 0xA06A0000, 0x206522A3, 0x306622A2,
+ 0x60672ADE, 0x80682AE4, 0x90692AE3,
+ 0xBFFF2AE5, 0xB0730000, 0x006C22B1,
+ 0x106D22B0, 0x206E22B3, 0x306F22B2,
+ 0x407022B5, 0x507122B4, 0x607222B7,
+ 0x7FFF22B6, 0xC0790000, 0x907522CA,
+ 0xA07622C9, 0xB07722CC, 0xC07822CB,
+ 0xDFFF2243, 0xD0860000, 0x007B22D1,
+ 0x107C22D0, 0x607D22D7, 0x707E22D6,
+ 0x807F22D9, 0x908022D8, 0xA08122DB,
+ 0xB08222DA, 0xC08322DD, 0xD08422DC,
+ 0xE08522DF, 0xFFFF22DE, 0xE0950000,
+ 0x008822E1, 0x108922E0, 0x208A22E3,
+ 0x308B22E2, 0x408C22E5, 0x508D22E4,
+ 0x608E22E7, 0x708F22E6, 0x809022E9,
+ 0x909122E8, 0xA09222EB, 0xB09322EA,
+ 0xC09422ED, 0xDFFF22EC, 0xFFFF0000,
+ 0x009722F1, 0x109822F0, 0x209922FA,
+ 0x309A22FB, 0x409B22FC, 0x609C22FD,
+ 0x709D22FE, 0xA09E22F2, 0xB09F22F3,
+ 0xC0A022F4, 0xD0A122F6, 0xEFFF22F7,
+ 0x30AB0000, 0x00A80000, 0x80A52309,
+ 0x90A62308, 0xA0A7230B, 0xBFFF230A,
+ 0x2FFF0000, 0x90AA232A, 0xAFFF2329,
+ 0x70CC0000, 0x60B50000, 0x80AE2769,
+ 0x90AF2768, 0xA0B0276B, 0xB0B1276A,
+ 0xC0B2276D, 0xD0B3276C, 0xE0B4276F,
+ 0xFFFF276E, 0x70BC0000, 0x00B72771,
+ 0x10B82770, 0x20B92773, 0x30BA2772,
+ 0x40BB2775, 0x5FFF2774, 0xD0C10000,
+ 0x50BE27D6, 0x60BF27D5, 0xD0C027DE,
+ 0xEFFF27DD, 0xEFFF0000, 0x20C327E3,
+ 0x30C427E2, 0x40C527E5, 0x50C627E4,
+ 0x60C727E7, 0x70C827E6, 0x80C927E9,
+ 0x90CA27E8, 0xA0CB27EB, 0xBFFF27EA,
+ 0x90FD0000, 0x80DB0000, 0x30CF2984,
+ 0x40D02983, 0x50D12986, 0x60D22985,
+ 0x70D32988, 0x80D42987, 0x90D5298A,
+ 0xA0D62989, 0xB0D7298C, 0xC0D8298B,
+ 0xD0D92990, 0xE0DA298F, 0xFFFF298E,
+ 0x90E50000, 0x00DD298D, 0x10DE2992,
+ 0x20DF2991, 0x30E02994, 0x40E12993,
+ 0x50E22996, 0x60E32995, 0x70E42998,
+ 0x8FFF2997, 0xB0E70000, 0x8FFF2298,
+ 0xC0ED0000, 0x00E929C1, 0x10EA29C0,
+ 0x40EB29C5, 0x50EC29C4, 0xFFFF29D0,
+ 0xD0F70000, 0x00EF29CF, 0x10F029D2,
+ 0x20F129D1, 0x40F229D5, 0x50F329D4,
+ 0x80F429D9, 0x90F529D8, 0xA0F629DB,
+ 0xBFFF29DA, 0xFFFF0000, 0x50F92215,
+ 0x80FA29F9, 0x90FB29F8, 0xC0FC29FD,
+ 0xDFFF29FC, 0xAFFF0000, 0x21030000,
+ 0xB1002A2C, 0xC1012A2B, 0xD1022A2E,
+ 0xEFFF2A2D, 0x31080000, 0x41052A35,
+ 0x51062A34, 0xC1072A3D, 0xDFFF2A3C,
+ 0x610B0000, 0x410A2A65, 0x5FFF2A64,
+ 0x71110000, 0x910D2A7A, 0xA10E2A79,
+ 0xD10F2A7E, 0xE1102A7D, 0xFFFF2A80,
+ 0x81190000, 0x01132A7F, 0x11142A82,
+ 0x21152A81, 0x31162A84, 0x41172A83,
+ 0xB1182A8C, 0xCFFF2A8B, 0x91260000,
+ 0x111B2A92, 0x211C2A91, 0x311D2A94,
+ 0x411E2A93, 0x511F2A96, 0x61202A95,
+ 0x71212A98, 0x81222A97, 0x91232A9A,
+ 0xA1242A99, 0xB1252A9C, 0xCFFF2A9B,
+ 0xA1320000, 0x11282AA2, 0x21292AA1,
+ 0x612A2AA7, 0x712B2AA6, 0x812C2AA9,
+ 0x912D2AA8, 0xA12E2AAB, 0xB12F2AAA,
+ 0xC1302AAD, 0xD1312AAC, 0xFFFF2AB0,
+ 0xB13B0000, 0x01342AAF, 0x31352AB4,
+ 0x41362AB3, 0xB1372ABC, 0xC1382ABB,
+ 0xD1392ABE, 0xE13A2ABD, 0xFFFF2AC0,
+ 0xC1460000, 0x013D2ABF, 0x113E2AC2,
+ 0x213F2AC1, 0x31402AC4, 0x41412AC3,
+ 0x51422AC6, 0x61432AC5, 0xD1442ACE,
+ 0xE1452ACD, 0xFFFF2AD0, 0xD14F0000,
+ 0x01482ACF, 0x11492AD2, 0x214A2AD1,
+ 0x314B2AD4, 0x414C2AD3, 0x514D2AD6,
+ 0x614E2AD5, 0xEFFF22A6, 0xE1550000,
+ 0x315122A9, 0x415222A8, 0x515322AB,
+ 0xC1542AED, 0xDFFF2AEC, 0xFFFF0000,
+ 0x71572AF8, 0x81582AF7, 0x91592AFA,
+ 0xAFFF2AF9, 0x31700000, 0x0FFF0000,
+ 0x01650000, 0x815E3009, 0x915F3008,
+ 0xA160300B, 0xB161300A, 0xC162300D,
+ 0xD163300C, 0xE164300F, 0xFFFF300E,
+ 0x1FFF0000, 0x01673011, 0x11683010,
+ 0x41693015, 0x516A3014, 0x616B3017,
+ 0x716C3016, 0x816D3019, 0x916E3018,
+ 0xA16F301B, 0xBFFF301A, 0xFFFF0000,
+ 0xFFFF0000, 0x01750000, 0x8174FF09,
+ 0x9FFFFF08, 0x11780000, 0xC177FF1E,
+ 0xEFFFFF1C, 0x317B0000, 0xB17AFF3D,
+ 0xDFFFFF3B, 0x517F0000, 0xB17DFF5D,
+ 0xD17EFF5B, 0xFFFFFF60, 0x6FFF0000,
+ 0x0181FF5F, 0x2182FF63, 0x3FFFFF62
+};
+
+#define NUM_BIDI_OPTMIRRORS (sizeof(bidiOptMirrorList)/sizeof(uint32_t))
+
+#define MIRROR_NYBBLE_SHIFT 28
+#define MIRROR_NEXT_MASK 0x0FFF0000
+#define MIRROR_NEXT_SHIFT 16
+#define MIRROR_VALUE_MASK 0x0000FFFF
+#define TRIPPLE_NYBBLE_NEG 4095
+
+/*
+ * Functions and static functions...
+ */
+
+
+/** Returns a directional type for different characters. The type is found in
+ * the lookup table bidiPropList using a binary search. Each element in the
+ * table is a range of code-points which share the same type, all arranged
+ * in ascending order.
+ */
+static Bidi_CharType classFromChWS(uint16_t ch)
+{
+ int start = 0;
+ int end = NUM_BDIPROP_SPANS - 1;
+ int mid = end / 2;
+
+ while (start <= end)
+ {
+ if (ch < bidiPropList[mid].first)
+ end = mid - 1;
+ else if (ch > bidiPropList[mid].last)
+ start = mid + 1;
+ else
+ {
+ if (bidiPropList[mid].type == BDI_B)
+ return BDI_ON;
+ return bidiPropList[mid].type;
+ }
+
+ mid = (start + end) / 2;
+ }
+
+ return BDI_L; /* in the absence of anything better to do. */
+}
+
+/** Return a direction for white-space on the second pass of the algorithm.
+ */
+Bidi_CharType Bidi_classFromChN(uint16_t ch)
+{
+ int fromChWS = classFromChWS(ch);
+
+ if (fromChWS == BDI_S || fromChWS == BDI_WS)
+ return BDI_N;
+
+ return fromChWS;
+}
+
+
+static
+int Bidi_isEuropeanNumber(const uint16_t *str, unsigned int len)
+{
+ const uint16_t *end = str + len;
+
+ for ( ; str != end; str++)
+ {
+ const uint16_t u = *str;
+ if ((u >= UNICODE_RTL_START && u < UNICODE_ARABIC_INDIC_DIGIT_ZERO) ||
+ (u > UNICODE_ARABIC_INDIC_DIGIT_NINE && u < UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO) ||
+ (u > UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE && u <= UNICODE_RTL_END))
+ {
+ /* This is just a normal RTL character or accent */
+ return FALSE;
+ }
+ else if(!(
+ (u >= UNICODE_DIGIT_ZERO && u <= UNICODE_DIGIT_NINE) ||
+ (u == UNICODE_SUPERSCRIPT_TWO) ||
+ (u == UNICODE_SUPERSCRIPT_THREE) ||
+ (u == UNICODE_SUPERSCRIPT_ONE) ||
+ (u >= UNICODE_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_ARABIC_INDIC_DIGIT_NINE) ||
+ (u >= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE) ||
+ (u == UNICODE_SUPERSCRIPT_ZERO) ||
+ (u >= UNICODE_SUPERSCRIPT_FOUR && u <= UNICODE_SUPERSCRIPT_NINE) ||
+ (u >= UNICODE_SUBSCRIPT_ZERO && u <= UNICODE_SUBSCRIPT_NINE) ||
+ (u >= UNICODE_CIRCLED_DIGIT_ONE && u <= UNICODE_NUMBER_TWENTY_FULL_STOP) ||
+ (u == UNICODE_CIRCLED_DIGIT_ZERO) ||
+ (u >= UNICODE_FULLWIDTH_DIGIT_ZERO && u <= UNICODE_FULLWIDTH_DIGIT_NINE) ||
+ (u == UNICODE_ZERO_WIDTH_NON_JOINER)
+ )
+ )
+ {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+/** The Bidi_mirrorChar function tranverses the tree looking for nybble
+ * matches.
+ *
+ * EXAMPLE:
+ * If uint16_t value u=0x005D has been passed to Bidi_mirrorChar for testing,
+ * it is separated into nybbles 0,0,5,D
+ * The first node in the tree (index 0 in the array) is the starting point,
+ * and luckily the first 4-bits hold a 0, which matches. We can therefore
+ * drop down to the child to check that. The child of a node is stored in
+ * the next array position, in this case index 1.
+ * The nybble of index 1 is a 0 and matches the second nybble we are
+ * searching for. Because of the match, the next child index 2 is moved to.
+ * Now the nybble at index 2 of the tree is a 2 and doesn't match the 5 we
+ * are searching for. Bits 5-16 of the array represent the offset to the
+ * next sibling. In this case the offset is 5, when we move to this array
+ * value we see that it holds a 3 nybble, again this doesn't match, so we
+ * get the next offset, which is 8, and move to that sibling.
+ * The sibling holds the nybble 5, which is the next one we want; so
+ * increment the index to its child. The child doesn't match and holds a B
+ * instead of a D. So we move to the next sibling ... which matches!!
+ * This node holds the mirror character 0x005B in bits 17-32.
+ */
+uint16_t Bidi_mirrorChar(const uint16_t u)
+{
+ int i, index;
+ uint16_t uCpy;
+ uint8_t n, nybble;
+
+ if((u < UNICODE_RTL_START) || (u > UNICODE_RTL_END))
+ {
+ /* uint16_t value lies outside of RTL character range
+ * and could therefore be a mirrorable charcter
+ */
+ index = 0;
+ uCpy = (uint16_t)u;
+ /* look through each nybble of the uint16_t and search in tree
+ * for matches
+ */
+ for(i = 0; i < 4; i++)
+ {
+ n = (uint8_t)(uCpy >> 12); /* the current nybble of the target */
+ /* the nybble of the first child node */
+ nybble = (uint8_t)(bidiOptMirrorList[index] >> MIRROR_NYBBLE_SHIFT);
+
+ /* search through node siblings for the matching nybble */
+ while(nybble != n)
+ {
+ /* index to next sibling */
+ index = (bidiOptMirrorList[index] & MIRROR_NEXT_MASK) >> MIRROR_NEXT_SHIFT;
+ if(index == TRIPPLE_NYBBLE_NEG) /* 12-bit next value is -1 */
+ {
+ /* no more siblings */
+ return UNICODE_EOS;
+ }
+ nybble = (uint8_t)(bidiOptMirrorList[index] >> MIRROR_NYBBLE_SHIFT);
+ }
+ uCpy <<= 4;
+ index++;
+ }
+ return (uint16_t)(bidiOptMirrorList[--index] & MIRROR_VALUE_MASK);
+ }
+ return UNICODE_EOS;
+}
+
+#ifdef BIDI_LINE_AT_A_TIME
+/** Subsitutes characters with their mirrored equivalents, e.g. swaps
+ * '(' with ')'. Implements rule L4 of the uint16_t Bidirectional
+ * algorithm. Mirrored characters are found using a binary search through
+ * the lookup table bidiOptMirrorList.
+ */
+static
+void Bidi_mirror(uint16_t *text,
+ const Bidi_Level *poslevel,
+ int len)
+{
+ int i;
+ uint16_t out;
+
+ for (i = 0; i < len; i ++)
+ {
+ if (ODD(poslevel[i]) == 0)
+ continue;
+ out = Bidi_mirrorChar((const uint16_t)text[i]);
+ if(out != UNICODE_EOS)
+ text[i] = out;
+ }
+}
+#endif
+
+
+/** Searches a RTL fragment for a mirror character
+ * When it finds one it creates a separate fragment for the
+ * character and the surrounding fragments. It passes the mirrored
+ * uint16_t back through the callback.
+ */
+static void Bidi_createFragmentMirrors(const uint16_t *text,
+ int len,
+ Bidi_Fragment_Callback callback,
+ void *arg)
+{
+ int i;
+ int lastPtr;
+ uint16_t mirror;
+
+ assert(text != NULL);
+ assert(len > 0);
+ lastPtr = 0;
+ for (i = 0; i < len; i ++)
+ {
+ mirror = Bidi_mirrorChar(text[i]);
+ if(mirror != UNICODE_EOS)
+ {
+ /* create preceding fragment */
+ if(i > lastPtr)
+ {
+ (*callback)(&text[lastPtr],
+ i - lastPtr,
+ TRUE,
+ UNICODE_EOS,
+ arg);
+ DBUGVF(("create mirror fragment for %x\n",(int)text[i]));
+ }
+ /* create mirror fragment */
+ (*callback)(&text[i],
+ 1,
+ TRUE,
+ mirror,
+ arg);
+ lastPtr = i + 1;
+ }
+ }
+
+ if(lastPtr < len)
+ {
+ /* create end fragment */
+ (*callback)(&text[lastPtr],
+ len - lastPtr,
+ TRUE,
+ UNICODE_EOS,
+ arg);
+ }
+}
+
+
+
+/** Determines the character classes for all following
+ * passes of the algorithm. A character class is basically the type of Bidi
+ * behaviour that the character exhibits.
+ */
+void Bidi_classifyCharacters(const uint16_t *text,
+ Bidi_CharType *types,
+ int len,
+ Bidi_Flags flags)
+{
+ int i;
+
+ if ((flags & Bidi_classifyWhiteSpace)!=0)
+ {
+ for (i = 0; i < len; i++)
+ {
+ types[i] = classFromChWS(text[i]);
+ }
+ }
+ else
+ {
+#ifdef DEBUG_BIDI_VERBOSE
+ fprintf(stderr, "Text: ");
+ for (i = 0; i < len; i++)
+ {
+ /* So that we can actually sort of read the debug string, any
+ * non-ascii characters are replaced with a 1-digit hash
+ * value from 0-9, making non-english characters appear
+ * as numbers
+ */
+ fprintf(stderr, "%c", (text[i] <= 127 && text[i ]>= 32)?
+ text[i]
+ :(char)((text[i] % 9) + 48)
+ );
+ }
+ fprintf(stderr, "\nTypes: ");
+ for (i = 0; i < len; i++)
+ {
+ types[i] = Bidi_classFromChN(text[i]);
+ fprintf(stderr, "%c", charFromTypes[(int)types[i]]);
+ }
+ fprintf(stderr, "\n");
+#endif
+ }
+}
+
+
+
+/** Determines the base level of the text.
+ * Implements rule P2 of the uint16_t Bidi Algorithm.
+ * Note: Ignores explicit embeddings
+ */
+static Bidi_Level baseLevelFromText(Bidi_CharType *types, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ {
+ switch (types[i])
+ {
+ /* strong left */
+ case BDI_L:
+ return Bidi_LeftToRight;
+
+ /* strong right */
+ case BDI_R:
+ case BDI_AL:
+ return Bidi_RightToLeft;
+ }
+ }
+ return Bidi_LeftToRight;
+}
+
+
+
+static Bidi_Direction directionFromType( Bidi_CharType type )
+{
+ switch( type )
+ {
+ case BDI_L:
+ case BDI_EN:
+ return Bidi_LeftToRight;
+
+ case BDI_R:
+ case BDI_AL:
+ return Bidi_RightToLeft;
+
+ default:
+ return Bidi_Neutral;
+ }
+}
+
+
+
+static void classifyQuotedBlocks(const uint16_t *text,
+ Bidi_CharType *types,
+ size_t len)
+{
+ size_t i;
+ int inQuote = FALSE;
+ int pdfNeeded = FALSE;
+ int ltrFound = FALSE;
+ int rtlFound = FALSE;
+
+ /* Only do anything special here if there is mixed content
+ * (LTR *and* RTL) in the text.
+ */
+ for (i = 0; i < len; i++)
+ {
+ switch ( directionFromType(types[i]) )
+ {
+ case Bidi_LeftToRight:
+ ltrFound = TRUE;
+ break;
+
+ case Bidi_RightToLeft:
+ rtlFound = TRUE;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /* Only make any changes if *both* LTR and RTL characters exist
+ * in this text.
+ */
+ if ( !ltrFound || !rtlFound )
+ {
+ return;
+ }
+
+ for (i = 0; i < len; i++)
+ {
+ if (text[i]=='"')
+ {
+ /* If we're already in a quote then terminate it,
+ * else start a new block.
+ */
+ if ( inQuote )
+ {
+ inQuote = FALSE;
+ if ( pdfNeeded )
+ {
+ pdfNeeded = FALSE;
+ types[i] = BDI_PDF;
+ }
+ }
+ else
+ {
+ size_t j;
+ int done = FALSE;
+
+ inQuote = TRUE;
+
+ /* Find the first strong right or left type and
+ * use that to determine whether we should classify
+ * the quote as LRE or RLE. Or neither, if we
+ * hit another quote before any strongly-directional
+ * character.
+ */
+ for ( j = i + 1;
+ !done && (j < len) && text[j] != '"';
+ ++j )
+ {
+ switch( types[j] )
+ {
+ case BDI_RLE:
+ case BDI_LRE:
+ done = TRUE;
+ break;
+
+ case BDI_L:
+ case BDI_EN:
+ types[i] = BDI_LRE;
+ pdfNeeded = TRUE;
+ done = TRUE;
+ break;
+
+ case BDI_R:
+ case BDI_AL:
+ types[i] = BDI_RLE;
+ pdfNeeded = TRUE;
+ done = TRUE;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+
+/* Creates a buffer with an embedding level for every uint16_t in the
+ * given text. Also determines the base level and returns it in
+ * *baseDir if *baseDir does not initially contain a valid direction.
+ */
+static Bidi_Level *
+createLevels(fz_context *ctx,
+ const uint16_t *text,
+ size_t len,
+ Bidi_Direction *baseDir,
+ int resolveWhiteSpace,
+ int bidiFlag)
+{
+ Bidi_Level *levels;
+ Bidi_CharType *types = NULL;
+ Bidi_Level baseLevel;
+
+ levels = fz_malloc(ctx, len * sizeof(*levels));
+
+ fz_var(types);
+
+ fz_try(ctx)
+ {
+ types = fz_malloc(ctx, len * sizeof(Bidi_CharType));
+
+ Bidi_classifyCharacters(text, types, len, bidiFlag);
+
+ if (*baseDir != Bidi_LeftToRight && *baseDir != Bidi_RightToLeft)
+ {
+ /* Derive the base level from the text and
+ * update *baseDir in case the caller wants to know.
+ */
+ baseLevel = baseLevelFromText(types, len);
+ *baseDir = ODD(baseLevel)==1 ? Bidi_RightToLeft : Bidi_LeftToRight;
+ }
+ else
+ {
+ baseLevel = (Bidi_Level)*baseDir;
+ }
+
+ {
+ /* Replace tab with base direction, i.e. make tab appear as
+ * 'strong left' if the base direction is left-to-right and
+ * 'strong right' if base direction is right-to-left. This
+ * allows Layout to implicitly treat tabs as 'segment separators'.
+ */
+ size_t i;
+
+ for (i = 0u; i < len; i++)
+ {
+ if (text[i]=='\t')
+ {
+ types[i] = (*baseDir == Bidi_RightToLeft) ? BDI_R : BDI_L;
+ }
+ }
+ }
+
+ /* Look for quotation marks. Classify them as RLE or LRE
+ * or leave them alone, depending on what follows them.
+ */
+ classifyQuotedBlocks( text, types, len );
+
+ /* Work out the levels and character types... */
+ (void)Bidi_resolveExplicit(baseLevel, BDI_N, types, levels, len, 0);
+ Bidi_resolveWeak(ctx, baseLevel, types, levels, len);
+ Bidi_resolveNeutrals(baseLevel,types, levels, len);
+ Bidi_resolveImplicit(types, levels, len);
+
+ Bidi_classifyCharacters(text, types, len, Bidi_classifyWhiteSpace);
+
+ if (resolveWhiteSpace)
+ {
+ /* resolve whitespace */
+ Bidi_resolveWhitespace(baseLevel, types, levels, len);
+ }
+
+ /* The levels buffer now has odd and even numbers indicating
+ * rtl or ltr characters, respectively.
+ */
+#ifdef DEBUG_BIDI_VERBOSE
+ fprintf(stderr, "Levels: ");
+ {
+ size_t i;
+ for (i = 0; i < len; i++)
+ {
+ fprintf(stderr, "%d", levels[i]>9?0:levels[i]);
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+ }
+ fz_always(ctx)
+ {
+ fz_free(ctx, types);
+ }
+ fz_catch(ctx)
+ {
+ fz_free(ctx, levels);
+ fz_rethrow(ctx);
+ }
+ return levels;
+}
+
+
+
+/* Partitions the given uint16_t sequence into one or more unidirectional
+ * fragments and invokes the given callback function for each fragment.
+ */
+void Bidi_fragmentText(fz_context *ctx,
+ const uint16_t *text,
+ size_t textlen,
+ Bidi_Direction *baseDir,
+ Bidi_Fragment_Callback callback,
+ void *arg,
+ int bidiFlag)
+{
+ size_t startOfFragment;
+ size_t i;
+ Bidi_Level *levels;
+
+ if (text == NULL || callback == NULL || textlen == 0)
+ return;
+
+ DBUGH(("Bidi_fragmentText( '%S', len = %d )\n", text, textlen ));
+
+ levels = createLevels(ctx, text, textlen, baseDir, FALSE, bidiFlag);
+
+ /* We now have an array with an embedding level
+ * for each uint16_t in text.
+ */
+ assert(levels != NULL);
+
+ fz_try(ctx)
+ {
+ startOfFragment = 0;
+ for (i = 1; i < textlen; i++)
+ {
+ if (levels[i] != levels[i-1])
+ {
+ /* We've gone past the end of the fragment.
+ * Create a text object for it, then start
+ * a new fragment.
+ */
+ if(ODD(levels[startOfFragment]) != 0)
+ {
+ /* if RTL check for mirrors and create sub-frags */
+ Bidi_createFragmentMirrors(&text[startOfFragment],
+ i - startOfFragment,
+ callback,
+ arg);
+ }
+ else
+ {
+ /* otherwise create 1 fragment */
+ (*callback)(&text[startOfFragment],
+ i - startOfFragment,
+ ODD(levels[startOfFragment]),
+ UNICODE_EOS,
+ arg);
+ }
+ startOfFragment = i;
+ }
+ }
+ /* Now i == textlen. Deal with the final (or maybe only) fragment. */
+ if(ODD(levels[startOfFragment]) != 0)
+ {
+ /* if RTL check for mirrors and create sub-frags */
+ Bidi_createFragmentMirrors(&text[startOfFragment],
+ i - startOfFragment,
+ callback,
+ arg);
+ }
+ else
+ {
+ /* otherwise create 1 fragment */
+ (*callback)(&text[startOfFragment],
+ i - startOfFragment,
+ ODD(levels[startOfFragment]),
+ UNICODE_EOS,
+ arg);
+ }
+ }
+ fz_always(ctx)
+ {
+ fz_free(ctx, levels);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
+
+
+
+static void newFragCb(const uint16_t *fragment,
+ size_t fragmentLen,
+ int rightToLeft,
+ uint16_t mirror,
+ void *arg)
+{
+ size_t fragOffset;
+ int isRtlNumber = FALSE;
+ Bidi_ProcessLine_fragData *fragData = arg;
+
+ assert(fragData != NULL);
+
+ fragOffset = fragment - fragData->entireText;
+
+ if((fragOffset != 0) &&
+ Bidi_isEuropeanNumber(fragment,fragmentLen))
+ {
+ /* fragment contains digits only */
+ isRtlNumber = TRUE;
+ }
+ fragData->callersCallback(fragOffset,
+ fragmentLen,
+ rightToLeft,
+ mirror,
+ fragData->callersData,
+ isRtlNumber);
+}
+
+
+
+/* Currently this creates a text representation of all the objects
+ * on a line, then passes it to Bidi_fragmentText. In future, we
+ * should create the levels array directly from the objects supplied.
+ */
+Bidi_Direction Bidi_processLine(fz_context *ctx,
+ Bidi_PL_NextObj_Callback nextObjCb,
+ Bidi_PL_Fragment_Callback fragmentCb,
+ void *callerData,
+ int bidiFlag,
+ int *more)
+{
+ uint16_t *entireText = NULL;
+ size_t entireLength = 0;
+ size_t lengthIncrement = 100;
+ size_t freeSpace = 0;
+ const uint16_t *objText = NULL;
+ size_t objLength = 0;
+ Bidi_Direction explicitDirection = Bidi_Neutral;
+
+ fz_var(entireText);
+
+ fz_try(ctx)
+ {
+ do
+ {
+ /* Call the client-supplied function to get the next object. */
+ nextObjCb(callerData, &objText, &objLength, more, &explicitDirection);
+ if (objText == NULL || objLength == 0)
+ {
+ /* End of sequence, or caller is giving us rubbish. */
+ break;
+ }
+
+ if (freeSpace < objLength)
+ {
+ /* We need a bigger buffer. */
+ uint16_t *newBuffer = NULL;
+ size_t increment = (objLength < lengthIncrement) ?
+ lengthIncrement : objLength;
+ /* Accommodate the string terminator that ustrncat will write */
+ if (entireText == NULL)
+ {
+ /* First Time. Add extra byte */
+ entireLength += increment + 1;
+ }
+ else
+ {
+ entireLength += increment;
+ }
+ newBuffer = fz_resize_array(ctx, entireText, entireLength, sizeof(uint16_t));
+ if (entireText == NULL)
+ {
+ /* First time. Pretend buffer holds an empty string. */
+ newBuffer[0] = UNICODE_EOS;
+ }
+ entireText = newBuffer;
+ freeSpace += increment;
+ }
+ ustrncat (entireText, objText, objLength);
+
+ freeSpace -= objLength;
+ }
+ while (objText != NULL);
+
+ if (entireText != NULL)
+ {
+ size_t i;
+ size_t length = entireLength - 1 - freeSpace;
+ Bidi_ProcessLine_fragData fragData;
+ /* replace TAB character with SPACE for html Documents */
+ if((bidiFlag & Bidi_replaceTab) != 0)
+ {
+ for (i = 0; i < length; i++)
+ {
+ if (entireText[i]=='\t')
+ {
+ entireText[i] = ' ';
+ }
+ }
+ }
+
+ fragData.entireText = entireText;
+ fragData.callersCallback = fragmentCb;
+ fragData.callersData = callerData;
+ Bidi_fragmentText(ctx,
+ entireText,
+ length, &explicitDirection,
+ newFragCb, &fragData,
+ bidiFlag);
+ }
+ }
+ fz_always(ctx)
+ {
+ fz_free(ctx, entireText);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+
+ return explicitDirection;
+}
+
+
+#ifdef BIDI_LINE_AT_A_TIME
+/* This changes the given text by reordering and possibly mirroring
+ * characters. Suitable only for a single line of text.
+ */
+void Bidi_processText(fz_context *ctx,
+ uint16_t *text,
+ Bidi_Direction baseDirection,
+ Bidi_Direction outputDirection,
+ int len)
+{
+ Bidi_Level *levels;
+
+ if (text == NULL || len == 0)
+ {
+ return;
+ }
+
+ levels = createLevels(ctx, text, (size_t)len, &baseDirection, TRUE, 0);
+
+ assert (levels != NULL);
+
+ /* resolve mirrored characters, e.g. swap '(' with ')' */
+ Bidi_mirror(text, levels, len);
+
+ (void)Bidi_reorder(baseDirection, text, levels, len);
+
+ /* reverse the string */
+ if (outputDirection == Bidi_RightToLeft)
+ ustrnreverse(text, len);
+
+#ifdef DEBUG_BIDI_VERBOSE
+ fprintf(stderr, "Done: ");
+ {
+ int i;
+ for (i = 0; i < len; i++)
+ {
+ /* So that we can actually sort of read the debug string, any
+ * non-ascii characters are replaced with a 1-digit hash
+ * value from 0-9, making non-english characters appear
+ * as numbers
+ */
+ fprintf(stderr, "%c", (text[i] <= 127 && text[i ]>= 32)?
+ text[i]
+ :(char)((text[i] % 9) + 48)
+ );
+ }
+ fprintf(stderr, "\n\n");
+ }
+#endif
+
+ fz_free(ctx, levels);
+}
+#endif