summaryrefslogtreecommitdiff
path: root/source/fitz/strtof.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2016-02-03 14:52:43 +0000
committerRobin Watts <robin.watts@artifex.com>2016-02-03 16:34:24 +0000
commit36e57486f01fc026fd1e8886b32f91e385befedc (patch)
tree43c2cf9f99e70539aea65c7b889427fd84a4270e /source/fitz/strtof.c
parentd9c24f4fea589ab65a6046769c0642206557e60d (diff)
downloadmupdf-36e57486f01fc026fd1e8886b32f91e385befedc.tar.xz
Bug 696546: Add fast strtof
Take on a (slightly tweaked) version of Simon Reinhardt's patch. The actual logic is left entirely unchanged; minor changes have been made to the names of functions/types to avoid clashing in the cmapdump.c repeated inclusion. Currently this should really only affect xps files, as strtof is only used as fz_atof, and that's (effectively) all xps for now. I will look at updating lex_number to call this in future.
Diffstat (limited to 'source/fitz/strtof.c')
-rw-r--r--source/fitz/strtof.c471
1 files changed, 471 insertions, 0 deletions
diff --git a/source/fitz/strtof.c b/source/fitz/strtof.c
new file mode 100644
index 00000000..71345258
--- /dev/null
+++ b/source/fitz/strtof.c
@@ -0,0 +1,471 @@
+#include "mupdf/fitz.h"
+
+#ifndef INFINITY
+#define INFINITY (DBL_MAX+DBL_MAX)
+#endif
+#ifndef NAN
+#define NAN (INFINITY-INFINITY)
+#endif
+
+/*
+ We use "Algorithm D" from "Contributions to a Proposed Standard for Binary
+ Floating-Point Arithmetic" by Jerome Coonen (1984).
+
+ The implementation uses a self-made floating point type, 'strtof_fp_t', with
+ a 32-bit significand. The steps of the algorithm are
+
+ INPUT: Up to 9 decimal digits d1 , ... d9 and an exponent dexp.
+ OUTPUT: A float corresponding to the number d1 ... d9 * 10^dexp.
+
+ 1) Convert the integer d1 ... d9 to an strtof_fp_t x.
+ 2) Lookup the strtof_fp_t power = 10 ^ |dexp|.
+ 3) If dexp is positive set x = x * power, else set x = x / power. Use rounding mode 'round to odd'.
+ 4) Round x to a float using rounding mode 'to even'.
+
+ Step 1) is always lossless as the strtof_fp_t's significand can hold a 9-digit integer.
+ In the case |dexp| <= 13 the cached power is exact and the algoritm returns
+ the exactly rounded result (with rounding mode 'to even').
+ There is no double-rounding in 3), 4) as the multiply/divide uses 'round to odd'.
+
+ For |dexp| > 13 the maximum error is bounded by (1/2 + 1/256) ulp.
+ This is small enough to ensure that binary to decimal to binary conversion
+ is the identity if the decimal format uses 9 correctly rounded significant digits.
+*/
+typedef struct strtof_fp_t
+{
+ uint32_t f;
+ int e;
+} strtof_fp_t;
+
+/* Multiply/Divide x by y with 'round to odd'. Assume that x and y are normalized. */
+
+static strtof_fp_t
+strtof_multiply(strtof_fp_t x, strtof_fp_t y)
+{
+ uint64_t tmp;
+ strtof_fp_t res;
+
+ assert(x.f & y.f & 0x80000000);
+
+ res.e = x.e + y.e + 32;
+ tmp = (uint64_t) x.f * y.f;
+ /* Normalize. */
+ if ((tmp < ((uint64_t) 1 << 63)))
+ {
+ tmp <<= 1;
+ --res.e;
+ }
+
+ res.f = tmp >> 32;
+
+ /* Set the last bit of the significand to 1 if the result is
+ inexact. */
+ if (tmp & 0xffffffff)
+ res.f |= 1;
+ return res;
+}
+
+static strtof_fp_t
+divide(strtof_fp_t x, strtof_fp_t y)
+{
+ uint64_t product, quotient;
+ uint32_t remainder;
+ strtof_fp_t res;
+
+ res.e = x.e - y.e - 32;
+ product = (uint64_t) x.f << 32;
+ quotient = product / y.f;
+ remainder = product % y.f;
+ /* 2^31 <= quotient <= 2^33 - 2. */
+ if (quotient <= 0xffffffff)
+ res.f = quotient;
+ else
+ {
+ ++res.e;
+ /* If quotient % 2 != 0 we have remainder != 0. */
+ res.f = quotient >> 1;
+ }
+ if (remainder)
+ res.f |= 1;
+ return res;
+}
+
+
+
+/* From 10^0 to 10^54. Generated with GNU MPFR. */
+static const uint32_t strtof_powers_ten[55] = {
+ 0x80000000, 0xa0000000, 0xc8000000, 0xfa000000, 0x9c400000, 0xc3500000,
+ 0xf4240000, 0x98968000, 0xbebc2000, 0xee6b2800, 0x9502f900, 0xba43b740,
+ 0xe8d4a510, 0x9184e72a, 0xb5e620f4, 0xe35fa932, 0x8e1bc9bf, 0xb1a2bc2f,
+ 0xde0b6b3a, 0x8ac72305, 0xad78ebc6, 0xd8d726b7, 0x87867832, 0xa968163f,
+ 0xd3c21bcf, 0x84595161, 0xa56fa5ba, 0xcecb8f28, 0x813f3979, 0xa18f07d7,
+ 0xc9f2c9cd, 0xfc6f7c40, 0x9dc5ada8, 0xc5371912, 0xf684df57, 0x9a130b96,
+ 0xc097ce7c, 0xf0bdc21b, 0x96769951, 0xbc143fa5, 0xeb194f8e, 0x92efd1b9,
+ 0xb7abc627, 0xe596b7b1, 0x8f7e32ce, 0xb35dbf82, 0xe0352f63, 0x8c213d9e,
+ 0xaf298d05, 0xdaf3f046, 0x88d8762c, 0xab0e93b7, 0xd5d238a5, 0x85a36367,
+ 0xa70c3c41
+};
+static const int strtof_powers_ten_e[55] = {
+ -31, -28, -25, -22, -18, -15, -12, -8, -5, -2,
+ 2, 5, 8, 12, 15, 18, 22, 25, 28, 32, 35, 38, 42, 45, 48, 52, 55, 58, 62, 65,
+ 68, 71, 75, 78, 81, 85, 88, 91, 95, 98, 101, 105, 108, 111, 115, 118, 121,
+ 125, 128, 131, 135, 138, 141, 145, 148
+};
+
+static strtof_fp_t
+strtof_cached_power(int i)
+{
+ strtof_fp_t result;
+ assert (i >= 0 && i <= 54);
+ result.f = strtof_powers_ten[i];
+ result.e = strtof_powers_ten_e[i];
+ return result;
+}
+
+/* Find number of leading zero bits in an uint32_t. Derived from the
+ "Bit Twiddling Hacks" at graphics.stanford.edu/~seander/bithacks.html. */
+static unsigned char clz_table[256] = {
+ 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+# define sixteen_times(N) N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ sixteen_times (3) sixteen_times (2) sixteen_times (2)
+ sixteen_times (1) sixteen_times (1) sixteen_times (1) sixteen_times (1)
+ /* Zero for the rest. */
+};
+static unsigned
+leading_zeros (uint32_t x)
+{
+ unsigned tmp1, tmp2;
+
+ tmp1 = x >> 16;
+ if (tmp1)
+ {
+ tmp2 = tmp1 >> 8;
+ if (tmp2)
+ return clz_table[tmp2];
+ else
+ return 8 + clz_table[tmp1];
+ }
+ else
+ {
+ tmp1 = x >> 8;
+ if (tmp1)
+ return 16 + clz_table[tmp1];
+ else
+ return 24 + clz_table[x];
+ }
+}
+
+static strtof_fp_t
+uint32_to_diy (uint32_t x)
+{
+ strtof_fp_t result = {x, 0};
+ unsigned shift = leading_zeros(x);
+
+ result.f <<= shift;
+ result.e -= shift;
+ return result;
+}
+
+
+#define SP_SIGNIFICAND_SIZE 23
+#define SP_EXPONENT_BIAS (127 + SP_SIGNIFICAND_SIZE)
+#define SP_MIN_EXPONENT (-SP_EXPONENT_BIAS)
+#define SP_EXPONENT_MASK 0x7f800000
+#define SP_SIGNIFICAND_MASK 0x7fffff
+#define SP_HIDDEN_BIT 0x800000 /* 2^23 */
+
+/* Convert normalized strtof_fp_t to IEEE-754 single with 'round to even'.
+ See "Implementing IEEE 754-2008 Rounding" in the
+ "Handbook of Floating-Point Arithmetik".
+*/
+static float
+diy_to_float(strtof_fp_t x, int negative)
+{
+ uint32_t result;
+ union
+ {
+ float f;
+ uint32_t n;
+ } tmp;
+
+ assert(x.f & 0x80000000);
+
+ /* We have 2^32 - 2^7 = 0xffffff80. */
+ if (x.e > 96 || (x.e == 96 && x.f >= 0xffffff80))
+ {
+ /* Overflow. Set result to infinity. */
+ errno = ERANGE;
+ result = 0xff << SP_SIGNIFICAND_SIZE;
+ }
+ /* We have 2^32 - 2^8 = 0xffffff00. */
+ else if (x.e > -158)
+ {
+ /* x is greator or equal to FLT_MAX. So we get a normalized number. */
+ result = (uint32_t) (x.e + 158) << SP_SIGNIFICAND_SIZE;
+ result |= (x.f >> 8) & SP_SIGNIFICAND_MASK;
+
+ if (x.f & 0x80)
+ {
+ /* Round-bit is set. */
+ if (x.f & 0x7f)
+ /* Sticky-bit is set. */
+ ++result;
+ else if (x.f & 0x100)
+ /* Significand is odd. */
+ ++result;
+ }
+ }
+ else if (x.e == -158 && x.f >= 0xffffff00)
+ {
+ /* x is in the range (2^32, 2^32 - 2^8] * 2^-158, so its smaller than
+ FLT_MIN but still rounds to it. */
+ result = 1U << SP_SIGNIFICAND_SIZE;
+ }
+ else if (x.e > -181)
+ {
+ /* Non-zero Denormal. */
+ int shift = -149 - x.e; /* 9 <= shift <= 31. */
+
+ result = x.f >> shift;
+
+ if (x.f & (1U << (shift - 1)))
+ /* Round-bit is set. */
+ {
+ if (x.f & ((1U << (shift - 1)) - 1))
+ /* Sticky-bit is set. */
+ ++result;
+ else if (x.f & 1U << shift)
+ /* Significand is odd. */
+ ++result;
+ }
+ }
+ else if (x.e == -181 && x.f > 0x80000000)
+ {
+ /* x is in the range (0.5,1) * 2^-149 so it rounds to the smallest
+ denormal. Cant't handle this in the previous case as shifting a
+ uint32_t 32 bits to the right is undefined behaviour. */
+ result = 1;
+ }
+ else
+ {
+ /* Underflow. */
+ errno = ERANGE;
+ result = 0;
+ }
+
+ if (negative)
+ result |= 0x80000000;
+
+ tmp.n = result;
+ return tmp.f;
+}
+
+static float
+scale_integer_to_float(uint32_t M, int N, int negative)
+{
+ strtof_fp_t result, x, power;
+
+ if (M == 0)
+ return negative ? -0.f : 0.f;
+ if (N > 38)
+ {
+ /* Overflow. */
+ errno = ERANGE;
+ return negative ? -INFINITY : INFINITY;
+ }
+ if (N < -54)
+ {
+ /* Underflow. */
+ errno = ERANGE;
+ return negative ? -0.f : 0.f;
+ }
+ /* If N is in the range {-13, ..., 13} the conversion is exact.
+ Try to scale N into this region. */
+ while (N > 13 && M <= 0xffffffff / 10)
+ {
+ M *= 10;
+ --N;
+ }
+
+ while (N < -13 && M % 10 == 0)
+ {
+ M /= 10;
+ ++N;
+ }
+
+ x = uint32_to_diy (M);
+ if (N >= 0)
+ {
+ power = strtof_cached_power(N);
+ result = strtof_multiply(x, power);
+ }
+ else
+ {
+ power = strtof_cached_power(-N);
+ result = divide(x, power);
+ }
+
+ return diy_to_float(result, negative);
+}
+
+/* Return non-zero if *s starts with string (must be uppercase), ignoring case,
+ and increment *s by its length. */
+static int
+starts_with(const char **s, const char *string)
+{
+ const char *x = *s, *y = string;
+ while (*x && *y && (*x == *y || *x == *y + 32))
+ ++x, ++y;
+ if (*y == 0)
+ {
+ /* Match. */
+ *s = x;
+ return 1;
+ }
+ else
+ return 0;
+}
+#define SET_TAILPTR(tailptr, s) \
+ do \
+ if (tailptr) \
+ *tailptr = (char *) s; \
+ while (0)
+
+static float
+strtof_internal(const char *string, char **tailptr, int exp_format)
+{
+ /* FIXME: error (1/2 + 1/256) ulp */
+ const char *s;
+ uint32_t M = 0;
+ int N = 0;
+ /* If decimal_digits gets 9 we truncate all following digits. */
+ int decimal_digits = 0;
+ int negative = 0;
+ const char *number_start = 0;
+
+ /* Skip leading whitespace (isspace in "C" locale). */
+ s = string;
+ while (*s == ' ' || *s == '\f' || *s == '\n' || *s == '\r' || *s == '\t'
+ || *s == '\v')
+ ++s;
+
+ /* Parse sign. */
+ if (*s == '+')
+ ++s;
+ if (*s == '-')
+ {
+ negative = 1;
+ ++s;
+ }
+ number_start = s;
+ /* Parse digits before decimal point. */
+ while (*s >= '0' && *s <= '9')
+ {
+ if (decimal_digits)
+ {
+ if (decimal_digits < 9)
+ {
+ ++decimal_digits;
+ M = M * 10 + *s - '0';
+ }
+ /* Really arcane strings might overflow N. */
+ else if (N < 1000)
+ ++N;
+ }
+ else if (*s > '0')
+ {
+ M = *s - '0';
+ ++decimal_digits;
+ }
+ ++s;
+ }
+
+ /* Parse decimal point. */
+ if (*s == '.')
+ ++s;
+
+ /* Parse digits after decimal point. */
+ while (*s >= '0' && *s <= '9')
+ {
+ if (decimal_digits < 9)
+ {
+ if (decimal_digits || *s > '0')
+ {
+ ++decimal_digits;
+ M = M * 10 + *s - '0';
+ }
+ --N;
+ }
+ ++s;
+ }
+ if ((s == number_start + 1 && *number_start == '.') || number_start == s)
+ {
+ /* No Number. Check for INF and NAN strings. */
+ s = number_start;
+ if (starts_with(&s, "INFINITY") || starts_with(&s, "INF"))
+ {
+ errno = ERANGE;
+ SET_TAILPTR(tailptr, s);
+ return negative ? -INFINITY : +INFINITY;
+ }
+ else if (starts_with(&s, "NAN"))
+ {
+ SET_TAILPTR(tailptr, s);
+ return (float)NAN;
+ }
+ else
+ {
+ SET_TAILPTR(tailptr, string);
+ return 0.f;
+ }
+ }
+
+ /* Parse exponent. */
+ if (exp_format && (*s == 'e' || *s == 'E'))
+ {
+ int exp_negative = 0;
+ int exp = 0;
+ const char *int_start;
+ const char *exp_start = s;
+
+ ++s;
+ if (*s == '+')
+ ++s;
+ else if (*s == '-')
+ {
+ ++s;
+ exp_negative = 1;
+ }
+ int_start = s;
+ /* Parse integer. */
+ while (*s >= '0' && *s <= '9')
+ {
+ /* Make sure exp does not get overflowed. */
+ if (exp < 100)
+ exp = exp * 10 + *s - '0';
+ ++s;
+ }
+ if (exp_negative)
+ exp = -exp;
+ if (s == int_start)
+ /* No Number. */
+ s = exp_start;
+ else
+ N += exp;
+ }
+
+ SET_TAILPTR(tailptr, s);
+ return scale_integer_to_float(M, N, negative);
+}
+
+float
+fz_strtof(const char *string, char **tailptr)
+{
+ return strtof_internal(string, tailptr, 1);
+}
+
+float
+fz_strtof_no_exp(const char *string, char **tailptr)
+{
+ return strtof_internal(string, tailptr, 0);
+}