summaryrefslogtreecommitdiff
path: root/source/fitz/draw-scale-simple.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2013-06-19 15:29:44 +0200
committerTor Andersson <tor.andersson@artifex.com>2013-06-20 16:45:35 +0200
commit0a927854a10e1e6b9770a81e2e1d9f3093631757 (patch)
tree3d65d820d9fdba2d0d394d99c36290c851b78ca0 /source/fitz/draw-scale-simple.c
parent1ae8f19179c5f0f8c6352b3c7855465325d5449a (diff)
downloadmupdf-0a927854a10e1e6b9770a81e2e1d9f3093631757.tar.xz
Rearrange source files.
Diffstat (limited to 'source/fitz/draw-scale-simple.c')
-rw-r--r--source/fitz/draw-scale-simple.c1509
1 files changed, 1509 insertions, 0 deletions
diff --git a/source/fitz/draw-scale-simple.c b/source/fitz/draw-scale-simple.c
new file mode 100644
index 00000000..08dedf0b
--- /dev/null
+++ b/source/fitz/draw-scale-simple.c
@@ -0,0 +1,1509 @@
+/*
+This code does smooth scaling of a pixmap.
+
+This function returns a new pixmap representing the area starting at (0,0)
+given by taking the source pixmap src, scaling it to width w, and height h,
+and then positioning it at (frac(x),frac(y)).
+
+This is a cut-down version of draw_scale.c that only copes with filters
+that return values strictly in the 0..1 range, and uses bytes for
+intermediate results rather than ints.
+*/
+
+#include "mupdf/fitz.h"
+#include "draw-imp.h"
+
+/* Do we special case handling of single pixel high/wide images? The
+ * 'purest' handling is given by not special casing them, but certain
+ * files that use such images 'stack' them to give full images. Not
+ * special casing them results in then being fainter and giving noticeable
+ * rounding errors.
+ */
+#define SINGLE_PIXEL_SPECIALS
+
+#ifdef DEBUG_SCALING
+#ifdef WIN32
+#include <windows.h>
+static void debug_print(const char *fmt, ...)
+{
+ va_list args;
+ char text[256];
+ va_start(args, fmt);
+ vsprintf(text, fmt, args);
+ va_end(args);
+ OutputDebugStringA(text);
+ printf(text);
+}
+#else
+static void debug_print(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+}
+#endif
+#endif
+#ifdef DEBUG_SCALING
+#define DBUG(A) debug_print A
+#else
+#define DBUG(A) do {} while(0==1)
+#endif
+
+/*
+Consider a row of source samples, src, of width src_w, positioned at x,
+scaled to width dst_w.
+
+src[i] is centred at: x + (i + 0.5)*dst_w/src_w
+
+Therefore the distance between the centre of the jth output pixel and
+the centre of the ith source sample is:
+
+dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w)
+
+When scaling up, therefore:
+
+dst[j] = SUM(filter(dist[j,i]) * src[i])
+ (for all ints i)
+
+This can be simplified by noticing that filters are only non zero within
+a given filter width (henceforth called W). So:
+
+dst[j] = SUM(filter(dist[j,i]) * src[i])
+ (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W)
+
+When scaling down, each filtered source sample is stretched to be wider
+to avoid aliasing issues. This effectively reduces the distance between
+centres.
+
+dst[j] = SUM(filter(dist[j,i] * F) * F * src[i])
+ (where F = dst_w/src_w)
+ (for ints i, s.t. (j-W)/F < i < (j+W)/F)
+
+*/
+
+typedef struct fz_scale_filter_s fz_scale_filter;
+
+struct fz_scale_filter_s
+{
+ int width;
+ float (*fn)(fz_scale_filter *, float);
+};
+
+/* Image scale filters */
+
+static float
+triangle(fz_scale_filter *filter, float f)
+{
+ if (f >= 1)
+ return 0;
+ return 1-f;
+}
+
+static float
+box(fz_scale_filter *filter, float f)
+{
+ if (f >= 0.5f)
+ return 0;
+ return 1;
+}
+
+static float
+simple(fz_scale_filter *filter, float x)
+{
+ if (x >= 1)
+ return 0;
+ return 1 + (2*x - 3)*x*x;
+}
+
+fz_scale_filter fz_scale_filter_box = { 1, box };
+fz_scale_filter fz_scale_filter_triangle = { 1, triangle };
+fz_scale_filter fz_scale_filter_simple = { 1, simple };
+
+/*
+We build ourselves a set of tables to contain the precalculated weights
+for a given set of scale settings.
+
+The first dst_w entries in index are the index into index of the
+sets of weight for each destination pixel.
+
+Each of the sets of weights is a set of values consisting of:
+ the minimum source pixel index used for this destination pixel
+ the number of weights used for this destination pixel
+ the weights themselves
+
+So to calculate dst[i] we do the following:
+
+ weights = &index[index[i]];
+ min = *weights++;
+ len = *weights++;
+ dst[i] = 0;
+ while (--len > 0)
+ dst[i] += src[min++] * *weights++
+
+in addition, we guarantee that at the end of this process weights will now
+point to the weights value for dst pixel i+1.
+
+In the simplest version of this algorithm, we would scale the whole image
+horizontally first into a temporary buffer, then scale that temporary
+buffer again vertically to give us our result. Using such a simple
+algorithm would mean that could use the same style of weights for both
+horizontal and vertical scaling.
+
+Unfortunately, this would also require a large temporary buffer,
+particularly in the case where we are scaling up.
+
+We therefore modify the algorithm as follows; we scale scanlines from the
+source image horizontally into a temporary buffer, until we have all the
+contributors for a given output scanline. We then produce that output
+scanline from the temporary buffer. In this way we restrict the height
+of the temporary buffer to a small fraction of the final size.
+
+Unfortunately, this means that the pseudo code for recombining a
+scanline of fully scaled pixels is as follows:
+
+ weights = &index[index[y]];
+ min = *weights++;
+ len = *weights++;
+ for (x=0 to dst_w)
+ min2 = min
+ len2 = len
+ weights2 = weights
+ dst[x] = 0;
+ while (--len2 > 0)
+ dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++
+
+i.e. it requires a % operation for every source pixel - this is typically
+expensive.
+
+To avoid this, we alter the order in which vertical weights are stored,
+so that they are ordered in the same order as the temporary buffer lines
+would appear. This simplifies the algorithm to:
+
+ weights = &index[index[y]];
+ min = *weights++;
+ len = *weights++;
+ for (x=0 to dst_w)
+ min2 = 0
+ len2 = len
+ weights2 = weights
+ dst[x] = 0;
+ while (--len2 > 0)
+ dst[x] += temp[i][min2++] * *weights2++
+
+This means that len may be larger than it needs to be (due to the
+possible inclusion of a zero weight row or two), but in practise this
+is only an increase of 1 or 2 at worst.
+
+We implement this by generating the weights as normal (but ensuring we
+leave enough space) and then reordering afterwards.
+
+*/
+
+typedef struct fz_weights_s fz_weights;
+
+/* This structure is accessed from ARM code - bear this in mind before
+ * altering it! */
+struct fz_weights_s
+{
+ int flip; /* true if outputting reversed */
+ int count; /* number of output pixels we have records for in this table */
+ int max_len; /* Maximum number of weights for any one output pixel */
+ int n; /* number of components (src->n) */
+ int new_line; /* True if no weights for the current output pixel */
+ int patch_l; /* How many output pixels we skip over */
+ int index[1];
+};
+
+struct fz_scale_cache_s
+{
+ int src_w;
+ float x;
+ float dst_w;
+ fz_scale_filter *filter;
+ int vertical;
+ int dst_w_int;
+ int patch_l;
+ int patch_r;
+ int n;
+ int flip;
+ fz_weights *weights;
+};
+
+static fz_weights *
+new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l)
+{
+ int max_len;
+ fz_weights *weights;
+
+ if (src_w > dst_w)
+ {
+ /* Scaling down, so there will be a maximum of
+ * 2*filterwidth*src_w/dst_w src pixels
+ * contributing to each dst pixel. */
+ max_len = (int)ceilf((2 * filter->width * src_w)/dst_w);
+ if (max_len > src_w)
+ max_len = src_w;
+ }
+ else
+ {
+ /* Scaling up, so there will be a maximum of
+ * 2*filterwidth src pixels contributing to each dst pixel.
+ */
+ max_len = 2 * filter->width;
+ }
+ /* We need the size of the struct,
+ * plus patch_w*sizeof(int) for the index
+ * plus (2+max_len)*sizeof(int) for the weights
+ * plus room for an extra set of weights for reordering.
+ */
+ weights = fz_malloc(ctx, sizeof(*weights)+(max_len+3)*(patch_w+1)*sizeof(int));
+ if (!weights)
+ return NULL;
+ weights->count = -1;
+ weights->max_len = max_len;
+ weights->index[0] = patch_w;
+ weights->n = n;
+ weights->patch_l = patch_l;
+ weights->flip = flip;
+ return weights;
+}
+
+/* j is destination pixel in the patch_l..patch_l+patch_w range */
+static void
+init_weights(fz_weights *weights, int j)
+{
+ int index;
+
+ j -= weights->patch_l;
+ assert(weights->count == j-1);
+ weights->count++;
+ weights->new_line = 1;
+ if (j == 0)
+ index = weights->index[0];
+ else
+ {
+ index = weights->index[j-1];
+ index += 2 + weights->index[index+1];
+ }
+ weights->index[j] = index; /* row pointer */
+ weights->index[index] = 0; /* min */
+ weights->index[index+1] = 0; /* len */
+}
+
+static void
+add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter,
+ float x, float F, float G, int src_w, float dst_w)
+{
+ float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w);
+ float f;
+ int min, len, index, weight;
+
+ dist *= G;
+ if (dist < 0)
+ dist = -dist;
+ f = filter->fn(filter, dist)*F;
+ weight = (int)(256*f+0.5f);
+
+ /* Ensure i is in range */
+ if (i < 0 || i >= src_w)
+ return;
+ if (weight == 0)
+ {
+ /* We add a fudge factor here to allow for extreme downscales
+ * where all the weights round to 0. Ensure that at least one
+ * (arbitrarily the first one) is non zero. */
+ if (weights->new_line && f > 0)
+ weight = 1;
+ else
+ return;
+ }
+
+ DBUG(("add_weight[%d][%d] = %d(%g) dist=%g\n",j,i,weight,f,dist));
+
+ /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */
+ j -= weights->patch_l;
+ if (weights->new_line)
+ {
+ /* New line */
+ weights->new_line = 0;
+ index = weights->index[j]; /* row pointer */
+ weights->index[index] = i; /* min */
+ weights->index[index+1] = 0; /* len */
+ }
+ index = weights->index[j];
+ min = weights->index[index++];
+ len = weights->index[index++];
+ while (i < min)
+ {
+ /* This only happens in rare cases, but we need to insert
+ * one earlier. In exceedingly rare cases we may need to
+ * insert more than one earlier. */
+ int k;
+
+ for (k = len; k > 0; k--)
+ {
+ weights->index[index+k] = weights->index[index+k-1];
+ }
+ weights->index[index] = 0;
+ min--;
+ len++;
+ weights->index[index-2] = min;
+ weights->index[index-1] = len;
+ }
+ if (i-min >= len)
+ {
+ /* The usual case */
+ while (i-min >= ++len)
+ {
+ weights->index[index+len-1] = 0;
+ }
+ assert(len-1 == i-min);
+ weights->index[index+i-min] = weight;
+ weights->index[index-1] = len;
+ assert(len <= weights->max_len);
+ }
+ else
+ {
+ /* Infrequent case */
+ weights->index[index+i-min] += weight;
+ }
+}
+
+static void
+reorder_weights(fz_weights *weights, int j, int src_w)
+{
+ int idx = weights->index[j - weights->patch_l];
+ int min = weights->index[idx++];
+ int len = weights->index[idx++];
+ int max = weights->max_len;
+ int tmp = idx+max;
+ int i, off;
+
+ /* Copy into the temporary area */
+ memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len);
+
+ /* Pad out if required */
+ assert(len <= max);
+ assert(min+len <= src_w);
+ off = 0;
+ if (len < max)
+ {
+ memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len));
+ len = max;
+ if (min + len > src_w)
+ {
+ off = min + len - src_w;
+ min = src_w - len;
+ weights->index[idx-2] = min;
+ }
+ weights->index[idx-1] = len;
+ }
+
+ /* Copy back into the proper places */
+ for (i = 0; i < len; i++)
+ {
+ weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i];
+ }
+}
+
+/* Due to rounding and edge effects, the sums for the weights sometimes don't
+ * add up to 256. This causes visible rendering effects. Therefore, we take
+ * pains to ensure that they 1) never exceed 256, and 2) add up to exactly
+ * 256 for all pixels that are completely covered. See bug #691629. */
+static void
+check_weights(fz_weights *weights, int j, int w, float x, float wf)
+{
+ int idx, len;
+ int sum = 0;
+ int max = -256;
+ int maxidx = 0;
+ int i;
+
+ idx = weights->index[j - weights->patch_l];
+ idx++; /* min */
+ len = weights->index[idx++];
+
+ for(i=0; i < len; i++)
+ {
+ int v = weights->index[idx++];
+ sum += v;
+ if (v > max)
+ {
+ max = v;
+ maxidx = idx;
+ }
+ }
+ /* If we aren't the first or last pixel, OR if the sum is too big
+ * then adjust it. */
+ if (((j != 0) && (j != w-1)) || (sum > 256))
+ weights->index[maxidx-1] += 256-sum;
+ /* Otherwise, if we are the first pixel, and it's fully covered, then
+ * adjust it. */
+ else if ((j == 0) && (x < 0.0001F) && (sum != 256))
+ weights->index[maxidx-1] += 256-sum;
+ /* Finally, if we are the last pixel, and it's fully covered, then
+ * adjust it. */
+ else if ((j == w-1) && ((float)w-wf < 0.0001F) && (sum != 256))
+ weights->index[maxidx-1] += 256-sum;
+ DBUG(("total weight %d = %d\n", j, sum));
+}
+
+static fz_weights *
+make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache)
+{
+ fz_weights *weights;
+ float F, G;
+ float window;
+ int j;
+
+ if (cache)
+ {
+ if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w &&
+ cache->filter == filter && cache->vertical == vertical &&
+ cache->dst_w_int == dst_w_int &&
+ cache->patch_l == patch_l && cache->patch_r == patch_r &&
+ cache->n == n && cache->flip == flip)
+ {
+ return cache->weights;
+ }
+ cache->src_w = src_w;
+ cache->x = x;
+ cache->dst_w = dst_w;
+ cache->filter = filter;
+ cache->vertical = vertical;
+ cache->dst_w_int = dst_w_int;
+ cache->patch_l = patch_l;
+ cache->patch_r = patch_r;
+ cache->n = n;
+ cache->flip = flip;
+ fz_free(ctx, cache->weights);
+ cache->weights = NULL;
+ }
+
+ if (dst_w < src_w)
+ {
+ /* Scaling down */
+ F = dst_w / src_w;
+ G = 1;
+ }
+ else
+ {
+ /* Scaling up */
+ F = 1;
+ G = src_w / dst_w;
+ }
+ window = filter->width / F;
+ DBUG(("make_weights src_w=%d x=%g dst_w=%g patch_l=%d patch_r=%d F=%g window=%g\n", src_w, x, dst_w, patch_l, patch_r, F, window));
+ weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l);
+ if (!weights)
+ return NULL;
+ for (j = patch_l; j < patch_r; j++)
+ {
+ /* find the position of the centre of dst[j] in src space */
+ float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f;
+ int l, r;
+ l = ceilf(centre - window);
+ r = floorf(centre + window);
+ DBUG(("%d: centre=%g l=%d r=%d\n", j, centre, l, r));
+ init_weights(weights, j);
+ for (; l <= r; l++)
+ {
+ add_weight(weights, j, l, filter, x, F, G, src_w, dst_w);
+ }
+ check_weights(weights, j, dst_w_int, x, dst_w);
+ if (vertical)
+ {
+ reorder_weights(weights, j, src_w);
+ }
+ }
+ weights->count++; /* weights->count = dst_w_int now */
+ if (cache)
+ {
+ cache->weights = weights;
+ }
+ return weights;
+}
+
+static void
+scale_row_to_temp(unsigned char *dst, unsigned char *src, fz_weights *weights)
+{
+ int *contrib = &weights->index[weights->index[0]];
+ int len, i, j, n;
+ unsigned char *min;
+ int tmp[FZ_MAX_COLORS];
+ int *t = tmp;
+
+ n = weights->n;
+ for (j = 0; j < n; j++)
+ tmp[j] = 128;
+ if (weights->flip)
+ {
+ dst += (weights->count-1)*n;
+ for (i=weights->count; i > 0; i--)
+ {
+ min = &src[n * *contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ for (j = n; j > 0; j--)
+ *t++ += *min++ * *contrib;
+ t -= n;
+ contrib++;
+ }
+ for (j = n; j > 0; j--)
+ {
+ *dst++ = (unsigned char)(*t>>8);
+ *t++ = 128;
+ }
+ t -= n;
+ dst -= n*2;
+ }
+ }
+ else
+ {
+ for (i=weights->count; i > 0; i--)
+ {
+ min = &src[n * *contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ for (j = n; j > 0; j--)
+ *t++ += *min++ * *contrib;
+ t -= n;
+ contrib++;
+ }
+ for (j = n; j > 0; j--)
+ {
+ *dst++ = (unsigned char)(*t>>8);
+ *t++ = 128;
+ }
+ t -= n;
+ }
+ }
+}
+
+#ifdef ARCH_ARM
+
+static void
+scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights)
+__attribute__((naked));
+
+static void
+scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights)
+__attribute__((naked));
+
+static void
+scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights)
+__attribute__((naked));
+
+static void
+scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row)
+__attribute__((naked));
+
+static void
+scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights)
+{
+ asm volatile(
+ ENTER_ARM
+ "stmfd r13!,{r4-r7,r9,r14} \n"
+ "@ r0 = dst \n"
+ "@ r1 = src \n"
+ "@ r2 = weights \n"
+ "ldr r12,[r2],#4 @ r12= flip \n"
+ "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
+ "ldr r4, [r2] @ r4 = index[0] \n"
+ "cmp r12,#0 @ if (flip) \n"
+ "beq 5f @ { \n"
+ "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
+ "add r0, r0, r3 @ dst += count \n"
+ "1: \n"
+ "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
+ "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
+ "mov r5, #128 @ r5 = a = 128 \n"
+ "add r4, r1, r4 @ r4 = min = &src[r4] \n"
+ "subs r9, r9, #1 @ len-- \n"
+ "blt 3f @ while (len >= 0) \n"
+ "2: @ { \n"
+ "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n"
+ "ldrgtb r7, [r4], #1 @ r7 = *min++ \n"
+ "ldr r12,[r2], #4 @ r12 = *contrib++ \n"
+ "ldrb r14,[r4], #1 @ r14 = *min++ \n"
+ "mlagt r5, r6, r7, r5 @ g += r6 * r7 \n"
+ "subs r9, r9, #2 @ r9 = len -= 2 \n"
+ "mla r5, r12,r14,r5 @ g += r14 * r12 \n"
+ "bge 2b @ } \n"
+ "3: \n"
+ "mov r5, r5, lsr #8 @ g >>= 8 \n"
+ "strb r5,[r0, #-1]! @ *--dst=a \n"
+ "subs r3, r3, #1 @ i-- \n"
+ "bgt 1b @ \n"
+ "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
+ "5:"
+ "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
+ "6:"
+ "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
+ "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
+ "mov r5, #128 @ r5 = a = 128 \n"
+ "add r4, r1, r4 @ r4 = min = &src[r4] \n"
+ "subs r9, r9, #1 @ len-- \n"
+ "blt 9f @ while (len > 0) \n"
+ "7: @ { \n"
+ "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n"
+ "ldrgtb r7, [r4], #1 @ r7 = *min++ \n"
+ "ldr r12,[r2], #4 @ r12 = *contrib++ \n"
+ "ldrb r14,[r4], #1 @ r14 = *min++ \n"
+ "mlagt r5, r6,r7,r5 @ a += r6 * r7 \n"
+ "subs r9, r9, #2 @ r9 = len -= 2 \n"
+ "mla r5, r12,r14,r5 @ a += r14 * r12 \n"
+ "bge 7b @ } \n"
+ "9: \n"
+ "mov r5, r5, LSR #8 @ a >>= 8 \n"
+ "strb r5, [r0], #1 @ *dst++=a \n"
+ "subs r3, r3, #1 @ i-- \n"
+ "bgt 6b @ \n"
+ "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
+ ENTER_THUMB
+ );
+}
+
+static void
+scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights)
+{
+ asm volatile(
+ ENTER_ARM
+ "stmfd r13!,{r4-r6,r9-r11,r14} \n"
+ "@ r0 = dst \n"
+ "@ r1 = src \n"
+ "@ r2 = weights \n"
+ "ldr r12,[r2],#4 @ r12= flip \n"
+ "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
+ "ldr r4, [r2] @ r4 = index[0] \n"
+ "cmp r12,#0 @ if (flip) \n"
+ "beq 4f @ { \n"
+ "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
+ "add r0, r0, r3, LSL #1 @ dst += 2*count \n"
+ "1: \n"
+ "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
+ "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
+ "mov r5, #128 @ r5 = g = 128 \n"
+ "mov r6, #128 @ r6 = a = 128 \n"
+ "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n"
+ "cmp r9, #0 @ while (len-- > 0) \n"
+ "beq 3f @ { \n"
+ "2: \n"
+ "ldr r14,[r2], #4 @ r14 = *contrib++ \n"
+ "ldrb r11,[r4], #1 @ r11 = *min++ \n"
+ "ldrb r12,[r4], #1 @ r12 = *min++ \n"
+ "subs r9, r9, #1 @ r9 = len-- \n"
+ "mla r5, r14,r11,r5 @ g += r11 * r14 \n"
+ "mla r6, r14,r12,r6 @ a += r12 * r14 \n"
+ "bgt 2b @ } \n"
+ "3: \n"
+ "mov r5, r5, lsr #8 @ g >>= 8 \n"
+ "mov r6, r6, lsr #8 @ a >>= 8 \n"
+ "strb r5, [r0, #-2]! @ *--dst=a \n"
+ "strb r6, [r0, #1] @ *--dst=g \n"
+ "subs r3, r3, #1 @ i-- \n"
+ "bgt 1b @ \n"
+ "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n"
+ "4:"
+ "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
+ "5:"
+ "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
+ "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
+ "mov r5, #128 @ r5 = g = 128 \n"
+ "mov r6, #128 @ r6 = a = 128 \n"
+ "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n"
+ "cmp r9, #0 @ while (len-- > 0) \n"
+ "beq 7f @ { \n"
+ "6: \n"
+ "ldr r14,[r2], #4 @ r10 = *contrib++ \n"
+ "ldrb r11,[r4], #1 @ r11 = *min++ \n"
+ "ldrb r12,[r4], #1 @ r12 = *min++ \n"
+ "subs r9, r9, #1 @ r9 = len-- \n"
+ "mla r5, r14,r11,r5 @ g += r11 * r14 \n"
+ "mla r6, r14,r12,r6 @ a += r12 * r14 \n"
+ "bgt 6b @ } \n"
+ "7: \n"
+ "mov r5, r5, lsr #8 @ g >>= 8 \n"
+ "mov r6, r6, lsr #8 @ a >>= 8 \n"
+ "strb r5, [r0], #1 @ *dst++=g \n"
+ "strb r6, [r0], #1 @ *dst++=a \n"
+ "subs r3, r3, #1 @ i-- \n"
+ "bgt 5b @ \n"
+ "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n"
+ ENTER_THUMB
+ );
+}
+
+static void
+scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights)
+{
+ asm volatile(
+ ENTER_ARM
+ "stmfd r13!,{r4-r11,r14} \n"
+ "@ r0 = dst \n"
+ "@ r1 = src \n"
+ "@ r2 = weights \n"
+ "ldr r12,[r2],#4 @ r12= flip \n"
+ "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
+ "ldr r4, [r2] @ r4 = index[0] \n"
+ "ldr r5,=0x00800080 @ r5 = rounding \n"
+ "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n"
+ "cmp r12,#0 @ if (flip) \n"
+ "beq 4f @ { \n"
+ "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
+ "add r0, r0, r3, LSL #2 @ dst += 4*count \n"
+ "1: \n"
+ "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
+ "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
+ "mov r7, r5 @ r7 = b = rounding \n"
+ "mov r8, r5 @ r8 = a = rounding \n"
+ "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n"
+ "cmp r9, #0 @ while (len-- > 0) \n"
+ "beq 3f @ { \n"
+ "2: \n"
+ "ldr r11,[r4], #4 @ r11 = *min++ \n"
+ "ldr r10,[r2], #4 @ r10 = *contrib++ \n"
+ "subs r9, r9, #1 @ r9 = len-- \n"
+ "and r12,r6, r11 @ r12 = __22__00 \n"
+ "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n"
+ "mla r7, r10,r12,r7 @ b += r14 * r10 \n"
+ "mla r8, r10,r11,r8 @ a += r11 * r10 \n"
+ "bgt 2b @ } \n"
+ "3: \n"
+ "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n"
+ "bic r8, r8, r6 @ r8 = 33__11__ \n"
+ "orr r7, r7, r8 @ r7 = 33221100 \n"
+ "str r7, [r0, #-4]! @ *--dst=r \n"
+ "subs r3, r3, #1 @ i-- \n"
+ "bgt 1b @ \n"
+ "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
+ "4: \n"
+ "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
+ "5: \n"
+ "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
+ "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
+ "mov r7, r5 @ r7 = b = rounding \n"
+ "mov r8, r5 @ r8 = a = rounding \n"
+ "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n"
+ "cmp r9, #0 @ while (len-- > 0) \n"
+ "beq 7f @ { \n"
+ "6: \n"
+ "ldr r11,[r4], #4 @ r11 = *min++ \n"
+ "ldr r10,[r2], #4 @ r10 = *contrib++ \n"
+ "subs r9, r9, #1 @ r9 = len-- \n"
+ "and r12,r6, r11 @ r12 = __22__00 \n"
+ "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n"
+ "mla r7, r10,r12,r7 @ b += r14 * r10 \n"
+ "mla r8, r10,r11,r8 @ a += r11 * r10 \n"
+ "bgt 6b @ } \n"
+ "7: \n"
+ "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n"
+ "bic r8, r8, r6 @ r8 = 33__11__ \n"
+ "orr r7, r7, r8 @ r7 = 33221100 \n"
+ "str r7, [r0], #4 @ *dst++=r \n"
+ "subs r3, r3, #1 @ i-- \n"
+ "bgt 5b @ \n"
+ "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
+ ENTER_THUMB
+ );
+}
+
+static void
+scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row)
+{
+ asm volatile(
+ ENTER_ARM
+ "ldr r12,[r13] @ r12= row \n"
+ "add r2, r2, #24 @ r2 = weights->index \n"
+ "stmfd r13!,{r4-r11,r14} \n"
+ "@ r0 = dst \n"
+ "@ r1 = src \n"
+ "@ r2 = &weights->index[0] \n"
+ "@ r3 = width \n"
+ "@ r12= row \n"
+ "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
+ "add r2, r2, #4 @ r2 = &index[1] \n"
+ "subs r6, r3, #4 @ r6 = x = width-4 \n"
+ "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
+ " @ r14= len = *contrib \n"
+ "blt 4f @ while (x >= 0) { \n"
+#ifndef ARCH_ARM_CAN_LOAD_UNALIGNED
+ "tst r3, #3 @ if ((r3 & 3) \n"
+ "tsteq r1, #3 @ || (r1 & 3)) \n"
+ "bne 4f @ can't do fast code \n"
+#endif
+ "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n"
+ "1: \n"
+ "ldr r7, =0x00800080 @ r5 = val0 = round \n"
+ "stmfd r13!,{r1,r2,r7} @ stash r1,r2,r5 \n"
+ " @ r1 = min = src \n"
+ " @ r2 = contrib2-4 \n"
+ "movs r8, r14 @ r8 = len2 = len \n"
+ "mov r5, r7 @ r7 = val1 = round \n"
+ "ble 3f @ while (len2-- > 0) { \n"
+ "2: \n"
+ "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n"
+ "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n"
+ "subs r8, r8, #1 @ len2-- \n"
+ "and r11,r9, r12 @ r11= __22__00 \n"
+ "and r12,r9, r12,LSR #8 @ r12= __33__11 \n"
+ "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n"
+ "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n"
+ "bgt 2b @ } \n"
+ "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n"
+ "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n"
+ "orr r5, r5, r7 @ r5 = 33221100 \n"
+ "3: \n"
+ "ldmfd r13!,{r1,r2,r7} @ restore r1,r2,r7 \n"
+ "subs r6, r6, #4 @ x-- \n"
+ "add r1, r1, #4 @ src++ \n"
+ "str r5, [r0], #4 @ *dst++ = val \n"
+ "bge 1b @ \n"
+ "4: @ } (Less than 4 to go) \n"
+ "adds r6, r6, #4 @ r6 = x += 4 \n"
+ "beq 8f @ if (x == 0) done \n"
+ "5: \n"
+ "mov r5, r1 @ r5 = min = src \n"
+ "mov r7, #128 @ r7 = val = 128 \n"
+ "movs r8, r14 @ r8 = len2 = len \n"
+ "add r9, r2, #4 @ r9 = contrib2 \n"
+ "ble 7f @ while (len2-- > 0) { \n"
+ "6: \n"
+ "ldr r10,[r9], #4 @ r10 = *contrib2++ \n"
+ "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n"
+ "subs r8, r8, #1 @ len2-- \n"
+ "@ stall r12 \n"
+ "mla r7, r10,r12,r7 @ val += r12 * r10 \n"
+ "bgt 6b @ } \n"
+ "7: \n"
+ "mov r7, r7, asr #8 @ r7 = val >>= 8 \n"
+ "subs r6, r6, #1 @ x-- \n"
+ "add r1, r1, #1 @ src++ \n"
+ "strb r7, [r0], #1 @ *dst++ = val \n"
+ "bgt 5b @ \n"
+ "8: \n"
+ "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
+ ".ltorg \n"
+ ENTER_THUMB
+ );
+}
+#else
+
+static void
+scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights)
+{
+ int *contrib = &weights->index[weights->index[0]];
+ int len, i;
+ unsigned char *min;
+
+ assert(weights->n == 1);
+ if (weights->flip)
+ {
+ dst += weights->count;
+ for (i=weights->count; i > 0; i--)
+ {
+ int val = 128;
+ min = &src[*contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ val += *min++ * *contrib++;
+ }
+ *--dst = (unsigned char)(val>>8);
+ }
+ }
+ else
+ {
+ for (i=weights->count; i > 0; i--)
+ {
+ int val = 128;
+ min = &src[*contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ val += *min++ * *contrib++;
+ }
+ *dst++ = (unsigned char)(val>>8);
+ }
+ }
+}
+
+static void
+scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights)
+{
+ int *contrib = &weights->index[weights->index[0]];
+ int len, i;
+ unsigned char *min;
+
+ assert(weights->n == 2);
+ if (weights->flip)
+ {
+ dst += 2*weights->count;
+ for (i=weights->count; i > 0; i--)
+ {
+ int c1 = 128;
+ int c2 = 128;
+ min = &src[2 * *contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ c1 += *min++ * *contrib;
+ c2 += *min++ * *contrib++;
+ }
+ *--dst = (unsigned char)(c2>>8);
+ *--dst = (unsigned char)(c1>>8);
+ }
+ }
+ else
+ {
+ for (i=weights->count; i > 0; i--)
+ {
+ int c1 = 128;
+ int c2 = 128;
+ min = &src[2 * *contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ c1 += *min++ * *contrib;
+ c2 += *min++ * *contrib++;
+ }
+ *dst++ = (unsigned char)(c1>>8);
+ *dst++ = (unsigned char)(c2>>8);
+ }
+ }
+}
+
+static void
+scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights)
+{
+ int *contrib = &weights->index[weights->index[0]];
+ int len, i;
+ unsigned char *min;
+
+ assert(weights->n == 4);
+ if (weights->flip)
+ {
+ dst += 4*weights->count;
+ for (i=weights->count; i > 0; i--)
+ {
+ int r = 128;
+ int g = 128;
+ int b = 128;
+ int a = 128;
+ min = &src[4 * *contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ r += *min++ * *contrib;
+ g += *min++ * *contrib;
+ b += *min++ * *contrib;
+ a += *min++ * *contrib++;
+ }
+ *--dst = (unsigned char)(a>>8);
+ *--dst = (unsigned char)(b>>8);
+ *--dst = (unsigned char)(g>>8);
+ *--dst = (unsigned char)(r>>8);
+ }
+ }
+ else
+ {
+ for (i=weights->count; i > 0; i--)
+ {
+ int r = 128;
+ int g = 128;
+ int b = 128;
+ int a = 128;
+ min = &src[4 * *contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ r += *min++ * *contrib;
+ g += *min++ * *contrib;
+ b += *min++ * *contrib;
+ a += *min++ * *contrib++;
+ }
+ *dst++ = (unsigned char)(r>>8);
+ *dst++ = (unsigned char)(g>>8);
+ *dst++ = (unsigned char)(b>>8);
+ *dst++ = (unsigned char)(a>>8);
+ }
+ }
+}
+
+static void
+scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row)
+{
+ int *contrib = &weights->index[weights->index[row]];
+ int len, x;
+
+ contrib++; /* Skip min */
+ len = *contrib++;
+ for (x=width; x > 0; x--)
+ {
+ unsigned char *min = src;
+ int val = 128;
+ int len2 = len;
+ int *contrib2 = contrib;
+
+ while (len2-- > 0)
+ {
+ val += *min * *contrib2++;
+ min += width;
+ }
+ *dst++ = (unsigned char)(val>>8);
+ src++;
+ }
+}
+#endif
+
+#ifdef SINGLE_PIXEL_SPECIALS
+static void
+duplicate_single_pixel(unsigned char *dst, unsigned char *src, int n, int w, int h)
+{
+ int i;
+
+ for (i = n; i > 0; i--)
+ *dst++ = *src++;
+ for (i = (w*h-1)*n; i > 0; i--)
+ {
+ *dst = dst[-n];
+ dst++;
+ }
+}
+
+static void
+scale_single_row(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int h)
+{
+ int *contrib = &weights->index[weights->index[0]];
+ int min, len, i, j, n;
+ int tmp[FZ_MAX_COLORS];
+
+ n = weights->n;
+ /* Scale a single row */
+ for (j = 0; j < n; j++)
+ tmp[j] = 128;
+ if (weights->flip)
+ {
+ dst += (weights->count-1)*n;
+ for (i=weights->count; i > 0; i--)
+ {
+ min = *contrib++;
+ len = *contrib++;
+ min *= n;
+ while (len-- > 0)
+ {
+ for (j = 0; j < n; j++)
+ tmp[j] += src[min++] * *contrib;
+ contrib++;
+ }
+ for (j = 0; j < n; j++)
+ {
+ *dst++ = (unsigned char)(tmp[j]>>8);
+ tmp[j] = 128;
+ }
+ dst -= 2*n;
+ }
+ dst += n * (weights->count+1);
+ }
+ else
+ {
+ for (i=weights->count; i > 0; i--)
+ {
+ min = *contrib++;
+ len = *contrib++;
+ min *= n;
+ while (len-- > 0)
+ {
+ for (j = 0; j < n; j++)
+ tmp[j] += src[min++] * *contrib;
+ contrib++;
+ }
+ for (j = 0; j < n; j++)
+ {
+ *dst++ = (unsigned char)(tmp[j]>>8);
+ tmp[j] = 128;
+ }
+ }
+ }
+ /* And then duplicate it h times */
+ n *= weights->count;
+ while (--h > 0)
+ {
+ memcpy(dst, dst-n, n);
+ dst += n;
+ }
+}
+
+static void
+scale_single_col(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int n, int w, int flip_y)
+{
+ int *contrib = &weights->index[weights->index[0]];
+ int min, len, i, j;
+ int tmp[FZ_MAX_COLORS];
+
+ for (j = 0; j < n; j++)
+ tmp[j] = 128;
+ if (flip_y)
+ {
+ src_w = (src_w-1)*n;
+ w = (w-1)*n;
+ for (i=weights->count; i > 0; i--)
+ {
+ /* Scale the next pixel in the column */
+ min = *contrib++;
+ len = *contrib++;
+ min = src_w-min*n;
+ while (len-- > 0)
+ {
+ for (j = 0; j < n; j++)
+ tmp[j] += src[src_w-min+j] * *contrib;
+ contrib++;
+ }
+ for (j = 0; j < n; j++)
+ {
+ *dst++ = (unsigned char)(tmp[j]>>8);
+ tmp[j] = 128;
+ }
+ /* And then duplicate it across the row */
+ for (j = w; j > 0; j--)
+ {
+ *dst = dst[-n];
+ dst++;
+ }
+ }
+ }
+ else
+ {
+ w = (w-1)*n;
+ for (i=weights->count; i > 0; i--)
+ {
+ /* Scale the next pixel in the column */
+ min = *contrib++;
+ len = *contrib++;
+ min *= n;
+ while (len-- > 0)
+ {
+ for (j = 0; j < n; j++)
+ tmp[j] += src[min++] * *contrib;
+ contrib++;
+ }
+ for (j = 0; j < n; j++)
+ {
+ *dst++ = (unsigned char)(tmp[j]>>8);
+ tmp[j] = 128;
+ }
+ /* And then duplicate it across the row */
+ for (j = w; j > 0; j--)
+ {
+ *dst = dst[-n];
+ dst++;
+ }
+ }
+ }
+}
+#endif /* SINGLE_PIXEL_SPECIALS */
+
+fz_pixmap *
+fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, fz_irect *clip)
+{
+ return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL);
+}
+
+fz_pixmap *
+fz_scale_pixmap_cached(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y)
+{
+ fz_scale_filter *filter = &fz_scale_filter_simple;
+ fz_weights *contrib_rows = NULL;
+ fz_weights *contrib_cols = NULL;
+ fz_pixmap *output = NULL;
+ unsigned char *temp = NULL;
+ int max_row, temp_span, temp_rows, row;
+ int dst_w_int, dst_h_int, dst_x_int, dst_y_int;
+ int flip_x, flip_y;
+ fz_rect patch;
+
+ fz_var(contrib_cols);
+ fz_var(contrib_rows);
+
+ DBUG(("Scale: (%d,%d) to (%g,%g) at (%g,%g)\n",src->w,src->h,w,h,x,y));
+
+ /* Avoid extreme scales where overflows become problematic. */
+ if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24))
+ return NULL;
+
+ /* Clamp small ranges of w and h */
+ if (w <= -1)
+ {
+ }
+ else if (w < 0)
+ {
+ w = -1;
+ }
+ else if (w < 1)
+ {
+ w = 1;
+ }
+ if (h <= -1)
+ {
+ }
+ else if (h < 0)
+ {
+ h = -1;
+ }
+ else if (h < 1)
+ {
+ h = 1;
+ }
+
+ /* Find the destination bbox, width/height, and sub pixel offset,
+ * allowing for whether we're flipping or not. */
+ /* The (x,y) position given describes where the top left corner
+ * of the source image should be mapped to (i.e. where (0,0) in image
+ * space ends up). Also there are differences in the way we scale
+ * horizontally and vertically. When scaling rows horizontally, we
+ * always read forwards through the source, and store either forwards
+ * or in reverse as required. When scaling vertically, we always store
+ * out forwards, but may feed source rows in in a different order.
+ *
+ * Consider the image rectangle 'r' to which the image is mapped,
+ * and the (possibly) larger rectangle 'R', given by expanding 'r' to
+ * complete pixels.
+ *
+ * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether
+ * the image is x flipped or not. Whatever happens 0 <= x < 1.
+ * y is always R.ymax - r.ymax.
+ */
+ /* dst_x_int is calculated to be the left of the scaled image, and
+ * x (the sub pixel offset) is the distance in from either the left
+ * or right pixel expanded edge. */
+ flip_x = (w < 0);
+ if (flip_x)
+ {
+ float tmp;
+ w = -w;
+ dst_x_int = floorf(x-w);
+ tmp = ceilf(x);
+ dst_w_int = (int)tmp;
+ x = tmp - x;
+ dst_w_int -= dst_x_int;
+ }
+ else
+ {
+ dst_x_int = floorf(x);
+ x -= (float)dst_x_int;
+ dst_w_int = (int)ceilf(x + w);
+ }
+ /* dst_y_int is calculated to be the top of the scaled image, and
+ * y (the sub pixel offset) is the distance in from either the top
+ * or bottom pixel expanded edge.
+ */
+ flip_y = (h < 0);
+ if (flip_y)
+ {
+ float tmp;
+ h = -h;
+ dst_y_int = floorf(y-h);
+ tmp = ceilf(y);
+ dst_h_int = (int)tmp;
+ y = tmp - y;
+ dst_h_int -= dst_y_int;
+ }
+ else
+ {
+ dst_y_int = floorf(y);
+ y -= (float)dst_y_int;
+ dst_h_int = (int)ceilf(y + h);
+ }
+
+ DBUG(("Result image: (%d,%d) at (%d,%d) (subpix=%g,%g)\n", dst_w_int, dst_h_int, dst_x_int, dst_y_int, x, y));
+
+ /* Step 0: Calculate the patch */
+ patch.x0 = 0;
+ patch.y0 = 0;
+ patch.x1 = dst_w_int;
+ patch.y1 = dst_h_int;
+ if (clip)
+ {
+ if (flip_x)
+ {
+ if (dst_x_int + dst_w_int > clip->x1)
+ patch.x0 = dst_x_int + dst_w_int - clip->x1;
+ if (clip->x0 > dst_x_int)
+ {
+ patch.x1 = dst_w_int - (clip->x0 - dst_x_int);
+ dst_x_int = clip->x0;
+ }
+ }
+ else
+ {
+ if (dst_x_int + dst_w_int > clip->x1)
+ patch.x1 = clip->x1 - dst_x_int;
+ if (clip->x0 > dst_x_int)
+ {
+ patch.x0 = clip->x0 - dst_x_int;
+ dst_x_int += patch.x0;
+ }
+ }
+
+ if (flip_y)
+ {
+ if (dst_y_int + dst_h_int > clip->y1)
+ patch.y1 = clip->y1 - dst_y_int;
+ if (clip->y0 > dst_y_int)
+ {
+ patch.y0 = clip->y0 - dst_y_int;
+ dst_y_int = clip->y0;
+ }
+ }
+ else
+ {
+ if (dst_y_int + dst_h_int > clip->y1)
+ patch.y1 = clip->y1 - dst_y_int;
+ if (clip->y0 > dst_y_int)
+ {
+ patch.y0 = clip->y0 - dst_y_int;
+ dst_y_int += patch.y0;
+ }
+ }
+ }
+ if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1)
+ return NULL;
+
+ fz_try(ctx)
+ {
+ /* Step 1: Calculate the weights for columns and rows */
+#ifdef SINGLE_PIXEL_SPECIALS
+ if (src->w == 1)
+ contrib_cols = NULL;
+ else
+#endif /* SINGLE_PIXEL_SPECIALS */
+ contrib_cols = make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x);
+#ifdef SINGLE_PIXEL_SPECIALS
+ if (src->h == 1)
+ contrib_rows = NULL;
+ else
+#endif /* SINGLE_PIXEL_SPECIALS */
+ contrib_rows = make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y);
+
+ output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0);
+ }
+ fz_catch(ctx)
+ {
+ if (!cache_x)
+ fz_free(ctx, contrib_cols);
+ if (!cache_y)
+ fz_free(ctx, contrib_rows);
+ fz_rethrow(ctx);
+ }
+ output->x = dst_x_int;
+ output->y = dst_y_int;
+
+ /* Step 2: Apply the weights */
+#ifdef SINGLE_PIXEL_SPECIALS
+ if (!contrib_rows)
+ {
+ /* Only 1 source pixel high. */
+ if (!contrib_cols)
+ {
+ /* Only 1 pixel in the entire image! */
+ duplicate_single_pixel(output->samples, src->samples, src->n, patch.x1-patch.x0, patch.y1-patch.y0);
+ }
+ else
+ {
+ /* Scale the row once, then copy it. */
+ scale_single_row(output->samples, src->samples, contrib_cols, src->w, patch.y1-patch.y0);
+ }
+ }
+ else if (!contrib_cols)
+ {
+ /* Only 1 source pixel wide. Scale the col and duplicate. */
+ scale_single_col(output->samples, src->samples, contrib_rows, src->h, src->n, patch.x1-patch.x0, flip_y);
+ }
+ else
+#endif /* SINGLE_PIXEL_SPECIALS */
+ {
+ void (*row_scale)(unsigned char *dst, unsigned char *src, fz_weights *weights);
+
+ temp_span = contrib_cols->count * src->n;
+ temp_rows = contrib_rows->max_len;
+ if (temp_span <= 0 || temp_rows > INT_MAX / temp_span)
+ goto cleanup;
+ fz_try(ctx)
+ {
+ temp = fz_calloc(ctx, temp_span*temp_rows, sizeof(unsigned char));
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_pixmap(ctx, output);
+ if (!cache_x)
+ fz_free(ctx, contrib_cols);
+ if (!cache_y)
+ fz_free(ctx, contrib_rows);
+ fz_rethrow(ctx);
+ }
+ switch (src->n)
+ {
+ default:
+ row_scale = scale_row_to_temp;
+ break;
+ case 1: /* Image mask case */
+ row_scale = scale_row_to_temp1;
+ break;
+ case 2: /* Greyscale with alpha case */
+ row_scale = scale_row_to_temp2;
+ break;
+ case 4: /* RGBA */
+ row_scale = scale_row_to_temp4;
+ break;
+ }
+ max_row = contrib_rows->index[contrib_rows->index[0]];
+ for (row = 0; row < contrib_rows->count; row++)
+ {
+ /*
+ Which source rows do we need to have scaled into the
+ temporary buffer in order to be able to do the final
+ scale?
+ */
+ int row_index = contrib_rows->index[row];
+ int row_min = contrib_rows->index[row_index++];
+ int row_len = contrib_rows->index[row_index++];
+ while (max_row < row_min+row_len)
+ {
+ /* Scale another row */
+ assert(max_row < src->h);
+ DBUG(("scaling row %d to temp\n", max_row));
+ (*row_scale)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->w*src->n], contrib_cols);
+ max_row++;
+ }
+
+ DBUG(("scaling row %d from temp\n", row));
+ scale_row_from_temp(&output->samples[row*output->w*output->n], temp, contrib_rows, temp_span, row);
+ }
+ fz_free(ctx, temp);
+ }
+
+cleanup:
+ if (!cache_y)
+ fz_free(ctx, contrib_rows);
+ if (!cache_x)
+ fz_free(ctx, contrib_cols);
+ return output;
+}
+
+void
+fz_free_scale_cache(fz_context *ctx, fz_scale_cache *sc)
+{
+ if (!sc)
+ return;
+ fz_free(ctx, sc->weights);
+ fz_free(ctx, sc);
+}
+
+fz_scale_cache *
+fz_new_scale_cache(fz_context *ctx)
+{
+ return fz_malloc_struct(ctx, fz_scale_cache);
+}