From 0a927854a10e1e6b9770a81e2e1d9f3093631757 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Wed, 19 Jun 2013 15:29:44 +0200 Subject: Rearrange source files. --- source/fitz/draw-scale-simple.c | 1509 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 1509 insertions(+) create mode 100644 source/fitz/draw-scale-simple.c (limited to 'source/fitz/draw-scale-simple.c') diff --git a/source/fitz/draw-scale-simple.c b/source/fitz/draw-scale-simple.c new file mode 100644 index 00000000..08dedf0b --- /dev/null +++ b/source/fitz/draw-scale-simple.c @@ -0,0 +1,1509 @@ +/* +This code does smooth scaling of a pixmap. + +This function returns a new pixmap representing the area starting at (0,0) +given by taking the source pixmap src, scaling it to width w, and height h, +and then positioning it at (frac(x),frac(y)). + +This is a cut-down version of draw_scale.c that only copes with filters +that return values strictly in the 0..1 range, and uses bytes for +intermediate results rather than ints. +*/ + +#include "mupdf/fitz.h" +#include "draw-imp.h" + +/* Do we special case handling of single pixel high/wide images? The + * 'purest' handling is given by not special casing them, but certain + * files that use such images 'stack' them to give full images. Not + * special casing them results in then being fainter and giving noticeable + * rounding errors. + */ +#define SINGLE_PIXEL_SPECIALS + +#ifdef DEBUG_SCALING +#ifdef WIN32 +#include +static void debug_print(const char *fmt, ...) +{ + va_list args; + char text[256]; + va_start(args, fmt); + vsprintf(text, fmt, args); + va_end(args); + OutputDebugStringA(text); + printf(text); +} +#else +static void debug_print(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} +#endif +#endif +#ifdef DEBUG_SCALING +#define DBUG(A) debug_print A +#else +#define DBUG(A) do {} while(0==1) +#endif + +/* +Consider a row of source samples, src, of width src_w, positioned at x, +scaled to width dst_w. + +src[i] is centred at: x + (i + 0.5)*dst_w/src_w + +Therefore the distance between the centre of the jth output pixel and +the centre of the ith source sample is: + +dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w) + +When scaling up, therefore: + +dst[j] = SUM(filter(dist[j,i]) * src[i]) + (for all ints i) + +This can be simplified by noticing that filters are only non zero within +a given filter width (henceforth called W). So: + +dst[j] = SUM(filter(dist[j,i]) * src[i]) + (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W) + +When scaling down, each filtered source sample is stretched to be wider +to avoid aliasing issues. This effectively reduces the distance between +centres. + +dst[j] = SUM(filter(dist[j,i] * F) * F * src[i]) + (where F = dst_w/src_w) + (for ints i, s.t. (j-W)/F < i < (j+W)/F) + +*/ + +typedef struct fz_scale_filter_s fz_scale_filter; + +struct fz_scale_filter_s +{ + int width; + float (*fn)(fz_scale_filter *, float); +}; + +/* Image scale filters */ + +static float +triangle(fz_scale_filter *filter, float f) +{ + if (f >= 1) + return 0; + return 1-f; +} + +static float +box(fz_scale_filter *filter, float f) +{ + if (f >= 0.5f) + return 0; + return 1; +} + +static float +simple(fz_scale_filter *filter, float x) +{ + if (x >= 1) + return 0; + return 1 + (2*x - 3)*x*x; +} + +fz_scale_filter fz_scale_filter_box = { 1, box }; +fz_scale_filter fz_scale_filter_triangle = { 1, triangle }; +fz_scale_filter fz_scale_filter_simple = { 1, simple }; + +/* +We build ourselves a set of tables to contain the precalculated weights +for a given set of scale settings. + +The first dst_w entries in index are the index into index of the +sets of weight for each destination pixel. + +Each of the sets of weights is a set of values consisting of: + the minimum source pixel index used for this destination pixel + the number of weights used for this destination pixel + the weights themselves + +So to calculate dst[i] we do the following: + + weights = &index[index[i]]; + min = *weights++; + len = *weights++; + dst[i] = 0; + while (--len > 0) + dst[i] += src[min++] * *weights++ + +in addition, we guarantee that at the end of this process weights will now +point to the weights value for dst pixel i+1. + +In the simplest version of this algorithm, we would scale the whole image +horizontally first into a temporary buffer, then scale that temporary +buffer again vertically to give us our result. Using such a simple +algorithm would mean that could use the same style of weights for both +horizontal and vertical scaling. + +Unfortunately, this would also require a large temporary buffer, +particularly in the case where we are scaling up. + +We therefore modify the algorithm as follows; we scale scanlines from the +source image horizontally into a temporary buffer, until we have all the +contributors for a given output scanline. We then produce that output +scanline from the temporary buffer. In this way we restrict the height +of the temporary buffer to a small fraction of the final size. + +Unfortunately, this means that the pseudo code for recombining a +scanline of fully scaled pixels is as follows: + + weights = &index[index[y]]; + min = *weights++; + len = *weights++; + for (x=0 to dst_w) + min2 = min + len2 = len + weights2 = weights + dst[x] = 0; + while (--len2 > 0) + dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++ + +i.e. it requires a % operation for every source pixel - this is typically +expensive. + +To avoid this, we alter the order in which vertical weights are stored, +so that they are ordered in the same order as the temporary buffer lines +would appear. This simplifies the algorithm to: + + weights = &index[index[y]]; + min = *weights++; + len = *weights++; + for (x=0 to dst_w) + min2 = 0 + len2 = len + weights2 = weights + dst[x] = 0; + while (--len2 > 0) + dst[x] += temp[i][min2++] * *weights2++ + +This means that len may be larger than it needs to be (due to the +possible inclusion of a zero weight row or two), but in practise this +is only an increase of 1 or 2 at worst. + +We implement this by generating the weights as normal (but ensuring we +leave enough space) and then reordering afterwards. + +*/ + +typedef struct fz_weights_s fz_weights; + +/* This structure is accessed from ARM code - bear this in mind before + * altering it! */ +struct fz_weights_s +{ + int flip; /* true if outputting reversed */ + int count; /* number of output pixels we have records for in this table */ + int max_len; /* Maximum number of weights for any one output pixel */ + int n; /* number of components (src->n) */ + int new_line; /* True if no weights for the current output pixel */ + int patch_l; /* How many output pixels we skip over */ + int index[1]; +}; + +struct fz_scale_cache_s +{ + int src_w; + float x; + float dst_w; + fz_scale_filter *filter; + int vertical; + int dst_w_int; + int patch_l; + int patch_r; + int n; + int flip; + fz_weights *weights; +}; + +static fz_weights * +new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l) +{ + int max_len; + fz_weights *weights; + + if (src_w > dst_w) + { + /* Scaling down, so there will be a maximum of + * 2*filterwidth*src_w/dst_w src pixels + * contributing to each dst pixel. */ + max_len = (int)ceilf((2 * filter->width * src_w)/dst_w); + if (max_len > src_w) + max_len = src_w; + } + else + { + /* Scaling up, so there will be a maximum of + * 2*filterwidth src pixels contributing to each dst pixel. + */ + max_len = 2 * filter->width; + } + /* We need the size of the struct, + * plus patch_w*sizeof(int) for the index + * plus (2+max_len)*sizeof(int) for the weights + * plus room for an extra set of weights for reordering. + */ + weights = fz_malloc(ctx, sizeof(*weights)+(max_len+3)*(patch_w+1)*sizeof(int)); + if (!weights) + return NULL; + weights->count = -1; + weights->max_len = max_len; + weights->index[0] = patch_w; + weights->n = n; + weights->patch_l = patch_l; + weights->flip = flip; + return weights; +} + +/* j is destination pixel in the patch_l..patch_l+patch_w range */ +static void +init_weights(fz_weights *weights, int j) +{ + int index; + + j -= weights->patch_l; + assert(weights->count == j-1); + weights->count++; + weights->new_line = 1; + if (j == 0) + index = weights->index[0]; + else + { + index = weights->index[j-1]; + index += 2 + weights->index[index+1]; + } + weights->index[j] = index; /* row pointer */ + weights->index[index] = 0; /* min */ + weights->index[index+1] = 0; /* len */ +} + +static void +add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter, + float x, float F, float G, int src_w, float dst_w) +{ + float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w); + float f; + int min, len, index, weight; + + dist *= G; + if (dist < 0) + dist = -dist; + f = filter->fn(filter, dist)*F; + weight = (int)(256*f+0.5f); + + /* Ensure i is in range */ + if (i < 0 || i >= src_w) + return; + if (weight == 0) + { + /* We add a fudge factor here to allow for extreme downscales + * where all the weights round to 0. Ensure that at least one + * (arbitrarily the first one) is non zero. */ + if (weights->new_line && f > 0) + weight = 1; + else + return; + } + + DBUG(("add_weight[%d][%d] = %d(%g) dist=%g\n",j,i,weight,f,dist)); + + /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */ + j -= weights->patch_l; + if (weights->new_line) + { + /* New line */ + weights->new_line = 0; + index = weights->index[j]; /* row pointer */ + weights->index[index] = i; /* min */ + weights->index[index+1] = 0; /* len */ + } + index = weights->index[j]; + min = weights->index[index++]; + len = weights->index[index++]; + while (i < min) + { + /* This only happens in rare cases, but we need to insert + * one earlier. In exceedingly rare cases we may need to + * insert more than one earlier. */ + int k; + + for (k = len; k > 0; k--) + { + weights->index[index+k] = weights->index[index+k-1]; + } + weights->index[index] = 0; + min--; + len++; + weights->index[index-2] = min; + weights->index[index-1] = len; + } + if (i-min >= len) + { + /* The usual case */ + while (i-min >= ++len) + { + weights->index[index+len-1] = 0; + } + assert(len-1 == i-min); + weights->index[index+i-min] = weight; + weights->index[index-1] = len; + assert(len <= weights->max_len); + } + else + { + /* Infrequent case */ + weights->index[index+i-min] += weight; + } +} + +static void +reorder_weights(fz_weights *weights, int j, int src_w) +{ + int idx = weights->index[j - weights->patch_l]; + int min = weights->index[idx++]; + int len = weights->index[idx++]; + int max = weights->max_len; + int tmp = idx+max; + int i, off; + + /* Copy into the temporary area */ + memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len); + + /* Pad out if required */ + assert(len <= max); + assert(min+len <= src_w); + off = 0; + if (len < max) + { + memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len)); + len = max; + if (min + len > src_w) + { + off = min + len - src_w; + min = src_w - len; + weights->index[idx-2] = min; + } + weights->index[idx-1] = len; + } + + /* Copy back into the proper places */ + for (i = 0; i < len; i++) + { + weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i]; + } +} + +/* Due to rounding and edge effects, the sums for the weights sometimes don't + * add up to 256. This causes visible rendering effects. Therefore, we take + * pains to ensure that they 1) never exceed 256, and 2) add up to exactly + * 256 for all pixels that are completely covered. See bug #691629. */ +static void +check_weights(fz_weights *weights, int j, int w, float x, float wf) +{ + int idx, len; + int sum = 0; + int max = -256; + int maxidx = 0; + int i; + + idx = weights->index[j - weights->patch_l]; + idx++; /* min */ + len = weights->index[idx++]; + + for(i=0; i < len; i++) + { + int v = weights->index[idx++]; + sum += v; + if (v > max) + { + max = v; + maxidx = idx; + } + } + /* If we aren't the first or last pixel, OR if the sum is too big + * then adjust it. */ + if (((j != 0) && (j != w-1)) || (sum > 256)) + weights->index[maxidx-1] += 256-sum; + /* Otherwise, if we are the first pixel, and it's fully covered, then + * adjust it. */ + else if ((j == 0) && (x < 0.0001F) && (sum != 256)) + weights->index[maxidx-1] += 256-sum; + /* Finally, if we are the last pixel, and it's fully covered, then + * adjust it. */ + else if ((j == w-1) && ((float)w-wf < 0.0001F) && (sum != 256)) + weights->index[maxidx-1] += 256-sum; + DBUG(("total weight %d = %d\n", j, sum)); +} + +static fz_weights * +make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache) +{ + fz_weights *weights; + float F, G; + float window; + int j; + + if (cache) + { + if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w && + cache->filter == filter && cache->vertical == vertical && + cache->dst_w_int == dst_w_int && + cache->patch_l == patch_l && cache->patch_r == patch_r && + cache->n == n && cache->flip == flip) + { + return cache->weights; + } + cache->src_w = src_w; + cache->x = x; + cache->dst_w = dst_w; + cache->filter = filter; + cache->vertical = vertical; + cache->dst_w_int = dst_w_int; + cache->patch_l = patch_l; + cache->patch_r = patch_r; + cache->n = n; + cache->flip = flip; + fz_free(ctx, cache->weights); + cache->weights = NULL; + } + + if (dst_w < src_w) + { + /* Scaling down */ + F = dst_w / src_w; + G = 1; + } + else + { + /* Scaling up */ + F = 1; + G = src_w / dst_w; + } + window = filter->width / F; + DBUG(("make_weights src_w=%d x=%g dst_w=%g patch_l=%d patch_r=%d F=%g window=%g\n", src_w, x, dst_w, patch_l, patch_r, F, window)); + weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l); + if (!weights) + return NULL; + for (j = patch_l; j < patch_r; j++) + { + /* find the position of the centre of dst[j] in src space */ + float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f; + int l, r; + l = ceilf(centre - window); + r = floorf(centre + window); + DBUG(("%d: centre=%g l=%d r=%d\n", j, centre, l, r)); + init_weights(weights, j); + for (; l <= r; l++) + { + add_weight(weights, j, l, filter, x, F, G, src_w, dst_w); + } + check_weights(weights, j, dst_w_int, x, dst_w); + if (vertical) + { + reorder_weights(weights, j, src_w); + } + } + weights->count++; /* weights->count = dst_w_int now */ + if (cache) + { + cache->weights = weights; + } + return weights; +} + +static void +scale_row_to_temp(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + int *contrib = &weights->index[weights->index[0]]; + int len, i, j, n; + unsigned char *min; + int tmp[FZ_MAX_COLORS]; + int *t = tmp; + + n = weights->n; + for (j = 0; j < n; j++) + tmp[j] = 128; + if (weights->flip) + { + dst += (weights->count-1)*n; + for (i=weights->count; i > 0; i--) + { + min = &src[n * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + for (j = n; j > 0; j--) + *t++ += *min++ * *contrib; + t -= n; + contrib++; + } + for (j = n; j > 0; j--) + { + *dst++ = (unsigned char)(*t>>8); + *t++ = 128; + } + t -= n; + dst -= n*2; + } + } + else + { + for (i=weights->count; i > 0; i--) + { + min = &src[n * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + for (j = n; j > 0; j--) + *t++ += *min++ * *contrib; + t -= n; + contrib++; + } + for (j = n; j > 0; j--) + { + *dst++ = (unsigned char)(*t>>8); + *t++ = 128; + } + t -= n; + } + } +} + +#ifdef ARCH_ARM + +static void +scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + +static void +scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + +static void +scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + +static void +scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row) +__attribute__((naked)); + +static void +scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r7,r9,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 5f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3 @ dst += count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = a = 128 \n" + "add r4, r1, r4 @ r4 = min = &src[r4] \n" + "subs r9, r9, #1 @ len-- \n" + "blt 3f @ while (len >= 0) \n" + "2: @ { \n" + "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" + "ldrgtb r7, [r4], #1 @ r7 = *min++ \n" + "ldr r12,[r2], #4 @ r12 = *contrib++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "mlagt r5, r6, r7, r5 @ g += r6 * r7 \n" + "subs r9, r9, #2 @ r9 = len -= 2 \n" + "mla r5, r12,r14,r5 @ g += r14 * r12 \n" + "bge 2b @ } \n" + "3: \n" + "mov r5, r5, lsr #8 @ g >>= 8 \n" + "strb r5,[r0, #-1]! @ *--dst=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" + "5:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "6:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = a = 128 \n" + "add r4, r1, r4 @ r4 = min = &src[r4] \n" + "subs r9, r9, #1 @ len-- \n" + "blt 9f @ while (len > 0) \n" + "7: @ { \n" + "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" + "ldrgtb r7, [r4], #1 @ r7 = *min++ \n" + "ldr r12,[r2], #4 @ r12 = *contrib++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "mlagt r5, r6,r7,r5 @ a += r6 * r7 \n" + "subs r9, r9, #2 @ r9 = len -= 2 \n" + "mla r5, r12,r14,r5 @ a += r14 * r12 \n" + "bge 7b @ } \n" + "9: \n" + "mov r5, r5, LSR #8 @ a >>= 8 \n" + "strb r5, [r0], #1 @ *dst++=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 6b @ \n" + "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r6,r9-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #1 @ dst += 2*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = g = 128 \n" + "mov r6, #128 @ r6 = a = 128 \n" + "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r14,[r2], #4 @ r14 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r11,r5 @ g += r11 * r14 \n" + "mla r6, r14,r12,r6 @ a += r12 * r14 \n" + "bgt 2b @ } \n" + "3: \n" + "mov r5, r5, lsr #8 @ g >>= 8 \n" + "mov r6, r6, lsr #8 @ a >>= 8 \n" + "strb r5, [r0, #-2]! @ *--dst=a \n" + "strb r6, [r0, #1] @ *--dst=g \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" + "4:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = g = 128 \n" + "mov r6, #128 @ r6 = a = 128 \n" + "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r14,[r2], #4 @ r10 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r11,r5 @ g += r11 * r14 \n" + "mla r6, r14,r12,r6 @ a += r12 * r14 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r5, r5, lsr #8 @ g >>= 8 \n" + "mov r6, r6, lsr #8 @ a >>= 8 \n" + "strb r5, [r0], #1 @ *dst++=g \n" + "strb r6, [r0], #1 @ *dst++=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "ldr r5,=0x00800080 @ r5 = rounding \n" + "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #2 @ dst += 4*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r7, r5 @ r7 = b = rounding \n" + "mov r8, r5 @ r8 = a = rounding \n" + "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r11,[r4], #4 @ r11 = *min++ \n" + "ldr r10,[r2], #4 @ r10 = *contrib++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "and r12,r6, r11 @ r12 = __22__00 \n" + "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" + "mla r7, r10,r12,r7 @ b += r14 * r10 \n" + "mla r8, r10,r11,r8 @ a += r11 * r10 \n" + "bgt 2b @ } \n" + "3: \n" + "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" + "bic r8, r8, r6 @ r8 = 33__11__ \n" + "orr r7, r7, r8 @ r7 = 33221100 \n" + "str r7, [r0, #-4]! @ *--dst=r \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + "4: \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r7, r5 @ r7 = b = rounding \n" + "mov r8, r5 @ r8 = a = rounding \n" + "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r11,[r4], #4 @ r11 = *min++ \n" + "ldr r10,[r2], #4 @ r10 = *contrib++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "and r12,r6, r11 @ r12 = __22__00 \n" + "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" + "mla r7, r10,r12,r7 @ b += r14 * r10 \n" + "mla r8, r10,r11,r8 @ a += r11 * r10 \n" + "bgt 6b @ } \n" + "7: \n" + "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" + "bic r8, r8, r6 @ r8 = 33__11__ \n" + "orr r7, r7, r8 @ r7 = 33221100 \n" + "str r7, [r0], #4 @ *dst++=r \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row) +{ + asm volatile( + ENTER_ARM + "ldr r12,[r13] @ r12= row \n" + "add r2, r2, #24 @ r2 = weights->index \n" + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = &weights->index[0] \n" + "@ r3 = width \n" + "@ r12= row \n" + "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" + "add r2, r2, #4 @ r2 = &index[1] \n" + "subs r6, r3, #4 @ r6 = x = width-4 \n" + "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" + " @ r14= len = *contrib \n" + "blt 4f @ while (x >= 0) { \n" +#ifndef ARCH_ARM_CAN_LOAD_UNALIGNED + "tst r3, #3 @ if ((r3 & 3) \n" + "tsteq r1, #3 @ || (r1 & 3)) \n" + "bne 4f @ can't do fast code \n" +#endif + "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n" + "1: \n" + "ldr r7, =0x00800080 @ r5 = val0 = round \n" + "stmfd r13!,{r1,r2,r7} @ stash r1,r2,r5 \n" + " @ r1 = min = src \n" + " @ r2 = contrib2-4 \n" + "movs r8, r14 @ r8 = len2 = len \n" + "mov r5, r7 @ r7 = val1 = round \n" + "ble 3f @ while (len2-- > 0) { \n" + "2: \n" + "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n" + "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n" + "subs r8, r8, #1 @ len2-- \n" + "and r11,r9, r12 @ r11= __22__00 \n" + "and r12,r9, r12,LSR #8 @ r12= __33__11 \n" + "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n" + "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n" + "bgt 2b @ } \n" + "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n" + "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n" + "orr r5, r5, r7 @ r5 = 33221100 \n" + "3: \n" + "ldmfd r13!,{r1,r2,r7} @ restore r1,r2,r7 \n" + "subs r6, r6, #4 @ x-- \n" + "add r1, r1, #4 @ src++ \n" + "str r5, [r0], #4 @ *dst++ = val \n" + "bge 1b @ \n" + "4: @ } (Less than 4 to go) \n" + "adds r6, r6, #4 @ r6 = x += 4 \n" + "beq 8f @ if (x == 0) done \n" + "5: \n" + "mov r5, r1 @ r5 = min = src \n" + "mov r7, #128 @ r7 = val = 128 \n" + "movs r8, r14 @ r8 = len2 = len \n" + "add r9, r2, #4 @ r9 = contrib2 \n" + "ble 7f @ while (len2-- > 0) { \n" + "6: \n" + "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" + "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" + "subs r8, r8, #1 @ len2-- \n" + "@ stall r12 \n" + "mla r7, r10,r12,r7 @ val += r12 * r10 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" + "subs r6, r6, #1 @ x-- \n" + "add r1, r1, #1 @ src++ \n" + "strb r7, [r0], #1 @ *dst++ = val \n" + "bgt 5b @ \n" + "8: \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ".ltorg \n" + ENTER_THUMB + ); +} +#else + +static void +scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + int *contrib = &weights->index[weights->index[0]]; + int len, i; + unsigned char *min; + + assert(weights->n == 1); + if (weights->flip) + { + dst += weights->count; + for (i=weights->count; i > 0; i--) + { + int val = 128; + min = &src[*contrib++]; + len = *contrib++; + while (len-- > 0) + { + val += *min++ * *contrib++; + } + *--dst = (unsigned char)(val>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int val = 128; + min = &src[*contrib++]; + len = *contrib++; + while (len-- > 0) + { + val += *min++ * *contrib++; + } + *dst++ = (unsigned char)(val>>8); + } + } +} + +static void +scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + int *contrib = &weights->index[weights->index[0]]; + int len, i; + unsigned char *min; + + assert(weights->n == 2); + if (weights->flip) + { + dst += 2*weights->count; + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + min = &src[2 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + c1 += *min++ * *contrib; + c2 += *min++ * *contrib++; + } + *--dst = (unsigned char)(c2>>8); + *--dst = (unsigned char)(c1>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + min = &src[2 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + c1 += *min++ * *contrib; + c2 += *min++ * *contrib++; + } + *dst++ = (unsigned char)(c1>>8); + *dst++ = (unsigned char)(c2>>8); + } + } +} + +static void +scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + int *contrib = &weights->index[weights->index[0]]; + int len, i; + unsigned char *min; + + assert(weights->n == 4); + if (weights->flip) + { + dst += 4*weights->count; + for (i=weights->count; i > 0; i--) + { + int r = 128; + int g = 128; + int b = 128; + int a = 128; + min = &src[4 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + r += *min++ * *contrib; + g += *min++ * *contrib; + b += *min++ * *contrib; + a += *min++ * *contrib++; + } + *--dst = (unsigned char)(a>>8); + *--dst = (unsigned char)(b>>8); + *--dst = (unsigned char)(g>>8); + *--dst = (unsigned char)(r>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int r = 128; + int g = 128; + int b = 128; + int a = 128; + min = &src[4 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + r += *min++ * *contrib; + g += *min++ * *contrib; + b += *min++ * *contrib; + a += *min++ * *contrib++; + } + *dst++ = (unsigned char)(r>>8); + *dst++ = (unsigned char)(g>>8); + *dst++ = (unsigned char)(b>>8); + *dst++ = (unsigned char)(a>>8); + } + } +} + +static void +scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row) +{ + int *contrib = &weights->index[weights->index[row]]; + int len, x; + + contrib++; /* Skip min */ + len = *contrib++; + for (x=width; x > 0; x--) + { + unsigned char *min = src; + int val = 128; + int len2 = len; + int *contrib2 = contrib; + + while (len2-- > 0) + { + val += *min * *contrib2++; + min += width; + } + *dst++ = (unsigned char)(val>>8); + src++; + } +} +#endif + +#ifdef SINGLE_PIXEL_SPECIALS +static void +duplicate_single_pixel(unsigned char *dst, unsigned char *src, int n, int w, int h) +{ + int i; + + for (i = n; i > 0; i--) + *dst++ = *src++; + for (i = (w*h-1)*n; i > 0; i--) + { + *dst = dst[-n]; + dst++; + } +} + +static void +scale_single_row(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int h) +{ + int *contrib = &weights->index[weights->index[0]]; + int min, len, i, j, n; + int tmp[FZ_MAX_COLORS]; + + n = weights->n; + /* Scale a single row */ + for (j = 0; j < n; j++) + tmp[j] = 128; + if (weights->flip) + { + dst += (weights->count-1)*n; + for (i=weights->count; i > 0; i--) + { + min = *contrib++; + len = *contrib++; + min *= n; + while (len-- > 0) + { + for (j = 0; j < n; j++) + tmp[j] += src[min++] * *contrib; + contrib++; + } + for (j = 0; j < n; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + dst -= 2*n; + } + dst += n * (weights->count+1); + } + else + { + for (i=weights->count; i > 0; i--) + { + min = *contrib++; + len = *contrib++; + min *= n; + while (len-- > 0) + { + for (j = 0; j < n; j++) + tmp[j] += src[min++] * *contrib; + contrib++; + } + for (j = 0; j < n; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + } + } + /* And then duplicate it h times */ + n *= weights->count; + while (--h > 0) + { + memcpy(dst, dst-n, n); + dst += n; + } +} + +static void +scale_single_col(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int n, int w, int flip_y) +{ + int *contrib = &weights->index[weights->index[0]]; + int min, len, i, j; + int tmp[FZ_MAX_COLORS]; + + for (j = 0; j < n; j++) + tmp[j] = 128; + if (flip_y) + { + src_w = (src_w-1)*n; + w = (w-1)*n; + for (i=weights->count; i > 0; i--) + { + /* Scale the next pixel in the column */ + min = *contrib++; + len = *contrib++; + min = src_w-min*n; + while (len-- > 0) + { + for (j = 0; j < n; j++) + tmp[j] += src[src_w-min+j] * *contrib; + contrib++; + } + for (j = 0; j < n; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + /* And then duplicate it across the row */ + for (j = w; j > 0; j--) + { + *dst = dst[-n]; + dst++; + } + } + } + else + { + w = (w-1)*n; + for (i=weights->count; i > 0; i--) + { + /* Scale the next pixel in the column */ + min = *contrib++; + len = *contrib++; + min *= n; + while (len-- > 0) + { + for (j = 0; j < n; j++) + tmp[j] += src[min++] * *contrib; + contrib++; + } + for (j = 0; j < n; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + /* And then duplicate it across the row */ + for (j = w; j > 0; j--) + { + *dst = dst[-n]; + dst++; + } + } + } +} +#endif /* SINGLE_PIXEL_SPECIALS */ + +fz_pixmap * +fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, fz_irect *clip) +{ + return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL); +} + +fz_pixmap * +fz_scale_pixmap_cached(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y) +{ + fz_scale_filter *filter = &fz_scale_filter_simple; + fz_weights *contrib_rows = NULL; + fz_weights *contrib_cols = NULL; + fz_pixmap *output = NULL; + unsigned char *temp = NULL; + int max_row, temp_span, temp_rows, row; + int dst_w_int, dst_h_int, dst_x_int, dst_y_int; + int flip_x, flip_y; + fz_rect patch; + + fz_var(contrib_cols); + fz_var(contrib_rows); + + DBUG(("Scale: (%d,%d) to (%g,%g) at (%g,%g)\n",src->w,src->h,w,h,x,y)); + + /* Avoid extreme scales where overflows become problematic. */ + if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24)) + return NULL; + + /* Clamp small ranges of w and h */ + if (w <= -1) + { + } + else if (w < 0) + { + w = -1; + } + else if (w < 1) + { + w = 1; + } + if (h <= -1) + { + } + else if (h < 0) + { + h = -1; + } + else if (h < 1) + { + h = 1; + } + + /* Find the destination bbox, width/height, and sub pixel offset, + * allowing for whether we're flipping or not. */ + /* The (x,y) position given describes where the top left corner + * of the source image should be mapped to (i.e. where (0,0) in image + * space ends up). Also there are differences in the way we scale + * horizontally and vertically. When scaling rows horizontally, we + * always read forwards through the source, and store either forwards + * or in reverse as required. When scaling vertically, we always store + * out forwards, but may feed source rows in in a different order. + * + * Consider the image rectangle 'r' to which the image is mapped, + * and the (possibly) larger rectangle 'R', given by expanding 'r' to + * complete pixels. + * + * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether + * the image is x flipped or not. Whatever happens 0 <= x < 1. + * y is always R.ymax - r.ymax. + */ + /* dst_x_int is calculated to be the left of the scaled image, and + * x (the sub pixel offset) is the distance in from either the left + * or right pixel expanded edge. */ + flip_x = (w < 0); + if (flip_x) + { + float tmp; + w = -w; + dst_x_int = floorf(x-w); + tmp = ceilf(x); + dst_w_int = (int)tmp; + x = tmp - x; + dst_w_int -= dst_x_int; + } + else + { + dst_x_int = floorf(x); + x -= (float)dst_x_int; + dst_w_int = (int)ceilf(x + w); + } + /* dst_y_int is calculated to be the top of the scaled image, and + * y (the sub pixel offset) is the distance in from either the top + * or bottom pixel expanded edge. + */ + flip_y = (h < 0); + if (flip_y) + { + float tmp; + h = -h; + dst_y_int = floorf(y-h); + tmp = ceilf(y); + dst_h_int = (int)tmp; + y = tmp - y; + dst_h_int -= dst_y_int; + } + else + { + dst_y_int = floorf(y); + y -= (float)dst_y_int; + dst_h_int = (int)ceilf(y + h); + } + + DBUG(("Result image: (%d,%d) at (%d,%d) (subpix=%g,%g)\n", dst_w_int, dst_h_int, dst_x_int, dst_y_int, x, y)); + + /* Step 0: Calculate the patch */ + patch.x0 = 0; + patch.y0 = 0; + patch.x1 = dst_w_int; + patch.y1 = dst_h_int; + if (clip) + { + if (flip_x) + { + if (dst_x_int + dst_w_int > clip->x1) + patch.x0 = dst_x_int + dst_w_int - clip->x1; + if (clip->x0 > dst_x_int) + { + patch.x1 = dst_w_int - (clip->x0 - dst_x_int); + dst_x_int = clip->x0; + } + } + else + { + if (dst_x_int + dst_w_int > clip->x1) + patch.x1 = clip->x1 - dst_x_int; + if (clip->x0 > dst_x_int) + { + patch.x0 = clip->x0 - dst_x_int; + dst_x_int += patch.x0; + } + } + + if (flip_y) + { + if (dst_y_int + dst_h_int > clip->y1) + patch.y1 = clip->y1 - dst_y_int; + if (clip->y0 > dst_y_int) + { + patch.y0 = clip->y0 - dst_y_int; + dst_y_int = clip->y0; + } + } + else + { + if (dst_y_int + dst_h_int > clip->y1) + patch.y1 = clip->y1 - dst_y_int; + if (clip->y0 > dst_y_int) + { + patch.y0 = clip->y0 - dst_y_int; + dst_y_int += patch.y0; + } + } + } + if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1) + return NULL; + + fz_try(ctx) + { + /* Step 1: Calculate the weights for columns and rows */ +#ifdef SINGLE_PIXEL_SPECIALS + if (src->w == 1) + contrib_cols = NULL; + else +#endif /* SINGLE_PIXEL_SPECIALS */ + contrib_cols = make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x); +#ifdef SINGLE_PIXEL_SPECIALS + if (src->h == 1) + contrib_rows = NULL; + else +#endif /* SINGLE_PIXEL_SPECIALS */ + contrib_rows = make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y); + + output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0); + } + fz_catch(ctx) + { + if (!cache_x) + fz_free(ctx, contrib_cols); + if (!cache_y) + fz_free(ctx, contrib_rows); + fz_rethrow(ctx); + } + output->x = dst_x_int; + output->y = dst_y_int; + + /* Step 2: Apply the weights */ +#ifdef SINGLE_PIXEL_SPECIALS + if (!contrib_rows) + { + /* Only 1 source pixel high. */ + if (!contrib_cols) + { + /* Only 1 pixel in the entire image! */ + duplicate_single_pixel(output->samples, src->samples, src->n, patch.x1-patch.x0, patch.y1-patch.y0); + } + else + { + /* Scale the row once, then copy it. */ + scale_single_row(output->samples, src->samples, contrib_cols, src->w, patch.y1-patch.y0); + } + } + else if (!contrib_cols) + { + /* Only 1 source pixel wide. Scale the col and duplicate. */ + scale_single_col(output->samples, src->samples, contrib_rows, src->h, src->n, patch.x1-patch.x0, flip_y); + } + else +#endif /* SINGLE_PIXEL_SPECIALS */ + { + void (*row_scale)(unsigned char *dst, unsigned char *src, fz_weights *weights); + + temp_span = contrib_cols->count * src->n; + temp_rows = contrib_rows->max_len; + if (temp_span <= 0 || temp_rows > INT_MAX / temp_span) + goto cleanup; + fz_try(ctx) + { + temp = fz_calloc(ctx, temp_span*temp_rows, sizeof(unsigned char)); + } + fz_catch(ctx) + { + fz_drop_pixmap(ctx, output); + if (!cache_x) + fz_free(ctx, contrib_cols); + if (!cache_y) + fz_free(ctx, contrib_rows); + fz_rethrow(ctx); + } + switch (src->n) + { + default: + row_scale = scale_row_to_temp; + break; + case 1: /* Image mask case */ + row_scale = scale_row_to_temp1; + break; + case 2: /* Greyscale with alpha case */ + row_scale = scale_row_to_temp2; + break; + case 4: /* RGBA */ + row_scale = scale_row_to_temp4; + break; + } + max_row = contrib_rows->index[contrib_rows->index[0]]; + for (row = 0; row < contrib_rows->count; row++) + { + /* + Which source rows do we need to have scaled into the + temporary buffer in order to be able to do the final + scale? + */ + int row_index = contrib_rows->index[row]; + int row_min = contrib_rows->index[row_index++]; + int row_len = contrib_rows->index[row_index++]; + while (max_row < row_min+row_len) + { + /* Scale another row */ + assert(max_row < src->h); + DBUG(("scaling row %d to temp\n", max_row)); + (*row_scale)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->w*src->n], contrib_cols); + max_row++; + } + + DBUG(("scaling row %d from temp\n", row)); + scale_row_from_temp(&output->samples[row*output->w*output->n], temp, contrib_rows, temp_span, row); + } + fz_free(ctx, temp); + } + +cleanup: + if (!cache_y) + fz_free(ctx, contrib_rows); + if (!cache_x) + fz_free(ctx, contrib_cols); + return output; +} + +void +fz_free_scale_cache(fz_context *ctx, fz_scale_cache *sc) +{ + if (!sc) + return; + fz_free(ctx, sc->weights); + fz_free(ctx, sc); +} + +fz_scale_cache * +fz_new_scale_cache(fz_context *ctx) +{ + return fz_malloc_struct(ctx, fz_scale_cache); +} -- cgit v1.2.3