diff options
author | Robin Watts <robin.watts@artifex.com> | 2011-11-14 18:22:13 +0000 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2011-11-15 15:20:54 +0000 |
commit | 9c0a49060475b2dea1e4c2668bebd1d566113a7b (patch) | |
tree | 49e45a691cf105f4266d5c6b7242a4a3256c1200 /draw | |
parent | 60c0544742931da63db623ad7a79ba3758704cc1 (diff) | |
parent | fd6def85f22b598d4c278e76138ab7dccbb84c36 (diff) | |
download | mupdf-9c0a49060475b2dea1e4c2668bebd1d566113a7b.tar.xz |
Merge branch 'master' into context
Mostly redoing the xps_context to xps_document change and adding
contexts to newly written code.
Conflicts:
apps/pdfapp.c
apps/pdfapp.h
apps/x11_main.c
apps/xpsdraw.c
draw/draw_device.c
draw/draw_scale.c
fitz/base_object.c
fitz/fitz.h
pdf/mupdf.h
pdf/pdf_interpret.c
pdf/pdf_outline.c
pdf/pdf_page.c
xps/muxps.h
xps/xps_doc.c
xps/xps_xml.c
Diffstat (limited to 'draw')
-rw-r--r-- | draw/draw_affine.c | 158 | ||||
-rw-r--r-- | draw/draw_device.c | 10 | ||||
-rw-r--r-- | draw/draw_scale.c | 426 | ||||
-rw-r--r-- | draw/draw_simple_scale.c | 1353 |
4 files changed, 1795 insertions, 152 deletions
diff --git a/draw/draw_affine.c b/draw/draw_affine.c index 95d28bea..c1ee88f8 100644 --- a/draw/draw_affine.c +++ b/draw/draw_affine.c @@ -454,6 +454,146 @@ fz_paint_affine_color_near(byte *dp, byte *sp, int sw, int sh, int u, int v, int } } +/* RJW: The following code was originally written to be sensitive to + * FLT_EPSILON. Given the way the 'minimum representable difference' + * between 2 floats changes size as we scale, we now pick a larger + * value to ensure idempotency even with rounding problems. The + * value we pick is still far smaller than would ever show up with + * antialiasing. + */ +#define MY_EPSILON 0.001 + +void +fz_gridfit_matrix(fz_matrix *m) +{ + if (fabsf(m->b) < FLT_EPSILON && fabsf(m->c) < FLT_EPSILON) + { + if (m->a > 0) + { + float f; + /* Adjust left hand side onto pixel boundary */ + f = (float)(int)(m->e); + if (f - m->e > MY_EPSILON) + f -= 1.0; /* Ensure it moves left */ + m->a += m->e - f; /* width gets wider as f <= m->e */ + m->e = f; + /* Adjust right hand side onto pixel boundary */ + f = (float)(int)(m->a); + if (m->a - f > MY_EPSILON) + f += 1.0; /* Ensure it moves right */ + m->a = f; + } + else if (m->a < 0) + { + float f; + /* Adjust right hand side onto pixel boundary */ + f = (float)(int)(m->e); + if (m->e - f > MY_EPSILON) + f += 1.0; /* Ensure it moves right */ + m->a += m->e - f; /* width gets wider (more -ve) */ + m->e = f; + /* Adjust left hand side onto pixel boundary */ + f = (float)(int)(m->a); + if (f - m->a > MY_EPSILON) + f -= 1.0; /* Ensure it moves left */ + m->a = f; + } + if (m->d > 0) + { + float f; + /* Adjust top onto pixel boundary */ + f = (float)(int)(m->f); + if (f - m->f > MY_EPSILON) + f -= 1.0; /* Ensure it moves upwards */ + m->d += m->f - f; /* width gets wider as f <= m->f */ + m->f = f; + /* Adjust bottom onto pixel boundary */ + f = (float)(int)(m->d); + if (m->d - f > MY_EPSILON) + f += 1.0; /* Ensure it moves down */ + m->d = f; + } + else if (m->d < 0) + { + float f; + /* Adjust bottom onto pixel boundary */ + f = (float)(int)(m->f); + if (m->f - f > MY_EPSILON) + f += 1.0; /* Ensure it moves down */ + m->d += m->f - f; /* width gets wider (more -ve) */ + m->f = f; + /* Adjust top onto pixel boundary */ + f = (float)(int)(m->d); + if (f - m->d > MY_EPSILON) + f -= 1.0; /* Ensure it moves up */ + m->d = f; + } + } + else if (fabsf(m->a) < FLT_EPSILON && fabsf(m->d) < FLT_EPSILON) + { + if (m->b > 0) + { + float f; + /* Adjust left hand side onto pixel boundary */ + f = (float)(int)(m->f); + if (f - m->f > MY_EPSILON) + f -= 1.0; /* Ensure it moves left */ + m->b += m->f - f; /* width gets wider as f <= m->f */ + m->f = f; + /* Adjust right hand side onto pixel boundary */ + f = (float)(int)(m->b); + if (m->b - f > MY_EPSILON) + f += 1.0; /* Ensure it moves right */ + m->b = f; + } + else if (m->b < 0) + { + float f; + /* Adjust right hand side onto pixel boundary */ + f = (float)(int)(m->f); + if (m->f - f > MY_EPSILON) + f += 1.0; /* Ensure it moves right */ + m->b += m->f - f; /* width gets wider (more -ve) */ + m->f = f; + /* Adjust left hand side onto pixel boundary */ + f = (float)(int)(m->b); + if (f - m->b > MY_EPSILON) + f -= 1.0; /* Ensure it moves left */ + m->b = f; + } + if (m->c > 0) + { + float f; + /* Adjust top onto pixel boundary */ + f = (float)(int)(m->e); + if (f - m->e > MY_EPSILON) + f -= 1.0; /* Ensure it moves upwards */ + m->c += m->e - f; /* width gets wider as f <= m->e */ + m->e = f; + /* Adjust bottom onto pixel boundary */ + f = (float)(int)(m->c); + if (m->c - f > MY_EPSILON) + f += 1.0; /* Ensure it moves down */ + m->c = f; + } + else if (m->c < 0) + { + float f; + /* Adjust bottom onto pixel boundary */ + f = (float)(int)(m->e); + if (m->e - f > MY_EPSILON) + f += 1.0; /* Ensure it moves down */ + m->c += m->e - f; /* width gets wider (more -ve) */ + m->e = f; + /* Adjust top onto pixel boundary */ + f = (float)(int)(m->c); + if (f - m->c > MY_EPSILON) + f -= 1.0; /* Ensure it moves up */ + m->c = f; + } + } +} + /* Draw an image with an affine transform on destination */ static void @@ -469,15 +609,7 @@ fz_paint_image_imp(fz_pixmap *dst, fz_bbox scissor, fz_pixmap *shape, fz_pixmap void (*paintfn)(byte *dp, byte *sp, int sw, int sh, int u, int v, int fa, int fb, int w, int n, int alpha, byte *color, byte *hp); /* grid fit the image */ - if (fz_is_rectilinear(ctm)) - { - ctm.a = roundup(ctm.a); - ctm.b = roundup(ctm.b); - ctm.c = roundup(ctm.c); - ctm.d = roundup(ctm.d); - ctm.e = floorf(ctm.e); - ctm.f = floorf(ctm.f); - } + fz_gridfit_matrix(&ctm); /* turn on interpolation for upscaled and non-rectilinear transforms */ dolerp = 0; @@ -519,6 +651,14 @@ fz_paint_image_imp(fz_pixmap *dst, fz_bbox scissor, fz_pixmap *shape, fz_pixmap u = (fa * x) + (fc * y) + inv.e * 65536 + ((fa + fc) >> 1); v = (fb * x) + (fd * y) + inv.f * 65536 + ((fb + fd) >> 1); + /* RJW: The following is voodoo. No idea why it works, but it gives + * the best match between scaled/unscaled/interpolated/non-interpolated + * that we have found. */ + if (dolerp) { + u -= 32768; + v -= 32768; + } + dp = dst->samples + ((y - dst->y) * dst->w + (x - dst->x)) * dst->n; n = dst->n; sp = img->samples; diff --git a/draw/draw_device.c b/draw/draw_device.c index a8da9e19..5407d618 100644 --- a/draw/draw_device.c +++ b/draw/draw_device.c @@ -861,7 +861,10 @@ fz_transform_pixmap(fz_context *ctx, fz_pixmap *image, fz_matrix *ctm, int x, in if (ctm->a != 0 && ctm->b == 0 && ctm->c == 0 && ctm->d != 0) { /* Unrotated or X-flip or Y-flip or XY-flip */ - scaled = fz_scale_pixmap_gridfit(ctx, image, ctm->e, ctm->f, ctm->a, ctm->d, gridfit); + fz_matrix m = *ctm; + if (gridfit) + fz_gridfit_matrix(&m); + scaled = fz_scale_pixmap(ctx, image, m.e, m.f, m.a, m.d); if (scaled == NULL) return NULL; ctm->a = scaled->w; @@ -874,7 +877,10 @@ fz_transform_pixmap(fz_context *ctx, fz_pixmap *image, fz_matrix *ctm, int x, in if (ctm->a == 0 && ctm->b != 0 && ctm->c != 0 && ctm->d == 0) { /* Other orthogonal flip/rotation cases */ - scaled = fz_scale_pixmap_gridfit(ctx, image, ctm->f, ctm->e, ctm->b, ctm->c, gridfit); + fz_matrix m = *ctm; + if (gridfit) + fz_gridfit_matrix(&m); + scaled = fz_scale_pixmap(ctx, image, m.f, m.e, m.b, m.c); if (scaled == NULL) return NULL; ctm->b = scaled->w; diff --git a/draw/draw_scale.c b/draw/draw_scale.c index 4d1b66c5..fd3c3798 100644 --- a/draw/draw_scale.c +++ b/draw/draw_scale.c @@ -16,6 +16,21 @@ and then positioning it at (frac(x),frac(y)). */ #define SINGLE_PIXEL_SPECIALS +/* If we're compiling as thumb code, then we need to tell the compiler + * to enter and exit ARM mode around our assembly sections. If we move + * the ARM functions to a separate file and arrange for it to be compiled + * without thumb mode, we can save some time on entry. + */ +#ifdef ARCH_ARM +#ifdef ARCH_THUMB +#define ENTER_ARM ".balign 4\nmov r12,pc\nbx r12\n0:.arm\n" +#define ENTER_THUMB "9:.thumb\n" +#else +#define ENTER_ARM +#define ENTER_THUMB +#endif +#endif + #ifdef DEBUG_SCALING #ifdef WIN32 #include <windows.h> @@ -247,10 +262,10 @@ typedef struct fz_weights_s fz_weights; struct fz_weights_s { + int flip; int count; int max_len; int n; - int flip; int new_line; int index[1]; }; @@ -584,6 +599,274 @@ scale_row_to_temp(int *dst, unsigned char *src, fz_weights *weights) } } +#ifdef ARCH_ARM + +static void +scale_row_to_temp1(int *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + +static void +scale_row_to_temp2(int *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + +static void +scale_row_to_temp4(int *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + +static void +scale_row_from_temp(unsigned char *dst, int *src, fz_weights *weights, int width, int row) +__attribute__((naked)); + +static void +scale_row_to_temp1(int *dst, unsigned char *src, fz_weights *weights) +{ + /* possible optimisation in here; unroll inner loops to avoid stall. */ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r5,r9,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#16 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #2 @ dst += count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #0 @ r5 = a = 0 \n" + "add r4, r1, r4 @ r4 = min = &src[r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r12,[r2], #4 @ r12 = *contrib++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "@stall on r14 \n" + "mla r5, r12,r14,r5 @ g += r14 * r12 \n" + "bgt 2b @ } \n" + "3: \n" + "str r5,[r0, #-4]! @ *--dst=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r5,r9,PC} @ pop, return to thumb \n" + "4:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #0 @ r5 = a = 0 \n" + "add r4, r1, r4 @ r4 = min = &src[r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r12,[r2], #4 @ r12 = *contrib++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "@stall on r14 \n" + "mla r5, r12,r14,r5 @ a += r14 * r12 \n" + "bgt 6b @ } \n" + "7: \n" + "str r5, [r0], #4 @ *dst++=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r5,r9,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_to_temp2(int *dst, unsigned char *src, fz_weights *weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r6,r9-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#16 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #3 @ dst += 2*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #0 @ r5 = g = 0 \n" + "mov r6, #0 @ r6 = a = 0 \n" + "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r14,[r2], #4 @ r14 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r11,r5 @ g += r11 * r14 \n" + "mla r6, r14,r12,r6 @ a += r12 * r14 \n" + "bgt 2b @ } \n" + "3: \n" + "stmdb r0!,{r5,r6} @ *--dst=a;*--dst=g; \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" + "4:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #0 @ r5 = g = 0 \n" + "mov r6, #0 @ r6 = a = 0 \n" + "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r14,[r2], #4 @ r10 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r11,r5 @ g += r11 * r14 \n" + "mla r6, r14,r12,r6 @ a += r12 * r14 \n" + "bgt 6b @ } \n" + "7: \n" + "stmia r0!,{r5,r6} @ *dst++=r;*dst++=g; \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_to_temp4(int *dst, unsigned char *src, fz_weights *weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#16 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #4 @ dst += 4*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #0 @ r5 = r = 0 \n" + "mov r6, #0 @ r6 = g = 0 \n" + "mov r7, #0 @ r7 = b = 0 \n" + "mov r8, #0 @ r8 = a = 0 \n" + "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r10,[r2], #4 @ r10 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "mla r5, r10,r11,r5 @ r += r11 * r10 \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "mla r6, r10,r12,r6 @ g += r12 * r10 \n" + "mla r7, r10,r14,r7 @ b += r14 * r10 \n" + "mla r8, r10,r11,r8 @ a += r11 * r10 \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "bgt 2b @ } \n" + "3: \n" + "stmdb r0!,{r5,r6,r7,r8} @ *--dst=a;*--dst=b; \n" + " @ *--dst=g;*--dst=r; \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + "4:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #0 @ r5 = r = 0 \n" + "mov r6, #0 @ r6 = g = 0 \n" + "mov r7, #0 @ r7 = b = 0 \n" + "mov r8, #0 @ r8 = a = 0 \n" + "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r10,[r2], #4 @ r10 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "mla r5, r10,r11,r5 @ r += r11 * r10 \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "mla r6, r10,r12,r6 @ g += r12 * r10 \n" + "mla r7, r10,r14,r7 @ b += r14 * r10 \n" + "mla r8, r10,r11,r8 @ a += r11 * r10 \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "bgt 6b @ } \n" + "7: \n" + "stmia r0!,{r5,r6,r7,r8} @ *dst++=r;*dst++=g; \n" + " @ *dst++=b;*dst++=a; \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_from_temp(unsigned char *dst, int *src, fz_weights *weights, int width, int row) +{ + asm volatile( + ENTER_ARM + "ldr r12,[r13] @ r12= row \n" + "add r2, r2, #20 @ r2 = weights->index \n" + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = &weights->index[0] \n" + "@ r3 = width \n" + "@ r12= row \n" + "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" + "add r2, r2, #4 @ r2 = &index[1] \n" + "mov r6, r3 @ r6 = x = width \n" + "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" + " @ r14= len = *contrib \n" + "1: \n" + "mov r5, r1 @ r5 = min = src \n" + "mov r7, #1<<15 @ r7 = val = 1<<15 \n" + "movs r8, r14 @ r8 = len2 = len \n" + "add r9, r2, #4 @ r9 = contrib2 \n" + "ble 3f @ while (len2-- > 0) { \n" + "2: \n" + "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" + "ldr r12,[r5], r3, LSL #2 @ r12 = *min r5 = min += width\n" + "subs r8, r8, #1 @ len2-- \n" + "@ stall r12 \n" + "mla r7, r10,r12,r7 @ val += r12 * r10 \n" + "bgt 2b @ } \n" + "3: \n" + "movs r7, r7, asr #16 @ r7 = val >>= 16 \n" + "movlt r7, #0 @ if (r7 < 0) r7 = 0 \n" + "cmp r7, #255 @ if (r7 > 255) \n" + "add r1, r1, #4 @ src++ \n" + "movgt r7, #255 @ r7 = 255 \n" + "subs r6, r6, #1 @ x-- \n" + "strb r7, [r0], #1 @ *dst++ = val \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +#else + static void scale_row_to_temp1(int *dst, unsigned char *src, fz_weights *weights) { @@ -672,54 +955,13 @@ static void scale_row_to_temp4(int *dst, unsigned char *src, fz_weights *weights) { int *contrib = &weights->index[weights->index[0]]; -#ifndef ARCH_ARM int len, i; unsigned char *min; -#endif assert(weights->n == 4); if (weights->flip) { dst += 4*weights->count; -#ifdef ARCH_ARM - asm volatile( - "1:" - "ldr r4, [%2], #4 @ r4 = *contrib++ \n" - "ldr r9, [%2], #4 @ r9 = len = *contrib++ \n" - "mov r5, #0 @ r5 = r = 0 \n" - "mov r6, #0 @ r6 = g = 0 \n" - "mov r7, #0 @ r7 = b = 0 \n" - "mov r8, #0 @ r8 = a = 0 \n" - "add r4, %1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" - "cmp r9, #0 @ while (len-- > 0) \n" - "beq 3f @ { \n" - "2: \n" - "ldr r10,[%2], #4 @ r10 = *contrib++ \n" - "ldrb r11,[r4], #1 @ r11 = *min++ \n" - "ldrb r12,[r4], #1 @ r12 = *min++ \n" - "ldrb r14,[r4], #1 @ r14 = *min++ \n" - "mla r5, r10,r11,r5 @ r += r11 * r10 \n" - "ldrb r11,[r4], #1 @ r11 = *min++ \n" - "mla r6, r10,r12,r6 @ g += r12 * r10 \n" - "mla r7, r10,r14,r7 @ b += r14 * r10 \n" - "mla r8, r10,r11,r8 @ a += r11 * r10 \n" - "subs r9, r9, #1 @ r9 = len-- \n" - "bgt 2b @ } \n" - "stmdb %0!,{r5,r6,r7,r8} @ *--dst=a;*--dst=b; \n" - "3: @ *--dst=g;*--dst=r; \n" - "subs %3, %3, #1 @ i-- \n" - "bgt 1b @ \n" - : - : - "r" (dst), - "r" (src), - "r" (contrib), - "r" (weights->count) - : - "r4","r5","r6","r7","r8","r9","r10","r11","r12","r14", - "memory","cc" - ); -#else for (i=weights->count; i > 0; i--) { int r = 0; @@ -740,49 +982,9 @@ scale_row_to_temp4(int *dst, unsigned char *src, fz_weights *weights) *--dst = g; *--dst = r; } -#endif } else { -#ifdef ARCH_ARM - asm volatile( - "1:" - "ldr r4, [%2], #4 @ r4 = *contrib++ \n" - "ldr r9, [%2], #4 @ r9 = len = *contrib++ \n" - "mov r5, #0 @ r5 = r = 0 \n" - "mov r6, #0 @ r6 = g = 0 \n" - "mov r7, #0 @ r7 = b = 0 \n" - "mov r8, #0 @ r8 = a = 0 \n" - "add r4, %1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" - "cmp r9, #0 @ while (len-- > 0) \n" - "beq 3f @ { \n" - "2: \n" - "ldr r10,[%2], #4 @ r10 = *contrib++ \n" - "ldrb r11,[r4], #1 @ r11 = *min++ \n" - "ldrb r12,[r4], #1 @ r12 = *min++ \n" - "ldrb r14,[r4], #1 @ r14 = *min++ \n" - "mla r5, r10,r11,r5 @ r += r11 * r10 \n" - "ldrb r11,[r4], #1 @ r11 = *min++ \n" - "mla r6, r10,r12,r6 @ g += r12 * r10 \n" - "mla r7, r10,r14,r7 @ b += r14 * r10 \n" - "mla r8, r10,r11,r8 @ a += r11 * r10 \n" - "subs r9, r9, #1 @ r9 = len-- \n" - "bgt 2b @ } \n" - "stmia %0!,{r5,r6,r7,r8} @ *dst++=r;*dst++=g; \n" - "3: @ *dst++=b;*dst++=a; \n" - "subs %3, %3, #1 @ i-- \n" - "bgt 1b @ \n" - : - : - "r" (dst), - "r" (src), - "r" (contrib), - "r" (weights->count) - : - "r4","r5","r6","r7","r8","r9","r10","r11","r12","r14", - "memory","cc" - ); -#else for (i=weights->count; i > 0; i--) { int r = 0; @@ -803,7 +1005,6 @@ scale_row_to_temp4(int *dst, unsigned char *src, fz_weights *weights) *dst++ = b; *dst++ = a; } -#endif } } @@ -836,6 +1037,7 @@ scale_row_from_temp(unsigned char *dst, int *src, fz_weights *weights, int width src++; } } +#endif #ifdef SINGLE_PIXEL_SPECIALS static void @@ -1004,64 +1206,6 @@ scale_single_col(unsigned char *dst, unsigned char *src, fz_weights *weights, in } #endif /* SINGLE_PIXEL_SPECIALS */ -fz_pixmap * -fz_scale_pixmap_gridfit(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, int gridfit) -{ - if (gridfit) { - float n; - if (w > 0) { - /* Adjust the left hand edge, leftwards to a pixel boundary */ - n = (float)(int)x; /* n is now on a pixel boundary */ - if (n > x) /* Ensure it's the pixel boundary BELOW x */ - n -= 1.0f; - w += x-n; /* width gets wider as x >= n */ - x = n; - /* Adjust the right hand edge rightwards to a pixel boundary */ - n = (float)(int)w; /* n is now the integer width <= w */ - if (n != w) /* If w isn't an integer already, bump it */ - w = 1.0f + n;/* up to the next integer. */ - } else { - /* Adjust the right hand edge, rightwards to a pixel boundary */ - n = (float)(int)x; /* n is now on a pixel boundary */ - if (n > x) /* Ensure it's the pixel boundary <= x */ - n -= 1.0f; - if (n != x) /* If x isn't on a pixel boundary already, */ - n += 1.0f; /* make n be the pixel boundary above x. */ - w -= n-x; /* Expand width (more negative!) as n >= x */ - x = n; - /* Adjust the left hand edge leftwards to a pixel boundary */ - n = (float)(int)w; - if (n != w) - w = n - 1.0f; - } - if (h > 0) { - /* Adjust the bottom edge, downwards to a pixel boundary */ - n = (float)(int)y; /* n is now on a pixel boundary */ - if (n > y) /* Ensure it's the pixel boundary BELOW y */ - n -= 1.0f; - h += y-n; /* height gets larger as y >= n */ - y = n; - /* Adjust the top edge upwards to a pixel boundary */ - n = (float)(int)h; /* n is now the integer height <= h */ - if (n != h) /* If h isn't an integer already, bump it */ - h = 1.0f + n;/* up to the next integer. */ - } else { - /* Adjust the top edge, upwards to a pixel boundary */ - n = (float)(int)y; /* n is now on a pixel boundary */ - if (n > y) /* Ensure it's the pixel boundary <= y */ - n -= 1.0f; - if (n != y) /* If y isn't on a pixel boundary already, */ - n += 1.0f; /* make n be the pixel boundary above y. */ - h -= n-y; /* Expand height (more negative!) as n >= y */ - y = n; - /* Adjust the bottom edge downwards to a pixel boundary */ - n = (float)(int)h; - if (n != h) - h = n - 1.0f; - } - } - return fz_scale_pixmap(ctx, src, x, y, w, h); -} fz_pixmap * fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h) diff --git a/draw/draw_simple_scale.c b/draw/draw_simple_scale.c new file mode 100644 index 00000000..dcd5cd68 --- /dev/null +++ b/draw/draw_simple_scale.c @@ -0,0 +1,1353 @@ +/* +This code does smooth scaling of a pixmap. + +This function returns a new pixmap representing the area starting at (0,0) +given by taking the source pixmap src, scaling it to width w, and height h, +and then positioning it at (frac(x),frac(y)). + +This is a cut-down version of draw_scale.c that only copes with filters +that return values strictly in the 0..1 range, and uses bytes for +intermediate results rather than ints. +*/ + +#include "fitz.h" + +/* Do we special case handling of single pixel high/wide images? The + * 'purest' handling is given by not special casing them, but certain + * files that use such images 'stack' them to give full images. Not + * special casing them results in then being fainter and giving noticable + * rounding errors. + */ +#define SINGLE_PIXEL_SPECIALS + +/* If we're compiling as thumb code, then we need to tell the compiler + * to enter and exit ARM mode around our assembly sections. If we move + * the ARM functions to a separate file and arrange for it to be compiled + * without thumb mode, we can save some time on entry. + */ +#ifdef ARCH_ARM +#ifdef ARCH_THUMB +#define ENTER_ARM ".balign 4\nmov r12,pc\nbx r12\n0:.arm\n" +#define ENTER_THUMB "9:.thumb\n" +#else +#define ENTER_ARM +#define ENTER_THUMB +#endif +#endif + +#ifdef DEBUG_SCALING +#ifdef WIN32 +#include <windows.h> +static void debug_print(const char *fmt, ...) +{ + va_list args; + char text[256]; + va_start(args, fmt); + vsprintf(text, fmt, args); + va_end(args); + OutputDebugStringA(text); + printf(text); +} +#else +static void debug_print(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} +#endif +#endif +#ifdef DEBUG_SCALING +#define DBUG(A) debug_print A +#else +#define DBUG(A) do {} while(0==1) +#endif + +/* +Consider a row of source samples, src, of width src_w, positioned at x, +scaled to width dst_w. + +src[i] is centred at: x + (i + 0.5)*dst_w/src_w + +Therefore the distance between the centre of the jth output pixel and +the centre of the ith source sample is: + +dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w) + +When scaling up, therefore: + +dst[j] = SUM(filter(dist[j,i]) * src[i]) + (for all ints i) + +This can be simplified by noticing that filters are only non zero within +a given filter width (henceforth called W). So: + +dst[j] = SUM(filter(dist[j,i]) * src[i]) + (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W) + +When scaling down, each filtered source sample is stretched to be wider +to avoid aliasing issues. This effectively reduces the distance between +centres. + +dst[j] = SUM(filter(dist[j,i] * F) * F * src[i]) + (where F = dst_w/src_w) + (for ints i, s.t. (j-W)/F < i < (j+W)/F) + +*/ + +typedef struct fz_scale_filter_s fz_scale_filter; + +struct fz_scale_filter_s +{ + int width; + float (*fn)(fz_scale_filter *, float); +}; + +/* Image scale filters */ + +static float +triangle(fz_scale_filter *filter, float f) +{ + if (f >= 1) + return 0; + return 1-f; +} + +static float +box(fz_scale_filter *filter, float f) +{ + if (f >= 0.5f) + return 0; + return 1; +} + +static float +simple(fz_scale_filter *filter, float x) +{ + if (x >= 1) + return 0; + return 1 + (2*x - 3)*x*x; +} + +fz_scale_filter fz_scale_filter_box = { 1, box }; +fz_scale_filter fz_scale_filter_triangle = { 1, triangle }; +fz_scale_filter fz_scale_filter_simple = { 1, simple }; + +/* +We build ourselves a set of tables to contain the precalculated weights +for a given set of scale settings. + +The first dst_w entries in index are the index into index of the +sets of weight for each destination pixel. + +Each of the sets of weights is a set of values consisting of: + the minimum source pixel index used for this destination pixel + the number of weights used for this destination pixel + the weights themselves + +So to calculate dst[i] we do the following: + + weights = &index[index[i]]; + min = *weights++; + len = *weights++; + dst[i] = 0; + while (--len > 0) + dst[i] += src[min++] * *weights++ + +in addition, we guarantee that at the end of this process weights will now +point to the weights value for dst pixel i+1. + +In the simplest version of this algorithm, we would scale the whole image +horizontally first into a temporary buffer, then scale that temporary +buffer again vertically to give us our result. Using such a simple +algorithm would mean that could use the same style of weights for both +horizontal and vertical scaling. + +Unfortunately, this would also require a large temporary buffer, +particularly in the case where we are scaling up. + +We therefore modify the algorithm as follows; we scale scanlines from the +source image horizontally into a temporary buffer, until we have all the +contributors for a given output scanline. We then produce that output +scanline from the temporary buffer. In this way we restrict the height +of the temporary buffer to a small fraction of the final size. + +Unfortunately, this means that the pseudo code for recombining a +scanline of fully scaled pixels is as follows: + + weights = &index[index[y]]; + min = *weights++; + len = *weights++; + for (x=0 to dst_w) + min2 = min + len2 = len + weights2 = weights + dst[x] = 0; + while (--len2 > 0) + dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++ + +i.e. it requires a % operation for every source pixel - this is typically +expensive. + +To avoid this, we alter the order in which vertical weights are stored, +so that they are ordered in the same order as the temporary buffer lines +would appear. This simplifies the algorithm to: + + weights = &index[index[y]]; + min = *weights++; + len = *weights++; + for (x=0 to dst_w) + min2 = 0 + len2 = len + weights2 = weights + dst[x] = 0; + while (--len2 > 0) + dst[x] += temp[i][min2++] * *weights2++ + +This means that len may be larger than it needs to be (due to the +possible inclusion of a zero weight row or two), but in practise this +is only an increase of 1 or 2 at worst. + +We implement this by generating the weights as normal (but ensuring we +leave enough space) and then reordering afterwards. + +*/ + +typedef struct fz_weights_s fz_weights; + +struct fz_weights_s +{ + int flip; + int count; + int max_len; + int n; + int new_line; + int index[1]; +}; + +static fz_weights * +new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int dst_w_i, int n, int flip) +{ + int max_len; + fz_weights *weights; + + if (src_w > dst_w) + { + /* Scaling down, so there will be a maximum of + * 2*filterwidth*src_w/dst_w src pixels + * contributing to each dst pixel. */ + max_len = (int)ceilf((2 * filter->width * src_w)/dst_w); + if (max_len > src_w) + max_len = src_w; + } + else + { + /* Scaling up, so there will be a maximum of + * 2*filterwidth src pixels contributing to each dst pixel. + */ + max_len = 2 * filter->width; + } + /* We need the size of the struct, + * plus dst_w*sizeof(int) for the index + * plus (2+max_len)*sizeof(int) for the weights + * plus room for an extra set of weights for reordering. + */ + weights = fz_malloc(ctx, sizeof(*weights)+(max_len+3)*(dst_w_i+1)*sizeof(int)); + if (weights == NULL) + return NULL; + weights->count = -1; + weights->max_len = max_len; + weights->index[0] = dst_w_i; + weights->n = n; + weights->flip = flip; + return weights; +} + +static void +init_weights(fz_weights *weights, int j) +{ + int index; + + assert(weights->count == j-1); + weights->count++; + weights->new_line = 1; + if (j == 0) + index = weights->index[0]; + else + { + index = weights->index[j-1]; + index += 2 + weights->index[index+1]; + } + weights->index[j] = index; /* row pointer */ + weights->index[index] = 0; /* min */ + weights->index[index+1] = 0; /* len */ +} + +static void +add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter, + float x, float F, float G, int src_w, float dst_w) +{ + float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w); + float f; + int min, len, index, weight; + + dist *= G; + if (dist < 0) + dist = -dist; + f = filter->fn(filter, dist)*F; + weight = (int)(256*f+0.5f); + if (weight == 0) + return; + + /* Ensure i is in range */ + if (i < 0) + { + i = 0; + weight = 0; + } + else if (i >= src_w) + { + i = src_w-1; + weight = 0; + } + if (weight == 0) + return; + + DBUG(("add_weight[%d][%d] = %d(%g) dist=%g\n",j,i,weight,f,dist)); + + if (weights->new_line) + { + /* New line */ + weights->new_line = 0; + index = weights->index[j]; /* row pointer */ + weights->index[index] = i; /* min */ + weights->index[index+1] = 0; /* len */ + } + index = weights->index[j]; + min = weights->index[index++]; + len = weights->index[index++]; + while (i < min) + { + /* This only happens in rare cases, but we need to insert + * one earlier. In exceedingly rare cases we may need to + * insert more than one earlier. */ + int k; + + for (k = len; k > 0; k--) + { + weights->index[index+k] = weights->index[index+k-1]; + } + weights->index[index] = 0; + min--; + len++; + weights->index[index-2] = min; + weights->index[index-1] = len; + } + if (i-min >= len) + { + /* The usual case */ + while (i-min >= ++len) + { + weights->index[index+len-1] = 0; + } + assert(len-1 == i-min); + weights->index[index+i-min] = weight; + weights->index[index-1] = len; + assert(len <= weights->max_len); + } + else + { + /* Infrequent case */ + weights->index[index+i-min] += weight; + } +} + +static void +reorder_weights(fz_weights *weights, int j, int src_w) +{ + int idx = weights->index[j]; + int min = weights->index[idx++]; + int len = weights->index[idx++]; + int max = weights->max_len; + int tmp = idx+max; + int i, off; + + /* Copy into the temporary area */ + memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len); + + /* Pad out if required */ + assert(len <= max); + assert(min+len <= src_w); + off = 0; + if (len < max) + { + memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len)); + len = max; + if (min + len > src_w) + { + off = min + len - src_w; + min = src_w - len; + weights->index[idx-2] = min; + } + weights->index[idx-1] = len; + } + + /* Copy back into the proper places */ + for (i = 0; i < len; i++) + { + weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i]; + } +} + +/* Due to rounding and edge effects, the sums for the weights sometimes don't + * add up to 256. This causes visible rendering effects. Therefore, we take + * pains to ensure that they 1) never exceed 256, and 2) add up to exactly + * 256 for all pixels that are completely covered. See bug #691629. */ +static void +check_weights(fz_weights *weights, int j, int w, float x, float wf) +{ + int idx, len; + int sum = 0; + int max = -256; + int maxidx = 0; + int i; + + idx = weights->index[j]; + idx++; /* min */ + len = weights->index[idx++]; + + for(i=0; i < len; i++) + { + int v = weights->index[idx++]; + sum += v; + if (v > max) + { + max = v; + maxidx = idx; + } + } + /* If we aren't the first or last pixel, OR if the sum is too big + * then adjust it. */ + if (((j != 0) && (j != w-1)) || (sum > 256)) + weights->index[maxidx-1] += 256-sum; + /* Otherwise, if we are the first pixel, and it's fully covered, then + * adjust it. */ + else if ((j == 0) && (x < 0.0001F) && (sum != 256)) + weights->index[maxidx-1] += 256-sum; + /* Finally, if we are the last pixel, and it's fully covered, then + * adjust it. */ + else if ((j == w-1) && ((float)w-wf < 0.0001F) && (sum != 256)) + weights->index[maxidx-1] += 256-sum; + DBUG(("total weight %d = %d\n", j, sum)); +} + +static fz_weights * +make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int n, int flip) +{ + fz_weights *weights; + float F, G; + float window; + int j; + + if (dst_w < src_w) + { + /* Scaling down */ + F = dst_w / src_w; + G = 1; + } + else + { + /* Scaling up */ + F = 1; + G = src_w / dst_w; + } + window = filter->width / F; + DBUG(("make_weights src_w=%d x=%g dst_w=%g dst_w_int=%d F=%g window=%g\n", src_w, x, dst_w, dst_w_int, F, window)); + weights = new_weights(ctx, filter, src_w, dst_w, dst_w_int, n, flip); + if (weights == NULL) + return NULL; + for (j = 0; j < dst_w_int; j++) + { + /* find the position of the centre of dst[j] in src space */ + float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f; + int l, r; + l = ceilf(centre - window); + r = floorf(centre + window); + DBUG(("%d: centre=%g l=%d r=%d\n", j, centre, l, r)); + init_weights(weights, j); + for (; l <= r; l++) + { + add_weight(weights, j, l, filter, x, F, G, src_w, dst_w); + } + check_weights(weights, j, dst_w_int, x, dst_w); + if (vertical) + { + reorder_weights(weights, j, src_w); + } + } + weights->count++; /* weights->count = dst_w_int now */ + return weights; +} + +static void +scale_row_to_temp(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + int *contrib = &weights->index[weights->index[0]]; + int len, i, j, n; + unsigned char *min; + int tmp[FZ_MAX_COLORS]; + int *t = tmp; + + n = weights->n; + for (j = 0; j < n; j++) + tmp[j] = 128; + if (weights->flip) + { + dst += (weights->count-1)*n; + for (i=weights->count; i > 0; i--) + { + min = &src[n * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + for (j = n; j > 0; j--) + *t++ += *min++ * *contrib; + t -= n; + contrib++; + } + for (j = n; j > 0; j--) + { + *dst++ = (unsigned char)(*t>>8); + *t++ = 128; + } + t -= n; + dst -= n*2; + } + } + else + { + for (i=weights->count; i > 0; i--) + { + min = &src[n * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + for (j = n; j > 0; j--) + *t++ += *min++ * *contrib; + t -= n; + contrib++; + } + for (j = n; j > 0; j--) + { + *dst++ = (unsigned char)(*t>>8); + *t++ = 128; + } + t -= n; + } + } +} + +#ifdef ARCH_ARM + +static void +scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + +static void +scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + +static void +scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + +static void +scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row) +__attribute__((naked)); + +static void +scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + /* possible optimisation in here; unroll inner loops to avoid stall. */ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r5,r9,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#16 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3 @ dst += count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = a = 128 \n" + "add r4, r1, r4 @ r4 = min = &src[r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r12,[r2], #4 @ r12 = *contrib++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "@stall on r14 \n" + "mla r5, r12,r14,r5 @ g += r14 * r12 \n" + "bgt 2b @ } \n" + "3: \n" + "mov r5, r5, lsr #8 @ g >>= 8 \n" + "strb r5,[r0, #-1]! @ *--dst=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r5,r9,PC} @ pop, return to thumb \n" + "4:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = a = 128 \n" + "add r4, r1, r4 @ r4 = min = &src[r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r12,[r2], #4 @ r12 = *contrib++ \n" + "ldrb r14,[r4], #1 @ r14 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "@stall on r14 \n" + "mla r5, r12,r14,r5 @ a += r14 * r12 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r5, r5, LSR #8 @ a >>= 8 \n" + "strb r5, [r0], #1 @ *dst++=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r5,r9,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r6,r9-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#16 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #1 @ dst += 2*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = g = 128 \n" + "mov r6, #128 @ r6 = a = 128 \n" + "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r14,[r2], #4 @ r14 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r11,r5 @ g += r11 * r14 \n" + "mla r6, r14,r12,r6 @ a += r12 * r14 \n" + "bgt 2b @ } \n" + "3: \n" + "mov r5, r5, lsr #8 @ g >>= 8 \n" + "mov r6, r6, lsr #8 @ a >>= 8 \n" + "strb r5, [r0, #-2]! @ *--dst=a \n" + "strb r6, [r0, #1] @ *--dst=g \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" + "4:" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = g = 128 \n" + "mov r6, #128 @ r6 = a = 128 \n" + "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r14,[r2], #4 @ r10 = *contrib++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r11,r5 @ g += r11 * r14 \n" + "mla r6, r14,r12,r6 @ a += r12 * r14 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r5, r5, lsr #8 @ g >>= 8 \n" + "mov r6, r6, lsr #8 @ a >>= 8 \n" + "strb r5, [r0], #1 @ *dst++=g \n" + "strb r6, [r0], #1 @ *dst++=a \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#16 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "ldr r5,=0x00800080 @ r5 = rounding \n" + "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #2 @ dst += 4*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r7, r5 @ r7 = b = rounding \n" + "mov r8, r5 @ r8 = a = rounding \n" + "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r11,[r4], #4 @ r11 = *min++ \n" + "ldr r10,[r2], #4 @ r10 = *contrib++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "and r12,r6, r11 @ r12 = __22__00 \n" + "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" + "mla r7, r10,r12,r7 @ b += r14 * r10 \n" + "mla r8, r10,r11,r8 @ a += r11 * r10 \n" + "bgt 2b @ } \n" + "3: \n" + "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" + "bic r8, r8, r6 @ r8 = 33__11__ \n" + "orr r7, r7, r8 @ r7 = 33221100 \n" + "str r7, [r0, #-4]! @ *--dst=r \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + "4: \n" + "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" + "5: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r7, r5 @ r7 = b = rounding \n" + "mov r8, r5 @ r8 = a = rounding \n" + "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r11,[r4], #4 @ r11 = *min++ \n" + "ldr r10,[r2], #4 @ r10 = *contrib++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "and r12,r6, r11 @ r12 = __22__00 \n" + "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" + "mla r7, r10,r12,r7 @ b += r14 * r10 \n" + "mla r8, r10,r11,r8 @ a += r11 * r10 \n" + "bgt 6b @ } \n" + "7: \n" + "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" + "bic r8, r8, r6 @ r8 = 33__11__ \n" + "orr r7, r7, r8 @ r7 = 33221100 \n" + "str r7, [r0], #4 @ *dst++=r \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + +static void +scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row) +{ + asm volatile( + ENTER_ARM + "ldr r12,[r13] @ r12= row \n" + "add r2, r2, #20 @ r2 = weights->index \n" + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = &weights->index[0] \n" + "@ r3 = width \n" + "@ r12= row \n" + "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" + "add r2, r2, #4 @ r2 = &index[1] \n" + "subs r6, r3, #4 @ r6 = x = width-4 \n" + "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" + " @ r14= len = *contrib \n" + "blt 4f @ while (x >= 0) { \n" +#ifndef ARCH_ARM_CAN_LOAD_UNALIGNED + "tst r3, #3 @ if (r3 & 3) \n" + "blt 4f @ can't do fast code\n" +#endif + "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n" + "1: \n" + "ldr r5, =0x00800080 @ r5 = val0 = round \n" + "stmfd r13!,{r1,r2} @ stash r1,r2,r14 \n" + " @ r1 = min = src \n" + " @ r2 = contrib2-4 \n" + "movs r8, r14 @ r8 = len2 = len \n" + "mov r7, r5 @ r7 = val1 = round \n" + "ble 3f @ while (len2-- > 0) { \n" + "2: \n" + "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n" + "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n" + "subs r8, r8, #1 @ len2-- \n" + "and r11,r9, r12 @ r11= __22__00 \n" + "and r12,r9, r12,LSR #8 @ r12= __33__11 \n" + "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n" + "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n" + "bgt 2b @ } \n" + "3: \n" + "ldmfd r13!,{r1,r2} @ restore r1,r2,r14 \n" + "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n" + "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n" + "orr r5, r5, r7 @ r5 = 33221100 \n" + "subs r6, r6, #4 @ x-- \n" + "add r1, r1, #4 @ src++ \n" + "str r5, [r0], #4 @ *dst++ = val \n" + "bge 1b @ \n" + "4: @ } (Less than 4 to go) \n" + "adds r6, r6, #4 @ r6 = x += 4 \n" + "beq 8f @ if (x == 0) done \n" + "5: \n" + "mov r5, r1 @ r5 = min = src \n" + "mov r7, #128 @ r7 = val = 128 \n" + "movs r8, r14 @ r8 = len2 = len \n" + "add r9, r2, #4 @ r9 = contrib2 \n" + "ble 7f @ while (len2-- > 0) { \n" + "6: \n" + "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" + "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" + "subs r8, r8, #1 @ len2-- \n" + "@ stall r12 \n" + "mla r7, r10,r12,r7 @ val += r12 * r10 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" + "subs r6, r6, #1 @ x-- \n" + "add r1, r1, #1 @ src++ \n" + "strb r7, [r0], #1 @ *dst++ = val \n" + "bgt 5b @ \n" + "8: \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} +#else + +static void +scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + int *contrib = &weights->index[weights->index[0]]; + int len, i; + unsigned char *min; + + assert(weights->n == 1); + if (weights->flip) + { + dst += weights->count; + for (i=weights->count; i > 0; i--) + { + int val = 128; + min = &src[*contrib++]; + len = *contrib++; + while (len-- > 0) + { + val += *min++ * *contrib++; + } + *--dst = (unsigned char)(val>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int val = 128; + min = &src[*contrib++]; + len = *contrib++; + while (len-- > 0) + { + val += *min++ * *contrib++; + } + *dst++ = (unsigned char)(val>>8); + } + } +} + +static void +scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + int *contrib = &weights->index[weights->index[0]]; + int len, i; + unsigned char *min; + + assert(weights->n == 2); + if (weights->flip) + { + dst += 2*weights->count; + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + min = &src[2 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + c1 += *min++ * *contrib; + c2 += *min++ * *contrib++; + } + *--dst = (unsigned char)(c2>>8); + *--dst = (unsigned char)(c1>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + min = &src[2 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + c1 += *min++ * *contrib; + c2 += *min++ * *contrib++; + } + *dst++ = (unsigned char)(c1>>8); + *dst++ = (unsigned char)(c2>>8); + } + } +} + +static void +scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + int *contrib = &weights->index[weights->index[0]]; + int len, i; + unsigned char *min; + + assert(weights->n == 4); + if (weights->flip) + { + dst += 4*weights->count; + for (i=weights->count; i > 0; i--) + { + int r = 128; + int g = 128; + int b = 128; + int a = 128; + min = &src[4 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + r += *min++ * *contrib; + g += *min++ * *contrib; + b += *min++ * *contrib; + a += *min++ * *contrib++; + } + *--dst = (unsigned char)(a>>8); + *--dst = (unsigned char)(b>>8); + *--dst = (unsigned char)(g>>8); + *--dst = (unsigned char)(r>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int r = 128; + int g = 128; + int b = 128; + int a = 128; + min = &src[4 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + r += *min++ * *contrib; + g += *min++ * *contrib; + b += *min++ * *contrib; + a += *min++ * *contrib++; + } + *dst++ = (unsigned char)(r>>8); + *dst++ = (unsigned char)(g>>8); + *dst++ = (unsigned char)(b>>8); + *dst++ = (unsigned char)(a>>8); + } + } +} + +static void +scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row) +{ + int *contrib = &weights->index[weights->index[row]]; + int len, x; + + contrib++; /* Skip min */ + len = *contrib++; + for (x=width; x > 0; x--) + { + unsigned char *min = src; + int val = 128; + int len2 = len; + int *contrib2 = contrib; + + while (len2-- > 0) + { + val += *min * *contrib2++; + min += width; + } + *dst++ = (unsigned char)(val>>8); + src++; + } +} +#endif + +#ifdef SINGLE_PIXEL_SPECIALS +static void +duplicate_single_pixel(unsigned char *dst, unsigned char *src, int n, int w, int h) +{ + int i; + + for (i = n; i > 0; i--) + *dst++ = *src++; + for (i = (w*h-1)*n; i > 0; i--) + { + *dst = dst[-n]; + dst++; + } +} + +static void +scale_single_row(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int h) +{ + int *contrib = &weights->index[weights->index[0]]; + int min, len, i, j, n; + int tmp[FZ_MAX_COLORS]; + + n = weights->n; + /* Scale a single row */ + for (j = 0; j < n; j++) + tmp[j] = 128; + if (weights->flip) + { + dst += (weights->count-1)*n; + for (i=weights->count; i > 0; i--) + { + min = *contrib++; + len = *contrib++; + min *= n; + while (len-- > 0) + { + for (j = 0; j < n; j++) + tmp[j] += src[min++] * *contrib; + contrib++; + } + for (j = 0; j < n; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + dst -= 2*n; + } + dst += n * (weights->count+1); + } + else + { + for (i=weights->count; i > 0; i--) + { + min = *contrib++; + len = *contrib++; + min *= n; + while (len-- > 0) + { + for (j = 0; j < n; j++) + tmp[j] += src[min++] * *contrib; + contrib++; + } + for (j = 0; j < n; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + } + } + /* And then duplicate it h times */ + n *= weights->count; + while (--h > 0) + { + memcpy(dst, dst-n, n); + dst += n; + } +} + +static void +scale_single_col(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int n, int w, int flip_y) +{ + int *contrib = &weights->index[weights->index[0]]; + int min, len, i, j; + int tmp[FZ_MAX_COLORS]; + + for (j = 0; j < n; j++) + tmp[j] = 128; + if (flip_y) + { + src_w = (src_w-1)*n; + w = (w-1)*n; + for (i=weights->count; i > 0; i--) + { + /* Scale the next pixel in the column */ + min = *contrib++; + len = *contrib++; + min = src_w-min*n; + while (len-- > 0) + { + for (j = 0; j < n; j++) + tmp[j] += src[src_w-min+j] * *contrib; + contrib++; + } + for (j = 0; j < n; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + /* And then duplicate it across the row */ + for (j = w; j > 0; j--) + { + *dst = dst[-n]; + dst++; + } + } + } + else + { + w = (w-1)*n; + for (i=weights->count; i > 0; i--) + { + /* Scale the next pixel in the column */ + min = *contrib++; + len = *contrib++; + min *= n; + while (len-- > 0) + { + for (j = 0; j < n; j++) + tmp[j] += src[min++] * *contrib; + contrib++; + } + for (j = 0; j < n; j++) + { + *dst++ = (unsigned char)(tmp[j]>>8); + tmp[j] = 128; + } + /* And then duplicate it across the row */ + for (j = w; j > 0; j--) + { + *dst = dst[-n]; + dst++; + } + } + } +} +#endif /* SINGLE_PIXEL_SPECIALS */ + +fz_pixmap * +fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h) +{ + fz_scale_filter *filter = &fz_scale_filter_simple; + fz_weights *contrib_rows = NULL; + fz_weights *contrib_cols = NULL; + fz_pixmap *output = NULL; + unsigned char *temp = NULL; + int max_row, temp_span, temp_rows, row; + int dst_w_int, dst_h_int, dst_x_int, dst_y_int; + int flip_x, flip_y; + + DBUG(("Scale: (%d,%d) to (%g,%g) at (%g,%g)\n",src->w,src->h,w,h,x,y)); + + /* Find the destination bbox, width/height, and sub pixel offset, + * allowing for whether we're flipping or not. */ + /* Note that the x and y sub pixel offsets here are different. + * The (x,y) position given describes where the bottom left corner + * of the source image should be mapped to (i.e. where (0,h) in image + * space ends up, not the more logical and sane (0,0)). Also there + * are differences in the way we scale horizontally and vertically. + * When scaling rows horizontally, we always read forwards through + * the source, and store either forwards or in reverse as required. + * When scaling vertically, we always store out forwards, but may + * feed source rows in in a different order. + * + * Consider the image rectange 'r' to which the image is mapped, + * and the (possibly) larger rectangle 'R', given by expanding 'r' to + * complete pixels. + * + * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether + * the image is x flipped or not. Whatever happens 0 <= x < 1. + * y is always R.ymax - r.ymax. + */ + /* dst_x_int is calculated to be the left of the scaled image, and + * x (the sub_pixel_offset) is the distance in from either the left + * or right pixel expanded edge. */ + flip_x = (w < 0); + if (flip_x) + { + float tmp; + w = -w; + dst_x_int = floor(x-w); + tmp = ceilf(x); + dst_w_int = (int)tmp; + x = tmp - x; + dst_w_int -= dst_x_int; + } + else + { + dst_x_int = floor(x); + x -= (float)dst_x_int; + dst_w_int = (int)ceilf(x + w); + } + flip_y = (h < 0); + /* dst_y_int is calculated to be the bottom of the scaled image, but + * y (the sub pixel offset) has to end up being the value at the top. + */ + if (flip_y) + { + h = -h; + dst_y_int = floor(y-h); + dst_h_int = (int)ceilf(y) - dst_y_int; + } else { + dst_y_int = floor(y); + y += h; + dst_h_int = (int)ceilf(y) - dst_y_int; + } + /* y is the top edge position in floats. We want it to be the + * distance down from the next pixel boundary. */ + y = ceilf(y) - y; + + DBUG(("Result image: (%d,%d) at (%d,%d) (subpix=%g,%g)\n", dst_w_int, dst_h_int, dst_x_int, dst_y_int, x, y)); + + /* Step 1: Calculate the weights for columns and rows */ +#ifdef SINGLE_PIXEL_SPECIALS + if (src->w == 1) + { + contrib_cols = NULL; + } + else +#endif /* SINGLE_PIXEL_SPECIALS */ + { + contrib_cols = make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, src->n, flip_x); + if (contrib_cols == NULL) + goto cleanup; + } +#ifdef SINGLE_PIXEL_SPECIALS + if (src->h == 1) + { + contrib_rows = NULL; + } + else +#endif /* SINGLE_PIXEL_SPECIALS */ + { + contrib_rows = make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, src->n, flip_y); + if (contrib_rows == NULL) + goto cleanup; + } + + assert(contrib_cols == NULL || contrib_cols->count == dst_w_int); + assert(contrib_rows == NULL || contrib_rows->count == dst_h_int); + output = fz_new_pixmap(ctx, src->colorspace, dst_w_int, dst_h_int); + output->x = dst_x_int; + output->y = dst_y_int; + + /* Step 2: Apply the weights */ +#ifdef SINGLE_PIXEL_SPECIALS + if (contrib_rows == NULL) + { + /* Only 1 source pixel high. */ + if (contrib_cols == NULL) + { + /* Only 1 pixel in the entire image! */ + duplicate_single_pixel(output->samples, src->samples, src->n, dst_w_int, dst_h_int); + } + else + { + /* Scale the row once, then copy it. */ + scale_single_row(output->samples, src->samples, contrib_cols, src->w, dst_h_int); + } + } + else if (contrib_cols == NULL) + { + /* Only 1 source pixel wide. Scale the col and duplicate. */ + scale_single_col(output->samples, src->samples, contrib_rows, src->h, src->n, dst_w_int, flip_y); + } + else +#endif /* SINGLE_PIXEL_SPECIALS */ + { + void (*row_scale)(unsigned char *dst, unsigned char *src, fz_weights *weights); + + temp_span = contrib_cols->count * src->n; + temp_rows = contrib_rows->max_len; + if (temp_span <= 0 || temp_rows > INT_MAX / temp_span) + goto cleanup; + temp = fz_calloc(ctx, temp_span*temp_rows, sizeof(unsigned char)); + if (temp == NULL) + goto cleanup; + switch (src->n) + { + default: + row_scale = scale_row_to_temp; + break; + case 1: /* Image mask case */ + row_scale = scale_row_to_temp1; + break; + case 2: /* Greyscale with alpha case */ + row_scale = scale_row_to_temp2; + break; + case 4: /* RGBA */ + row_scale = scale_row_to_temp4; + break; + } + max_row = 0; + for (row = 0; row < contrib_rows->count; row++) + { + /* + Which source rows do we need to have scaled into the + temporary buffer in order to be able to do the final + scale? + */ + int row_index = contrib_rows->index[row]; + int row_min = contrib_rows->index[row_index++]; + int row_len = contrib_rows->index[row_index++]; + while (max_row < row_min+row_len) + { + /* Scale another row */ + assert(max_row < src->h); + DBUG(("scaling row %d to temp\n", max_row)); + (*row_scale)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->w*src->n], contrib_cols); + max_row++; + } + + DBUG(("scaling row %d from temp\n", row)); + scale_row_from_temp(&output->samples[row*output->w*output->n], temp, contrib_rows, temp_span, row); + } + fz_free(ctx, temp); + } + +cleanup: + fz_free(ctx, contrib_rows); + fz_free(ctx, contrib_cols); + return output; +} |