From c80baca351290fc353a8fc659f635d395e4e9a91 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Mon, 23 May 2016 11:44:57 +0100 Subject: Fix ARM code in light of plotter changes. --- source/fitz/draw-scale-simple.c | 149 +++++++++++++++++++++++++++++++++++++++- source/fitz/halftone.c | 143 ++++++++------------------------------ source/fitz/pixmap.c | 63 +++++++++++------ 3 files changed, 218 insertions(+), 137 deletions(-) diff --git a/source/fitz/draw-scale-simple.c b/source/fitz/draw-scale-simple.c index 0f267743..5d4c5b98 100644 --- a/source/fitz/draw-scale-simple.c +++ b/source/fitz/draw-scale-simple.c @@ -559,6 +559,10 @@ static void scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights) __attribute__((naked)); +static void +scale_row_to_temp3(unsigned char *dst, unsigned char *src, fz_weights *weights) +__attribute__((naked)); + static void scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights) __attribute__((naked)); @@ -703,6 +707,91 @@ scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights) ); } +static void +scale_row_to_temp3(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + asm volatile( + ENTER_ARM + "stmfd r13!,{r4-r11,r14} \n" + "@ r0 = dst \n" + "@ r1 = src \n" + "@ r2 = weights \n" + "ldr r12,[r2],#4 @ r12= flip \n" + "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" + "ldr r4, [r2] @ r4 = index[0] \n" + "cmp r12,#0 @ if (flip) \n" + "beq 4f @ { \n" + "add r2, r2, r4, LSL #1 @ \n" + "add r2, r2, r4 @ r2 = &index[index[0]] \n" + "add r0, r0, r3, LSL #1 @ \n" + "add r0, r0, r3 @ dst += 3*count \n" + "1: \n" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = r = 128 \n" + "mov r6, #128 @ r6 = g = 128 \n" + "add r7, r1, r4, LSL #1 @ \n" + "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" + "mov r7, #128 @ r7 = b = 128 \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 3f @ { \n" + "2: \n" + "ldr r14,[r2], #4 @ r14 = *contrib++ \n" + "ldrb r8, [r4], #1 @ r8 = *min++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r8, r5 @ r += r8 * r14 \n" + "mla r6, r14,r11,r6 @ g += r11 * r14 \n" + "mla r7, r14,r12,r7 @ b += r12 * r14 \n" + "bgt 2b @ } \n" + "3: \n" + "mov r5, r5, lsr #8 @ r >>= 8 \n" + "mov r6, r6, lsr #8 @ g >>= 8 \n" + "mov r7, r7, lsr #8 @ b >>= 8 \n" + "strb r5, [r0, #-3]! @ *--dst=r \n" + "strb r6, [r0, #1] @ *--dst=g \n" + "strb r7, [r0, #2] @ *--dst=b \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 1b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + "4:" + "add r2, r2, r4, LSL #1 @ \n" + "add r2, r2, r4 @ r2 = &index[index[0]] \n" + "5:" + "ldr r4, [r2], #4 @ r4 = *contrib++ \n" + "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" + "mov r5, #128 @ r5 = r = 128 \n" + "mov r6, #128 @ r6 = g = 128 \n" + "add r7, r1, r4, LSL #1 @ r7 = min = &src[2*r4] \n" + "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" + "mov r7, #128 @ r7 = b = 128 \n" + "cmp r9, #0 @ while (len-- > 0) \n" + "beq 7f @ { \n" + "6: \n" + "ldr r14,[r2], #4 @ r10 = *contrib++ \n" + "ldrb r8, [r4], #1 @ r8 = *min++ \n" + "ldrb r11,[r4], #1 @ r11 = *min++ \n" + "ldrb r12,[r4], #1 @ r12 = *min++ \n" + "subs r9, r9, #1 @ r9 = len-- \n" + "mla r5, r14,r8, r5 @ r += r8 * r14 \n" + "mla r6, r14,r11,r6 @ g += r11 * r14 \n" + "mla r7, r14,r12,r7 @ b += r12 * r14 \n" + "bgt 6b @ } \n" + "7: \n" + "mov r5, r5, lsr #8 @ r >>= 8 \n" + "mov r6, r6, lsr #8 @ g >>= 8 \n" + "mov r7, r7, lsr #8 @ b >>= 8 \n" + "strb r5, [r0], #1 @ *dst++=r \n" + "strb r6, [r0], #1 @ *dst++=g \n" + "strb r7, [r0], #1 @ *dst++=b \n" + "subs r3, r3, #1 @ i-- \n" + "bgt 5b @ \n" + "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" + ENTER_THUMB + ); +} + static void scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights) { @@ -942,6 +1031,59 @@ scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights) } } +static void +scale_row_to_temp3(unsigned char *dst, unsigned char *src, fz_weights *weights) +{ + int *contrib = &weights->index[weights->index[0]]; + int len, i; + unsigned char *min; + + assert(weights->n == 3); + if (weights->flip) + { + dst += 3*weights->count; + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + int c3 = 128; + min = &src[3 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + int c = *contrib++; + c1 += *min++ * c; + c2 += *min++ * c; + c3 += *min++ * c; + } + *--dst = (unsigned char)(c3>>8); + *--dst = (unsigned char)(c2>>8); + *--dst = (unsigned char)(c1>>8); + } + } + else + { + for (i=weights->count; i > 0; i--) + { + int c1 = 128; + int c2 = 128; + int c3 = 128; + min = &src[3 * *contrib++]; + len = *contrib++; + while (len-- > 0) + { + int c = *contrib++; + c1 += *min++ * c; + c2 += *min++ * c; + c3 += *min++ * c; + } + *dst++ = (unsigned char)(c1>>8); + *dst++ = (unsigned char)(c2>>8); + *dst++ = (unsigned char)(c3>>8); + } + } +} + static void scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights) { @@ -1429,13 +1571,16 @@ fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, default: row_scale = scale_row_to_temp; break; - case 1: /* Image mask case */ + case 1: /* Image mask case or Greyscale case */ row_scale = scale_row_to_temp1; break; case 2: /* Greyscale with alpha case */ row_scale = scale_row_to_temp2; break; - case 4: /* RGBA */ + case 3: /* RGB case */ + row_scale = scale_row_to_temp3; + break; + case 4: /* RGBA or CMYK case */ row_scale = scale_row_to_temp4; break; } diff --git a/source/fitz/halftone.c b/source/fitz/halftone.c index 60e3c182..abe6d026 100644 --- a/source/fitz/halftone.c +++ b/source/fitz/halftone.c @@ -178,40 +178,38 @@ do_threshold_1(const unsigned char * restrict ht_line, const unsigned char * res "1: \n" "ldr r14,[r1], #4 @ r14= pixmap[0..3] \n" "ldr r5, [r1], #4 @ r5 = pixmap[4..7] \n" - "ldr r6, [r1], #4 @ r6 = pixmap[8..11] \n" - "ldr r7, [r1], #4 @ r7 = pixmap[12..15] \n" "ldrb r4, [r0], #8 @ r0 = ht_line += 8 \n" - "eors r14,r14,r5, ROR #8 @ if (white) \n" - "teqeq r6, r7, ROR #8 @ \n" + "adds r14,r14,#1 @ set eq iff r14=-1 \n" + "addeqs r5, r5, #1 @ set eq iff r14=r5=-1 \n" "beq 9b @ white \n" - "ldrb r5, [r1, #-16] @ r5 = pixmap[0] \n" + "ldrb r5, [r1, #-8] @ r5 = pixmap[0] \n" "ldrb r6, [r0, #-7] @ r6 = ht_line[1] \n" - "ldrb r7, [r1, #-14] @ r7 = pixmap[2] \n" + "ldrb r7, [r1, #-7] @ r7 = pixmap[1] \n" "mov r14,#0 @ r14= h = 0 \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x80 @ h |= 0x80 \n" "ldrb r4, [r0, #-6] @ r4 = ht_line[2] \n" - "ldrb r5, [r1, #-12] @ r5 = pixmap[4] \n" + "ldrb r5, [r1, #-6] @ r5 = pixmap[2] \n" "cmp r7, r6 @ if (r7 < r6) \n" "orrlt r14,r14,#0x40 @ h |= 0x40 \n" "ldrb r6, [r0, #-5] @ r6 = ht_line[3] \n" - "ldrb r7, [r1, #-10] @ r7 = pixmap[6] \n" + "ldrb r7, [r1, #-5] @ r7 = pixmap[3] \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x20 @ h |= 0x20 \n" "ldrb r4, [r0, #-4] @ r4 = ht_line[4] \n" - "ldrb r5, [r1, #-8] @ r5 = pixmap[8] \n" + "ldrb r5, [r1, #-4] @ r5 = pixmap[4] \n" "cmp r7, r6 @ if (r7 < r6) \n" "orrlt r14,r14,#0x10 @ h |= 0x10 \n" "ldrb r6, [r0, #-3] @ r6 = ht_line[5] \n" - "ldrb r7, [r1, #-6] @ r7 = pixmap[10] \n" + "ldrb r7, [r1, #-3] @ r7 = pixmap[5] \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x08 @ h |= 0x08 \n" "ldrb r4, [r0, #-2] @ r4 = ht_line[6] \n" - "ldrb r5, [r1, #-4] @ r5 = pixmap[12] \n" + "ldrb r5, [r1, #-2] @ r5 = pixmap[6] \n" "cmp r7, r6 @ if (r7 < r6) \n" "orrlt r14,r14,#0x04 @ h |= 0x04 \n" "ldrb r6, [r0, #-1] @ r6 = ht_line[7] \n" - "ldrb r7, [r1, #-2] @ r7 = pixmap[14] \n" + "ldrb r7, [r1, #-1] @ r7 = pixmap[7] \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x02 @ h |= 0x02 \n" "cmp r7, r6 @ if (r7 < r6) \n" @@ -226,43 +224,43 @@ do_threshold_1(const unsigned char * restrict ht_line, const unsigned char * res "adds r3, r3, #7 @ w += 7 \n" "ble 4f @ if (w >= 0) { \n" "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n" - "ldrb r5, [r1], #2 @ r5 = pixmap[0] \n" + "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n" "mov r14, #0 @ r14= h = 0 \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x80 @ h |= 0x80 \n" "cmp r3, #1 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[1] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[2] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[1] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x40 @ h |= 0x40 \n" "cmp r3, #2 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[2] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[4] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[2] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x20 @ h |= 0x20 \n" "cmp r3, #3 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[3] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[6] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[3] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x10 @ h |= 0x10 \n" "cmp r3, #4 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[4] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[8] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[4] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x08 @ h |= 0x08 \n" "cmp r3, #5 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[5] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[10] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[5] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x04 @ h |= 0x04 \n" "cmp r3, #6 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[6] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[12] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[6] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x02 @ h |= 0x02 \n" @@ -344,7 +342,6 @@ static void do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len) __attribute__((naked)); -#ifdef ARCH_UNALIGNED_OK static void do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len) { @@ -371,123 +368,40 @@ do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * res "beq 2f @ } \n" "blt 3f @ \n" "1: \n" - "ldr r5, [r1], #5 @ r5 = pixmap[0..3] \n" - "ldr r7, [r1], #5 @ r7 = pixmap[5..8] \n" + "ldr r5, [r1], #4 @ r5 = pixmap[0..3] \n" + "ldr r7, [r1], #4 @ r7 = pixmap[4..7] \n" "add r0, r0, #8 @ r0 = ht_line += 8 \n" "mov r14,#0 @ r14= h = 0 \n" "orrs r5, r5, r7 @ if (r5 | r7 == 0) \n" "beq 9b @ white \n" "ldrb r4, [r0, #-8] @ r4 = ht_line[0] \n" - "ldrb r5, [r1, #-10] @ r5 = pixmap[0] \n" + "ldrb r5, [r1, #-8] @ r5 = pixmap[0] \n" "ldrb r6, [r0, #-7] @ r6 = ht_line[1] \n" - "ldrb r7, [r1, #-9] @ r7 = pixmap[1] \n" + "ldrb r7, [r1, #-7] @ r7 = pixmap[1] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x80 @ h |= 0x80 \n" "ldrb r4, [r0, #-6] @ r4 = ht_line[2] \n" - "ldrb r5, [r1, #-8] @ r5 = pixmap[2] \n" + "ldrb r5, [r1, #-6] @ r5 = pixmap[2] \n" "cmp r6, r7 @ if (r6 < r7) \n" "orrle r14,r14,#0x40 @ h |= 0x40 \n" "ldrb r6, [r0, #-5] @ r6 = ht_line[3] \n" - "ldrb r7, [r1, #-7] @ r7 = pixmap[3] \n" + "ldrb r7, [r1, #-5] @ r7 = pixmap[3] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x20 @ h |= 0x20 \n" "ldrb r4, [r0, #-4] @ r4 = ht_line[4] \n" - "ldrb r5, [r1, #-5] @ r5 = pixmap[5] \n" + "ldrb r5, [r1, #-4] @ r5 = pixmap[4] \n" "cmp r6, r7 @ if (r6 < r7) \n" "orrle r14,r14,#0x10 @ h |= 0x10 \n" "ldrb r6, [r0, #-3] @ r6 = ht_line[5] \n" - "ldrb r7, [r1, #-4] @ r7 = pixmap[6] \n" + "ldrb r7, [r1, #-3] @ r7 = pixmap[5] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x08 @ h |= 0x08 \n" "ldrb r4, [r0, #-2] @ r4 = ht_line[6] \n" - "ldrb r5, [r1, #-3] @ r5 = pixmap[7] \n" + "ldrb r5, [r1, #-2] @ r5 = pixmap[6] \n" "cmp r6, r7 @ if (r6 < r7) \n" "orrle r14,r14,#0x04 @ h |= 0x04 \n" "ldrb r6, [r0, #-1] @ r6 = ht_line[7] \n" - "ldrb r7, [r1, #-2] @ r7 = pixmap[8] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x02 @ h |= 0x02 \n" - "cmp r6, r7 @ if (r7 < r6) \n" - "orrle r14,r14,#0x01 @ h |= 0x01 \n" - "subs r12,r12,#2 @ r12 = l -= 2 \n" - "strb r14,[r2], #1 @ *out++ = h \n" - "moveq r12,r9 @ if(l==0) l = ht_len \n" - "subeq r0, r0, r9, LSL #2 @ ht_line -= l \n" - "subs r3, r3, #2 @ w -= 2 \n" - "bgt 1b @ } \n" - "blt 3f @ \n" - "2: \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n" - "mov r14, #0 @ r14= h = 0 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n" - "ldrb r7, [r1], #1 @ r7 = pixmap[1] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x80 @ h |= 0x80 \n" - "ldrb r4, [r0], #1 @ r6 = ht_line[2] \n" - "ldrb r5, [r1], #1 @ r7 = pixmap[2] \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x40 @ h |= 0x40 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n" - "ldrb r7, [r1], #2 @ r7 = pixmap[2] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x20 @ h |= 0x20 \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x10 @ h |= 0x10 \n" - "strb r14,[r2] @ *out = h \n" - "3: \n" - "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" - ENTER_THUMB - ); -} -#else -/* Vanilla version, should work on all ARMs */ -static void -do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len) -{ - asm volatile( - ENTER_ARM - // Store one more reg that required to keep double stack alignment - "stmfd r13!,{r4-r7,r9,r14} \n" - "@ r0 = ht_line \n" - "@ r1 = pixmap \n" - "@ r2 = out \n" - "@ r3 = w \n" - "@ <> = ht_len \n" - "ldr r9, [r13,#6*4] @ r9 = ht_len \n" - "subs r3, r3, #1 @ r3 = w -= 1 \n" - "ble 2f @ while (w > 0) { \n" - "mov r12,r9 @ r12= l = ht_len \n" - "1: \n" - "mov r14,#0 @ r14= h = 0 \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n" - "ldrb r7, [r1], #1 @ r7 = pixmap[1] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x80 @ h |= 0x80 \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[2] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[2] \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x40 @ h |= 0x40 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[3] \n" - "ldrb r7, [r1], #2 @ r7 = pixmap[3] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x20 @ h |= 0x20 \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[4] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[4] \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x10 @ h |= 0x10 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[5] \n" - "ldrb r7, [r1], #1 @ r7 = pixmap[6] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x08 @ h |= 0x08 \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[6] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[7] \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x04 @ h |= 0x04 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[7] \n" - "ldrb r7, [r1], #2 @ r7 = pixmap[8] \n" + "ldrb r7, [r1, #-1] @ r7 = pixmap[7] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x02 @ h |= 0x02 \n" "cmp r6, r7 @ if (r7 < r6) \n" @@ -512,7 +426,7 @@ do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * res "cmp r6, r7 @ if (r6 < r7) \n" "orrle r14,r14,#0x40 @ h |= 0x40 \n" "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n" - "ldrb r7, [r1], #2 @ r7 = pixmap[2] \n" + "ldrb r7, [r1], #1 @ r7 = pixmap[3] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x20 @ h |= 0x20 \n" "cmp r6, r7 @ if (r6 < r7) \n" @@ -523,7 +437,6 @@ do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * res ENTER_THUMB ); } -#endif /* UNALIGNED */ #else static void do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char * restrict out, int w, int ht_len) { diff --git a/source/fitz/pixmap.c b/source/fitz/pixmap.c index e0d24265..3385021f 100644 --- a/source/fitz/pixmap.c +++ b/source/fitz/pixmap.c @@ -201,11 +201,11 @@ fz_pixmap_samples(fz_context *ctx, fz_pixmap *pix) */ #ifdef ARCH_ARM static void -clear_cmyk_bitmap(unsigned char *samples, int c, int value) +clear_cmyka_bitmap_ARM(uint32_t *samples, int c, int value) __attribute__((naked)); static void -clear_cmyk_bitmap(unsigned char *samples, int c, int value) +clear_cmyka_bitmap_ARM(uint32_t *samples, int c, int value) { asm volatile( ENTER_ARM @@ -251,7 +251,8 @@ clear_cmyk_bitmap(unsigned char *samples, int c, int value) ENTER_THUMB ); } -#else +#endif + static void clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, int alpha) { @@ -264,6 +265,10 @@ clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, i stride -= w*5; if (stride == 0) { +#ifdef ARCH_ARM + clear_cmyka_bitmap_ARM(s, c, alpha); + return; +#else /* We can do it all fast (except for maybe a few stragglers) */ union { @@ -306,6 +311,7 @@ clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, i } } c += 3; +#endif } t = (unsigned char *)s; w = c; @@ -327,27 +333,45 @@ clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, i else { stride -= w*4; - if ((stride & 3)== 0) + if ((stride & 3) == 0) { - /* We can do it all fast */ - union + if (stride == 0) { - uint8_t bytes[4]; - uint32_t word; - } d; - - d.word = 0; - d.bytes[3] = value; - w *= h; - + w *= h; + h = 1; + } + w *= 4; + if (value == 0) { - const uint32_t a0 = d.word; - while (w > 0) + while (h--) { - *s++ = a0; - w--; + memset(s, 0, w); + s += (stride>>2); + } + } + else + { + /* We can do it all fast */ + union + { + uint8_t bytes[4]; + uint32_t word; + } d; + + d.word = 0; + d.bytes[3] = value; + { + const uint32_t a0 = d.word; + while (h--) + { + int ww = w; + while (ww--) + { + *s++ = a0; + } + s += (stride>>2); + } } - s += (stride>>2); } } else @@ -369,7 +393,6 @@ clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, i } } } -#endif void fz_clear_pixmap(fz_context *ctx, fz_pixmap *pix) -- cgit v1.2.3