summaryrefslogtreecommitdiff
path: root/source/fitz
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2016-05-23 11:44:57 +0100
committerRobin Watts <robin.watts@artifex.com>2016-05-24 12:48:41 +0100
commitc80baca351290fc353a8fc659f635d395e4e9a91 (patch)
tree995f7f80cde876618d4e3ed8f88adff5e69399d5 /source/fitz
parentd0b78f4166a1503ce522944002b3aab035724cd9 (diff)
downloadmupdf-c80baca351290fc353a8fc659f635d395e4e9a91.tar.xz
Fix ARM code in light of plotter changes.
Diffstat (limited to 'source/fitz')
-rw-r--r--source/fitz/draw-scale-simple.c149
-rw-r--r--source/fitz/halftone.c143
-rw-r--r--source/fitz/pixmap.c63
3 files changed, 218 insertions, 137 deletions
diff --git a/source/fitz/draw-scale-simple.c b/source/fitz/draw-scale-simple.c
index 0f267743..5d4c5b98 100644
--- a/source/fitz/draw-scale-simple.c
+++ b/source/fitz/draw-scale-simple.c
@@ -560,6 +560,10 @@ scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights)
__attribute__((naked));
static void
+scale_row_to_temp3(unsigned char *dst, unsigned char *src, fz_weights *weights)
+__attribute__((naked));
+
+static void
scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights)
__attribute__((naked));
@@ -704,6 +708,91 @@ scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights)
}
static void
+scale_row_to_temp3(unsigned char *dst, unsigned char *src, fz_weights *weights)
+{
+ asm volatile(
+ ENTER_ARM
+ "stmfd r13!,{r4-r11,r14} \n"
+ "@ r0 = dst \n"
+ "@ r1 = src \n"
+ "@ r2 = weights \n"
+ "ldr r12,[r2],#4 @ r12= flip \n"
+ "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
+ "ldr r4, [r2] @ r4 = index[0] \n"
+ "cmp r12,#0 @ if (flip) \n"
+ "beq 4f @ { \n"
+ "add r2, r2, r4, LSL #1 @ \n"
+ "add r2, r2, r4 @ r2 = &index[index[0]] \n"
+ "add r0, r0, r3, LSL #1 @ \n"
+ "add r0, r0, r3 @ dst += 3*count \n"
+ "1: \n"
+ "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
+ "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
+ "mov r5, #128 @ r5 = r = 128 \n"
+ "mov r6, #128 @ r6 = g = 128 \n"
+ "add r7, r1, r4, LSL #1 @ \n"
+ "add r4, r7, r4 @ r4 = min = &src[3*r4] \n"
+ "mov r7, #128 @ r7 = b = 128 \n"
+ "cmp r9, #0 @ while (len-- > 0) \n"
+ "beq 3f @ { \n"
+ "2: \n"
+ "ldr r14,[r2], #4 @ r14 = *contrib++ \n"
+ "ldrb r8, [r4], #1 @ r8 = *min++ \n"
+ "ldrb r11,[r4], #1 @ r11 = *min++ \n"
+ "ldrb r12,[r4], #1 @ r12 = *min++ \n"
+ "subs r9, r9, #1 @ r9 = len-- \n"
+ "mla r5, r14,r8, r5 @ r += r8 * r14 \n"
+ "mla r6, r14,r11,r6 @ g += r11 * r14 \n"
+ "mla r7, r14,r12,r7 @ b += r12 * r14 \n"
+ "bgt 2b @ } \n"
+ "3: \n"
+ "mov r5, r5, lsr #8 @ r >>= 8 \n"
+ "mov r6, r6, lsr #8 @ g >>= 8 \n"
+ "mov r7, r7, lsr #8 @ b >>= 8 \n"
+ "strb r5, [r0, #-3]! @ *--dst=r \n"
+ "strb r6, [r0, #1] @ *--dst=g \n"
+ "strb r7, [r0, #2] @ *--dst=b \n"
+ "subs r3, r3, #1 @ i-- \n"
+ "bgt 1b @ \n"
+ "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
+ "4:"
+ "add r2, r2, r4, LSL #1 @ \n"
+ "add r2, r2, r4 @ r2 = &index[index[0]] \n"
+ "5:"
+ "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
+ "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
+ "mov r5, #128 @ r5 = r = 128 \n"
+ "mov r6, #128 @ r6 = g = 128 \n"
+ "add r7, r1, r4, LSL #1 @ r7 = min = &src[2*r4] \n"
+ "add r4, r7, r4 @ r4 = min = &src[3*r4] \n"
+ "mov r7, #128 @ r7 = b = 128 \n"
+ "cmp r9, #0 @ while (len-- > 0) \n"
+ "beq 7f @ { \n"
+ "6: \n"
+ "ldr r14,[r2], #4 @ r10 = *contrib++ \n"
+ "ldrb r8, [r4], #1 @ r8 = *min++ \n"
+ "ldrb r11,[r4], #1 @ r11 = *min++ \n"
+ "ldrb r12,[r4], #1 @ r12 = *min++ \n"
+ "subs r9, r9, #1 @ r9 = len-- \n"
+ "mla r5, r14,r8, r5 @ r += r8 * r14 \n"
+ "mla r6, r14,r11,r6 @ g += r11 * r14 \n"
+ "mla r7, r14,r12,r7 @ b += r12 * r14 \n"
+ "bgt 6b @ } \n"
+ "7: \n"
+ "mov r5, r5, lsr #8 @ r >>= 8 \n"
+ "mov r6, r6, lsr #8 @ g >>= 8 \n"
+ "mov r7, r7, lsr #8 @ b >>= 8 \n"
+ "strb r5, [r0], #1 @ *dst++=r \n"
+ "strb r6, [r0], #1 @ *dst++=g \n"
+ "strb r7, [r0], #1 @ *dst++=b \n"
+ "subs r3, r3, #1 @ i-- \n"
+ "bgt 5b @ \n"
+ "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
+ ENTER_THUMB
+ );
+}
+
+static void
scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights)
{
asm volatile(
@@ -943,6 +1032,59 @@ scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights)
}
static void
+scale_row_to_temp3(unsigned char *dst, unsigned char *src, fz_weights *weights)
+{
+ int *contrib = &weights->index[weights->index[0]];
+ int len, i;
+ unsigned char *min;
+
+ assert(weights->n == 3);
+ if (weights->flip)
+ {
+ dst += 3*weights->count;
+ for (i=weights->count; i > 0; i--)
+ {
+ int c1 = 128;
+ int c2 = 128;
+ int c3 = 128;
+ min = &src[3 * *contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ int c = *contrib++;
+ c1 += *min++ * c;
+ c2 += *min++ * c;
+ c3 += *min++ * c;
+ }
+ *--dst = (unsigned char)(c3>>8);
+ *--dst = (unsigned char)(c2>>8);
+ *--dst = (unsigned char)(c1>>8);
+ }
+ }
+ else
+ {
+ for (i=weights->count; i > 0; i--)
+ {
+ int c1 = 128;
+ int c2 = 128;
+ int c3 = 128;
+ min = &src[3 * *contrib++];
+ len = *contrib++;
+ while (len-- > 0)
+ {
+ int c = *contrib++;
+ c1 += *min++ * c;
+ c2 += *min++ * c;
+ c3 += *min++ * c;
+ }
+ *dst++ = (unsigned char)(c1>>8);
+ *dst++ = (unsigned char)(c2>>8);
+ *dst++ = (unsigned char)(c3>>8);
+ }
+ }
+}
+
+static void
scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights)
{
int *contrib = &weights->index[weights->index[0]];
@@ -1429,13 +1571,16 @@ fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y,
default:
row_scale = scale_row_to_temp;
break;
- case 1: /* Image mask case */
+ case 1: /* Image mask case or Greyscale case */
row_scale = scale_row_to_temp1;
break;
case 2: /* Greyscale with alpha case */
row_scale = scale_row_to_temp2;
break;
- case 4: /* RGBA */
+ case 3: /* RGB case */
+ row_scale = scale_row_to_temp3;
+ break;
+ case 4: /* RGBA or CMYK case */
row_scale = scale_row_to_temp4;
break;
}
diff --git a/source/fitz/halftone.c b/source/fitz/halftone.c
index 60e3c182..abe6d026 100644
--- a/source/fitz/halftone.c
+++ b/source/fitz/halftone.c
@@ -178,40 +178,38 @@ do_threshold_1(const unsigned char * restrict ht_line, const unsigned char * res
"1: \n"
"ldr r14,[r1], #4 @ r14= pixmap[0..3] \n"
"ldr r5, [r1], #4 @ r5 = pixmap[4..7] \n"
- "ldr r6, [r1], #4 @ r6 = pixmap[8..11] \n"
- "ldr r7, [r1], #4 @ r7 = pixmap[12..15] \n"
"ldrb r4, [r0], #8 @ r0 = ht_line += 8 \n"
- "eors r14,r14,r5, ROR #8 @ if (white) \n"
- "teqeq r6, r7, ROR #8 @ \n"
+ "adds r14,r14,#1 @ set eq iff r14=-1 \n"
+ "addeqs r5, r5, #1 @ set eq iff r14=r5=-1 \n"
"beq 9b @ white \n"
- "ldrb r5, [r1, #-16] @ r5 = pixmap[0] \n"
+ "ldrb r5, [r1, #-8] @ r5 = pixmap[0] \n"
"ldrb r6, [r0, #-7] @ r6 = ht_line[1] \n"
- "ldrb r7, [r1, #-14] @ r7 = pixmap[2] \n"
+ "ldrb r7, [r1, #-7] @ r7 = pixmap[1] \n"
"mov r14,#0 @ r14= h = 0 \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x80 @ h |= 0x80 \n"
"ldrb r4, [r0, #-6] @ r4 = ht_line[2] \n"
- "ldrb r5, [r1, #-12] @ r5 = pixmap[4] \n"
+ "ldrb r5, [r1, #-6] @ r5 = pixmap[2] \n"
"cmp r7, r6 @ if (r7 < r6) \n"
"orrlt r14,r14,#0x40 @ h |= 0x40 \n"
"ldrb r6, [r0, #-5] @ r6 = ht_line[3] \n"
- "ldrb r7, [r1, #-10] @ r7 = pixmap[6] \n"
+ "ldrb r7, [r1, #-5] @ r7 = pixmap[3] \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x20 @ h |= 0x20 \n"
"ldrb r4, [r0, #-4] @ r4 = ht_line[4] \n"
- "ldrb r5, [r1, #-8] @ r5 = pixmap[8] \n"
+ "ldrb r5, [r1, #-4] @ r5 = pixmap[4] \n"
"cmp r7, r6 @ if (r7 < r6) \n"
"orrlt r14,r14,#0x10 @ h |= 0x10 \n"
"ldrb r6, [r0, #-3] @ r6 = ht_line[5] \n"
- "ldrb r7, [r1, #-6] @ r7 = pixmap[10] \n"
+ "ldrb r7, [r1, #-3] @ r7 = pixmap[5] \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x08 @ h |= 0x08 \n"
"ldrb r4, [r0, #-2] @ r4 = ht_line[6] \n"
- "ldrb r5, [r1, #-4] @ r5 = pixmap[12] \n"
+ "ldrb r5, [r1, #-2] @ r5 = pixmap[6] \n"
"cmp r7, r6 @ if (r7 < r6) \n"
"orrlt r14,r14,#0x04 @ h |= 0x04 \n"
"ldrb r6, [r0, #-1] @ r6 = ht_line[7] \n"
- "ldrb r7, [r1, #-2] @ r7 = pixmap[14] \n"
+ "ldrb r7, [r1, #-1] @ r7 = pixmap[7] \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x02 @ h |= 0x02 \n"
"cmp r7, r6 @ if (r7 < r6) \n"
@@ -226,43 +224,43 @@ do_threshold_1(const unsigned char * restrict ht_line, const unsigned char * res
"adds r3, r3, #7 @ w += 7 \n"
"ble 4f @ if (w >= 0) { \n"
"ldrb r4, [r0], #1 @ r4 = ht_line[0] \n"
- "ldrb r5, [r1], #2 @ r5 = pixmap[0] \n"
+ "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n"
"mov r14, #0 @ r14= h = 0 \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x80 @ h |= 0x80 \n"
"cmp r3, #1 @ \n"
"ldrgtb r4, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrgtb r5, [r1], #2 @ r7 = pixmap[2] \n"
+ "ldrgtb r5, [r1], #1 @ r7 = pixmap[1] \n"
"ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x40 @ h |= 0x40 \n"
"cmp r3, #2 @ \n"
"ldrgtb r4, [r0], #1 @ r6 = ht_line[2] \n"
- "ldrgtb r5, [r1], #2 @ r7 = pixmap[4] \n"
+ "ldrgtb r5, [r1], #1 @ r7 = pixmap[2] \n"
"ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x20 @ h |= 0x20 \n"
"cmp r3, #3 @ \n"
"ldrgtb r4, [r0], #1 @ r6 = ht_line[3] \n"
- "ldrgtb r5, [r1], #2 @ r7 = pixmap[6] \n"
+ "ldrgtb r5, [r1], #1 @ r7 = pixmap[3] \n"
"ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x10 @ h |= 0x10 \n"
"cmp r3, #4 @ \n"
"ldrgtb r4, [r0], #1 @ r6 = ht_line[4] \n"
- "ldrgtb r5, [r1], #2 @ r7 = pixmap[8] \n"
+ "ldrgtb r5, [r1], #1 @ r7 = pixmap[4] \n"
"ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x08 @ h |= 0x08 \n"
"cmp r3, #5 @ \n"
"ldrgtb r4, [r0], #1 @ r6 = ht_line[5] \n"
- "ldrgtb r5, [r1], #2 @ r7 = pixmap[10] \n"
+ "ldrgtb r5, [r1], #1 @ r7 = pixmap[5] \n"
"ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x04 @ h |= 0x04 \n"
"cmp r3, #6 @ \n"
"ldrgtb r4, [r0], #1 @ r6 = ht_line[6] \n"
- "ldrgtb r5, [r1], #2 @ r7 = pixmap[12] \n"
+ "ldrgtb r5, [r1], #1 @ r7 = pixmap[6] \n"
"ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x02 @ h |= 0x02 \n"
@@ -344,7 +342,6 @@ static void
do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len)
__attribute__((naked));
-#ifdef ARCH_UNALIGNED_OK
static void
do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len)
{
@@ -371,123 +368,40 @@ do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * res
"beq 2f @ } \n"
"blt 3f @ \n"
"1: \n"
- "ldr r5, [r1], #5 @ r5 = pixmap[0..3] \n"
- "ldr r7, [r1], #5 @ r7 = pixmap[5..8] \n"
+ "ldr r5, [r1], #4 @ r5 = pixmap[0..3] \n"
+ "ldr r7, [r1], #4 @ r7 = pixmap[4..7] \n"
"add r0, r0, #8 @ r0 = ht_line += 8 \n"
"mov r14,#0 @ r14= h = 0 \n"
"orrs r5, r5, r7 @ if (r5 | r7 == 0) \n"
"beq 9b @ white \n"
"ldrb r4, [r0, #-8] @ r4 = ht_line[0] \n"
- "ldrb r5, [r1, #-10] @ r5 = pixmap[0] \n"
+ "ldrb r5, [r1, #-8] @ r5 = pixmap[0] \n"
"ldrb r6, [r0, #-7] @ r6 = ht_line[1] \n"
- "ldrb r7, [r1, #-9] @ r7 = pixmap[1] \n"
+ "ldrb r7, [r1, #-7] @ r7 = pixmap[1] \n"
"cmp r4, r5 @ if (r4 < r5) \n"
"orrle r14,r14,#0x80 @ h |= 0x80 \n"
"ldrb r4, [r0, #-6] @ r4 = ht_line[2] \n"
- "ldrb r5, [r1, #-8] @ r5 = pixmap[2] \n"
+ "ldrb r5, [r1, #-6] @ r5 = pixmap[2] \n"
"cmp r6, r7 @ if (r6 < r7) \n"
"orrle r14,r14,#0x40 @ h |= 0x40 \n"
"ldrb r6, [r0, #-5] @ r6 = ht_line[3] \n"
- "ldrb r7, [r1, #-7] @ r7 = pixmap[3] \n"
+ "ldrb r7, [r1, #-5] @ r7 = pixmap[3] \n"
"cmp r4, r5 @ if (r4 < r5) \n"
"orrle r14,r14,#0x20 @ h |= 0x20 \n"
"ldrb r4, [r0, #-4] @ r4 = ht_line[4] \n"
- "ldrb r5, [r1, #-5] @ r5 = pixmap[5] \n"
+ "ldrb r5, [r1, #-4] @ r5 = pixmap[4] \n"
"cmp r6, r7 @ if (r6 < r7) \n"
"orrle r14,r14,#0x10 @ h |= 0x10 \n"
"ldrb r6, [r0, #-3] @ r6 = ht_line[5] \n"
- "ldrb r7, [r1, #-4] @ r7 = pixmap[6] \n"
+ "ldrb r7, [r1, #-3] @ r7 = pixmap[5] \n"
"cmp r4, r5 @ if (r4 < r5) \n"
"orrle r14,r14,#0x08 @ h |= 0x08 \n"
"ldrb r4, [r0, #-2] @ r4 = ht_line[6] \n"
- "ldrb r5, [r1, #-3] @ r5 = pixmap[7] \n"
+ "ldrb r5, [r1, #-2] @ r5 = pixmap[6] \n"
"cmp r6, r7 @ if (r6 < r7) \n"
"orrle r14,r14,#0x04 @ h |= 0x04 \n"
"ldrb r6, [r0, #-1] @ r6 = ht_line[7] \n"
- "ldrb r7, [r1, #-2] @ r7 = pixmap[8] \n"
- "cmp r4, r5 @ if (r4 < r5) \n"
- "orrle r14,r14,#0x02 @ h |= 0x02 \n"
- "cmp r6, r7 @ if (r7 < r6) \n"
- "orrle r14,r14,#0x01 @ h |= 0x01 \n"
- "subs r12,r12,#2 @ r12 = l -= 2 \n"
- "strb r14,[r2], #1 @ *out++ = h \n"
- "moveq r12,r9 @ if(l==0) l = ht_len \n"
- "subeq r0, r0, r9, LSL #2 @ ht_line -= l \n"
- "subs r3, r3, #2 @ w -= 2 \n"
- "bgt 1b @ } \n"
- "blt 3f @ \n"
- "2: \n"
- "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n"
- "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n"
- "mov r14, #0 @ r14= h = 0 \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrb r7, [r1], #1 @ r7 = pixmap[1] \n"
- "cmp r4, r5 @ if (r4 < r5) \n"
- "orrle r14,r14,#0x80 @ h |= 0x80 \n"
- "ldrb r4, [r0], #1 @ r6 = ht_line[2] \n"
- "ldrb r5, [r1], #1 @ r7 = pixmap[2] \n"
- "cmp r6, r7 @ if (r6 < r7) \n"
- "orrle r14,r14,#0x40 @ h |= 0x40 \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrb r7, [r1], #2 @ r7 = pixmap[2] \n"
- "cmp r4, r5 @ if (r4 < r5) \n"
- "orrle r14,r14,#0x20 @ h |= 0x20 \n"
- "cmp r6, r7 @ if (r6 < r7) \n"
- "orrle r14,r14,#0x10 @ h |= 0x10 \n"
- "strb r14,[r2] @ *out = h \n"
- "3: \n"
- "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
- ENTER_THUMB
- );
-}
-#else
-/* Vanilla version, should work on all ARMs */
-static void
-do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len)
-{
- asm volatile(
- ENTER_ARM
- // Store one more reg that required to keep double stack alignment
- "stmfd r13!,{r4-r7,r9,r14} \n"
- "@ r0 = ht_line \n"
- "@ r1 = pixmap \n"
- "@ r2 = out \n"
- "@ r3 = w \n"
- "@ <> = ht_len \n"
- "ldr r9, [r13,#6*4] @ r9 = ht_len \n"
- "subs r3, r3, #1 @ r3 = w -= 1 \n"
- "ble 2f @ while (w > 0) { \n"
- "mov r12,r9 @ r12= l = ht_len \n"
- "1: \n"
- "mov r14,#0 @ r14= h = 0 \n"
- "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n"
- "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrb r7, [r1], #1 @ r7 = pixmap[1] \n"
- "cmp r4, r5 @ if (r4 < r5) \n"
- "orrle r14,r14,#0x80 @ h |= 0x80 \n"
- "ldrb r4, [r0], #1 @ r4 = ht_line[2] \n"
- "ldrb r5, [r1], #1 @ r5 = pixmap[2] \n"
- "cmp r6, r7 @ if (r6 < r7) \n"
- "orrle r14,r14,#0x40 @ h |= 0x40 \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[3] \n"
- "ldrb r7, [r1], #2 @ r7 = pixmap[3] \n"
- "cmp r4, r5 @ if (r4 < r5) \n"
- "orrle r14,r14,#0x20 @ h |= 0x20 \n"
- "ldrb r4, [r0], #1 @ r4 = ht_line[4] \n"
- "ldrb r5, [r1], #1 @ r5 = pixmap[4] \n"
- "cmp r6, r7 @ if (r6 < r7) \n"
- "orrle r14,r14,#0x10 @ h |= 0x10 \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[5] \n"
- "ldrb r7, [r1], #1 @ r7 = pixmap[6] \n"
- "cmp r4, r5 @ if (r4 < r5) \n"
- "orrle r14,r14,#0x08 @ h |= 0x08 \n"
- "ldrb r4, [r0], #1 @ r4 = ht_line[6] \n"
- "ldrb r5, [r1], #1 @ r5 = pixmap[7] \n"
- "cmp r6, r7 @ if (r6 < r7) \n"
- "orrle r14,r14,#0x04 @ h |= 0x04 \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[7] \n"
- "ldrb r7, [r1], #2 @ r7 = pixmap[8] \n"
+ "ldrb r7, [r1, #-1] @ r7 = pixmap[7] \n"
"cmp r4, r5 @ if (r4 < r5) \n"
"orrle r14,r14,#0x02 @ h |= 0x02 \n"
"cmp r6, r7 @ if (r7 < r6) \n"
@@ -512,7 +426,7 @@ do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * res
"cmp r6, r7 @ if (r6 < r7) \n"
"orrle r14,r14,#0x40 @ h |= 0x40 \n"
"ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrb r7, [r1], #2 @ r7 = pixmap[2] \n"
+ "ldrb r7, [r1], #1 @ r7 = pixmap[3] \n"
"cmp r4, r5 @ if (r4 < r5) \n"
"orrle r14,r14,#0x20 @ h |= 0x20 \n"
"cmp r6, r7 @ if (r6 < r7) \n"
@@ -523,7 +437,6 @@ do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * res
ENTER_THUMB
);
}
-#endif /* UNALIGNED */
#else
static void do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char * restrict out, int w, int ht_len)
{
diff --git a/source/fitz/pixmap.c b/source/fitz/pixmap.c
index e0d24265..3385021f 100644
--- a/source/fitz/pixmap.c
+++ b/source/fitz/pixmap.c
@@ -201,11 +201,11 @@ fz_pixmap_samples(fz_context *ctx, fz_pixmap *pix)
*/
#ifdef ARCH_ARM
static void
-clear_cmyk_bitmap(unsigned char *samples, int c, int value)
+clear_cmyka_bitmap_ARM(uint32_t *samples, int c, int value)
__attribute__((naked));
static void
-clear_cmyk_bitmap(unsigned char *samples, int c, int value)
+clear_cmyka_bitmap_ARM(uint32_t *samples, int c, int value)
{
asm volatile(
ENTER_ARM
@@ -251,7 +251,8 @@ clear_cmyk_bitmap(unsigned char *samples, int c, int value)
ENTER_THUMB
);
}
-#else
+#endif
+
static void
clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, int alpha)
{
@@ -264,6 +265,10 @@ clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, i
stride -= w*5;
if (stride == 0)
{
+#ifdef ARCH_ARM
+ clear_cmyka_bitmap_ARM(s, c, alpha);
+ return;
+#else
/* We can do it all fast (except for maybe a few stragglers) */
union
{
@@ -306,6 +311,7 @@ clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, i
}
}
c += 3;
+#endif
}
t = (unsigned char *)s;
w = c;
@@ -327,27 +333,45 @@ clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, i
else
{
stride -= w*4;
- if ((stride & 3)== 0)
+ if ((stride & 3) == 0)
{
- /* We can do it all fast */
- union
+ if (stride == 0)
{
- uint8_t bytes[4];
- uint32_t word;
- } d;
-
- d.word = 0;
- d.bytes[3] = value;
- w *= h;
-
+ w *= h;
+ h = 1;
+ }
+ w *= 4;
+ if (value == 0)
{
- const uint32_t a0 = d.word;
- while (w > 0)
+ while (h--)
{
- *s++ = a0;
- w--;
+ memset(s, 0, w);
+ s += (stride>>2);
+ }
+ }
+ else
+ {
+ /* We can do it all fast */
+ union
+ {
+ uint8_t bytes[4];
+ uint32_t word;
+ } d;
+
+ d.word = 0;
+ d.bytes[3] = value;
+ {
+ const uint32_t a0 = d.word;
+ while (h--)
+ {
+ int ww = w;
+ while (ww--)
+ {
+ *s++ = a0;
+ }
+ s += (stride>>2);
+ }
}
- s += (stride>>2);
}
}
else
@@ -369,7 +393,6 @@ clear_cmyk_bitmap(unsigned char *samples, int w, int h, int stride, int value, i
}
}
}
-#endif
void
fz_clear_pixmap(fz_context *ctx, fz_pixmap *pix)