diff options
Diffstat (limited to 'source/fitz/halftone.c')
-rw-r--r-- | source/fitz/halftone.c | 143 |
1 files changed, 28 insertions, 115 deletions
diff --git a/source/fitz/halftone.c b/source/fitz/halftone.c index 60e3c182..abe6d026 100644 --- a/source/fitz/halftone.c +++ b/source/fitz/halftone.c @@ -178,40 +178,38 @@ do_threshold_1(const unsigned char * restrict ht_line, const unsigned char * res "1: \n" "ldr r14,[r1], #4 @ r14= pixmap[0..3] \n" "ldr r5, [r1], #4 @ r5 = pixmap[4..7] \n" - "ldr r6, [r1], #4 @ r6 = pixmap[8..11] \n" - "ldr r7, [r1], #4 @ r7 = pixmap[12..15] \n" "ldrb r4, [r0], #8 @ r0 = ht_line += 8 \n" - "eors r14,r14,r5, ROR #8 @ if (white) \n" - "teqeq r6, r7, ROR #8 @ \n" + "adds r14,r14,#1 @ set eq iff r14=-1 \n" + "addeqs r5, r5, #1 @ set eq iff r14=r5=-1 \n" "beq 9b @ white \n" - "ldrb r5, [r1, #-16] @ r5 = pixmap[0] \n" + "ldrb r5, [r1, #-8] @ r5 = pixmap[0] \n" "ldrb r6, [r0, #-7] @ r6 = ht_line[1] \n" - "ldrb r7, [r1, #-14] @ r7 = pixmap[2] \n" + "ldrb r7, [r1, #-7] @ r7 = pixmap[1] \n" "mov r14,#0 @ r14= h = 0 \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x80 @ h |= 0x80 \n" "ldrb r4, [r0, #-6] @ r4 = ht_line[2] \n" - "ldrb r5, [r1, #-12] @ r5 = pixmap[4] \n" + "ldrb r5, [r1, #-6] @ r5 = pixmap[2] \n" "cmp r7, r6 @ if (r7 < r6) \n" "orrlt r14,r14,#0x40 @ h |= 0x40 \n" "ldrb r6, [r0, #-5] @ r6 = ht_line[3] \n" - "ldrb r7, [r1, #-10] @ r7 = pixmap[6] \n" + "ldrb r7, [r1, #-5] @ r7 = pixmap[3] \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x20 @ h |= 0x20 \n" "ldrb r4, [r0, #-4] @ r4 = ht_line[4] \n" - "ldrb r5, [r1, #-8] @ r5 = pixmap[8] \n" + "ldrb r5, [r1, #-4] @ r5 = pixmap[4] \n" "cmp r7, r6 @ if (r7 < r6) \n" "orrlt r14,r14,#0x10 @ h |= 0x10 \n" "ldrb r6, [r0, #-3] @ r6 = ht_line[5] \n" - "ldrb r7, [r1, #-6] @ r7 = pixmap[10] \n" + "ldrb r7, [r1, #-3] @ r7 = pixmap[5] \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x08 @ h |= 0x08 \n" "ldrb r4, [r0, #-2] @ r4 = ht_line[6] \n" - "ldrb r5, [r1, #-4] @ r5 = pixmap[12] \n" + "ldrb r5, [r1, #-2] @ r5 = pixmap[6] \n" "cmp r7, r6 @ if (r7 < r6) \n" "orrlt r14,r14,#0x04 @ h |= 0x04 \n" "ldrb r6, [r0, #-1] @ r6 = ht_line[7] \n" - "ldrb r7, [r1, #-2] @ r7 = pixmap[14] \n" + "ldrb r7, [r1, #-1] @ r7 = pixmap[7] \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x02 @ h |= 0x02 \n" "cmp r7, r6 @ if (r7 < r6) \n" @@ -226,43 +224,43 @@ do_threshold_1(const unsigned char * restrict ht_line, const unsigned char * res "adds r3, r3, #7 @ w += 7 \n" "ble 4f @ if (w >= 0) { \n" "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n" - "ldrb r5, [r1], #2 @ r5 = pixmap[0] \n" + "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n" "mov r14, #0 @ r14= h = 0 \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x80 @ h |= 0x80 \n" "cmp r3, #1 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[1] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[2] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[1] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x40 @ h |= 0x40 \n" "cmp r3, #2 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[2] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[4] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[2] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x20 @ h |= 0x20 \n" "cmp r3, #3 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[3] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[6] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[3] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x10 @ h |= 0x10 \n" "cmp r3, #4 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[4] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[8] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[4] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x08 @ h |= 0x08 \n" "cmp r3, #5 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[5] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[10] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[5] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x04 @ h |= 0x04 \n" "cmp r3, #6 @ \n" "ldrgtb r4, [r0], #1 @ r6 = ht_line[6] \n" - "ldrgtb r5, [r1], #2 @ r7 = pixmap[12] \n" + "ldrgtb r5, [r1], #1 @ r7 = pixmap[6] \n" "ble 3f @ \n" "cmp r5, r4 @ if (r5 < r4) \n" "orrlt r14,r14,#0x02 @ h |= 0x02 \n" @@ -344,7 +342,6 @@ static void do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len) __attribute__((naked)); -#ifdef ARCH_UNALIGNED_OK static void do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len) { @@ -371,123 +368,40 @@ do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * res "beq 2f @ } \n" "blt 3f @ \n" "1: \n" - "ldr r5, [r1], #5 @ r5 = pixmap[0..3] \n" - "ldr r7, [r1], #5 @ r7 = pixmap[5..8] \n" + "ldr r5, [r1], #4 @ r5 = pixmap[0..3] \n" + "ldr r7, [r1], #4 @ r7 = pixmap[4..7] \n" "add r0, r0, #8 @ r0 = ht_line += 8 \n" "mov r14,#0 @ r14= h = 0 \n" "orrs r5, r5, r7 @ if (r5 | r7 == 0) \n" "beq 9b @ white \n" "ldrb r4, [r0, #-8] @ r4 = ht_line[0] \n" - "ldrb r5, [r1, #-10] @ r5 = pixmap[0] \n" + "ldrb r5, [r1, #-8] @ r5 = pixmap[0] \n" "ldrb r6, [r0, #-7] @ r6 = ht_line[1] \n" - "ldrb r7, [r1, #-9] @ r7 = pixmap[1] \n" + "ldrb r7, [r1, #-7] @ r7 = pixmap[1] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x80 @ h |= 0x80 \n" "ldrb r4, [r0, #-6] @ r4 = ht_line[2] \n" - "ldrb r5, [r1, #-8] @ r5 = pixmap[2] \n" + "ldrb r5, [r1, #-6] @ r5 = pixmap[2] \n" "cmp r6, r7 @ if (r6 < r7) \n" "orrle r14,r14,#0x40 @ h |= 0x40 \n" "ldrb r6, [r0, #-5] @ r6 = ht_line[3] \n" - "ldrb r7, [r1, #-7] @ r7 = pixmap[3] \n" + "ldrb r7, [r1, #-5] @ r7 = pixmap[3] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x20 @ h |= 0x20 \n" "ldrb r4, [r0, #-4] @ r4 = ht_line[4] \n" - "ldrb r5, [r1, #-5] @ r5 = pixmap[5] \n" + "ldrb r5, [r1, #-4] @ r5 = pixmap[4] \n" "cmp r6, r7 @ if (r6 < r7) \n" "orrle r14,r14,#0x10 @ h |= 0x10 \n" "ldrb r6, [r0, #-3] @ r6 = ht_line[5] \n" - "ldrb r7, [r1, #-4] @ r7 = pixmap[6] \n" + "ldrb r7, [r1, #-3] @ r7 = pixmap[5] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x08 @ h |= 0x08 \n" "ldrb r4, [r0, #-2] @ r4 = ht_line[6] \n" - "ldrb r5, [r1, #-3] @ r5 = pixmap[7] \n" + "ldrb r5, [r1, #-2] @ r5 = pixmap[6] \n" "cmp r6, r7 @ if (r6 < r7) \n" "orrle r14,r14,#0x04 @ h |= 0x04 \n" "ldrb r6, [r0, #-1] @ r6 = ht_line[7] \n" - "ldrb r7, [r1, #-2] @ r7 = pixmap[8] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x02 @ h |= 0x02 \n" - "cmp r6, r7 @ if (r7 < r6) \n" - "orrle r14,r14,#0x01 @ h |= 0x01 \n" - "subs r12,r12,#2 @ r12 = l -= 2 \n" - "strb r14,[r2], #1 @ *out++ = h \n" - "moveq r12,r9 @ if(l==0) l = ht_len \n" - "subeq r0, r0, r9, LSL #2 @ ht_line -= l \n" - "subs r3, r3, #2 @ w -= 2 \n" - "bgt 1b @ } \n" - "blt 3f @ \n" - "2: \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n" - "mov r14, #0 @ r14= h = 0 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n" - "ldrb r7, [r1], #1 @ r7 = pixmap[1] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x80 @ h |= 0x80 \n" - "ldrb r4, [r0], #1 @ r6 = ht_line[2] \n" - "ldrb r5, [r1], #1 @ r7 = pixmap[2] \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x40 @ h |= 0x40 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n" - "ldrb r7, [r1], #2 @ r7 = pixmap[2] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x20 @ h |= 0x20 \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x10 @ h |= 0x10 \n" - "strb r14,[r2] @ *out = h \n" - "3: \n" - "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" - ENTER_THUMB - ); -} -#else -/* Vanilla version, should work on all ARMs */ -static void -do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len) -{ - asm volatile( - ENTER_ARM - // Store one more reg that required to keep double stack alignment - "stmfd r13!,{r4-r7,r9,r14} \n" - "@ r0 = ht_line \n" - "@ r1 = pixmap \n" - "@ r2 = out \n" - "@ r3 = w \n" - "@ <> = ht_len \n" - "ldr r9, [r13,#6*4] @ r9 = ht_len \n" - "subs r3, r3, #1 @ r3 = w -= 1 \n" - "ble 2f @ while (w > 0) { \n" - "mov r12,r9 @ r12= l = ht_len \n" - "1: \n" - "mov r14,#0 @ r14= h = 0 \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n" - "ldrb r7, [r1], #1 @ r7 = pixmap[1] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x80 @ h |= 0x80 \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[2] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[2] \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x40 @ h |= 0x40 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[3] \n" - "ldrb r7, [r1], #2 @ r7 = pixmap[3] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x20 @ h |= 0x20 \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[4] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[4] \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x10 @ h |= 0x10 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[5] \n" - "ldrb r7, [r1], #1 @ r7 = pixmap[6] \n" - "cmp r4, r5 @ if (r4 < r5) \n" - "orrle r14,r14,#0x08 @ h |= 0x08 \n" - "ldrb r4, [r0], #1 @ r4 = ht_line[6] \n" - "ldrb r5, [r1], #1 @ r5 = pixmap[7] \n" - "cmp r6, r7 @ if (r6 < r7) \n" - "orrle r14,r14,#0x04 @ h |= 0x04 \n" - "ldrb r6, [r0], #1 @ r6 = ht_line[7] \n" - "ldrb r7, [r1], #2 @ r7 = pixmap[8] \n" + "ldrb r7, [r1, #-1] @ r7 = pixmap[7] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x02 @ h |= 0x02 \n" "cmp r6, r7 @ if (r7 < r6) \n" @@ -512,7 +426,7 @@ do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * res "cmp r6, r7 @ if (r6 < r7) \n" "orrle r14,r14,#0x40 @ h |= 0x40 \n" "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n" - "ldrb r7, [r1], #2 @ r7 = pixmap[2] \n" + "ldrb r7, [r1], #1 @ r7 = pixmap[3] \n" "cmp r4, r5 @ if (r4 < r5) \n" "orrle r14,r14,#0x20 @ h |= 0x20 \n" "cmp r6, r7 @ if (r6 < r7) \n" @@ -523,7 +437,6 @@ do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * res ENTER_THUMB ); } -#endif /* UNALIGNED */ #else static void do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char * restrict out, int w, int ht_len) { |