summaryrefslogtreecommitdiff
path: root/source/fitz/halftone.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2016-03-25 00:18:40 +0000
committerRobin Watts <robin.watts@artifex.com>2016-03-25 14:16:45 +0000
commit57e627d1665b738570aa0d5faa8d39eb609ff22a (patch)
tree11b274d731149f25783314867b5266103a5397fe /source/fitz/halftone.c
parent1d589237650633ae5d3f3c71ac71f8d4cbd1d4c9 (diff)
downloadmupdf-57e627d1665b738570aa0d5faa8d39eb609ff22a.tar.xz
More ARM code thresholding work.
Fix do_threshold_1 and implement do_threshold_4. do_threshold_1 checks for white and shortcuts the work if it can. There are 2 ARM variants of do_threshold_4. One for ARMs that support unaligned loads, and one for the (rare configurations) that don't. The former checks for white and shortcuts the work.
Diffstat (limited to 'source/fitz/halftone.c')
-rw-r--r--source/fitz/halftone.c299
1 files changed, 250 insertions, 49 deletions
diff --git a/source/fitz/halftone.c b/source/fitz/halftone.c
index 2df8e3e7..186c0aa6 100644
--- a/source/fitz/halftone.c
+++ b/source/fitz/halftone.c
@@ -147,7 +147,6 @@ static void make_ht_line(unsigned char *buf, fz_halftone *ht, int x, int y, int
typedef void (threshold_fn)(const unsigned char *ht_line, const unsigned char *pixmap, unsigned char *out, int w, int ht_len);
#ifdef ARCH_ARM
-
static void
do_threshold_1(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len)
__attribute__((naked));
@@ -166,95 +165,110 @@ do_threshold_1(const unsigned char * restrict ht_line, const unsigned char * res
"@ <> = ht_len \n"
"ldr r9, [r13,#6*4] @ r9 = ht_len \n"
"subs r3, r3, #7 @ r3 = w -= 7 \n"
- "blt 2f @ while (w > 0) { \n"
+ "ble 2f @ while (w > 0) { \n"
"mov r12,r9 @ r12= l = ht_len \n"
+ "b 1f \n"
+ "9: \n"
+ "strb r14,[r2], #1 @ *out++ = 0 \n"
+ "subs r12,r12,#8 @ r12 = l -= 8 \n"
+ "moveq r12,r9 @ if(l==0) l = ht_len \n"
+ "subeq r0, r0, r9 @ ht_line -= l \n"
+ "subs r3, r3, #8 @ w -= 8 \n"
+ "ble 2f @ } \n"
"1: \n"
+ "ldr r14,[r1], #4 @ r14= pixmap[0..3] \n"
+ "ldr r5, [r1], #4 @ r5 = pixmap[4..7] \n"
+ "ldr r6, [r1], #4 @ r6 = pixmap[8..11] \n"
+ "ldr r7, [r1], #4 @ r7 = pixmap[12..15] \n"
+ "ldrb r4, [r0], #8 @ r0 = ht_line += 8 \n"
+ "eors r14,r14,r5, ROR #8 @ if (white) \n"
+ "teqeq r6, r7, ROR #8 @ \n"
+ "beq 9b @ white \n"
+ "ldrb r5, [r1, #-16] @ r5 = pixmap[0] \n"
+ "ldrb r6, [r0, #-7] @ r6 = ht_line[1] \n"
+ "ldrb r7, [r1, #-14] @ r7 = pixmap[2] \n"
"mov r14,#0 @ r14= h = 0 \n"
- "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n"
- "ldrb r5, [r1], #2 @ r5 = pixmap[0] \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrb r7, [r1], #2 @ r7 = pixmap[2] \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x80 @ h |= 0x80 \n"
- "ldrb r4, [r0], #1 @ r4 = ht_line[2] \n"
- "ldrb r5, [r1], #2 @ r5 = pixmap[4] \n"
+ "ldrb r4, [r0, #-6] @ r4 = ht_line[2] \n"
+ "ldrb r5, [r1, #-12] @ r5 = pixmap[4] \n"
"cmp r7, r6 @ if (r7 < r6) \n"
"orrlt r14,r14,#0x40 @ h |= 0x40 \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[3] \n"
- "ldrb r7, [r1], #2 @ r7 = pixmap[6] \n"
+ "ldrb r6, [r0, #-5] @ r6 = ht_line[3] \n"
+ "ldrb r7, [r1, #-10] @ r7 = pixmap[6] \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x20 @ h |= 0x20 \n"
- "ldrb r4, [r0], #1 @ r4 = ht_line[2] \n"
- "ldrb r5, [r1], #2 @ r5 = pixmap[4] \n"
+ "ldrb r4, [r0, #-4] @ r4 = ht_line[4] \n"
+ "ldrb r5, [r1, #-8] @ r5 = pixmap[8] \n"
"cmp r7, r6 @ if (r7 < r6) \n"
"orrlt r14,r14,#0x10 @ h |= 0x10 \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[3] \n"
- "ldrb r7, [r1], #2 @ r7 = pixmap[6] \n"
+ "ldrb r6, [r0, #-3] @ r6 = ht_line[5] \n"
+ "ldrb r7, [r1, #-6] @ r7 = pixmap[10] \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x08 @ h |= 0x08 \n"
- "ldrb r4, [r0], #1 @ r4 = ht_line[2] \n"
- "ldrb r5, [r1], #2 @ r5 = pixmap[4] \n"
+ "ldrb r4, [r0, #-2] @ r4 = ht_line[6] \n"
+ "ldrb r5, [r1, #-4] @ r5 = pixmap[12] \n"
"cmp r7, r6 @ if (r7 < r6) \n"
"orrlt r14,r14,#0x04 @ h |= 0x04 \n"
- "ldrb r6, [r0], #1 @ r6 = ht_line[3] \n"
- "ldrb r7, [r1], #2 @ r7 = pixmap[6] \n"
+ "ldrb r6, [r0, #-1] @ r6 = ht_line[7] \n"
+ "ldrb r7, [r1, #-2] @ r7 = pixmap[14] \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x02 @ h |= 0x02 \n"
"cmp r7, r6 @ if (r7 < r6) \n"
"orrlt r14,r14,#0x01 @ h |= 0x01 \n"
"subs r12,r12,#8 @ r12 = l -= 8 \n"
+ "strb r14,[r2], #1 @ *out++ = h \n"
"moveq r12,r9 @ if(l==0) l = ht_len \n"
- "subeq r6, r6, r12,LSL #2 @ ht_line -= l \n"
+ "subeq r0, r0, r9 @ ht_line -= l \n"
"subs r3, r3, #8 @ w -= 8 \n"
- "strb r14,[r2], #1 @ *out++ = h \n"
"bgt 1b @ } \n"
- "1: \n"
- "adds r3, r3, #6 @ w += 6 \n"
- "blt 3f @ if (w < 0) { \n"
+ "2: \n"
+ "adds r3, r3, #7 @ w += 7 \n"
+ "ble 4f @ if (w >= 0) { \n"
"ldrb r4, [r0], #1 @ r4 = ht_line[0] \n"
"ldrb r5, [r1], #2 @ r5 = pixmap[0] \n"
"mov r14, #0 @ r14= h = 0 \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x80 @ h |= 0x80 \n"
- "teq r3, #0 @ \n"
- "ldrneb r4, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrneb r5, [r1], #2 @ r7 = pixmap[2] \n"
- "beq 2f @ \n"
+ "cmp r3, #1 @ \n"
+ "ldrgtb r4, [r0], #1 @ r6 = ht_line[1] \n"
+ "ldrgtb r5, [r1], #2 @ r7 = pixmap[2] \n"
+ "ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x40 @ h |= 0x40 \n"
- "teq r3, #1 @ \n"
- "ldrneb r4, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrneb r5, [r1], #2 @ r7 = pixmap[2] \n"
- "beq 2f @ \n"
+ "cmp r3, #2 @ \n"
+ "ldrgtb r4, [r0], #1 @ r6 = ht_line[2] \n"
+ "ldrgtb r5, [r1], #2 @ r7 = pixmap[4] \n"
+ "ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x20 @ h |= 0x20 \n"
- "teq r3, #2 @ \n"
- "ldrneb r4, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrneb r5, [r1], #2 @ r7 = pixmap[2] \n"
- "beq 2f @ \n"
+ "cmp r3, #3 @ \n"
+ "ldrgtb r4, [r0], #1 @ r6 = ht_line[3] \n"
+ "ldrgtb r5, [r1], #2 @ r7 = pixmap[6] \n"
+ "ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x10 @ h |= 0x10 \n"
- "teq r3, #3 @ \n"
- "ldrneb r4, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrneb r5, [r1], #2 @ r7 = pixmap[2] \n"
- "beq 2f @ \n"
+ "cmp r3, #4 @ \n"
+ "ldrgtb r4, [r0], #1 @ r6 = ht_line[4] \n"
+ "ldrgtb r5, [r1], #2 @ r7 = pixmap[8] \n"
+ "ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x08 @ h |= 0x08 \n"
- "teq r3, #4 @ \n"
- "ldrneb r4, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrneb r5, [r1], #2 @ r7 = pixmap[2] \n"
- "beq 2f @ \n"
+ "cmp r3, #5 @ \n"
+ "ldrgtb r4, [r0], #1 @ r6 = ht_line[5] \n"
+ "ldrgtb r5, [r1], #2 @ r7 = pixmap[10] \n"
+ "ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x04 @ h |= 0x04 \n"
- "teq r3, #5 @ \n"
- "ldrneb r4, [r0], #1 @ r6 = ht_line[1] \n"
- "ldrneb r5, [r1], #2 @ r7 = pixmap[2] \n"
- "beq 2f @ \n"
+ "cmp r3, #6 @ \n"
+ "ldrgtb r4, [r0], #1 @ r6 = ht_line[6] \n"
+ "ldrgtb r5, [r1], #2 @ r7 = pixmap[12] \n"
+ "ble 3f @ \n"
"cmp r5, r4 @ if (r5 < r4) \n"
"orrlt r14,r14,#0x02 @ h |= 0x02 \n"
- "2: \n"
- "strb r14,[r2] @ *out = h \n"
"3: \n"
+ "strb r14,[r2] @ *out = h \n"
+ "4: \n"
"ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
ENTER_THUMB
);
@@ -325,6 +339,192 @@ static void do_threshold_1(const unsigned char * restrict ht_line, const unsigne
white = 0xFF. Reversing these tests enables us to maintain that
BlackIs1 in bitmaps.
*/
+#ifdef ARCH_ARM
+static void
+do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len)
+__attribute__((naked));
+
+#ifdef ARCH_ARM_CAN_LOAD_UNALIGNED
+static void
+do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len)
+{
+ asm volatile(
+ ENTER_ARM
+ // Store one more reg that required to keep double stack alignment
+ "stmfd r13!,{r4-r7,r9,r14} \n"
+ "@ r0 = ht_line \n"
+ "@ r1 = pixmap \n"
+ "@ r2 = out \n"
+ "@ r3 = w \n"
+ "@ <> = ht_len \n"
+ "ldr r9, [r13,#6*4] @ r9 = ht_len \n"
+ "subs r3, r3, #1 @ r3 = w -= 1 \n"
+ "ble 2f @ while (w > 0) { \n"
+ "mov r12,r9 @ r12= l = ht_len \n"
+ "b 1f @ \n"
+ "9: @ \n"
+ "strb r14,[r2], #1 @ *out++ = h \n"
+ "subs r12,r12,#2 @ r12 = l -= 2 \n"
+ "moveq r12,r9 @ if(l==0) l = ht_len \n"
+ "subeq r0, r0, r9, LSL #2 @ ht_line -= l \n"
+ "subs r3, r3, #2 @ w -= 2 \n"
+ "beq 2f @ } \n"
+ "blt 3f @ \n"
+ "1: \n"
+ "ldr r5, [r1], #5 @ r5 = pixmap[0..3] \n"
+ "ldr r7, [r1], #5 @ r7 = pixmap[5..8] \n"
+ "add r0, r0, #8 @ r0 = ht_line += 8 \n"
+ "mov r14,#0 @ r14= h = 0 \n"
+ "orrs r5, r5, r7 @ if (r5 | r7 == 0) \n"
+ "beq 9b @ white \n"
+ "ldrb r4, [r0, #-8] @ r4 = ht_line[0] \n"
+ "ldrb r5, [r1, #-10] @ r5 = pixmap[0] \n"
+ "ldrb r6, [r0, #-7] @ r6 = ht_line[1] \n"
+ "ldrb r7, [r1, #-9] @ r7 = pixmap[1] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x80 @ h |= 0x80 \n"
+ "ldrb r4, [r0, #-6] @ r4 = ht_line[2] \n"
+ "ldrb r5, [r1, #-8] @ r5 = pixmap[2] \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x40 @ h |= 0x40 \n"
+ "ldrb r6, [r0, #-5] @ r6 = ht_line[3] \n"
+ "ldrb r7, [r1, #-7] @ r7 = pixmap[3] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x20 @ h |= 0x20 \n"
+ "ldrb r4, [r0, #-4] @ r4 = ht_line[4] \n"
+ "ldrb r5, [r1, #-5] @ r5 = pixmap[5] \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x10 @ h |= 0x10 \n"
+ "ldrb r6, [r0, #-3] @ r6 = ht_line[5] \n"
+ "ldrb r7, [r1, #-4] @ r7 = pixmap[6] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x08 @ h |= 0x08 \n"
+ "ldrb r4, [r0, #-2] @ r4 = ht_line[6] \n"
+ "ldrb r5, [r1, #-3] @ r5 = pixmap[7] \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x04 @ h |= 0x04 \n"
+ "ldrb r6, [r0, #-1] @ r6 = ht_line[7] \n"
+ "ldrb r7, [r1, #-2] @ r7 = pixmap[8] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x02 @ h |= 0x02 \n"
+ "cmp r6, r7 @ if (r7 < r6) \n"
+ "orrle r14,r14,#0x01 @ h |= 0x01 \n"
+ "subs r12,r12,#2 @ r12 = l -= 2 \n"
+ "strb r14,[r2], #1 @ *out++ = h \n"
+ "moveq r12,r9 @ if(l==0) l = ht_len \n"
+ "subeq r0, r0, r9, LSL #2 @ ht_line -= l \n"
+ "subs r3, r3, #2 @ w -= 2 \n"
+ "bgt 1b @ } \n"
+ "blt 3f @ \n"
+ "2: \n"
+ "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n"
+ "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n"
+ "mov r14, #0 @ r14= h = 0 \n"
+ "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
+ "ldrb r7, [r1], #1 @ r7 = pixmap[1] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x80 @ h |= 0x80 \n"
+ "ldrb r4, [r0], #1 @ r6 = ht_line[2] \n"
+ "ldrb r5, [r1], #1 @ r7 = pixmap[2] \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x40 @ h |= 0x40 \n"
+ "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
+ "ldrb r7, [r1], #2 @ r7 = pixmap[2] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x20 @ h |= 0x20 \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x10 @ h |= 0x10 \n"
+ "strb r14,[r2] @ *out = h \n"
+ "3: \n"
+ "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
+ ENTER_THUMB
+ );
+}
+#else
+/* Vanilla version, should work on all ARMs */
+static void
+do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char *restrict out, int w, int ht_len)
+{
+ asm volatile(
+ ENTER_ARM
+ // Store one more reg that required to keep double stack alignment
+ "stmfd r13!,{r4-r7,r9,r14} \n"
+ "@ r0 = ht_line \n"
+ "@ r1 = pixmap \n"
+ "@ r2 = out \n"
+ "@ r3 = w \n"
+ "@ <> = ht_len \n"
+ "ldr r9, [r13,#6*4] @ r9 = ht_len \n"
+ "subs r3, r3, #1 @ r3 = w -= 1 \n"
+ "ble 2f @ while (w > 0) { \n"
+ "mov r12,r9 @ r12= l = ht_len \n"
+ "1: \n"
+ "mov r14,#0 @ r14= h = 0 \n"
+ "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n"
+ "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n"
+ "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
+ "ldrb r7, [r1], #1 @ r7 = pixmap[1] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x80 @ h |= 0x80 \n"
+ "ldrb r4, [r0], #1 @ r4 = ht_line[2] \n"
+ "ldrb r5, [r1], #1 @ r5 = pixmap[2] \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x40 @ h |= 0x40 \n"
+ "ldrb r6, [r0], #1 @ r6 = ht_line[3] \n"
+ "ldrb r7, [r1], #2 @ r7 = pixmap[3] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x20 @ h |= 0x20 \n"
+ "ldrb r4, [r0], #1 @ r4 = ht_line[4] \n"
+ "ldrb r5, [r1], #1 @ r5 = pixmap[4] \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x10 @ h |= 0x10 \n"
+ "ldrb r6, [r0], #1 @ r6 = ht_line[5] \n"
+ "ldrb r7, [r1], #1 @ r7 = pixmap[6] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x08 @ h |= 0x08 \n"
+ "ldrb r4, [r0], #1 @ r4 = ht_line[6] \n"
+ "ldrb r5, [r1], #1 @ r5 = pixmap[7] \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x04 @ h |= 0x04 \n"
+ "ldrb r6, [r0], #1 @ r6 = ht_line[7] \n"
+ "ldrb r7, [r1], #2 @ r7 = pixmap[8] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x02 @ h |= 0x02 \n"
+ "cmp r6, r7 @ if (r7 < r6) \n"
+ "orrle r14,r14,#0x01 @ h |= 0x01 \n"
+ "subs r12,r12,#2 @ r12 = l -= 2 \n"
+ "strb r14,[r2], #1 @ *out++ = h \n"
+ "moveq r12,r9 @ if(l==0) l = ht_len \n"
+ "subeq r0, r0, r9, LSL #2 @ ht_line -= l \n"
+ "subs r3, r3, #2 @ w -= 2 \n"
+ "bgt 1b @ } \n"
+ "blt 3f @ \n"
+ "2: \n"
+ "ldrb r4, [r0], #1 @ r4 = ht_line[0] \n"
+ "ldrb r5, [r1], #1 @ r5 = pixmap[0] \n"
+ "mov r14, #0 @ r14= h = 0 \n"
+ "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
+ "ldrb r7, [r1], #1 @ r7 = pixmap[1] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x80 @ h |= 0x80 \n"
+ "ldrb r4, [r0], #1 @ r6 = ht_line[2] \n"
+ "ldrb r5, [r1], #1 @ r7 = pixmap[2] \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x40 @ h |= 0x40 \n"
+ "ldrb r6, [r0], #1 @ r6 = ht_line[1] \n"
+ "ldrb r7, [r1], #2 @ r7 = pixmap[2] \n"
+ "cmp r4, r5 @ if (r4 < r5) \n"
+ "orrle r14,r14,#0x20 @ h |= 0x20 \n"
+ "cmp r6, r7 @ if (r6 < r7) \n"
+ "orrle r14,r14,#0x10 @ h |= 0x10 \n"
+ "strb r14,[r2] @ *out = h \n"
+ "3: \n"
+ "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
+ ENTER_THUMB
+ );
+}
+#endif /* UNALIGNED */
+#else
static void do_threshold_4(const unsigned char * restrict ht_line, const unsigned char * restrict pixmap, unsigned char * restrict out, int w, int ht_len)
{
int l = ht_len;
@@ -374,6 +574,7 @@ static void do_threshold_4(const unsigned char * restrict ht_line, const unsigne
*out = h;
}
}
+#endif
fz_bitmap *fz_new_bitmap_from_pixmap(fz_context *ctx, fz_pixmap *pix, fz_halftone *ht)
{