summaryrefslogtreecommitdiff
path: root/draw
diff options
context:
space:
mode:
authorTor Andersson <tor@ghostscript.com>2010-05-27 15:55:26 +0200
committerTor Andersson <tor@ghostscript.com>2010-05-27 15:55:26 +0200
commit9b79a247b765d4d3344c9c34fb49d47dee2d3ae5 (patch)
tree86c1c2874209d3bbcf3251129077a4b36b27f09c /draw
parentcf71a75f63fd6ba921d074634d32fe467e0f7a52 (diff)
downloadmupdf-9b79a247b765d4d3344c9c34fb49d47dee2d3ae5.tar.xz
Add Robin Watts' arm assembler version of path_w4i1o4.
Diffstat (limited to 'draw')
-rw-r--r--draw/archarm.c106
1 files changed, 106 insertions, 0 deletions
diff --git a/draw/archarm.c b/draw/archarm.c
new file mode 100644
index 00000000..609a1b90
--- /dev/null
+++ b/draw/archarm.c
@@ -0,0 +1,106 @@
+/*
+ * ARM specific render optims live here
+ */
+
+#include "fitz.h"
+
+/* always surround cpu specific code with HAVE_XXX */
+#ifdef ARCH_ARM
+
+static void
+path_w4i1o4arm(byte * restrict argb, byte * restrict src, byte cov, int len, byte * restrict dst)
+{
+ /* The ARM code here is a hand coded implementation
+ * of the optimized C version below. */
+ if (len <= 0)
+ return;
+ asm volatile(
+ "ldr %0, [%0] @ %0 = argb \n"
+ "mov r11,#0 \n"
+ "mov r8, #0xFF00 \n"
+ "and r14,%0,#255 @ r14= alpha \n"
+ "orr %0, %0, #255 @ %0 = argb |= 255 \n"
+ "orr r8, r8, r8, LSL #16 @ r8 = 0xFF00FF00 \n"
+ "adds r14,r14,r14,LSR #7 @ r14 = alpha += alpha>>7 \n"
+ "beq 9f @ if (alpha == 0) bale \n"
+ "and r6, %0, r8 @ r6 = rb<<8 \n"
+ "bic %0, %0, r8 @ %0 = ag \n"
+ "mov r6, r6, LSR #8 @ r6 = rb \n"
+ "cmp r14,#256 @ if (alpha == 256) \n"
+ "beq 4f @ no-alpha loop \n"
+ "B 2f @ enter the loop \n"
+ "1: @ Loop used for when coverage*alpha == 0 \n"
+ "subs %3, %3, #1 @ len-- \n"
+ "ble 9f \n"
+ "2: \n"
+ "ldrb r12,[%1] @ r12= *src \n"
+ "ldr r9, [%4], #4 @ r9 = dag = *dst32++ \n"
+ "strb r11,[%1], #1 @ r11= *src++ = 0 \n"
+ "add %2, r12, %2 @ %2 = cov += r12 \n"
+ "ands %2, %2, #255 @ %2 = cov &= 255 \n"
+ "beq 1b @ if coverage == 0 loop back \n"
+ "add r10,%2, %2, LSR #7 @ r10= ca = cov+(cov>>7) \n"
+ "mul r10,r14,r10 @ r10= ca *= alpha \n"
+ "and r7, r8, r9 @ r7 = drb = dag & MASK \n"
+ "mov r10,r10,LSR #8 @ r10= ca >>= 8 \n"
+ "and r9, r8, r9, LSL #8 @ r9 = dag = (dag<<8) & MASK \n"
+ "sub r12,r6, r7, LSR #8 @ r12= crb = rb - (drb>>8) \n"
+ "sub r5, %0, r9, LSR #8 @ r5 = cag = ag - (dag>>8) \n"
+ "mla r7, r12,r10,r7 @ r7 = drb += crb * ca \n"
+ "subs %3, %3, #1 @ len-- \n"
+ "mla r9, r5, r10,r9 @ r9 = dag += cag * ca \n"
+ "and r7, r8, r7 @ r7 = drb &= MASK \n"
+ "and r9, r8, r9 @ r9 = dag &= MASK \n"
+ "orr r9, r7, r9, LSR #8 @ r9 = dag = drb | (dag>>8) \n"
+ "str r9, [%4, #-4] @ dst32[-1] = r9 \n"
+ "bgt 2b \n"
+ "b 9f \n"
+ "@ --- Solid alpha loop --------------------------------------- \n"
+ "3: @ Loop used when coverage == 256 \n"
+ "orr r9, %0, r6, LSL #8 @ r9 = argb \n"
+ "str r9, [%4, #-4] @ dst32[-1] = r9 \n"
+ "4: @ Loop used for when coverage*alpha == 0 \n"
+ "subs %3, %3, #1 @ len-- \n"
+ "ble 9f \n"
+ "5: \n"
+ "ldrb r12,[%1] @ r12= *src \n"
+ "ldr r9, [%4], #4 @ r9 = dag = *dst32++ \n"
+ "strb r11,[%1], #1 @ r11= *src++ = 0 \n"
+ "add %2, r12, %2 @ %2 = cov += r12 \n"
+ "ands %2, %2, #255 @ %2 = cov &= 255 \n"
+ "beq 4b @ if coverage == 0 loop back \n"
+ "cmp %2, #255 @ if coverage == solid \n"
+ "beq 3b @ loop back \n"
+ "add r10,%2, %2, LSR #7 @ r10= ca = cov+(cov>>7) \n"
+ "and r7, r8, r9 @ r7 = drb = dag & MASK \n"
+ "and r9, r8, r9, LSL #8 @ r9 = dag = (dag<<8) & MASK \n"
+ "sub r12,r6, r7, LSR #8 @ r12= crb = rb - (drb>>8) \n"
+ "sub r5, %0, r9, LSR #8 @ r5 = cag = ag - (dag>>8) \n"
+ "mla r7, r12,r10,r7 @ r7 = drb += crb * ca \n"
+ "subs %3, %3, #1 @ len-- \n"
+ "mla r9, r5, r10,r9 @ r9 = dag += cag * ca \n"
+ "and r7, r8, r7 @ r7 = drb &= MASK \n"
+ "and r9, r8, r9 @ r9 = dag &= MASK \n"
+ "orr r9, r7, r9, LSR #8 @ r9 = dag = drb | (dag>>8) \n"
+ "str r9, [%4, #-4] @ dst32[-1] = r9 \n"
+ "bgt 5b \n"
+ "9: @ End \n"
+ :
+ "+r" (argb),
+ "+r" (src),
+ "+r" (cov),
+ "+r" (len),
+ "+r" (dst)
+ :
+ :
+ "r5","r6","r7","r8","r9","r10","r11","r12","r14","memory","cc"
+ );
+}
+
+void
+fz_accelerate(void)
+{
+ fz_path_w4i1o4 = path_w4i1o4arm;
+}
+
+#endif