summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--draw/draw_simple_scale.c22
-rw-r--r--fitz/fitz-internal.h24
-rw-r--r--fitz/res_colorspace.c22
-rw-r--r--fitz/res_pixmap.c205
4 files changed, 225 insertions, 48 deletions
diff --git a/draw/draw_simple_scale.c b/draw/draw_simple_scale.c
index 90fd75e0..29050387 100644
--- a/draw/draw_simple_scale.c
+++ b/draw/draw_simple_scale.c
@@ -20,28 +20,6 @@ intermediate results rather than ints.
*/
#define SINGLE_PIXEL_SPECIALS
-#ifdef NDK_PROFILER
-extern void __gnu_mcount_nc(void);
-#define ENTER_PG "push {lr}\nbl __gnu_mcount_nc\n"
-#else
-#define ENTER_PG
-#endif
-
-/* If we're compiling as thumb code, then we need to tell the compiler
- * to enter and exit ARM mode around our assembly sections. If we move
- * the ARM functions to a separate file and arrange for it to be compiled
- * without thumb mode, we can save some time on entry.
- */
-#ifdef ARCH_ARM
-#ifdef ARCH_THUMB
-#define ENTER_ARM ".balign 4\nmov r12,pc\nbx r12\n0:.arm\n" ENTER_PG
-#define ENTER_THUMB "9:.thumb\n" ENTER_PG
-#else
-#define ENTER_ARM
-#define ENTER_THUMB
-#endif
-#endif
-
#ifdef DEBUG_SCALING
#ifdef WIN32
#include <windows.h>
diff --git a/fitz/fitz-internal.h b/fitz/fitz-internal.h
index 2f1d137e..d7bbb5cb 100644
--- a/fitz/fitz-internal.h
+++ b/fitz/fitz-internal.h
@@ -71,6 +71,30 @@ fz_unlock(fz_context *ctx, int lock)
ctx->locks->unlock(ctx->locks->user, lock);
}
+/* ARM assembly specific defines */
+
+#ifdef ARCH_ARM
+#ifdef NDK_PROFILER
+extern void __gnu_mcount_nc(void);
+#define ENTER_PG "push {lr}\nbl __gnu_mcount_nc\n"
+#else
+#define ENTER_PG
+#endif
+
+/* If we're compiling as thumb code, then we need to tell the compiler
+ * to enter and exit ARM mode around our assembly sections. If we move
+ * the ARM functions to a separate file and arrange for it to be compiled
+ * without thumb mode, we can save some time on entry.
+ */
+#ifdef ARCH_THUMB
+#define ENTER_ARM ".balign 4\nmov r12,pc\nbx r12\n0:.arm\n" ENTER_PG
+#define ENTER_THUMB "9:.thumb\n" ENTER_PG
+#else
+#define ENTER_ARM
+#define ENTER_THUMB
+#endif
+#endif
+
/*
* Basic runtime and utility functions
diff --git a/fitz/res_colorspace.c b/fitz/res_colorspace.c
index 826909a0..81b08e54 100644
--- a/fitz/res_colorspace.c
+++ b/fitz/res_colorspace.c
@@ -2,28 +2,6 @@
#define SLOWCMYK
-#ifdef NDK_PROFILER
-extern void __gnu_mcount_nc(void);
-#define ENTER_PG "push {lr}\nbl __gnu_mcount_nc\n"
-#else
-#define ENTER_PG
-#endif
-
-/* If we're compiling as thumb code, then we need to tell the compiler
- * to enter and exit ARM mode around our assembly sections. If we move
- * the ARM functions to a separate file and arrange for it to be compiled
- * without thumb mode, we can save some time on entry.
- */
-#ifdef ARCH_ARM
-#ifdef ARCH_THUMB
-#define ENTER_ARM ".balign 4\nmov r12,pc\nbx r12\n0:.arm\n" ENTER_PG
-#define ENTER_THUMB "9:.thumb\n" ENTER_PG
-#else
-#define ENTER_ARM
-#define ENTER_THUMB
-#endif
-#endif
-
void
fz_free_colorspace_imp(fz_context *ctx, fz_storable *cs_)
{
diff --git a/fitz/res_pixmap.c b/fitz/res_pixmap.c
index a3ae7562..8eb489d1 100644
--- a/fitz/res_pixmap.c
+++ b/fitz/res_pixmap.c
@@ -688,6 +688,187 @@ fz_drop_image(fz_context *ctx, fz_image *image)
fz_drop_storable(ctx, &image->storable);
}
+#ifdef ARCH_ARM
+static void
+fz_subsample_pixmap_ARM(unsigned char *ptr, int w, int h, int f, int factor,
+ int n, int fwd, int back, int back2, int fwd2,
+ int divX, int back4, int fwd4, int fwd3,
+ int divY, int back5, int divXY)
+__attribute__((naked));
+
+static void
+fz_subsample_pixmap_ARM(unsigned char *ptr, int w, int h, int f, int factor,
+ int n, int fwd, int back, int back2, int fwd2,
+ int divX, int back4, int fwd4, int fwd3,
+ int divY, int back5, int divXY)
+{
+ asm volatile(
+ ENTER_ARM
+ "stmfd r13!,{r1,r4-r11,r14} \n"
+ "@STACK:r1,<9>,factor,n,fwd,back,back2,fwd2,divX,back4,fwd4,fwd3,divY,back5,divXY\n"
+ "@ r0 = src = ptr \n"
+ "@ r1 = w \n"
+ "@ r2 = h \n"
+ "@ r3 = f \n"
+ "mov r9, r0 @ r9 = dst = ptr \n"
+ "ldr r6, [r13,#4*12] @ r6 = fwd \n"
+ "ldr r7, [r13,#4*13] @ r7 = back \n"
+ "subs r2, r2, r3 @ r2 = h -= f \n"
+ "blt 11f @ Skip if less than a full row \n"
+ "1: @ for (y = h; y > 0; y--) { \n"
+ "ldr r1, [r13] @ r1 = w \n"
+ "subs r1, r1, r3 @ r1 = w -= f \n"
+ "blt 6f @ Skip if less than a full col \n"
+ "ldr r4, [r13,#4*10] @ r4 = factor \n"
+ "ldr r8, [r13,#4*14] @ r8 = back2 \n"
+ "ldr r12,[r13,#4*15] @ r12= fwd2 \n"
+ "2: @ for (x = w; x > 0; x--) { \n"
+ "ldr r5, [r13,#4*11] @ for (nn = n; nn > 0; n--) { \n"
+ "3: @ \n"
+ "mov r14,#0 @ r14= v = 0 \n"
+ "sub r5, r5, r3, LSL #8 @ for (xx = f; xx > 0; x--) { \n"
+ "4: @ \n"
+ "add r5, r5, r3, LSL #16 @ for (yy = f; yy > 0; y--) { \n"
+ "5: @ \n"
+ "ldrb r11,[r0], r6 @ r11= *src src += fwd \n"
+ "subs r5, r5, #1<<16 @ xx-- \n"
+ "add r14,r14,r11 @ v += r11 \n"
+ "bgt 5b @ } \n"
+ "sub r0, r0, r7 @ src -= back \n"
+ "adds r5, r5, #1<<8 @ yy-- \n"
+ "blt 4b @ } \n"
+ "mov r14,r14,LSR r4 @ r14 = v >>= factor \n"
+ "strb r14,[r9], #1 @ *d++ = r14 \n"
+ "sub r0, r0, r8 @ s -= back2 \n"
+ "subs r5, r5, #1 @ n-- \n"
+ "bgt 3b @ } \n"
+ "add r0, r0, r12 @ s += fwd2 \n"
+ "subs r1, r1, r3 @ x -= f \n"
+ "bge 2b @ } \n"
+ "6: @ Less than a full column left \n"
+ "adds r1, r1, r3 @ x += f \n"
+ "beq 11f @ if (x == 0) next row \n"
+ "@ r0 = src \n"
+ "@ r1 = x \n"
+ "@ r2 = y \n"
+ "@ r3 = f \n"
+ "@ r4 = factor \n"
+ "@ r6 = fwd \n"
+ "@ r7 = back \n"
+ "@STACK:r1,<9>,factor,n,fwd,back,back2,fwd2,divX,back4,fwd4,fwd3,divY,back5,divXY\n"
+ "ldr r5, [r13,#4*11] @ for (nn = n; nn > 0; n--) { \n"
+ "ldr r4, [r13,#4*16] @ r4 = divX \n"
+ "ldr r8, [r13,#4*17] @ r8 = back4 \n"
+ "ldr r12,[r13,#4*18] @ r12= fwd4 \n"
+ "8: @ \n"
+ "mov r14,#0 @ r14= v = 0 \n"
+ "sub r5, r5, r1, LSL #8 @ for (xx = x; xx > 0; x--) { \n"
+ "9: @ \n"
+ "add r5, r5, r3, LSL #16 @ for (yy = f; yy > 0; y--) { \n"
+ "10: @ \n"
+ "ldrb r11,[r0], r6 @ r11= *src src += fwd \n"
+ "subs r5, r5, #1<<16 @ xx-- \n"
+ "add r14,r14,r11 @ v += r11 \n"
+ "bgt 10b @ } \n"
+ "sub r0, r0, r7 @ src -= back \n"
+ "adds r5, r5, #1<<8 @ yy-- \n"
+ "blt 9b @ } \n"
+ "mul r14,r4, r14 @ r14= v *= divX \n"
+ "mov r14,r14,LSR #16 @ r14= v >>= 16 \n"
+ "strb r14,[r9], #1 @ *d++ = r14 \n"
+ "sub r0, r0, r8 @ s -= back4 \n"
+ "subs r5, r5, #1 @ n-- \n"
+ "bgt 8b @ } \n"
+ "add r0, r0, r12 @ s += fwd4 \n"
+ "11: @ \n"
+ "ldr r14,[r13,#4*19] @ r14 = fwd3 \n"
+ "subs r2, r2, r3 @ h -= f \n"
+ "add r0, r0, r14 @ s += fwd3 \n"
+ "bge 1b @ } \n"
+ "adds r2, r2, r3 @ h += f \n"
+ "beq 21f @ if no stray row, end \n"
+ "@ So doing one last (partial) row \n"
+ "@STACK:r1,<9>,factor,n,fwd,back,back2,fwd2,divX,back4,fwd4,fwd3,divY,back5,divXY\n"
+ "@ r0 = src = ptr \n"
+ "@ r1 = w \n"
+ "@ r2 = h \n"
+ "@ r3 = f \n"
+ "@ r4 = factor \n"
+ "@ r5 = n \n"
+ "@ r6 = fwd \n"
+ "12: @ for (y = h; y > 0; y--) { \n"
+ "ldr r1, [r13] @ r1 = w \n"
+ "ldr r7, [r13,#4*21] @ r7 = back5 \n"
+ "ldr r8, [r13,#4*14] @ r8 = back2 \n"
+ "subs r1, r1, r3 @ r1 = w -= f \n"
+ "blt 17f @ Skip if less than a full col \n"
+ "ldr r4, [r13,#4*20] @ r4 = divY \n"
+ "ldr r12,[r13,#4*15] @ r12= fwd2 \n"
+ "13: @ for (x = w; x > 0; x--) { \n"
+ "ldr r5, [r13,#4*11] @ for (nn = n; nn > 0; n--) { \n"
+ "14: @ \n"
+ "mov r14,#0 @ r14= v = 0 \n"
+ "sub r5, r5, r3, LSL #8 @ for (xx = f; xx > 0; x--) { \n"
+ "15: @ \n"
+ "add r5, r5, r2, LSL #16 @ for (yy = y; yy > 0; y--) { \n"
+ "16: @ \n"
+ "ldrb r11,[r0], r6 @ r11= *src src += fwd \n"
+ "subs r5, r5, #1<<16 @ xx-- \n"
+ "add r14,r14,r11 @ v += r11 \n"
+ "bgt 16b @ } \n"
+ "sub r0, r0, r7 @ src -= back5 \n"
+ "adds r5, r5, #1<<8 @ yy-- \n"
+ "blt 15b @ } \n"
+ "mul r14,r4, r14 @ r14 = x *= divY \n"
+ "mov r14,r14,LSR #16 @ r14 = v >>= 16 \n"
+ "strb r14,[r9], #1 @ *d++ = r14 \n"
+ "sub r0, r0, r8 @ s -= back2 \n"
+ "subs r5, r5, #1 @ n-- \n"
+ "bgt 14b @ } \n"
+ "add r0, r0, r12 @ s += fwd2 \n"
+ "subs r1, r1, r3 @ x -= f \n"
+ "bge 13b @ } \n"
+ "17: @ Less than a full column left \n"
+ "adds r1, r1, r3 @ x += f \n"
+ "beq 21f @ if (x == 0) end \n"
+ "@ r0 = src \n"
+ "@ r1 = x \n"
+ "@ r2 = y \n"
+ "@ r3 = f \n"
+ "@ r4 = factor \n"
+ "@ r6 = fwd \n"
+ "@ r7 = back5 \n"
+ "@ r8 = back2 \n"
+ "@STACK:r1,<9>,factor,n,fwd,back,back2,fwd2,divX,back4,fwd4,fwd3,divY,back5,divXY\n"
+ "ldr r4, [r13,#4*22] @ r4 = divXY \n"
+ "ldr r5, [r13,#4*11] @ for (nn = n; nn > 0; n--) { \n"
+ "18: @ \n"
+ "mov r14,#0 @ r14= v = 0 \n"
+ "sub r5, r5, r1, LSL #8 @ for (xx = x; xx > 0; x--) { \n"
+ "19: @ \n"
+ "add r5, r5, r2, LSL #16 @ for (yy = y; yy > 0; y--) { \n"
+ "20: @ \n"
+ "ldrb r11,[r0],r6 @ r11= *src src += fwd \n"
+ "subs r5, r5, #1<<16 @ xx-- \n"
+ "add r14,r14,r11 @ v += r11 \n"
+ "bgt 20b @ } \n"
+ "sub r0, r0, r7 @ src -= back5 \n"
+ "adds r5, r5, #1<<8 @ yy-- \n"
+ "blt 19b @ } \n"
+ "mul r14,r4, r14 @ r14= v *= divX \n"
+ "mov r14,r14,LSR #16 @ r14= v >>= 16 \n"
+ "strb r14,[r9], #1 @ *d++ = r14 \n"
+ "sub r0, r0, r8 @ s -= back2 \n"
+ "subs r5, r5, #1 @ n-- \n"
+ "bgt 18b @ } \n"
+ "21: @ \n"
+ "ldmfd r13!,{r1,r4-r11,PC} @ pop, return to thumb \n"
+ ENTER_THUMB
+ );
+}
+
+#endif
+
void
fz_subsample_pixmap(fz_context *ctx, fz_pixmap *tile, int factor)
{
@@ -709,6 +890,21 @@ fz_subsample_pixmap(fz_context *ctx, fz_pixmap *tile, int factor)
fwd2 = (f-1)*n;
fwd3 = (f-1)*fwd;
factor *= 2;
+#ifdef ARCH_ARM
+ {
+ int strayX = w%f;
+ int divX = (strayX ? 65536/(strayX*f) : 0);
+ int fwd4 = (strayX-1) * n;
+ int back4 = strayX*n-1;
+ int strayY = h%f;
+ int divY = (strayY ? 65536/(strayY*f) : 0);
+ int back5 = fwd * strayY - 1;
+ int divXY = (strayY*strayX ? 65536/(strayX*strayY) : 0);
+ fz_subsample_pixmap_ARM(s, w, h, f, factor, n, fwd, back,
+ back2, fwd2, divX, back4, fwd4, fwd3,
+ divY, back5, divXY);
+ }
+#else
for (y = h - f; y >= 0; y -= f)
{
for (x = w - f; x >= 0; x -= f)
@@ -761,7 +957,7 @@ fz_subsample_pixmap(fz_context *ctx, fz_pixmap *tile, int factor)
if (y > 0)
{
int div = y * f;
- back = fwd * y - 1;
+ int back5 = fwd * y - 1;
for (x = w - f; x >= 0; x -= f)
{
for (nn = n; nn > 0; nn--)
@@ -774,7 +970,7 @@ fz_subsample_pixmap(fz_context *ctx, fz_pixmap *tile, int factor)
v += *s;
s += fwd;
}
- s -= back;
+ s -= back5;
}
*d++ = v / div;
s -= back2;
@@ -785,7 +981,7 @@ fz_subsample_pixmap(fz_context *ctx, fz_pixmap *tile, int factor)
x += f;
if (x > 0)
{
- int div = x * y;
+ div = x * y;
for (nn = n; nn > 0; nn--)
{
int v = 0;
@@ -796,13 +992,14 @@ fz_subsample_pixmap(fz_context *ctx, fz_pixmap *tile, int factor)
v += *s;
s += fwd;
}
- s -= back;
+ s -= back5;
}
*d++ = v / div;
s -= back2;
}
}
}
+#endif
tile->w = dst_w;
tile->h = dst_h;
tile->samples = fz_resize_array(ctx, tile->samples, dst_w * n, dst_h);