summaryrefslogtreecommitdiff
path: root/draw
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2010-06-16 14:31:26 +0200
committerRobin Watts <robin.watts@artifex.com>2010-06-16 14:31:26 +0200
commit15f58a6059d75119803a33288bfb304cfd8ba9fb (patch)
treebd3e6707741174116afae733c1920437c730cd94 /draw
parent415f030afeabba60c5eeb25d091b979324101e36 (diff)
downloadmupdf-15f58a6059d75119803a33288bfb304cfd8ba9fb.tar.xz
Accelerate some of the blitting functions using SWAR.
Diffstat (limited to 'draw')
-rw-r--r--draw/archport.c210
1 files changed, 190 insertions, 20 deletions
diff --git a/draw/archport.c b/draw/archport.c
index 2b45b7c0..93e50fe9 100644
--- a/draw/archport.c
+++ b/draw/archport.c
@@ -1,25 +1,24 @@
#include "fitz.h"
-/* This C implementation was a prototype of the algorithm used
- * in the ARM code in draw/archarm.c. It is conceivable that on some
- * architectures/compilers this may be preferable to the vanilla
- * version below. */
+typedef unsigned char byte;
+
+/* These C implementations use SWAR (SIMD-within-a-register) techniques. */
+
+#define MASK 0xFF00FF00;
static void
-path_w4i1o4_32bit(unsigned char * restrict argb,
- unsigned char * restrict src, unsigned char cov, int len,
- unsigned char * restrict dst)
+path_w4i1o4_32bit(byte * restrict argb,
+ byte * restrict src, byte cov, int len,
+ byte * restrict dst)
{
/* COLOR * coverage + DST * (256-coverage) = (COLOR - DST)*coverage + DST*256 */
unsigned int *dst32 = (unsigned int *)(void *)dst;
int alpha = argb[0];
- unsigned int rb = argb[1] | argb[3] << 16;
- unsigned int ag = 255 | argb[2] << 16;
- const int MASK = 0xFF00FF00;
+ unsigned int rb = argb[1] | (argb[3] << 16);
+ unsigned int ag = 255 | (argb[2] << 16);
- /* sanity test */
- if (sizeof(int) != 4 || sizeof(unsigned int) != 4)
- abort();
+ if (alpha == 0)
+ return;
if (alpha != 255)
{
@@ -28,7 +27,7 @@ path_w4i1o4_32bit(unsigned char * restrict argb,
{
unsigned int ca, drb, dag, crb, cag;
cov += *src; *src++ = 0;
- ca = cov+(cov>>7); /* ca is in 0...256 range */
+ ca = cov + (cov>>7); /* ca is in 0...256 range */
ca = (ca*alpha)>>8; /* ca is is in 0...256 range */
dag = *dst32++;
if (ca != 0)
@@ -39,8 +38,8 @@ path_w4i1o4_32bit(unsigned char * restrict argb,
cag = ag - (dag>>8);
drb += crb * ca;
dag += cag * ca;
- drb = drb & MASK;
- dag = dag & MASK;
+ drb &= MASK;
+ dag &= MASK;
dag = drb | (dag>>8);
dst32[-1] = dag;
}
@@ -52,7 +51,7 @@ path_w4i1o4_32bit(unsigned char * restrict argb,
{
unsigned int ca, drb, dag, crb, cag;
cov += *src; *src++ = 0;
- ca = cov+(cov>>7); /* ca is in 0...256 range */
+ ca = cov + (cov>>7); /* ca is in 0...256 range */
dag = *dst32++;
if (ca == 0)
continue;
@@ -68,8 +67,8 @@ path_w4i1o4_32bit(unsigned char * restrict argb,
cag = ag - (dag>>8);
drb += crb * ca;
dag += cag * ca;
- drb = drb & MASK;
- dag = dag & MASK;
+ drb &= MASK;
+ dag &= MASK;
dag = drb | (dag>>8);
}
dst32[-1] = dag;
@@ -77,11 +76,182 @@ path_w4i1o4_32bit(unsigned char * restrict argb,
}
}
+static void
+duff_4o4_32bit(byte * restrict sp, int sw, byte * restrict dp, int dw, int w0, int h)
+{
+ unsigned int *sp32 = (unsigned int *)(void *)sp;
+ unsigned int *dp32 = (unsigned int *)(void *)dp;
+
+ /* duff_non(sp0, sw, 4, dp0, dw, w0, h); */
+
+ sw = (sw>>2)-w0;
+ dw = (dw>>2)-w0;
+ while (h--)
+ {
+ int w = w0;
+ while (w--)
+ {
+ unsigned int sag = *sp32++;
+ unsigned int dag = *dp32++;
+ unsigned int srb, drb;
+ int alpha = sag & 255;
+ if (alpha == 0)
+ continue;
+ alpha += alpha>>7;
+ sag |= 0xFF;
+ drb = dag & MASK;
+ dag = (dag<<8) & MASK;
+ srb = (sag>>8) & ~MASK;
+ sag = sag & ~MASK;
+ srb -= (drb>>8);
+ sag -= (dag>>8);
+ drb += srb * alpha;
+ dag += sag * alpha;
+ drb &= MASK;
+ dag &= MASK;
+ dag = drb | (dag>>8);
+ dp32[-1] = dag;
+ }
+ sp32 += sw;
+ dp32 += dw;
+ }
+}
+
+static void
+duff_4i1o4_32bit(byte * restrict sp, int sw,
+ byte * restrict mp, int mw,
+ byte * restrict dp, int dw, int w0, int h)
+{
+ unsigned int *sp32 = (unsigned int *)(void *)sp;
+ unsigned int *dp32 = (unsigned int *)(void *)dp;
+
+ /* duff_nimon(sp, sw, 4, mp, mw, 1, dp, dw, w0, h); */
+
+ sw = (sw>>2)-w0;
+ dw = (dw>>2)-w0;
+ mw -= w0;
+ while (h--)
+ {
+ int w = w0;
+ while (w--)
+ {
+ unsigned int sag = *sp32++;
+ unsigned int dag = *dp32++;
+ unsigned int srb, drb, alpha, ma;
+ alpha = sag & 255;
+ ma = *mp++;
+ if (alpha == 0)
+ continue;
+ ma += ma>>7;
+ if (ma == 0)
+ continue;
+ alpha += alpha>>7;
+ alpha = (alpha*ma)>>8;
+ sag |= 0xFF;
+ drb = dag & MASK;
+ dag = (dag<<8) & MASK;
+ srb = (sag>>8) & ~MASK;
+ sag = sag & ~MASK;
+ srb -= (drb>>8);
+ sag -= (dag>>8);
+ drb += srb * alpha;
+ dag += sag * alpha;
+ drb &= MASK;
+ dag &= MASK;
+ dag = drb | (dag>>8);
+ dp32[-1] = dag;
+ }
+ sp32 += sw;
+ mp += mw;
+ dp32 += dw;
+ }
+}
+
+static void
+text_w4i1o4_32bit(byte * restrict argb,
+ byte * restrict src, int srcw,
+ byte * restrict dst, int dstw, int w0, int h)
+{
+ unsigned int *dst32 = (unsigned int *)(void *)dst;
+ unsigned int alpha = argb[0];
+ unsigned int rb = argb[1] | (argb[3] << 16);
+ unsigned int ag = 255 | (argb[2] << 16);
+
+ if (alpha == 0)
+ return;
+
+ srcw -= w0;
+ dstw = (dstw>>2)-w0;
+
+ if (alpha != 255)
+ {
+ alpha += alpha>>7;
+ while (h--)
+ {
+ int w = w0;
+ while (w--)
+ {
+ unsigned int ca, drb, dag, crb, cag;
+ ca = *src++;
+ dag = *dst32++;
+ ca += ca>>7;
+ ca = (ca*alpha)>>8;
+ if (ca == 0)
+ continue;
+ drb = dag & MASK;
+ dag = (dag<<8) & MASK;
+ crb = rb - (drb>>8);
+ cag = ag - (dag>>8);
+ drb += crb * ca;
+ dag += cag * ca;
+ drb &= MASK;
+ dag &= MASK;
+ dag = drb | (dag>>8);
+ dst32[-1] = dag;
+ }
+ src += srcw;
+ dst32 += dstw;
+ }
+ }
+ else
+ {
+ alpha += alpha>>7;
+ while (h--)
+ {
+ int w = w0;
+ while (w--)
+ {
+ unsigned int ca, drb, dag, crb, cag;
+ ca = *src++;
+ dag = *dst32++;
+ ca += ca>>7;
+ if (ca == 0)
+ continue;
+ drb = dag & MASK;
+ dag = (dag<<8) & MASK;
+ crb = rb - (drb>>8);
+ cag = ag - (dag>>8);
+ drb += crb * ca;
+ dag += cag * ca;
+ drb &= MASK;
+ dag &= MASK;
+ dag = drb | (dag>>8);
+ dst32[-1] = dag;
+ }
+ src += srcw;
+ dst32 += dstw;
+ }
+ }
+}
+
void fz_accelerate(void)
{
- if (sizeof(int) == 4)
+ if (sizeof(int) == 4 && sizeof(unsigned int) == 4)
{
+ fz_duff_4o4 = duff_4o4_32bit;
+ fz_duff_4i1o4 = duff_4i1o4_32bit;
fz_path_w4i1o4 = path_w4i1o4_32bit;
+ fz_text_w4i1o4 = text_w4i1o4_32bit;
}
if (sizeof(int) == 8)