diff options
author | Robin Watts <robin.watts@artifex.com> | 2010-06-16 14:31:26 +0200 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2010-06-16 14:31:26 +0200 |
commit | 15f58a6059d75119803a33288bfb304cfd8ba9fb (patch) | |
tree | bd3e6707741174116afae733c1920437c730cd94 /draw | |
parent | 415f030afeabba60c5eeb25d091b979324101e36 (diff) | |
download | mupdf-15f58a6059d75119803a33288bfb304cfd8ba9fb.tar.xz |
Accelerate some of the blitting functions using SWAR.
Diffstat (limited to 'draw')
-rw-r--r-- | draw/archport.c | 210 |
1 files changed, 190 insertions, 20 deletions
diff --git a/draw/archport.c b/draw/archport.c index 2b45b7c0..93e50fe9 100644 --- a/draw/archport.c +++ b/draw/archport.c @@ -1,25 +1,24 @@ #include "fitz.h" -/* This C implementation was a prototype of the algorithm used - * in the ARM code in draw/archarm.c. It is conceivable that on some - * architectures/compilers this may be preferable to the vanilla - * version below. */ +typedef unsigned char byte; + +/* These C implementations use SWAR (SIMD-within-a-register) techniques. */ + +#define MASK 0xFF00FF00; static void -path_w4i1o4_32bit(unsigned char * restrict argb, - unsigned char * restrict src, unsigned char cov, int len, - unsigned char * restrict dst) +path_w4i1o4_32bit(byte * restrict argb, + byte * restrict src, byte cov, int len, + byte * restrict dst) { /* COLOR * coverage + DST * (256-coverage) = (COLOR - DST)*coverage + DST*256 */ unsigned int *dst32 = (unsigned int *)(void *)dst; int alpha = argb[0]; - unsigned int rb = argb[1] | argb[3] << 16; - unsigned int ag = 255 | argb[2] << 16; - const int MASK = 0xFF00FF00; + unsigned int rb = argb[1] | (argb[3] << 16); + unsigned int ag = 255 | (argb[2] << 16); - /* sanity test */ - if (sizeof(int) != 4 || sizeof(unsigned int) != 4) - abort(); + if (alpha == 0) + return; if (alpha != 255) { @@ -28,7 +27,7 @@ path_w4i1o4_32bit(unsigned char * restrict argb, { unsigned int ca, drb, dag, crb, cag; cov += *src; *src++ = 0; - ca = cov+(cov>>7); /* ca is in 0...256 range */ + ca = cov + (cov>>7); /* ca is in 0...256 range */ ca = (ca*alpha)>>8; /* ca is is in 0...256 range */ dag = *dst32++; if (ca != 0) @@ -39,8 +38,8 @@ path_w4i1o4_32bit(unsigned char * restrict argb, cag = ag - (dag>>8); drb += crb * ca; dag += cag * ca; - drb = drb & MASK; - dag = dag & MASK; + drb &= MASK; + dag &= MASK; dag = drb | (dag>>8); dst32[-1] = dag; } @@ -52,7 +51,7 @@ path_w4i1o4_32bit(unsigned char * restrict argb, { unsigned int ca, drb, dag, crb, cag; cov += *src; *src++ = 0; - ca = cov+(cov>>7); /* ca is in 0...256 range */ + ca = cov + (cov>>7); /* ca is in 0...256 range */ dag = *dst32++; if (ca == 0) continue; @@ -68,8 +67,8 @@ path_w4i1o4_32bit(unsigned char * restrict argb, cag = ag - (dag>>8); drb += crb * ca; dag += cag * ca; - drb = drb & MASK; - dag = dag & MASK; + drb &= MASK; + dag &= MASK; dag = drb | (dag>>8); } dst32[-1] = dag; @@ -77,11 +76,182 @@ path_w4i1o4_32bit(unsigned char * restrict argb, } } +static void +duff_4o4_32bit(byte * restrict sp, int sw, byte * restrict dp, int dw, int w0, int h) +{ + unsigned int *sp32 = (unsigned int *)(void *)sp; + unsigned int *dp32 = (unsigned int *)(void *)dp; + + /* duff_non(sp0, sw, 4, dp0, dw, w0, h); */ + + sw = (sw>>2)-w0; + dw = (dw>>2)-w0; + while (h--) + { + int w = w0; + while (w--) + { + unsigned int sag = *sp32++; + unsigned int dag = *dp32++; + unsigned int srb, drb; + int alpha = sag & 255; + if (alpha == 0) + continue; + alpha += alpha>>7; + sag |= 0xFF; + drb = dag & MASK; + dag = (dag<<8) & MASK; + srb = (sag>>8) & ~MASK; + sag = sag & ~MASK; + srb -= (drb>>8); + sag -= (dag>>8); + drb += srb * alpha; + dag += sag * alpha; + drb &= MASK; + dag &= MASK; + dag = drb | (dag>>8); + dp32[-1] = dag; + } + sp32 += sw; + dp32 += dw; + } +} + +static void +duff_4i1o4_32bit(byte * restrict sp, int sw, + byte * restrict mp, int mw, + byte * restrict dp, int dw, int w0, int h) +{ + unsigned int *sp32 = (unsigned int *)(void *)sp; + unsigned int *dp32 = (unsigned int *)(void *)dp; + + /* duff_nimon(sp, sw, 4, mp, mw, 1, dp, dw, w0, h); */ + + sw = (sw>>2)-w0; + dw = (dw>>2)-w0; + mw -= w0; + while (h--) + { + int w = w0; + while (w--) + { + unsigned int sag = *sp32++; + unsigned int dag = *dp32++; + unsigned int srb, drb, alpha, ma; + alpha = sag & 255; + ma = *mp++; + if (alpha == 0) + continue; + ma += ma>>7; + if (ma == 0) + continue; + alpha += alpha>>7; + alpha = (alpha*ma)>>8; + sag |= 0xFF; + drb = dag & MASK; + dag = (dag<<8) & MASK; + srb = (sag>>8) & ~MASK; + sag = sag & ~MASK; + srb -= (drb>>8); + sag -= (dag>>8); + drb += srb * alpha; + dag += sag * alpha; + drb &= MASK; + dag &= MASK; + dag = drb | (dag>>8); + dp32[-1] = dag; + } + sp32 += sw; + mp += mw; + dp32 += dw; + } +} + +static void +text_w4i1o4_32bit(byte * restrict argb, + byte * restrict src, int srcw, + byte * restrict dst, int dstw, int w0, int h) +{ + unsigned int *dst32 = (unsigned int *)(void *)dst; + unsigned int alpha = argb[0]; + unsigned int rb = argb[1] | (argb[3] << 16); + unsigned int ag = 255 | (argb[2] << 16); + + if (alpha == 0) + return; + + srcw -= w0; + dstw = (dstw>>2)-w0; + + if (alpha != 255) + { + alpha += alpha>>7; + while (h--) + { + int w = w0; + while (w--) + { + unsigned int ca, drb, dag, crb, cag; + ca = *src++; + dag = *dst32++; + ca += ca>>7; + ca = (ca*alpha)>>8; + if (ca == 0) + continue; + drb = dag & MASK; + dag = (dag<<8) & MASK; + crb = rb - (drb>>8); + cag = ag - (dag>>8); + drb += crb * ca; + dag += cag * ca; + drb &= MASK; + dag &= MASK; + dag = drb | (dag>>8); + dst32[-1] = dag; + } + src += srcw; + dst32 += dstw; + } + } + else + { + alpha += alpha>>7; + while (h--) + { + int w = w0; + while (w--) + { + unsigned int ca, drb, dag, crb, cag; + ca = *src++; + dag = *dst32++; + ca += ca>>7; + if (ca == 0) + continue; + drb = dag & MASK; + dag = (dag<<8) & MASK; + crb = rb - (drb>>8); + cag = ag - (dag>>8); + drb += crb * ca; + dag += cag * ca; + drb &= MASK; + dag &= MASK; + dag = drb | (dag>>8); + dst32[-1] = dag; + } + src += srcw; + dst32 += dstw; + } + } +} + void fz_accelerate(void) { - if (sizeof(int) == 4) + if (sizeof(int) == 4 && sizeof(unsigned int) == 4) { + fz_duff_4o4 = duff_4o4_32bit; + fz_duff_4i1o4 = duff_4i1o4_32bit; fz_path_w4i1o4 = path_w4i1o4_32bit; + fz_text_w4i1o4 = text_w4i1o4_32bit; } if (sizeof(int) == 8) |