summaryrefslogtreecommitdiff
path: root/draw
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2010-06-18 14:34:30 +0200
committerRobin Watts <robin.watts@artifex.com>2010-06-18 14:34:30 +0200
commit22d82e68d48b722731b288a30eab2857f4d0e3d7 (patch)
treeacd7a9e3394afd5298ea8227ad7c72095b113c83 /draw
parentb2e49f75d5668f42260dd058376c2de7be53062f (diff)
downloadmupdf-22d82e68d48b722731b288a30eab2857f4d0e3d7.tar.xz
SWAR implementations of the image drawing functions, plus some tweaks to the vanilla versions for speed.
Diffstat (limited to 'draw')
-rw-r--r--draw/archport.c321
-rw-r--r--draw/imagedraw.c92
2 files changed, 390 insertions, 23 deletions
diff --git a/draw/archport.c b/draw/archport.c
index 93e50fe9..7b70e84f 100644
--- a/draw/archport.c
+++ b/draw/archport.c
@@ -197,7 +197,7 @@ text_w4i1o4_32bit(byte * restrict argb,
ca += ca>>7;
ca = (ca*alpha)>>8;
if (ca == 0)
- continue;
+ continue;
drb = dag & MASK;
dag = (dag<<8) & MASK;
crb = rb - (drb>>8);
@@ -226,7 +226,7 @@ text_w4i1o4_32bit(byte * restrict argb,
dag = *dst32++;
ca += ca>>7;
if (ca == 0)
- continue;
+ continue;
drb = dag & MASK;
dag = (dag<<8) & MASK;
crb = rb - (drb>>8);
@@ -244,6 +244,320 @@ text_w4i1o4_32bit(byte * restrict argb,
}
}
+static void
+img_4o4_32bit(byte * restrict src, byte cov, int len, byte * restrict dst,
+ fz_pixmap *image, int u, int v, int fa, int fb)
+{
+ unsigned int *dst32 = (unsigned int *)(void *)dst;
+ unsigned int *samples = (unsigned int *)(void *)image->samples;
+ int w = image->w;
+ int h = image->h-1;
+
+ while (len--)
+ {
+ unsigned int a, a1, d, d1;
+ int sa;
+ cov += *src; *src = 0; src++;
+ /* (a,a1) = sampleargb(samples, w, h, u, v, argb); */
+ {
+ int ui, ui1, vi, vi1, ud, vd;
+ unsigned int b, b1, c, c1;
+ ui1 = 1;
+ ui = u >> 16;
+ if (ui < 0)
+ {
+ ui = 0;
+ ui1 = 0;
+ }
+ else if (ui >= w-1)
+ {
+ ui = w-1;
+ ui1 = 0;
+ }
+ vi1 = w;
+ vi = v >> 16;
+ if (vi < 0)
+ {
+ vi = 0;
+ vi1 = 0;
+ }
+ else if (vi >= h)
+ {
+ vi = h;
+ vi1 = 0;
+ }
+ ui += vi*w;
+ a = samples[ui];
+ b = samples[ui + ui1];
+ c = samples[ui + vi1];
+ d = samples[ui + ui1 + vi1];
+ ud = (u>>8) & 0xFF;
+ vd = (v>>8) & 0xFF;
+ ud = FZ_EXPAND(ud);
+ vd = FZ_EXPAND(vd);
+ /* (a,a1) = blend(a,b,ud) */
+ a1 = a & MASK;
+ a = (a<<8) & MASK;
+ b1 = (b>>8) & ~MASK;
+ b = b & ~MASK;
+ a = ((b -(a >>8)) * ud + a ) & MASK;
+ a1 = ((b1-(a1>>8)) * ud + a1) & MASK;
+ /* (c,c1) = blend(c,d,ud) */
+ c1 = c & MASK;
+ c = (c<<8) & MASK;
+ d1 = (d>>8) & ~MASK;
+ d = d & ~MASK;
+ c = ((d -(c >>8)) * ud + c ) & MASK;
+ c1 = ((d1-(c1>>8)) * ud + c1) & MASK;
+ /* (a,a1) = blend((a,a1),(c,c1),vd) */
+ a = (((c >>8)-(a >>8)) * vd + a ) & MASK;
+ a1 = (((c1>>8)-(a1>>8)) * vd + a1) & MASK;
+ }
+ sa = (a>>8) & 0xFF;
+ sa = FZ_COMBINE(FZ_EXPAND(sa), FZ_EXPAND(cov));
+ a |= 0xFF00;
+ d = *dst32++;
+ d1 = d & MASK;
+ d = (d<<8) & MASK;
+ a = (((a >>8)-(d >>8)) * sa + d ) & MASK;
+ a1 = (((a1>>8)-(d1>>8)) * sa + d1) & MASK;
+ dst32[-1] = (a>>8) | a1;
+ u += fa;
+ v += fb;
+ }
+}
+
+static void
+img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len,
+ byte * restrict dst, fz_pixmap *image, int u, int v, int fa, int fb)
+{
+ byte *samples = image->samples;
+ int w = image->w;
+ int h = image->h-1;
+ int alpha = FZ_EXPAND(argb[0]);
+ unsigned int rb = argb[1] | (argb[3] << 16);
+ unsigned int ag = 255 | (argb[2] << 16);
+ unsigned int *dst32 = (unsigned int *)(void *)dst;
+
+ if (alpha == 0)
+ return;
+ if (alpha != 256)
+ {
+ while (len--)
+ {
+ unsigned int ca, drb, dag, crb, cag;
+ unsigned int a, b;
+ cov += *src; *src = 0; src++;
+ dag = *dst32++;
+ ca = FZ_COMBINE(FZ_EXPAND(cov), alpha);
+ if (ca != 0)
+ {
+ int ui, ui1, vi, vi1, ud, vd;
+ /* a = samplemask(samples, w, h, u, v); */
+ ui1 = 1;
+ ui = u >> 16;
+ if (ui < 0)
+ {
+ ui = 0;
+ ui1 = 0;
+ }
+ else if (ui >= w-1)
+ {
+ ui = w-1;
+ ui1 = 0;
+ }
+ vi1 = w;
+ vi = v >> 16;
+ if (vi < 0)
+ {
+ vi = 0;
+ vi1 = 0;
+ }
+ else if (vi >= h)
+ {
+ vi = h;
+ vi1 = 0;
+ }
+ ui += vi*w;
+ a = samples[ui];
+ b = samples[ui + ui1];
+ a |= samples[ui + vi1]<<16;
+ b |= samples[ui + ui1 + vi1]<<16;
+ ud = (u>>8) & 0xFF;
+ vd = (v>>8) & 0xFF;
+ ud = FZ_EXPAND(ud);
+ vd = FZ_EXPAND(vd);
+ /* a = blend(a,b,ud) */
+ a = ((b-a) * ud + (a<<8)) & MASK;
+ /* a = blend(a,a>>16,vd) */
+ a = (((a>>24)-(a>>8)) * vd + a);
+ a = (a>>8) & 0xFF;
+ ca = FZ_COMBINE(ca, FZ_EXPAND(a));
+ }
+ if (ca != 0)
+ {
+ drb = dag & MASK;
+ dag = (dag<<8) & MASK;
+ crb = rb - (drb>>8);
+ cag = ag - (dag>>8);
+ drb += crb * ca;
+ dag += cag * ca;
+ drb &= MASK;
+ dag &= MASK;
+ dag = drb | (dag>>8);
+ dst32[-1] = dag;
+ }
+ u += fa;
+ v += fb;
+ }
+ }
+ else
+ {
+ while (len--)
+ {
+ unsigned int ca, drb, dag, crb, cag;
+ unsigned int a, b;
+ cov += *src; *src = 0; src++;
+ dag = *dst32++;
+ if (cov != 0)
+ {
+ int ui, ui1, vi, vi1, ud, vd;
+ /* a = samplemask(samples, w, h, u, v); */
+ ui1 = 1;
+ ui = u >> 16;
+ if (ui < 0)
+ {
+ ui = 0;
+ ui1 = 0;
+ }
+ else if (ui >= w-1)
+ {
+ ui = w-1;
+ ui1 = 0;
+ }
+ vi1 = w;
+ vi = v >> 16;
+ if (vi < 0)
+ {
+ vi = 0;
+ vi1 = 0;
+ }
+ else if (vi >= h)
+ {
+ vi = h;
+ vi1 = 0;
+ }
+ ui += vi*w;
+ a = samples[ui];
+ b = samples[ui + ui1];
+ a |= samples[ui + vi1]<<16;
+ b |= samples[ui + ui1 + vi1]<<16;
+ ud = (u>>8) & 0xFF;
+ vd = (v>>8) & 0xFF;
+ ud = FZ_EXPAND(ud);
+ vd = FZ_EXPAND(vd);
+ /* a = blend(a,b,ud) */
+ a = ((b-a) * ud + (a<<8)) & MASK;
+ /* a = blend(a,a>>16,vd) */
+ a = (((a>>24)-(a>>8)) * vd + a);
+ a = (a>>8) & 0xFF;
+ ca = FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(a));
+ if (ca != 0)
+ {
+ if (ca == 256)
+ {
+ dag = (rb<<8) | ag;
+ }
+ else
+ {
+ drb = dag & MASK;
+ dag = (dag<<8) & MASK;
+ crb = rb - (drb>>8);
+ cag = ag - (dag>>8);
+ drb += crb * ca;
+ dag += cag * ca;
+ drb &= MASK;
+ dag &= MASK;
+ dag = drb | (dag>>8);
+ }
+ dst32[-1] = dag;
+ }
+ }
+ u += fa;
+ v += fb;
+ }
+ }
+}
+
+static void
+img_1o1_32bit(byte * restrict src, byte cov, int len, byte * restrict dst,
+ fz_pixmap *image, int u, int v, int fa, int fb)
+{
+ byte *samples = image->samples;
+ int w = image->w;
+ int h = image->h-1;
+
+ while (len--)
+ {
+ unsigned int a, b;
+ cov += *src; *src = 0; src++;
+ if (cov != 0)
+ {
+ int ui, ui1, vi, vi1, ud, vd;
+ /* sa = samplemask(samples, w, h, u, v); */
+ ui1 = 1;
+ ui = u >> 16;
+ if (ui < 0)
+ {
+ ui = 0;
+ ui1 = 0;
+ }
+ else if (ui >= w-1)
+ {
+ ui = w-1;
+ ui1 = 0;
+ }
+ vi1 = w;
+ vi = v >> 16;
+ if (vi < 0)
+ {
+ vi = 0;
+ vi1 = 0;
+ }
+ else if (vi >= h)
+ {
+ vi = h;
+ vi1 = 0;
+ }
+ ui += vi*w;
+ a = samples[ui];
+ b = samples[ui + ui1];
+ a |= samples[ui + vi1]<<16;
+ b |= samples[ui + ui1 + vi1]<<16;
+ ud = (u>>8) & 0xFF;
+ vd = (v>>8) & 0xFF;
+ ud = FZ_EXPAND(ud);
+ vd = FZ_EXPAND(vd);
+ /* a = blend(a,b,ud) */
+ a = ((b-a) * ud + (a<<8)) & MASK;
+ /* a = blend(a,a>>16,vd) */
+ a = (((a>>24)-(a>>8)) * vd + a);
+ a = (a>>8) & 0xFF;
+ a = FZ_COMBINE(FZ_EXPAND(a), FZ_EXPAND(cov));
+ if (a != 0)
+ {
+ if (a == 256)
+ dst[0] = 255;
+ else
+ dst[0] = FZ_BLEND(255, dst[0], a);
+ }
+ }
+ dst++;
+ u += fa;
+ v += fb;
+ }
+}
+
void fz_accelerate(void)
{
if (sizeof(int) == 4 && sizeof(unsigned int) == 4)
@@ -252,6 +566,9 @@ void fz_accelerate(void)
fz_duff_4i1o4 = duff_4i1o4_32bit;
fz_path_w4i1o4 = path_w4i1o4_32bit;
fz_text_w4i1o4 = text_w4i1o4_32bit;
+ fz_img_4o4 = img_4o4_32bit;
+ fz_img_w4i1o4 = img_w4i1o4_32bit;
+ fz_img_1o1 = img_1o1_32bit;
}
if (sizeof(int) == 8)
diff --git a/draw/imagedraw.c b/draw/imagedraw.c
index 556724bb..9f6d93bf 100644
--- a/draw/imagedraw.c
+++ b/draw/imagedraw.c
@@ -108,13 +108,20 @@ img_1o1(byte * restrict src, byte cov, int len, byte * restrict dst,
byte *samples = image->samples;
int w = image->w;
int h = image->h;
+
while (len--)
{
int sa;
cov += *src; *src = 0; src++;
- sa = samplemask(samples, w, h, u, v);
- sa = FZ_COMBINE(FZ_EXPAND(sa), FZ_EXPAND(cov));
- dst[0] = FZ_BLEND(255, dst[0], sa);
+ if (cov != 0)
+ {
+ sa = samplemask(samples, w, h, u, v);
+ sa = FZ_COMBINE(FZ_EXPAND(sa), FZ_EXPAND(cov));
+ if (sa != 0)
+ {
+ dst[0] = FZ_BLEND(255, dst[0], sa);
+ }
+ }
dst++;
u += fa;
v += fb;
@@ -129,16 +136,23 @@ img_4o4(byte * restrict src, byte cov, int len, byte * restrict dst,
int w = image->w;
int h = image->h;
byte argb[4];
+
while (len--)
{
int sa;
cov += *src; *src = 0; src++;
- sampleargb(samples, w, h, u, v, argb);
- sa = FZ_COMBINE(FZ_EXPAND(argb[0]), FZ_EXPAND(cov));
- dst[0] = FZ_BLEND(255, dst[0], sa);
- dst[1] = FZ_BLEND(argb[1], dst[1], sa);
- dst[2] = FZ_BLEND(argb[2], dst[2], sa);
- dst[3] = FZ_BLEND(argb[3], dst[3], sa);
+ if (cov != 0)
+ {
+ sampleargb(samples, w, h, u, v, argb);
+ sa = FZ_COMBINE(FZ_EXPAND(argb[0]), FZ_EXPAND(cov));
+ if (sa != 0)
+ {
+ dst[0] = FZ_BLEND(255, dst[0], sa);
+ dst[1] = FZ_BLEND(argb[1], dst[1], sa);
+ dst[2] = FZ_BLEND(argb[2], dst[2], sa);
+ dst[3] = FZ_BLEND(argb[3], dst[3], sa);
+ }
+ }
dst += 4;
u += fa;
v += fb;
@@ -156,19 +170,55 @@ img_w4i1o4(byte *argb, byte * restrict src, byte cov, int len, byte * restrict d
byte r = argb[1];
byte g = argb[2];
byte b = argb[3];
- while (len--)
+
+ if (alpha == 0)
+ return;
+ if (alpha != 256)
{
- int ca;
- cov += *src; *src = 0; src++;
- ca = samplemask(samples, w, h, u, v);
- ca = FZ_COMBINE(FZ_EXPAND(ca), alpha);
- dst[0] = FZ_BLEND(255, dst[0], ca);
- dst[1] = FZ_BLEND(r, dst[1], ca);
- dst[2] = FZ_BLEND(g, dst[2], ca);
- dst[3] = FZ_BLEND(b, dst[3], ca);
- dst += 4;
- u += fa;
- v += fb;
+ while (len--)
+ {
+ int ca;
+ cov += *src; *src = 0; src++;
+ if (cov != 0)
+ {
+ ca = samplemask(samples, w, h, u, v);
+ ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca));
+ ca = FZ_COMBINE(ca, alpha);
+ if (ca != 0)
+ {
+ dst[0] = FZ_BLEND(255, dst[0], ca);
+ dst[1] = FZ_BLEND(r, dst[1], ca);
+ dst[2] = FZ_BLEND(g, dst[2], ca);
+ dst[3] = FZ_BLEND(b, dst[3], ca);
+ }
+ }
+ dst += 4;
+ u += fa;
+ v += fb;
+ }
+ }
+ else
+ {
+ while (len--)
+ {
+ int ca;
+ cov += *src; *src = 0; src++;
+ if (cov != 0)
+ {
+ ca = samplemask(samples, w, h, u, v);
+ ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca));
+ if (ca != 0)
+ {
+ dst[0] = FZ_BLEND(255, dst[0], ca);
+ dst[1] = FZ_BLEND(r, dst[1], ca);
+ dst[2] = FZ_BLEND(g, dst[2], ca);
+ dst[3] = FZ_BLEND(b, dst[3], ca);
+ }
+ }
+ dst += 4;
+ u += fa;
+ v += fb;
+ }
}
}