From aec11fac6a14bc4337dbb9601f070d6010413a84 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Thu, 6 Oct 2016 18:16:02 +0100 Subject: SWAR optimisation in plotters. --- source/fitz/draw-paint.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'source/fitz/draw-paint.c') diff --git a/source/fitz/draw-paint.c b/source/fitz/draw-paint.c index 33ab20c8..8ac04002 100644 --- a/source/fitz/draw-paint.c +++ b/source/fitz/draw-paint.c @@ -1000,18 +1000,33 @@ template_span_with_mask_3_general(byte * restrict dp, int da, const byte * restr } masa = FZ_REVERSE_COMBINE(sp[3], ma); masa = FZ_EXPAND(masa); - *dp = FZ_COMBINE2(*sp, ma, *dp, masa); - sp++; dp++; - *dp = FZ_COMBINE2(*sp, ma, *dp, masa); - sp++; dp++; - *dp = FZ_COMBINE2(*sp, ma, *dp, masa); - sp++; dp++; if (da) + { + const uint32_t mask = 0x00ff00ff; + uint32_t d0 = *(uint32_t *)dp; + uint32_t d1 = d0>>8; + uint32_t s0 = *(uint32_t *)sp; + uint32_t s1 = s0>>8; + sp += 4; + d0 &= mask; + d1 &= mask; + s0 &= mask; + s1 &= mask; + d0 = (((s0 * ma)>>8) & mask) + (((d0 * masa)>>8) & mask); + d1 = (((s1 * ma)>>8) & mask) + (((d1 * masa)>>8) & mask); + d0 |= d1<<8; + *(uint32_t *)dp = d0; + dp += 4; + } + else { *dp = FZ_COMBINE2(*sp, ma, *dp, masa); - dp++; + sp++; dp++; + *dp = FZ_COMBINE2(*sp, ma, *dp, masa); + sp++; dp++; + *dp = FZ_COMBINE2(*sp, ma, *dp, masa); + sp+=2; dp++; } - sp++; } else { -- cgit v1.2.3