From 7836643e0d6e9cc8e9d0d5c71cbfac5ee772ae21 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Thu, 6 Oct 2016 17:45:47 +0100 Subject: Optimise painters: Use SWAR to accelerate plotting. Used for the file from bug 697122. Makes a small improvement overall. (31.3s to 31s for a cutdown file). The equivalent code using 64bit operations is slower. --- source/fitz/draw-paint.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'source/fitz') diff --git a/source/fitz/draw-paint.c b/source/fitz/draw-paint.c index 061ee630..50112d00 100644 --- a/source/fitz/draw-paint.c +++ b/source/fitz/draw-paint.c @@ -950,19 +950,42 @@ template_span_with_mask_3_general(byte * restrict dp, int da, const byte * restr else { masa = FZ_EXPAND(masa); - *dp = *sp + FZ_COMBINE(*dp, masa); - sp++; dp++; - *dp = *sp + FZ_COMBINE(*dp, masa); - sp++; dp++; - *dp = *sp + FZ_COMBINE(*dp, masa); - sp++; dp++; - if (da) + if (da && sa) { - *dp = (sa ? *sp : 255) + FZ_COMBINE(*dp, masa); - dp++; + const uint32_t mask = 0x00ff00ff; + uint32_t d0 = *(uint32_t *)dp; + uint32_t d1 = d0>>8; + uint32_t s0 = *(uint32_t *)sp; + uint32_t s1 = s0>>8; + sp += 4; + d0 &= mask; + d1 &= mask; + s0 &= mask; + s1 &= mask; + s0 += (d0*masa)>>8; + s1 += (d1*masa)>>8; + s0 &= mask; + s1 &= mask; + s0 |= s1<<8; + *(uint32_t *)dp = s0; + dp += 4; + } + else + { + *dp = *sp + FZ_COMBINE(*dp, masa); + sp++; dp++; + *dp = *sp + FZ_COMBINE(*dp, masa); + sp++; dp++; + *dp = *sp + FZ_COMBINE(*dp, masa); + sp++; dp++; + if (da) + { + *dp = (sa ? *sp : 255) + FZ_COMBINE(*dp, masa); + dp++; + } + if (sa) + sp++; } - if (sa) - sp++; } } else -- cgit v1.2.3