summaryrefslogtreecommitdiff
path: root/draw
diff options
context:
space:
mode:
Diffstat (limited to 'draw')
-rw-r--r--draw/archarm.c63
-rw-r--r--draw/archport.c238
-rw-r--r--draw/archx86.c8
-rw-r--r--draw/imagedraw.c124
-rw-r--r--draw/imageunpack.c39
-rw-r--r--draw/meshdraw.c8
-rw-r--r--draw/porterduff.c229
7 files changed, 360 insertions, 349 deletions
diff --git a/draw/archarm.c b/draw/archarm.c
index c532a195..2904ce00 100644
--- a/draw/archarm.c
+++ b/draw/archarm.c
@@ -14,24 +14,25 @@ extern void fz_srow4_arm(byte *src, byte *dst, int w, int denom);
extern void fz_scol4_arm(byte *src, byte *dst, int w, int denom);
static void
-path_w4i1o4_arm(byte * restrict argb, byte * restrict src, byte cov, int len, byte * restrict dst)
+path_w4i1o4_arm(byte * restrict rgba, byte * restrict src, byte cov, int len, byte * restrict dst)
{
/* The ARM code here is a hand coded implementation
* of the optimized C version. */
+
if (len <= 0)
return;
asm volatile(
- "ldr %0, [%0] @ %0 = argb \n"
+ "ldr %0, [%0] @ %0 = rgba \n"
"mov r11,#0 \n"
"mov r8, #0xFF00 \n"
- "and r14,%0,#255 @ r14= alpha \n"
- "orr %0, %0, #255 @ %0 = argb |= 255 \n"
+ "mov r14,%0,lsr #24 @ r14= alpha \n"
+ "orr %0, %0, #0xFF000000 @ %0 = rgba |= 0xFF000000 \n"
"orr r8, r8, r8, LSL #16 @ r8 = 0xFF00FF00 \n"
"adds r14,r14,r14,LSR #7 @ r14 = alpha += alpha>>7 \n"
"beq 9f @ if (alpha == 0) bale \n"
- "and r6, %0, r8 @ r6 = rb<<8 \n"
- "bic %0, %0, r8 @ %0 = ag \n"
- "mov r6, r6, LSR #8 @ r6 = rb \n"
+ "and r6, %0, r8 @ r6 = ga<<8 \n"
+ "bic %0, %0, r8 @ %0 = rb \n"
+ "mov r6, r6, LSR #8 @ r6 = ga \n"
"cmp r14,#256 @ if (alpha == 256) \n"
"beq 4f @ no-alpha loop \n"
"B 2f @ enter the loop \n"
@@ -40,37 +41,37 @@ path_w4i1o4_arm(byte * restrict argb, byte * restrict src, byte cov, int len, by
"ble 9f \n"
"2: \n"
"ldrb r12,[%1] @ r12= *src \n"
- "ldr r9, [%4], #4 @ r9 = dag = *dst32++ \n"
+ "ldr r9, [%4], #4 @ r9 = drb = *dst32++ \n"
"strb r11,[%1], #1 @ r11= *src++ = 0 \n"
"add %2, r12, %2 @ %2 = cov += r12 \n"
"ands %2, %2, #255 @ %2 = cov &= 255 \n"
"beq 1b @ if coverage == 0 loop back \n"
"add r10,%2, %2, LSR #7 @ r10= ca = cov+(cov>>7) \n"
"mul r10,r14,r10 @ r10= ca *= alpha \n"
- "and r7, r8, r9 @ r7 = drb = dag & MASK \n"
+ "and r7, r8, r9 @ r7 = dga = drb & MASK \n"
"mov r10,r10,LSR #8 @ r10= ca >>= 8 \n"
- "and r9, r8, r9, LSL #8 @ r9 = dag = (dag<<8) & MASK \n"
- "sub r12,r6, r7, LSR #8 @ r12= crb = rb - (drb>>8) \n"
- "sub r5, %0, r9, LSR #8 @ r5 = cag = ag - (dag>>8) \n"
- "mla r7, r12,r10,r7 @ r7 = drb += crb * ca \n"
+ "and r9, r8, r9, LSL #8 @ r9 = drb = (drb<<8) & MASK \n"
+ "sub r12,r6, r7, LSR #8 @ r12= cga = ga - (dga>>8) \n"
+ "sub r5, %0, r9, LSR #8 @ r5 = crb = rb - (drb>>8) \n"
+ "mla r7, r12,r10,r7 @ r7 = dga += cga * ca \n"
"subs %3, %3, #1 @ len-- \n"
- "mla r9, r5, r10,r9 @ r9 = dag += cag * ca \n"
- "and r7, r8, r7 @ r7 = drb &= MASK \n"
- "and r9, r8, r9 @ r9 = dag &= MASK \n"
- "orr r9, r7, r9, LSR #8 @ r9 = dag = drb | (dag>>8) \n"
+ "mla r9, r5, r10,r9 @ r9 = drb += crb * ca \n"
+ "and r7, r8, r7 @ r7 = dga &= MASK \n"
+ "and r9, r8, r9 @ r9 = drb &= MASK \n"
+ "orr r9, r7, r9, LSR #8 @ r9 = drb = dga | (drb>>8) \n"
"str r9, [%4, #-4] @ dst32[-1] = r9 \n"
"bgt 2b \n"
"b 9f \n"
"@ --- Solid alpha loop --------------------------------------- \n"
"3: @ Loop used when coverage == 256 \n"
- "orr r9, %0, r6, LSL #8 @ r9 = argb \n"
+ "orr r9, %0, r6, LSL #8 @ r9 = rgba \n"
"str r9, [%4, #-4] @ dst32[-1] = r9 \n"
"4: @ Loop used for when coverage*alpha == 0 \n"
"subs %3, %3, #1 @ len-- \n"
"ble 9f \n"
"5: \n"
"ldrb r12,[%1] @ r12= *src \n"
- "ldr r9, [%4], #4 @ r9 = dag = *dst32++ \n"
+ "ldr r9, [%4], #4 @ r9 = drb = *dst32++ \n"
"strb r11,[%1], #1 @ r11= *src++ = 0 \n"
"add %2, r12, %2 @ %2 = cov += r12 \n"
"ands %2, %2, #255 @ %2 = cov &= 255 \n"
@@ -78,21 +79,21 @@ path_w4i1o4_arm(byte * restrict argb, byte * restrict src, byte cov, int len, by
"cmp %2, #255 @ if coverage == solid \n"
"beq 3b @ loop back \n"
"add r10,%2, %2, LSR #7 @ r10= ca = cov+(cov>>7) \n"
- "and r7, r8, r9 @ r7 = drb = dag & MASK \n"
- "and r9, r8, r9, LSL #8 @ r9 = dag = (dag<<8) & MASK \n"
- "sub r12,r6, r7, LSR #8 @ r12= crb = rb - (drb>>8) \n"
- "sub r5, %0, r9, LSR #8 @ r5 = cag = ag - (dag>>8) \n"
- "mla r7, r12,r10,r7 @ r7 = drb += crb * ca \n"
+ "and r7, r8, r9 @ r7 = dga = drb & MASK \n"
+ "and r9, r8, r9, LSL #8 @ r9 = dga = (drb<<8) & MASK \n"
+ "sub r12,r6, r7, LSR #8 @ r12= cga = ga - (dga>>8) \n"
+ "sub r5, %0, r9, LSR #8 @ r5 = crb = rb - (drb>>8) \n"
+ "mla r7, r12,r10,r7 @ r7 = dga += cga * ca \n"
"subs %3, %3, #1 @ len-- \n"
- "mla r9, r5, r10,r9 @ r9 = dag += cag * ca \n"
- "and r7, r8, r7 @ r7 = drb &= MASK \n"
- "and r9, r8, r9 @ r9 = dag &= MASK \n"
- "orr r9, r7, r9, LSR #8 @ r9 = dag = drb | (dag>>8) \n"
+ "mla r9, r5, r10,r9 @ r9 = drb += crb * ca \n"
+ "and r7, r8, r7 @ r7 = dga &= MASK \n"
+ "and r9, r8, r9 @ r9 = drb &= MASK \n"
+ "orr r9, r7, r9, LSR #8 @ r9 = drb = dga | (drb>>8) \n"
"str r9, [%4, #-4] @ dst32[-1] = r9 \n"
"bgt 5b \n"
"9: @ End \n"
:
- "+r" (argb),
+ "+r" (rgba),
"+r" (src),
"+r" (cov),
"+r" (len),
@@ -129,8 +130,8 @@ static void loadtile8_arm(byte * restrict src, int sw, byte * restrict dst, int
"2: \n"
"LDRB r4, [%[src]], #1 @ r4 = *src++ \n"
"SUBS r5, r5, #1 \n"
- "STRB r11,[%[dst]], #1 @ *dst++ = 255 \n"
"STRB r4, [%[dst]], #1 @ *dst++ = r4 \n"
+ "STRB r11,[%[dst]], #1 @ *dst++ = 255 \n"
"BGT 2b \n"
"ADD %[src],%[src],%[sw] @ src += sw \n"
"ADD %[dst],%[dst],%[dw] @ dst += dw \n"
@@ -161,10 +162,10 @@ static void loadtile8_arm(byte * restrict src, int sw, byte * restrict dst, int
"LDRB r6, [%[src]], #1 @ r6 = *src++ \n"
"LDRB r7, [%[src]], #1 @ r7 = *src++ \n"
"SUBS r5, r5, #3 \n"
- "STRB r11,[r8], #1 @ *dp++ = 255 \n"
"STRB r4, [r8], #1 @ *dp++ = r4 \n"
"STRB r6, [r8], #1 @ *dp++ = r6 \n"
"STRB r7, [r8], #1 @ *dp++ = r7 \n"
+ "STRB r11,[r8], #1 @ *dp++ = 255 \n"
"BGT 2b \n"
"ADD %[src],%[src],%[sw] @ src += sw \n"
"ADD %[dst],%[dst],%[dw] @ dst += dw \n"
diff --git a/draw/archport.c b/draw/archport.c
index f4fea5bc..337ad3c1 100644
--- a/draw/archport.c
+++ b/draw/archport.c
@@ -7,15 +7,15 @@ typedef unsigned char byte;
#define MASK 0xFF00FF00;
static void
-path_w4i1o4_32bit(byte * restrict argb,
+path_w4i1o4_32bit(byte * restrict rgba,
byte * restrict src, byte cov, int len,
byte * restrict dst)
{
/* COLOR * coverage + DST * (256-coverage) = (COLOR - DST)*coverage + DST*256 */
unsigned int *dst32 = (unsigned int *)(void *)dst;
- int alpha = argb[0];
- unsigned int rb = argb[1] | (argb[3] << 16);
- unsigned int ag = 255 | (argb[2] << 16);
+ int alpha = rgba[3];
+ unsigned int rb = rgba[0] | (rgba[2] << 16);
+ unsigned int ga = rgba[1] | 0xFF0000;
if (alpha == 0)
return;
@@ -25,23 +25,23 @@ path_w4i1o4_32bit(byte * restrict argb,
alpha += alpha>>7; /* alpha is now in the 0...256 range */
while (len--)
{
- unsigned int ca, drb, dag, crb, cag;
+ unsigned int ca, drb, dga, crb, cga;
cov += *src; *src++ = 0;
ca = cov + (cov>>7); /* ca is in 0...256 range */
ca = (ca*alpha)>>8; /* ca is is in 0...256 range */
- dag = *dst32++;
+ drb = *dst32++;
if (ca != 0)
{
- drb = dag & MASK;
- dag = (dag<<8) & MASK;
+ dga = drb & MASK;
+ drb = (drb<<8) & MASK;
+ cga = ga - (dga>>8);
crb = rb - (drb>>8);
- cag = ag - (dag>>8);
+ dga += cga * ca;
drb += crb * ca;
- dag += cag * ca;
+ dga &= MASK;
drb &= MASK;
- dag &= MASK;
- dag = drb | (dag>>8);
- dst32[-1] = dag;
+ drb = dga | (drb>>8);
+ dst32[-1] = drb;
}
}
}
@@ -49,133 +49,42 @@ path_w4i1o4_32bit(byte * restrict argb,
{
while (len--)
{
- unsigned int ca, drb, dag, crb, cag;
+ unsigned int ca, drb, dga, crb, cga;
cov += *src; *src++ = 0;
ca = cov + (cov>>7); /* ca is in 0...256 range */
- dag = *dst32++;
+ drb = *dst32++;
if (ca == 0)
continue;
if (ca == 255)
{
- dag = (rb<<8) | ag;
+ drb = (ga<<8) | rb;
}
else
{
- drb = dag & MASK;
- dag = (dag<<8) & MASK;
+ dga = drb & MASK;
+ drb = (drb<<8) & MASK;
+ cga = ga - (dga>>8);
crb = rb - (drb>>8);
- cag = ag - (dag>>8);
+ dga += cga * ca;
drb += crb * ca;
- dag += cag * ca;
+ dga &= MASK;
drb &= MASK;
- dag &= MASK;
- dag = drb | (dag>>8);
+ drb = dga |(drb>>8);
}
- dst32[-1] = dag;
+ dst32[-1] = drb;
}
}
}
static void
-duff_4o4_32bit(byte * restrict sp, int sw, byte * restrict dp, int dw, int w0, int h)
-{
- unsigned int *sp32 = (unsigned int *)(void *)sp;
- unsigned int *dp32 = (unsigned int *)(void *)dp;
-
- /* duff_non(sp0, sw, 4, dp0, dw, w0, h); */
-
- sw = (sw>>2)-w0;
- dw = (dw>>2)-w0;
- while (h--)
- {
- int w = w0;
- while (w--)
- {
- unsigned int sag = *sp32++;
- unsigned int dag = *dp32++;
- unsigned int srb, drb;
- int alpha = sag & 255;
- if (alpha == 0)
- continue;
- alpha += alpha>>7;
- sag |= 0xFF;
- drb = dag & MASK;
- dag = (dag<<8) & MASK;
- srb = (sag>>8) & ~MASK;
- sag = sag & ~MASK;
- srb -= (drb>>8);
- sag -= (dag>>8);
- drb += srb * alpha;
- dag += sag * alpha;
- drb &= MASK;
- dag &= MASK;
- dag = drb | (dag>>8);
- dp32[-1] = dag;
- }
- sp32 += sw;
- dp32 += dw;
- }
-}
-
-static void
-duff_4i1o4_32bit(byte * restrict sp, int sw,
- byte * restrict mp, int mw,
- byte * restrict dp, int dw, int w0, int h)
-{
- unsigned int *sp32 = (unsigned int *)(void *)sp;
- unsigned int *dp32 = (unsigned int *)(void *)dp;
-
- /* duff_nimon(sp, sw, 4, mp, mw, 1, dp, dw, w0, h); */
-
- sw = (sw>>2)-w0;
- dw = (dw>>2)-w0;
- mw -= w0;
- while (h--)
- {
- int w = w0;
- while (w--)
- {
- unsigned int sag = *sp32++;
- unsigned int dag = *dp32++;
- unsigned int srb, drb, alpha, ma;
- alpha = sag & 255;
- ma = *mp++;
- if (alpha == 0)
- continue;
- ma += ma>>7;
- if (ma == 0)
- continue;
- alpha += alpha>>7;
- alpha = (alpha*ma)>>8;
- sag |= 0xFF;
- drb = dag & MASK;
- dag = (dag<<8) & MASK;
- srb = (sag>>8) & ~MASK;
- sag = sag & ~MASK;
- srb -= (drb>>8);
- sag -= (dag>>8);
- drb += srb * alpha;
- dag += sag * alpha;
- drb &= MASK;
- dag &= MASK;
- dag = drb | (dag>>8);
- dp32[-1] = dag;
- }
- sp32 += sw;
- mp += mw;
- dp32 += dw;
- }
-}
-
-static void
-text_w4i1o4_32bit(byte * restrict argb,
+text_w4i1o4_32bit(byte * restrict rgba,
byte * restrict src, int srcw,
byte * restrict dst, int dstw, int w0, int h)
{
unsigned int *dst32 = (unsigned int *)(void *)dst;
- unsigned int alpha = argb[0];
- unsigned int rb = argb[1] | (argb[3] << 16);
- unsigned int ag = 255 | (argb[2] << 16);
+ unsigned int alpha = rgba[3];
+ unsigned int rb = rgba[1] | (rgba[2] << 16);
+ unsigned int ga = rgba[2] | 0xFF0000;
if (alpha == 0)
return;
@@ -185,29 +94,29 @@ text_w4i1o4_32bit(byte * restrict argb,
if (alpha != 255)
{
- alpha += alpha>>7;
+ alpha += alpha>>7; /* alpha is now in the 0...256 range */
while (h--)
{
int w = w0;
while (w--)
{
- unsigned int ca, drb, dag, crb, cag;
+ unsigned int ca, drb, dga, crb, cga;
ca = *src++;
- dag = *dst32++;
+ dga = *dst32++;
ca += ca>>7;
ca = (ca*alpha)>>8;
if (ca == 0)
continue;
- drb = dag & MASK;
- dag = (dag<<8) & MASK;
+ dga = drb & MASK;
+ drb = (drb<<8) & MASK;
+ cga = ga - (dga>>8);
crb = rb - (drb>>8);
- cag = ag - (dag>>8);
+ dga += cga * ca;
drb += crb * ca;
- dag += cag * ca;
+ dga &= MASK;
drb &= MASK;
- dag &= MASK;
- dag = drb | (dag>>8);
- dst32[-1] = dag;
+ drb = dga | (drb>>8);
+ dst32[-1] = drb;
}
src += srcw;
dst32 += dstw;
@@ -215,28 +124,27 @@ text_w4i1o4_32bit(byte * restrict argb,
}
else
{
- alpha += alpha>>7;
while (h--)
{
int w = w0;
while (w--)
{
- unsigned int ca, drb, dag, crb, cag;
+ unsigned int ca, drb, dga, crb, cga;
ca = *src++;
- dag = *dst32++;
+ drb = *dst32++;
ca += ca>>7;
if (ca == 0)
continue;
- drb = dag & MASK;
- dag = (dag<<8) & MASK;
+ dga = drb & MASK;
+ drb = (drb<<8) & MASK;
+ cga = ga - (dga>>8);
crb = rb - (drb>>8);
- cag = ag - (dag>>8);
+ dga += cga * ca;
drb += crb * ca;
- dag += cag * ca;
+ dga &= MASK;
drb &= MASK;
- dag &= MASK;
- dag = drb | (dag>>8);
- dst32[-1] = dag;
+ drb = dga | (drb>>8);
+ dst32[-1] = drb;
}
src += srcw;
dst32 += dstw;
@@ -313,9 +221,9 @@ img_4o4_32bit(byte * restrict src, byte cov, int len, byte * restrict dst,
a = (((c >>8)-(a >>8)) * vd + a ) & MASK;
a1 = (((c1>>8)-(a1>>8)) * vd + a1) & MASK;
}
- sa = (a>>8) & 0xFF;
+ sa = (a1>>24);
sa = FZ_COMBINE(FZ_EXPAND(sa), FZ_EXPAND(cov));
- a |= 0xFF00;
+ a1 |= 0xFF000000;
d = *dst32++;
d1 = d & MASK;
d = (d<<8) & MASK;
@@ -328,15 +236,15 @@ img_4o4_32bit(byte * restrict src, byte cov, int len, byte * restrict dst,
}
static void
-img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len,
+img_w4i1o4_32bit(byte *rgba, byte * restrict src, byte cov, int len,
byte * restrict dst, fz_pixmap *image, int u, int v, int fa, int fb)
{
byte *samples = image->samples;
int w = image->w;
int h = image->h-1;
- int alpha = FZ_EXPAND(argb[0]);
- unsigned int rb = argb[1] | (argb[3] << 16);
- unsigned int ag = 255 | (argb[2] << 16);
+ int alpha = FZ_EXPAND(rgba[3]);
+ unsigned int rb = rgba[0] | (rgba[2] << 16);
+ unsigned int ga = rgba[1] | 0xFF0000;
unsigned int *dst32 = (unsigned int *)(void *)dst;
if (alpha == 0)
@@ -345,10 +253,10 @@ img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len,
{
while (len--)
{
- unsigned int ca, drb, dag, crb, cag;
+ unsigned int ca, drb, dga, crb, cga;
unsigned int a, b;
cov += *src; *src = 0; src++;
- dag = *dst32++;
+ drb = *dst32++;
ca = FZ_COMBINE(FZ_EXPAND(cov), alpha);
if (ca != 0)
{
@@ -396,16 +304,16 @@ img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len,
}
if (ca != 0)
{
- drb = dag & MASK;
- dag = (dag<<8) & MASK;
+ dga = drb & MASK;
+ drb = (drb<<8) & MASK;
+ cga = ga - (dga>>8);
crb = rb - (drb>>8);
- cag = ag - (dag>>8);
+ dga += cga * ca;
drb += crb * ca;
- dag += cag * ca;
+ dga &= MASK;
drb &= MASK;
- dag &= MASK;
- dag = drb | (dag>>8);
- dst32[-1] = dag;
+ drb = dga | (drb>>8);
+ dst32[-1] = drb;
}
u += fa;
v += fb;
@@ -415,10 +323,10 @@ img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len,
{
while (len--)
{
- unsigned int ca, drb, dag, crb, cag;
+ unsigned int ca, drb, dga, crb, cga;
unsigned int a, b;
cov += *src; *src = 0; src++;
- dag = *dst32++;
+ drb = *dst32++;
if (cov != 0)
{
int ui, ui1, vi, vi1, ud, vd;
@@ -466,21 +374,21 @@ img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len,
{
if (ca == 256)
{
- dag = (rb<<8) | ag;
+ drb = (ga<<8) | rb;
}
else
{
- drb = dag & MASK;
- dag = (dag<<8) & MASK;
+ dga = drb & MASK;
+ drb = (drb<<8) & MASK;
+ cga = ga - (dga>>8);
crb = rb - (drb>>8);
- cag = ag - (dag>>8);
+ dga += cga * ca;
drb += crb * ca;
- dag += cag * ca;
+ dga &= MASK;
drb &= MASK;
- dag &= MASK;
- dag = drb | (dag>>8);
+ drb = dga | (drb>>8);
}
- dst32[-1] = dag;
+ dst32[-1] = drb;
}
}
u += fa;
@@ -560,10 +468,8 @@ img_1o1_32bit(byte * restrict src, byte cov, int len, byte * restrict dst,
void fz_accelerate(void)
{
- if (sizeof(int) == 4 && sizeof(unsigned int) == 4)
+ if (sizeof(int) == 4 && sizeof(unsigned int) == 4 && !fz_isbigendian())
{
- fz_duff_4o4 = duff_4o4_32bit;
- fz_duff_4i1o4 = duff_4i1o4_32bit;
fz_path_w4i1o4 = path_w4i1o4_32bit;
fz_text_w4i1o4 = text_w4i1o4_32bit;
fz_img_4o4 = img_4o4_32bit;
diff --git a/draw/archx86.c b/draw/archx86.c
index 0c313f33..5418e9f7 100644
--- a/draw/archx86.c
+++ b/draw/archx86.c
@@ -40,7 +40,7 @@ static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int d
{
int ts = *s++;
int ma = *mp++ + 1;
- int sa = ((ts & 0xff) * ma) >> 8;
+ int sa = (((ts>>24) & 0xff) * ma) >> 8;
int ssa = 255 - sa;
__m64 d0 = _mm_cvtsi32_si64(*d);
@@ -50,11 +50,11 @@ static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int d
__m64 mma = _mm_set1_pi16(ma);
__m64 mssa = _mm_set1_pi16(ssa);
- /* unpack 0000argb => a0r0g0b0 */
+ /* unpack 0000rgba => r0g0b0a0 */
__m64 d1 = _mm_unpacklo_pi8(d0, mzero);
__m64 s1 = _mm_unpacklo_pi8(s0, mzero);
- /* s1 * ma => a0r0g0b0 */
+ /* s1 * ma => r0g0b0a0 */
__m64 msma = _mm_mullo_pi16(s1, mma);
/* d1 * mssa */
__m64 mdssa = _mm_mullo_pi16(d1, mssa);
@@ -79,6 +79,8 @@ static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int d
#if 0 /* TODO */
+/* Needs to be rgba, not bgra, as well as needing finishing */
+
static inline unsigned
getargb(unsigned *s, int w, int h, int u, int v)
{
diff --git a/draw/imagedraw.c b/draw/imagedraw.c
index 81d2bb05..dd887f53 100644
--- a/draw/imagedraw.c
+++ b/draw/imagedraw.c
@@ -13,7 +13,7 @@ getmask(byte *s, int w, int h, int u, int v)
}
static inline byte *
-getargb(byte *s, int w, int h, int u, int v)
+getrgba(byte *s, int w, int h, int u, int v)
{
if (u < 0) u = 0;
if (v < 0) v = 0;
@@ -23,7 +23,7 @@ getargb(byte *s, int w, int h, int u, int v)
}
static inline byte *
-getag(byte *s, int w, int h, int u, int v)
+getga(byte *s, int w, int h, int u, int v)
{
if (u < 0) u = 0;
if (v < 0) v = 0;
@@ -49,14 +49,14 @@ lerp(int a, int b, int t)
}
static inline void
-lerpag(byte *dst, byte *a, byte *b, int t)
+lerpga(byte *dst, byte *a, byte *b, int t)
{
dst[0] = lerp(a[0], b[0], t);
dst[1] = lerp(a[1], b[1], t);
}
static inline void
-lerpargb(byte *dst, byte *a, byte *b, int t)
+lerprgba(byte *dst, byte *a, byte *b, int t)
{
dst[0] = lerp(a[0], b[0], t);
dst[1] = lerp(a[1], b[1], t);
@@ -81,7 +81,7 @@ samplemask(byte *s, int w, int h, int u, int v)
}
static inline void
-sampleag(byte *s, int w, int h, int u, int v, byte *out)
+samplega(byte *s, int w, int h, int u, int v, byte *out)
{
byte ab[4];
byte cd[4];
@@ -89,17 +89,17 @@ sampleag(byte *s, int w, int h, int u, int v, byte *out)
int vi = v >> 16;
int ud = u & 0xFFFF;
int vd = v & 0xFFFF;
- byte *a = getag(s, w, h, ui, vi);
- byte *b = getag(s, w, h, ui+1, vi);
- byte *c = getag(s, w, h, ui, vi+1);
- byte *d = getag(s, w, h, ui+1, vi+1);
- lerpag(ab, a, b, ud);
- lerpag(cd, c, d, ud);
- lerpag(out, ab, cd, vd);
+ byte *a = getga(s, w, h, ui, vi);
+ byte *b = getga(s, w, h, ui+1, vi);
+ byte *c = getga(s, w, h, ui, vi+1);
+ byte *d = getga(s, w, h, ui+1, vi+1);
+ lerpga(ab, a, b, ud);
+ lerpga(cd, c, d, ud);
+ lerpga(out, ab, cd, vd);
}
static inline void
-sampleargb(byte *s, int w, int h, int u, int v, byte *out)
+samplergba(byte *s, int w, int h, int u, int v, byte *out)
{
byte ab[4];
byte cd[4];
@@ -107,13 +107,13 @@ sampleargb(byte *s, int w, int h, int u, int v, byte *out)
int vi = v >> 16;
int ud = u & 0xFFFF;
int vd = v & 0xFFFF;
- byte *a = getargb(s, w, h, ui, vi);
- byte *b = getargb(s, w, h, ui+1, vi);
- byte *c = getargb(s, w, h, ui, vi+1);
- byte *d = getargb(s, w, h, ui+1, vi+1);
- lerpargb(ab, a, b, ud);
- lerpargb(cd, c, d, ud);
- lerpargb(out, ab, cd, vd);
+ byte *a = getrgba(s, w, h, ui, vi);
+ byte *b = getrgba(s, w, h, ui+1, vi);
+ byte *c = getrgba(s, w, h, ui, vi+1);
+ byte *d = getrgba(s, w, h, ui+1, vi+1);
+ lerprgba(ab, a, b, ud);
+ lerprgba(cd, c, d, ud);
+ lerprgba(out, ab, cd, vd);
}
static inline void
@@ -170,7 +170,7 @@ img_2o2(byte * restrict src, byte cov, int len, byte * restrict dst,
byte *samples = image->samples;
int w = image->w;
int h = image->h;
- byte ag[2];
+ byte ga[2];
while (len--)
{
@@ -178,12 +178,12 @@ img_2o2(byte * restrict src, byte cov, int len, byte * restrict dst,
cov += *src; *src = 0; src++;
if (cov != 0)
{
- sampleag(samples, w, h, u, v, ag);
- sa = FZ_COMBINE(FZ_EXPAND(ag[0]), FZ_EXPAND(cov));
+ samplega(samples, w, h, u, v, ga);
+ sa = FZ_COMBINE(FZ_EXPAND(ga[1]), FZ_EXPAND(cov));
if (sa != 0)
{
- dst[0] = FZ_BLEND(255, dst[0], sa);
- dst[1] = FZ_BLEND(ag[1], dst[1], sa);
+ dst[0] = FZ_BLEND(ga[0], dst[0], sa);
+ dst[1] = FZ_BLEND(255, dst[1], sa);
}
}
dst += 2;
@@ -199,7 +199,7 @@ img_4o4(byte * restrict src, byte cov, int len, byte * restrict dst,
byte *samples = image->samples;
int w = image->w;
int h = image->h;
- byte argb[4];
+ byte rgba[4];
while (len--)
{
@@ -207,14 +207,14 @@ img_4o4(byte * restrict src, byte cov, int len, byte * restrict dst,
cov += *src; *src = 0; src++;
if (cov != 0)
{
- sampleargb(samples, w, h, u, v, argb);
- sa = FZ_COMBINE(FZ_EXPAND(argb[0]), FZ_EXPAND(cov));
+ samplergba(samples, w, h, u, v, rgba);
+ sa = FZ_COMBINE(FZ_EXPAND(rgba[3]), FZ_EXPAND(cov));
if (sa != 0)
{
- dst[0] = FZ_BLEND(255, dst[0], sa);
- dst[1] = FZ_BLEND(argb[1], dst[1], sa);
- dst[2] = FZ_BLEND(argb[2], dst[2], sa);
- dst[3] = FZ_BLEND(argb[3], dst[3], sa);
+ dst[0] = FZ_BLEND(rgba[0], dst[0], sa);
+ dst[1] = FZ_BLEND(rgba[1], dst[1], sa);
+ dst[2] = FZ_BLEND(rgba[2], dst[2], sa);
+ dst[3] = FZ_BLEND(255, dst[3], sa);
}
}
dst += 4;
@@ -224,18 +224,18 @@ img_4o4(byte * restrict src, byte cov, int len, byte * restrict dst,
}
static void
-img_w2i1o2(byte *ag, byte * restrict src, byte cov, int len, byte * restrict dst,
+img_w2i1o2(byte *ga, byte * restrict src, byte cov, int len, byte * restrict dst,
fz_pixmap *image, int u, int v, int fa, int fb)
{
byte *samples = image->samples;
int w = image->w;
int h = image->h;
- int alpha = FZ_EXPAND(ag[0]);
- byte g = ag[1];
+ byte g = ga[0];
+ byte a = ga[1];
- if (alpha == 0)
+ if (a == 0)
return;
- if (alpha != 256)
+ if (a != 255)
{
while (len--)
{
@@ -244,12 +244,12 @@ img_w2i1o2(byte *ag, byte * restrict src, byte cov, int len, byte * restrict dst
if (cov != 0)
{
ca = samplemask(samples, w, h, u, v);
- ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca));
- ca = FZ_COMBINE(ca, alpha);
+ ca = FZ_COMBINE(FZ_EXPAND(cov), FZ_EXPAND(ca));
+ ca = FZ_COMBINE(ca, FZ_EXPAND(a));
if (ca != 0)
{
- dst[0] = FZ_BLEND(255, dst[0], ca);
- dst[1] = FZ_BLEND(g, dst[1], ca);
+ dst[0] = FZ_BLEND(g, dst[0], ca);
+ dst[1] = FZ_BLEND(255, dst[1], ca);
}
}
dst += 2;
@@ -266,11 +266,11 @@ img_w2i1o2(byte *ag, byte * restrict src, byte cov, int len, byte * restrict dst
if (cov != 0)
{
ca = samplemask(samples, w, h, u, v);
- ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca));
+ ca = FZ_COMBINE(FZ_EXPAND(cov), FZ_EXPAND(ca));
if (ca != 0)
{
- dst[0] = FZ_BLEND(255, dst[0], ca);
- dst[1] = FZ_BLEND(g, dst[1], ca);
+ dst[0] = FZ_BLEND(g, dst[0], ca);
+ dst[1] = FZ_BLEND(255, dst[1], ca);
}
}
dst += 2;
@@ -281,20 +281,20 @@ img_w2i1o2(byte *ag, byte * restrict src, byte cov, int len, byte * restrict dst
}
static void
-img_w4i1o4(byte *argb, byte * restrict src, byte cov, int len, byte * restrict dst,
+img_w4i1o4(byte *rgba, byte * restrict src, byte cov, int len, byte * restrict dst,
fz_pixmap *image, int u, int v, int fa, int fb)
{
byte *samples = image->samples;
int w = image->w;
int h = image->h;
- int alpha = FZ_EXPAND(argb[0]);
- byte r = argb[1];
- byte g = argb[2];
- byte b = argb[3];
+ byte r = rgba[0];
+ byte g = rgba[1];
+ byte b = rgba[2];
+ byte a = rgba[3];
- if (alpha == 0)
+ if (a == 0)
return;
- if (alpha != 256)
+ if (a != 255)
{
while (len--)
{
@@ -303,14 +303,14 @@ img_w4i1o4(byte *argb, byte * restrict src, byte cov, int len, byte * restrict d
if (cov != 0)
{
ca = samplemask(samples, w, h, u, v);
- ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca));
- ca = FZ_COMBINE(ca, alpha);
+ ca = FZ_COMBINE(FZ_EXPAND(cov), FZ_EXPAND(ca));
+ ca = FZ_COMBINE(ca, FZ_EXPAND(a));
if (ca != 0)
{
- dst[0] = FZ_BLEND(255, dst[0], ca);
- dst[1] = FZ_BLEND(r, dst[1], ca);
- dst[2] = FZ_BLEND(g, dst[2], ca);
- dst[3] = FZ_BLEND(b, dst[3], ca);
+ dst[0] = FZ_BLEND(r, dst[0], ca);
+ dst[1] = FZ_BLEND(g, dst[1], ca);
+ dst[2] = FZ_BLEND(b, dst[2], ca);
+ dst[3] = FZ_BLEND(255, dst[3], ca);
}
}
dst += 4;
@@ -327,13 +327,13 @@ img_w4i1o4(byte *argb, byte * restrict src, byte cov, int len, byte * restrict d
if (cov != 0)
{
ca = samplemask(samples, w, h, u, v);
- ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca));
+ ca = FZ_COMBINE(FZ_EXPAND(cov), FZ_EXPAND(ca));
if (ca != 0)
{
- dst[0] = FZ_BLEND(255, dst[0], ca);
- dst[1] = FZ_BLEND(r, dst[1], ca);
- dst[2] = FZ_BLEND(g, dst[2], ca);
- dst[3] = FZ_BLEND(b, dst[3], ca);
+ dst[0] = FZ_BLEND(r, dst[0], ca);
+ dst[1] = FZ_BLEND(g, dst[1], ca);
+ dst[2] = FZ_BLEND(b, dst[2], ca);
+ dst[3] = FZ_BLEND(255, dst[3], ca);
}
}
dst += 4;
diff --git a/draw/imageunpack.c b/draw/imageunpack.c
index 6a1a1628..48c420bf 100644
--- a/draw/imageunpack.c
+++ b/draw/imageunpack.c
@@ -19,19 +19,22 @@ static void decodetile(fz_pixmap *pix, int skip, float *decode)
int justinvert = 1;
unsigned int mask;
- min[0] = 0;
- max[0] = 255;
- sub[0] = 255;
-
- for (i = skip; i < n; i++)
+ for (i = 0; i < n-skip; i++)
{
- min[i] = decode[(i - skip) * 2] * 255;
- max[i] = decode[(i - skip) * 2 + 1] * 255;
+ min[i] = decode[i * 2] * 255;
+ max[i] = decode[i * 2 + 1] * 255;
sub[i] = max[i] - min[i];
needed |= (min[i] != 0) | (max[i] != 255);
justinvert &= min[i] == 255 && max[i] == 0 && sub[i] == -255;
}
+ if (skip)
+ {
+ min[i] = 0;
+ max[i] = 255;
+ sub[i] = 255;
+ }
+
if (fz_isbigendian())
mask = 0x00ff00ff;
else
@@ -115,8 +118,8 @@ static void init1(void)
{
x = tbit(bits, k);
t1pad0[i][k] = x;
- t1pad1[i][k * 2 + 0] = 255;
- t1pad1[i][k * 2 + 1] = x;
+ t1pad1[i][k * 2 + 0] = x;
+ t1pad1[i][k * 2 + 1] = 255;
}
}
@@ -178,7 +181,7 @@ static void loadtile1(byte * restrict src, int sw, byte * restrict dst, int dw,
dp = dst;
for (x = 0; x < w; x++)
{
- if ((x % pad) == 0)
+ if ((x % pad) == pad-1)
*dp++ = 255;
*dp++ = tbit(src, x);
}
@@ -204,14 +207,14 @@ static void loadtile1(byte * restrict src, int sw, byte * restrict dst, int dw,
while (h--) \
{ \
byte *dp = dst; \
- tpad = 0; \
+ tpad = pad; \
for (x = 0; x < w; x++) \
{ \
- if (!tpad--) { \
- tpad = pad-1; \
+ *dp++ = getf(src, x); \
+ if (--tpad == 0) { \
+ tpad = pad; \
*dp++ = 255; \
} \
- *dp++ = getf(src, x); \
} \
src += sw; \
dst += dw; \
@@ -247,8 +250,8 @@ static void loadtile8(byte * restrict src, int sw, byte * restrict dst, int dw,
int x;
for (x = w; x > 0; x --)
{
- *dst++ = 255;
*dst++ = *src++;
+ *dst++ = 255;
}
src += sw;
dst += dw;
@@ -263,10 +266,10 @@ static void loadtile8(byte * restrict src, int sw, byte * restrict dst, int dw,
int x;
for (x = w; x > 0; x -= 3)
{
- *dp++ = 255;
*dp++ = *src++;
*dp++ = *src++;
*dp++ = *src++;
+ *dp++ = 255;
}
src += sw;
dst += dw;
@@ -278,16 +281,16 @@ static void loadtile8(byte * restrict src, int sw, byte * restrict dst, int dw,
while (h--)
{
byte *dp = dst;
- int tpad = 1;
+ int tpad = pad;
int x;
for (x = w; x > 0; x--)
{
+ *dp++ = *src++;
tpad--;
if (tpad == 0) {
tpad = pad;
*dp++ = 255;
}
- *dp++ = *src++;
}
src += sw;
dst += dw;
diff --git a/draw/meshdraw.c b/draw/meshdraw.c
index c0e219c4..72a723e0 100644
--- a/draw/meshdraw.c
+++ b/draw/meshdraw.c
@@ -147,12 +147,12 @@ drawscan(fz_pixmap *pix, int y, int x1, int x2, int *v1, int *v2, int n)
while (w--)
{
- *p++ = 255;
for (k = 0; k < n; k++)
{
*p++ = v[k] >> 16;
v[k] += dv[k];
}
+ *p++ = 255;
}
}
@@ -376,11 +376,11 @@ fz_rendershade(fz_shade *shade, fz_matrix ctm, fz_pixmap *dest, fz_bbox bbox)
d = dest->samples + ((bbox.x0 - dest->x) + (y - dest->y) * dest->w) * dest->n;
for (x = bbox.x0; x < bbox.x1; x++)
{
- sa = s[0];
+ sa = s[1];
ssa = 255 - sa;
- d[0] = s[0] + fz_mul255(d[0], ssa);
for (k = 0; k < dest->colorspace->n; k++)
- d[k+1] = fz_mul255(clut[s[1]][k], sa) + fz_mul255(d[k+1], ssa);
+ d[k] = fz_mul255(clut[s[0]][k], sa) + fz_mul255(d[k+1], ssa);
+ d[k] = s[1] + fz_mul255(d[k], ssa);
s += 2;
d += 1 + dest->colorspace->n;
}
diff --git a/draw/porterduff.c b/draw/porterduff.c
index fd7a9d45..05f00f63 100644
--- a/draw/porterduff.c
+++ b/draw/porterduff.c
@@ -1,5 +1,74 @@
#include "fitz.h"
+/*
+ * The functions in this file implement various flavours of Porter-Duff
+ * blending.
+ *
+ * We take the following as definitions:
+ *
+ * Cx = Color (from plane x)
+ * ax = Alpha (from plane x)
+ * cx = Cx.ax = Premultiplied color (from plane x)
+ *
+ * The general PorterDuff blending equation is:
+ *
+ * Blend Z = X op Y cz = Fx.cx + Fy. cy where Fx and Fy depend on op
+ *
+ * The two operations we use in this file are: '(X in Y) over Z' and
+ * 'S over Z'. The definitions of the 'over' and 'in' operations are as
+ * follows:
+ *
+ * For S over Z, Fs = 1, Fz = 1-as
+ * For X in Y, Fx = ay, Fy = 0
+ *
+ * We have 2 choices; we can either work with premultiplied data, or non
+ * premultiplied data. Our
+ *
+ * First the premultiplied case:
+ *
+ * Let S = (X in Y)
+ * Let R = (X in Y) over Z = S over Z
+ *
+ * cs = cx.Fx + cy.Fy (where Fx = ay, Fy = 0)
+ * = cx.ay
+ * as = ax.Fx + ay.Fy
+ * = ax.ay
+ *
+ * cr = cs.Fs + cz.Fz (where Fs = 1, Fz = 1-as)
+ * = cs + cz.(1-as)
+ * = cx.ay + cz.(1-ax.ay)
+ * ar = as.Fs + az.Fz
+ * = as + az.(1-as)
+ * = ax.ay + az.(1-ax.ay)
+ *
+ * This has various nice properties, like not needing any divisions, and
+ * being symmetric in color and alpha, so this is what we use. Because we
+ * went through the pain of deriving the non premultiplied forms, we list
+ * them here too, though they are not used.
+ *
+ * Non Pre-multiplied case:
+ *
+ * Cs.as = Fx.Cx.ax + Fy.Cy.ay (where Fx = ay, Fy = 0)
+ * = Cx.ay.ax
+ * Cs = (Cx.ay.ax)/(ay.ax)
+ * = Cx
+ * Cr.ar = Fs.Cs.as + Fz.Cz.az (where Fs = 1, Fz = 1-as)
+ * = Cs.as + (1-as).Cz.az
+ * = Cx.ax.ay + Cz.az.(1-ax.ay)
+ * Cr = (Cx.ax.ay + Cz.az.(1-ax.ay))/(ax.ay + az.(1-ax-ay))
+ *
+ * Much more complex, it seems. However, if we could restrict ourselves to
+ * the case where we were always plotting onto an opaque background (i.e.
+ * az = 1), then:
+ *
+ * Cr = Cx.(ax.ay) + Cz.(1-ax.ay)
+ * = (Cx-Cz)*(1-ax.ay) + Cz (a single MLA operation)
+ * ar = 1
+ *
+ * Sadly, this is not true in the general case, so we abandon this effort
+ * and stick to using the premultiplied form.
+ */
+
typedef unsigned char byte;
/*
@@ -11,6 +80,7 @@ static void
duff_non(byte * restrict sp, int sw, int sn, byte * restrict dp, int dw, int w0, int h)
{
int k;
+
sw -= w0*sn;
dw -= w0*sn;
while (h--)
@@ -18,12 +88,10 @@ duff_non(byte * restrict sp, int sw, int sn, byte * restrict dp, int dw, int w0,
int w = w0;
while (w--)
{
- /* RJW: Alpha handling suspicious here; sp[0] counts twice */
- int sa = FZ_EXPAND(sp[0]);
- dp[0] = FZ_BLEND(255, dp[0], sa);
- for (k = 1; k < sn; k++)
+ int ssa = 255 - sp[sn-1];
+ for (k = 0; k < sn; k++)
{
- dp[k] = FZ_BLEND(sp[k], dp[k], sa);
+ dp[k] = sp[k] + fz_mul255(dp[k], ssa);
}
sp += sn;
dp += sn;
@@ -38,6 +106,7 @@ static void
duff_nimon(byte * restrict sp, int sw, int sn, byte * restrict mp, int mw, int mn, byte * restrict dp, int dw, int w0, int h)
{
int k;
+
sw -= w0*sn;
mw -= w0*mn;
dw -= w0*sn;
@@ -46,12 +115,11 @@ duff_nimon(byte * restrict sp, int sw, int sn, byte * restrict mp, int mw, int m
int w = w0;
while (w--)
{
- /* TODO: validate this */
- int ma = FZ_COMBINE(FZ_EXPAND(mp[0]), FZ_EXPAND(sp[0]));
- dp[0] = FZ_BLEND(255, dp[0], ma);
- for (k = 1; k < sn; k++)
+ int ma = mp[0];
+ int ssa = 255-fz_mul255(sp[sn-1], ma);
+ for (k = 0; k < sn; k++)
{
- dp[k] = FZ_BLEND(sp[k], dp[k], ma);
+ dp[k] = fz_mul255(sp[k], ma) + fz_mul255(dp[k], ssa);
}
sp += sn;
mp += mn;
@@ -64,22 +132,22 @@ duff_nimon(byte * restrict sp, int sw, int sn, byte * restrict mp, int mw, int m
}
static void
-duff_1o1(byte * restrict sp, int sw, byte * restrict dp, int dw, int w0, int h)
+duff_1o1(byte * restrict sp0, int sw, byte * restrict dp0, int dw, int w0, int h)
{
/* duff_non(sp0, sw, 1, dp0, dw, w0, h); */
- sw -= w0;
- dw -= w0;
while (h--)
{
+ byte *sp = sp0;
+ byte *dp = dp0;
int w = w0;
while (w--)
{
- dp[0] = FZ_BLEND(255, dp[0], FZ_EXPAND(sp[0]));
+ dp[0] = sp[0] + fz_mul255(dp[0], 255 - sp[0]);
sp ++;
dp ++;
}
- sp += sw;
- dp += dw;
+ sp0 += sw;
+ dp0 += dw;
}
}
@@ -87,6 +155,7 @@ static void
duff_4o4(byte *sp, int sw, byte *dp, int dw, int w0, int h)
{
/* duff_non(sp0, sw, 4, dp0, dw, w0, h); */
+
sw -= w0<<2;
dw -= w0<<2;
while (h--)
@@ -94,11 +163,11 @@ duff_4o4(byte *sp, int sw, byte *dp, int dw, int w0, int h)
int w = w0;
while (w--)
{
- int alpha = FZ_EXPAND(sp[0]);
- dp[0] = FZ_BLEND(255, dp[0], alpha);
- dp[1] = FZ_BLEND(sp[1], dp[1], alpha);
- dp[2] = FZ_BLEND(sp[2], dp[2], alpha);
- dp[3] = FZ_BLEND(sp[3], dp[3], alpha);
+ byte ssa = 255 - sp[3];
+ dp[0] = sp[0] + fz_mul255(dp[0], ssa);
+ dp[1] = sp[1] + fz_mul255(dp[1], ssa);
+ dp[2] = sp[2] + fz_mul255(dp[2], ssa);
+ dp[3] = sp[3] + fz_mul255(dp[3], ssa);
sp += 4;
dp += 4;
}
@@ -111,16 +180,16 @@ static void
duff_1i1o1(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restrict dp, int dw, int w0, int h)
{
/* duff_nimon(sp0, sw, 1, mp0, mw, 1, dp0, dw, w0, h); */
- sw -= w0;
- mw -= w0;
- dw -= w0;
+
while (h--)
{
int w = w0;
while (w--)
{
- int ma = FZ_COMBINE(FZ_EXPAND(mp[0]), FZ_EXPAND(sp[0]));
- dp[0] = FZ_BLEND(255, dp[0], ma);
+ byte ma = mp[0];
+ byte sa = fz_mul255(sp[0], ma);
+ byte ssa = 255 - sa;
+ dp[0] = sa + fz_mul255(dp[0], ssa);
sp ++;
mp ++;
dp ++;
@@ -132,9 +201,37 @@ duff_1i1o1(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restri
}
static void
+duff_2i1o2(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restrict dp, int dw, int w0, int h)
+{
+
+ /* duff_nimon(sp, sw, 2, mp, mw, 1, dp, dw, w0, h); */
+ sw -= w0<<1;
+ dw -= w0<<1;
+ mw -= w0;
+ while (h--)
+ {
+ int w = w0;
+ while (w--)
+ {
+ byte ma = mp[0];
+ byte ssa = 255 - fz_mul255(sp[1], ma);
+ dp[0] = fz_mul255(sp[0], ma) + fz_mul255(dp[0], ssa);
+ dp[1] = fz_mul255(sp[1], ma) + fz_mul255(dp[1], ssa);
+ sp += 2;
+ mp += 1;
+ dp += 2;
+ }
+ sp += sw;
+ mp += mw;
+ dp += dw;
+ }
+}
+
+static void
duff_4i1o4(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restrict dp, int dw, int w0, int h)
{
/* duff_nimon(sp, sw, 4, mp, mw, 1, dp, dw, w0, h); */
+
sw -= w0<<2;
dw -= w0<<2;
mw -= w0;
@@ -143,11 +240,12 @@ duff_4i1o4(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restri
int w = w0;
while (w--)
{
- int ma = FZ_COMBINE(FZ_EXPAND(mp[0]), FZ_EXPAND(sp[0]));
- dp[0] = FZ_BLEND(255, dp[0], ma);
- dp[1] = FZ_BLEND(sp[1], dp[1], ma);
- dp[2] = FZ_BLEND(sp[2], dp[2], ma);
- dp[3] = FZ_BLEND(sp[3], dp[3], ma);
+ byte ma = mp[0];
+ byte ssa = 255 - fz_mul255(sp[3], ma);
+ dp[0] = fz_mul255(sp[0], ma) + fz_mul255(dp[0], ssa);
+ dp[1] = fz_mul255(sp[1], ma) + fz_mul255(dp[1], ssa);
+ dp[2] = fz_mul255(sp[2], ma) + fz_mul255(dp[2], ssa);
+ dp[3] = fz_mul255(sp[3], ma) + fz_mul255(dp[3], ssa);
sp += 4;
mp += 1;
dp += 4;
@@ -176,39 +274,39 @@ path_1o1(byte * restrict src, byte cov, int len, byte * restrict dst)
}
static void
-path_w2i1o2(byte * restrict ag, byte * restrict src, byte cov, int len, byte * restrict dst)
+path_w2i1o2(byte * restrict ga, byte * restrict src, byte cov, int len, byte * restrict dst)
{
- int alpha = FZ_EXPAND(ag[0]);
- byte g = ag[1];
+ byte g = ga[0];
+ int a = FZ_EXPAND(ga[1]);
while (len--)
{
int ca;
cov += *src; *src = 0; src++;
- ca = FZ_COMBINE(FZ_EXPAND(cov), alpha);
- dst[0] = FZ_BLEND(255, dst[0], ca);
- dst[1] = FZ_BLEND(g, dst[1], ca);
+ ca = FZ_COMBINE(FZ_EXPAND(cov), a);
+ dst[0] = FZ_BLEND(g, dst[0], ca);
+ dst[1] = FZ_BLEND(255, dst[1], ca);
dst += 2;
}
}
static void
-path_w4i1o4(byte * restrict argb, byte * restrict src, byte cov, int len, byte * restrict dst)
+path_w4i1o4(byte * restrict rgba, byte * restrict src, byte cov, int len, byte * restrict dst)
{
- int alpha = FZ_EXPAND(argb[0]);
- byte r = argb[1];
- byte g = argb[2];
- byte b = argb[3];
+ byte r = rgba[0];
+ byte g = rgba[1];
+ byte b = rgba[2];
+ int a = FZ_EXPAND(rgba[3]);
while (len--)
{
int ca;
cov += *src; *src = 0; src++;
- ca = FZ_COMBINE(FZ_EXPAND(cov), alpha);
- dst[0] = FZ_BLEND(255, dst[0], ca);
- dst[1] = FZ_BLEND(r, dst[1], ca);
- dst[2] = FZ_BLEND(g, dst[2], ca);
- dst[3] = FZ_BLEND(b, dst[3], ca);
+ ca = FZ_COMBINE(FZ_EXPAND(cov), a);
+ dst[0] = FZ_BLEND(r, dst[0], ca);
+ dst[1] = FZ_BLEND(g, dst[1], ca);
+ dst[2] = FZ_BLEND(b, dst[2], ca);
+ dst[3] = FZ_BLEND(255, dst[3], ca);
dst += 4;
}
}
@@ -220,6 +318,7 @@ path_w4i1o4(byte * restrict argb, byte * restrict src, byte cov, int len, byte *
static void
text_1o1(byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h)
{
+
srcw -= w0;
dstw -= w0;
while (h--)
@@ -227,8 +326,7 @@ text_1o1(byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, i
int w = w0;
while (w--)
{
- int c = FZ_EXPAND(src[0]);
- dst[0] = FZ_BLEND(255, dst[0], c);
+ dst[0] = src[0] + fz_mul255(dst[0], 255 - src[0]);
src++;
dst++;
}
@@ -238,10 +336,10 @@ text_1o1(byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, i
}
static void
-text_w2i1o2(byte * restrict ag, byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h)
+text_w2i1o2(byte * restrict ga, byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h)
{
- int alpha = FZ_EXPAND(ag[0]);
- byte g = ag[1];
+ byte g = ga[0];
+ int a = FZ_EXPAND(ga[1]);
srcw -= w0;
dstw -= w0<<1;
@@ -250,9 +348,9 @@ text_w2i1o2(byte * restrict ag, byte * restrict src, int srcw, byte * restrict d
int w = w0;
while (w--)
{
- int c = FZ_COMBINE(FZ_EXPAND(src[0]), alpha);
- dst[0] = FZ_BLEND(255, dst[0], c);
- dst[1] = FZ_BLEND(g, dst[1], c);
+ int c = FZ_COMBINE(FZ_EXPAND(src[0]), a);
+ dst[0] = FZ_BLEND(g, dst[0], c);
+ dst[1] = FZ_BLEND(255, dst[1], c);
src ++;
dst += 2;
}
@@ -262,12 +360,12 @@ text_w2i1o2(byte * restrict ag, byte * restrict src, int srcw, byte * restrict d
}
static void
-text_w4i1o4(byte * restrict argb, byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h)
+text_w4i1o4(byte * restrict rgba, byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h)
{
- int alpha = FZ_EXPAND(argb[0]);
- byte r = argb[1];
- byte g = argb[2];
- byte b = argb[3];
+ byte r = rgba[0];
+ byte g = rgba[1];
+ byte b = rgba[2];
+ int a = FZ_EXPAND(rgba[3]);
srcw -= w0;
dstw -= w0<<2;
@@ -276,11 +374,11 @@ text_w4i1o4(byte * restrict argb, byte * restrict src, int srcw, byte * restrict
int w = w0;
while (w--)
{
- int c = FZ_COMBINE(FZ_EXPAND(src[0]), alpha);
- dst[0] = FZ_BLEND(255, dst[0], c);
- dst[1] = FZ_BLEND(r, dst[1], c);
- dst[2] = FZ_BLEND(g, dst[2], c);
- dst[3] = FZ_BLEND(b, dst[3], c);
+ int c = FZ_COMBINE(FZ_EXPAND(src[0]), a);
+ dst[0] = FZ_BLEND(r, dst[0], c);
+ dst[1] = FZ_BLEND(g, dst[1], c);
+ dst[2] = FZ_BLEND(b, dst[2], c);
+ dst[3] = FZ_BLEND(255, dst[3], c);
src ++;
dst += 4;
}
@@ -298,6 +396,7 @@ void (*fz_duff_nimon)(byte*,int,int,byte*,int,int,byte*,int,int,int) = duff_nimo
void (*fz_duff_1o1)(byte*,int,byte*,int,int,int) = duff_1o1;
void (*fz_duff_4o4)(byte*,int,byte*,int,int,int) = duff_4o4;
void (*fz_duff_1i1o1)(byte*,int,byte*,int,byte*,int,int,int) = duff_1i1o1;
+void (*fz_duff_2i1o2)(byte*,int,byte*,int,byte*,int,int,int) = duff_2i1o2;
void (*fz_duff_4i1o4)(byte*,int,byte*,int,byte*,int,int,int) = duff_4i1o4;
void (*fz_path_1o1)(byte*,byte,int,byte*) = path_1o1;