diff options
Diffstat (limited to 'draw')
-rw-r--r-- | draw/archarm.c | 63 | ||||
-rw-r--r-- | draw/archport.c | 238 | ||||
-rw-r--r-- | draw/archx86.c | 8 | ||||
-rw-r--r-- | draw/imagedraw.c | 124 | ||||
-rw-r--r-- | draw/imageunpack.c | 39 | ||||
-rw-r--r-- | draw/meshdraw.c | 8 | ||||
-rw-r--r-- | draw/porterduff.c | 229 |
7 files changed, 360 insertions, 349 deletions
diff --git a/draw/archarm.c b/draw/archarm.c index c532a195..2904ce00 100644 --- a/draw/archarm.c +++ b/draw/archarm.c @@ -14,24 +14,25 @@ extern void fz_srow4_arm(byte *src, byte *dst, int w, int denom); extern void fz_scol4_arm(byte *src, byte *dst, int w, int denom); static void -path_w4i1o4_arm(byte * restrict argb, byte * restrict src, byte cov, int len, byte * restrict dst) +path_w4i1o4_arm(byte * restrict rgba, byte * restrict src, byte cov, int len, byte * restrict dst) { /* The ARM code here is a hand coded implementation * of the optimized C version. */ + if (len <= 0) return; asm volatile( - "ldr %0, [%0] @ %0 = argb \n" + "ldr %0, [%0] @ %0 = rgba \n" "mov r11,#0 \n" "mov r8, #0xFF00 \n" - "and r14,%0,#255 @ r14= alpha \n" - "orr %0, %0, #255 @ %0 = argb |= 255 \n" + "mov r14,%0,lsr #24 @ r14= alpha \n" + "orr %0, %0, #0xFF000000 @ %0 = rgba |= 0xFF000000 \n" "orr r8, r8, r8, LSL #16 @ r8 = 0xFF00FF00 \n" "adds r14,r14,r14,LSR #7 @ r14 = alpha += alpha>>7 \n" "beq 9f @ if (alpha == 0) bale \n" - "and r6, %0, r8 @ r6 = rb<<8 \n" - "bic %0, %0, r8 @ %0 = ag \n" - "mov r6, r6, LSR #8 @ r6 = rb \n" + "and r6, %0, r8 @ r6 = ga<<8 \n" + "bic %0, %0, r8 @ %0 = rb \n" + "mov r6, r6, LSR #8 @ r6 = ga \n" "cmp r14,#256 @ if (alpha == 256) \n" "beq 4f @ no-alpha loop \n" "B 2f @ enter the loop \n" @@ -40,37 +41,37 @@ path_w4i1o4_arm(byte * restrict argb, byte * restrict src, byte cov, int len, by "ble 9f \n" "2: \n" "ldrb r12,[%1] @ r12= *src \n" - "ldr r9, [%4], #4 @ r9 = dag = *dst32++ \n" + "ldr r9, [%4], #4 @ r9 = drb = *dst32++ \n" "strb r11,[%1], #1 @ r11= *src++ = 0 \n" "add %2, r12, %2 @ %2 = cov += r12 \n" "ands %2, %2, #255 @ %2 = cov &= 255 \n" "beq 1b @ if coverage == 0 loop back \n" "add r10,%2, %2, LSR #7 @ r10= ca = cov+(cov>>7) \n" "mul r10,r14,r10 @ r10= ca *= alpha \n" - "and r7, r8, r9 @ r7 = drb = dag & MASK \n" + "and r7, r8, r9 @ r7 = dga = drb & MASK \n" "mov r10,r10,LSR #8 @ r10= ca >>= 8 \n" - "and r9, r8, r9, LSL #8 @ r9 = dag = (dag<<8) & MASK \n" - "sub r12,r6, r7, LSR #8 @ r12= crb = rb - (drb>>8) \n" - "sub r5, %0, r9, LSR #8 @ r5 = cag = ag - (dag>>8) \n" - "mla r7, r12,r10,r7 @ r7 = drb += crb * ca \n" + "and r9, r8, r9, LSL #8 @ r9 = drb = (drb<<8) & MASK \n" + "sub r12,r6, r7, LSR #8 @ r12= cga = ga - (dga>>8) \n" + "sub r5, %0, r9, LSR #8 @ r5 = crb = rb - (drb>>8) \n" + "mla r7, r12,r10,r7 @ r7 = dga += cga * ca \n" "subs %3, %3, #1 @ len-- \n" - "mla r9, r5, r10,r9 @ r9 = dag += cag * ca \n" - "and r7, r8, r7 @ r7 = drb &= MASK \n" - "and r9, r8, r9 @ r9 = dag &= MASK \n" - "orr r9, r7, r9, LSR #8 @ r9 = dag = drb | (dag>>8) \n" + "mla r9, r5, r10,r9 @ r9 = drb += crb * ca \n" + "and r7, r8, r7 @ r7 = dga &= MASK \n" + "and r9, r8, r9 @ r9 = drb &= MASK \n" + "orr r9, r7, r9, LSR #8 @ r9 = drb = dga | (drb>>8) \n" "str r9, [%4, #-4] @ dst32[-1] = r9 \n" "bgt 2b \n" "b 9f \n" "@ --- Solid alpha loop --------------------------------------- \n" "3: @ Loop used when coverage == 256 \n" - "orr r9, %0, r6, LSL #8 @ r9 = argb \n" + "orr r9, %0, r6, LSL #8 @ r9 = rgba \n" "str r9, [%4, #-4] @ dst32[-1] = r9 \n" "4: @ Loop used for when coverage*alpha == 0 \n" "subs %3, %3, #1 @ len-- \n" "ble 9f \n" "5: \n" "ldrb r12,[%1] @ r12= *src \n" - "ldr r9, [%4], #4 @ r9 = dag = *dst32++ \n" + "ldr r9, [%4], #4 @ r9 = drb = *dst32++ \n" "strb r11,[%1], #1 @ r11= *src++ = 0 \n" "add %2, r12, %2 @ %2 = cov += r12 \n" "ands %2, %2, #255 @ %2 = cov &= 255 \n" @@ -78,21 +79,21 @@ path_w4i1o4_arm(byte * restrict argb, byte * restrict src, byte cov, int len, by "cmp %2, #255 @ if coverage == solid \n" "beq 3b @ loop back \n" "add r10,%2, %2, LSR #7 @ r10= ca = cov+(cov>>7) \n" - "and r7, r8, r9 @ r7 = drb = dag & MASK \n" - "and r9, r8, r9, LSL #8 @ r9 = dag = (dag<<8) & MASK \n" - "sub r12,r6, r7, LSR #8 @ r12= crb = rb - (drb>>8) \n" - "sub r5, %0, r9, LSR #8 @ r5 = cag = ag - (dag>>8) \n" - "mla r7, r12,r10,r7 @ r7 = drb += crb * ca \n" + "and r7, r8, r9 @ r7 = dga = drb & MASK \n" + "and r9, r8, r9, LSL #8 @ r9 = dga = (drb<<8) & MASK \n" + "sub r12,r6, r7, LSR #8 @ r12= cga = ga - (dga>>8) \n" + "sub r5, %0, r9, LSR #8 @ r5 = crb = rb - (drb>>8) \n" + "mla r7, r12,r10,r7 @ r7 = dga += cga * ca \n" "subs %3, %3, #1 @ len-- \n" - "mla r9, r5, r10,r9 @ r9 = dag += cag * ca \n" - "and r7, r8, r7 @ r7 = drb &= MASK \n" - "and r9, r8, r9 @ r9 = dag &= MASK \n" - "orr r9, r7, r9, LSR #8 @ r9 = dag = drb | (dag>>8) \n" + "mla r9, r5, r10,r9 @ r9 = drb += crb * ca \n" + "and r7, r8, r7 @ r7 = dga &= MASK \n" + "and r9, r8, r9 @ r9 = drb &= MASK \n" + "orr r9, r7, r9, LSR #8 @ r9 = drb = dga | (drb>>8) \n" "str r9, [%4, #-4] @ dst32[-1] = r9 \n" "bgt 5b \n" "9: @ End \n" : - "+r" (argb), + "+r" (rgba), "+r" (src), "+r" (cov), "+r" (len), @@ -129,8 +130,8 @@ static void loadtile8_arm(byte * restrict src, int sw, byte * restrict dst, int "2: \n" "LDRB r4, [%[src]], #1 @ r4 = *src++ \n" "SUBS r5, r5, #1 \n" - "STRB r11,[%[dst]], #1 @ *dst++ = 255 \n" "STRB r4, [%[dst]], #1 @ *dst++ = r4 \n" + "STRB r11,[%[dst]], #1 @ *dst++ = 255 \n" "BGT 2b \n" "ADD %[src],%[src],%[sw] @ src += sw \n" "ADD %[dst],%[dst],%[dw] @ dst += dw \n" @@ -161,10 +162,10 @@ static void loadtile8_arm(byte * restrict src, int sw, byte * restrict dst, int "LDRB r6, [%[src]], #1 @ r6 = *src++ \n" "LDRB r7, [%[src]], #1 @ r7 = *src++ \n" "SUBS r5, r5, #3 \n" - "STRB r11,[r8], #1 @ *dp++ = 255 \n" "STRB r4, [r8], #1 @ *dp++ = r4 \n" "STRB r6, [r8], #1 @ *dp++ = r6 \n" "STRB r7, [r8], #1 @ *dp++ = r7 \n" + "STRB r11,[r8], #1 @ *dp++ = 255 \n" "BGT 2b \n" "ADD %[src],%[src],%[sw] @ src += sw \n" "ADD %[dst],%[dst],%[dw] @ dst += dw \n" diff --git a/draw/archport.c b/draw/archport.c index f4fea5bc..337ad3c1 100644 --- a/draw/archport.c +++ b/draw/archport.c @@ -7,15 +7,15 @@ typedef unsigned char byte; #define MASK 0xFF00FF00; static void -path_w4i1o4_32bit(byte * restrict argb, +path_w4i1o4_32bit(byte * restrict rgba, byte * restrict src, byte cov, int len, byte * restrict dst) { /* COLOR * coverage + DST * (256-coverage) = (COLOR - DST)*coverage + DST*256 */ unsigned int *dst32 = (unsigned int *)(void *)dst; - int alpha = argb[0]; - unsigned int rb = argb[1] | (argb[3] << 16); - unsigned int ag = 255 | (argb[2] << 16); + int alpha = rgba[3]; + unsigned int rb = rgba[0] | (rgba[2] << 16); + unsigned int ga = rgba[1] | 0xFF0000; if (alpha == 0) return; @@ -25,23 +25,23 @@ path_w4i1o4_32bit(byte * restrict argb, alpha += alpha>>7; /* alpha is now in the 0...256 range */ while (len--) { - unsigned int ca, drb, dag, crb, cag; + unsigned int ca, drb, dga, crb, cga; cov += *src; *src++ = 0; ca = cov + (cov>>7); /* ca is in 0...256 range */ ca = (ca*alpha)>>8; /* ca is is in 0...256 range */ - dag = *dst32++; + drb = *dst32++; if (ca != 0) { - drb = dag & MASK; - dag = (dag<<8) & MASK; + dga = drb & MASK; + drb = (drb<<8) & MASK; + cga = ga - (dga>>8); crb = rb - (drb>>8); - cag = ag - (dag>>8); + dga += cga * ca; drb += crb * ca; - dag += cag * ca; + dga &= MASK; drb &= MASK; - dag &= MASK; - dag = drb | (dag>>8); - dst32[-1] = dag; + drb = dga | (drb>>8); + dst32[-1] = drb; } } } @@ -49,133 +49,42 @@ path_w4i1o4_32bit(byte * restrict argb, { while (len--) { - unsigned int ca, drb, dag, crb, cag; + unsigned int ca, drb, dga, crb, cga; cov += *src; *src++ = 0; ca = cov + (cov>>7); /* ca is in 0...256 range */ - dag = *dst32++; + drb = *dst32++; if (ca == 0) continue; if (ca == 255) { - dag = (rb<<8) | ag; + drb = (ga<<8) | rb; } else { - drb = dag & MASK; - dag = (dag<<8) & MASK; + dga = drb & MASK; + drb = (drb<<8) & MASK; + cga = ga - (dga>>8); crb = rb - (drb>>8); - cag = ag - (dag>>8); + dga += cga * ca; drb += crb * ca; - dag += cag * ca; + dga &= MASK; drb &= MASK; - dag &= MASK; - dag = drb | (dag>>8); + drb = dga |(drb>>8); } - dst32[-1] = dag; + dst32[-1] = drb; } } } static void -duff_4o4_32bit(byte * restrict sp, int sw, byte * restrict dp, int dw, int w0, int h) -{ - unsigned int *sp32 = (unsigned int *)(void *)sp; - unsigned int *dp32 = (unsigned int *)(void *)dp; - - /* duff_non(sp0, sw, 4, dp0, dw, w0, h); */ - - sw = (sw>>2)-w0; - dw = (dw>>2)-w0; - while (h--) - { - int w = w0; - while (w--) - { - unsigned int sag = *sp32++; - unsigned int dag = *dp32++; - unsigned int srb, drb; - int alpha = sag & 255; - if (alpha == 0) - continue; - alpha += alpha>>7; - sag |= 0xFF; - drb = dag & MASK; - dag = (dag<<8) & MASK; - srb = (sag>>8) & ~MASK; - sag = sag & ~MASK; - srb -= (drb>>8); - sag -= (dag>>8); - drb += srb * alpha; - dag += sag * alpha; - drb &= MASK; - dag &= MASK; - dag = drb | (dag>>8); - dp32[-1] = dag; - } - sp32 += sw; - dp32 += dw; - } -} - -static void -duff_4i1o4_32bit(byte * restrict sp, int sw, - byte * restrict mp, int mw, - byte * restrict dp, int dw, int w0, int h) -{ - unsigned int *sp32 = (unsigned int *)(void *)sp; - unsigned int *dp32 = (unsigned int *)(void *)dp; - - /* duff_nimon(sp, sw, 4, mp, mw, 1, dp, dw, w0, h); */ - - sw = (sw>>2)-w0; - dw = (dw>>2)-w0; - mw -= w0; - while (h--) - { - int w = w0; - while (w--) - { - unsigned int sag = *sp32++; - unsigned int dag = *dp32++; - unsigned int srb, drb, alpha, ma; - alpha = sag & 255; - ma = *mp++; - if (alpha == 0) - continue; - ma += ma>>7; - if (ma == 0) - continue; - alpha += alpha>>7; - alpha = (alpha*ma)>>8; - sag |= 0xFF; - drb = dag & MASK; - dag = (dag<<8) & MASK; - srb = (sag>>8) & ~MASK; - sag = sag & ~MASK; - srb -= (drb>>8); - sag -= (dag>>8); - drb += srb * alpha; - dag += sag * alpha; - drb &= MASK; - dag &= MASK; - dag = drb | (dag>>8); - dp32[-1] = dag; - } - sp32 += sw; - mp += mw; - dp32 += dw; - } -} - -static void -text_w4i1o4_32bit(byte * restrict argb, +text_w4i1o4_32bit(byte * restrict rgba, byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h) { unsigned int *dst32 = (unsigned int *)(void *)dst; - unsigned int alpha = argb[0]; - unsigned int rb = argb[1] | (argb[3] << 16); - unsigned int ag = 255 | (argb[2] << 16); + unsigned int alpha = rgba[3]; + unsigned int rb = rgba[1] | (rgba[2] << 16); + unsigned int ga = rgba[2] | 0xFF0000; if (alpha == 0) return; @@ -185,29 +94,29 @@ text_w4i1o4_32bit(byte * restrict argb, if (alpha != 255) { - alpha += alpha>>7; + alpha += alpha>>7; /* alpha is now in the 0...256 range */ while (h--) { int w = w0; while (w--) { - unsigned int ca, drb, dag, crb, cag; + unsigned int ca, drb, dga, crb, cga; ca = *src++; - dag = *dst32++; + dga = *dst32++; ca += ca>>7; ca = (ca*alpha)>>8; if (ca == 0) continue; - drb = dag & MASK; - dag = (dag<<8) & MASK; + dga = drb & MASK; + drb = (drb<<8) & MASK; + cga = ga - (dga>>8); crb = rb - (drb>>8); - cag = ag - (dag>>8); + dga += cga * ca; drb += crb * ca; - dag += cag * ca; + dga &= MASK; drb &= MASK; - dag &= MASK; - dag = drb | (dag>>8); - dst32[-1] = dag; + drb = dga | (drb>>8); + dst32[-1] = drb; } src += srcw; dst32 += dstw; @@ -215,28 +124,27 @@ text_w4i1o4_32bit(byte * restrict argb, } else { - alpha += alpha>>7; while (h--) { int w = w0; while (w--) { - unsigned int ca, drb, dag, crb, cag; + unsigned int ca, drb, dga, crb, cga; ca = *src++; - dag = *dst32++; + drb = *dst32++; ca += ca>>7; if (ca == 0) continue; - drb = dag & MASK; - dag = (dag<<8) & MASK; + dga = drb & MASK; + drb = (drb<<8) & MASK; + cga = ga - (dga>>8); crb = rb - (drb>>8); - cag = ag - (dag>>8); + dga += cga * ca; drb += crb * ca; - dag += cag * ca; + dga &= MASK; drb &= MASK; - dag &= MASK; - dag = drb | (dag>>8); - dst32[-1] = dag; + drb = dga | (drb>>8); + dst32[-1] = drb; } src += srcw; dst32 += dstw; @@ -313,9 +221,9 @@ img_4o4_32bit(byte * restrict src, byte cov, int len, byte * restrict dst, a = (((c >>8)-(a >>8)) * vd + a ) & MASK; a1 = (((c1>>8)-(a1>>8)) * vd + a1) & MASK; } - sa = (a>>8) & 0xFF; + sa = (a1>>24); sa = FZ_COMBINE(FZ_EXPAND(sa), FZ_EXPAND(cov)); - a |= 0xFF00; + a1 |= 0xFF000000; d = *dst32++; d1 = d & MASK; d = (d<<8) & MASK; @@ -328,15 +236,15 @@ img_4o4_32bit(byte * restrict src, byte cov, int len, byte * restrict dst, } static void -img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len, +img_w4i1o4_32bit(byte *rgba, byte * restrict src, byte cov, int len, byte * restrict dst, fz_pixmap *image, int u, int v, int fa, int fb) { byte *samples = image->samples; int w = image->w; int h = image->h-1; - int alpha = FZ_EXPAND(argb[0]); - unsigned int rb = argb[1] | (argb[3] << 16); - unsigned int ag = 255 | (argb[2] << 16); + int alpha = FZ_EXPAND(rgba[3]); + unsigned int rb = rgba[0] | (rgba[2] << 16); + unsigned int ga = rgba[1] | 0xFF0000; unsigned int *dst32 = (unsigned int *)(void *)dst; if (alpha == 0) @@ -345,10 +253,10 @@ img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len, { while (len--) { - unsigned int ca, drb, dag, crb, cag; + unsigned int ca, drb, dga, crb, cga; unsigned int a, b; cov += *src; *src = 0; src++; - dag = *dst32++; + drb = *dst32++; ca = FZ_COMBINE(FZ_EXPAND(cov), alpha); if (ca != 0) { @@ -396,16 +304,16 @@ img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len, } if (ca != 0) { - drb = dag & MASK; - dag = (dag<<8) & MASK; + dga = drb & MASK; + drb = (drb<<8) & MASK; + cga = ga - (dga>>8); crb = rb - (drb>>8); - cag = ag - (dag>>8); + dga += cga * ca; drb += crb * ca; - dag += cag * ca; + dga &= MASK; drb &= MASK; - dag &= MASK; - dag = drb | (dag>>8); - dst32[-1] = dag; + drb = dga | (drb>>8); + dst32[-1] = drb; } u += fa; v += fb; @@ -415,10 +323,10 @@ img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len, { while (len--) { - unsigned int ca, drb, dag, crb, cag; + unsigned int ca, drb, dga, crb, cga; unsigned int a, b; cov += *src; *src = 0; src++; - dag = *dst32++; + drb = *dst32++; if (cov != 0) { int ui, ui1, vi, vi1, ud, vd; @@ -466,21 +374,21 @@ img_w4i1o4_32bit(byte *argb, byte * restrict src, byte cov, int len, { if (ca == 256) { - dag = (rb<<8) | ag; + drb = (ga<<8) | rb; } else { - drb = dag & MASK; - dag = (dag<<8) & MASK; + dga = drb & MASK; + drb = (drb<<8) & MASK; + cga = ga - (dga>>8); crb = rb - (drb>>8); - cag = ag - (dag>>8); + dga += cga * ca; drb += crb * ca; - dag += cag * ca; + dga &= MASK; drb &= MASK; - dag &= MASK; - dag = drb | (dag>>8); + drb = dga | (drb>>8); } - dst32[-1] = dag; + dst32[-1] = drb; } } u += fa; @@ -560,10 +468,8 @@ img_1o1_32bit(byte * restrict src, byte cov, int len, byte * restrict dst, void fz_accelerate(void) { - if (sizeof(int) == 4 && sizeof(unsigned int) == 4) + if (sizeof(int) == 4 && sizeof(unsigned int) == 4 && !fz_isbigendian()) { - fz_duff_4o4 = duff_4o4_32bit; - fz_duff_4i1o4 = duff_4i1o4_32bit; fz_path_w4i1o4 = path_w4i1o4_32bit; fz_text_w4i1o4 = text_w4i1o4_32bit; fz_img_4o4 = img_4o4_32bit; diff --git a/draw/archx86.c b/draw/archx86.c index 0c313f33..5418e9f7 100644 --- a/draw/archx86.c +++ b/draw/archx86.c @@ -40,7 +40,7 @@ static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int d { int ts = *s++; int ma = *mp++ + 1; - int sa = ((ts & 0xff) * ma) >> 8; + int sa = (((ts>>24) & 0xff) * ma) >> 8; int ssa = 255 - sa; __m64 d0 = _mm_cvtsi32_si64(*d); @@ -50,11 +50,11 @@ static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int d __m64 mma = _mm_set1_pi16(ma); __m64 mssa = _mm_set1_pi16(ssa); - /* unpack 0000argb => a0r0g0b0 */ + /* unpack 0000rgba => r0g0b0a0 */ __m64 d1 = _mm_unpacklo_pi8(d0, mzero); __m64 s1 = _mm_unpacklo_pi8(s0, mzero); - /* s1 * ma => a0r0g0b0 */ + /* s1 * ma => r0g0b0a0 */ __m64 msma = _mm_mullo_pi16(s1, mma); /* d1 * mssa */ __m64 mdssa = _mm_mullo_pi16(d1, mssa); @@ -79,6 +79,8 @@ static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int d #if 0 /* TODO */ +/* Needs to be rgba, not bgra, as well as needing finishing */ + static inline unsigned getargb(unsigned *s, int w, int h, int u, int v) { diff --git a/draw/imagedraw.c b/draw/imagedraw.c index 81d2bb05..dd887f53 100644 --- a/draw/imagedraw.c +++ b/draw/imagedraw.c @@ -13,7 +13,7 @@ getmask(byte *s, int w, int h, int u, int v) } static inline byte * -getargb(byte *s, int w, int h, int u, int v) +getrgba(byte *s, int w, int h, int u, int v) { if (u < 0) u = 0; if (v < 0) v = 0; @@ -23,7 +23,7 @@ getargb(byte *s, int w, int h, int u, int v) } static inline byte * -getag(byte *s, int w, int h, int u, int v) +getga(byte *s, int w, int h, int u, int v) { if (u < 0) u = 0; if (v < 0) v = 0; @@ -49,14 +49,14 @@ lerp(int a, int b, int t) } static inline void -lerpag(byte *dst, byte *a, byte *b, int t) +lerpga(byte *dst, byte *a, byte *b, int t) { dst[0] = lerp(a[0], b[0], t); dst[1] = lerp(a[1], b[1], t); } static inline void -lerpargb(byte *dst, byte *a, byte *b, int t) +lerprgba(byte *dst, byte *a, byte *b, int t) { dst[0] = lerp(a[0], b[0], t); dst[1] = lerp(a[1], b[1], t); @@ -81,7 +81,7 @@ samplemask(byte *s, int w, int h, int u, int v) } static inline void -sampleag(byte *s, int w, int h, int u, int v, byte *out) +samplega(byte *s, int w, int h, int u, int v, byte *out) { byte ab[4]; byte cd[4]; @@ -89,17 +89,17 @@ sampleag(byte *s, int w, int h, int u, int v, byte *out) int vi = v >> 16; int ud = u & 0xFFFF; int vd = v & 0xFFFF; - byte *a = getag(s, w, h, ui, vi); - byte *b = getag(s, w, h, ui+1, vi); - byte *c = getag(s, w, h, ui, vi+1); - byte *d = getag(s, w, h, ui+1, vi+1); - lerpag(ab, a, b, ud); - lerpag(cd, c, d, ud); - lerpag(out, ab, cd, vd); + byte *a = getga(s, w, h, ui, vi); + byte *b = getga(s, w, h, ui+1, vi); + byte *c = getga(s, w, h, ui, vi+1); + byte *d = getga(s, w, h, ui+1, vi+1); + lerpga(ab, a, b, ud); + lerpga(cd, c, d, ud); + lerpga(out, ab, cd, vd); } static inline void -sampleargb(byte *s, int w, int h, int u, int v, byte *out) +samplergba(byte *s, int w, int h, int u, int v, byte *out) { byte ab[4]; byte cd[4]; @@ -107,13 +107,13 @@ sampleargb(byte *s, int w, int h, int u, int v, byte *out) int vi = v >> 16; int ud = u & 0xFFFF; int vd = v & 0xFFFF; - byte *a = getargb(s, w, h, ui, vi); - byte *b = getargb(s, w, h, ui+1, vi); - byte *c = getargb(s, w, h, ui, vi+1); - byte *d = getargb(s, w, h, ui+1, vi+1); - lerpargb(ab, a, b, ud); - lerpargb(cd, c, d, ud); - lerpargb(out, ab, cd, vd); + byte *a = getrgba(s, w, h, ui, vi); + byte *b = getrgba(s, w, h, ui+1, vi); + byte *c = getrgba(s, w, h, ui, vi+1); + byte *d = getrgba(s, w, h, ui+1, vi+1); + lerprgba(ab, a, b, ud); + lerprgba(cd, c, d, ud); + lerprgba(out, ab, cd, vd); } static inline void @@ -170,7 +170,7 @@ img_2o2(byte * restrict src, byte cov, int len, byte * restrict dst, byte *samples = image->samples; int w = image->w; int h = image->h; - byte ag[2]; + byte ga[2]; while (len--) { @@ -178,12 +178,12 @@ img_2o2(byte * restrict src, byte cov, int len, byte * restrict dst, cov += *src; *src = 0; src++; if (cov != 0) { - sampleag(samples, w, h, u, v, ag); - sa = FZ_COMBINE(FZ_EXPAND(ag[0]), FZ_EXPAND(cov)); + samplega(samples, w, h, u, v, ga); + sa = FZ_COMBINE(FZ_EXPAND(ga[1]), FZ_EXPAND(cov)); if (sa != 0) { - dst[0] = FZ_BLEND(255, dst[0], sa); - dst[1] = FZ_BLEND(ag[1], dst[1], sa); + dst[0] = FZ_BLEND(ga[0], dst[0], sa); + dst[1] = FZ_BLEND(255, dst[1], sa); } } dst += 2; @@ -199,7 +199,7 @@ img_4o4(byte * restrict src, byte cov, int len, byte * restrict dst, byte *samples = image->samples; int w = image->w; int h = image->h; - byte argb[4]; + byte rgba[4]; while (len--) { @@ -207,14 +207,14 @@ img_4o4(byte * restrict src, byte cov, int len, byte * restrict dst, cov += *src; *src = 0; src++; if (cov != 0) { - sampleargb(samples, w, h, u, v, argb); - sa = FZ_COMBINE(FZ_EXPAND(argb[0]), FZ_EXPAND(cov)); + samplergba(samples, w, h, u, v, rgba); + sa = FZ_COMBINE(FZ_EXPAND(rgba[3]), FZ_EXPAND(cov)); if (sa != 0) { - dst[0] = FZ_BLEND(255, dst[0], sa); - dst[1] = FZ_BLEND(argb[1], dst[1], sa); - dst[2] = FZ_BLEND(argb[2], dst[2], sa); - dst[3] = FZ_BLEND(argb[3], dst[3], sa); + dst[0] = FZ_BLEND(rgba[0], dst[0], sa); + dst[1] = FZ_BLEND(rgba[1], dst[1], sa); + dst[2] = FZ_BLEND(rgba[2], dst[2], sa); + dst[3] = FZ_BLEND(255, dst[3], sa); } } dst += 4; @@ -224,18 +224,18 @@ img_4o4(byte * restrict src, byte cov, int len, byte * restrict dst, } static void -img_w2i1o2(byte *ag, byte * restrict src, byte cov, int len, byte * restrict dst, +img_w2i1o2(byte *ga, byte * restrict src, byte cov, int len, byte * restrict dst, fz_pixmap *image, int u, int v, int fa, int fb) { byte *samples = image->samples; int w = image->w; int h = image->h; - int alpha = FZ_EXPAND(ag[0]); - byte g = ag[1]; + byte g = ga[0]; + byte a = ga[1]; - if (alpha == 0) + if (a == 0) return; - if (alpha != 256) + if (a != 255) { while (len--) { @@ -244,12 +244,12 @@ img_w2i1o2(byte *ag, byte * restrict src, byte cov, int len, byte * restrict dst if (cov != 0) { ca = samplemask(samples, w, h, u, v); - ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca)); - ca = FZ_COMBINE(ca, alpha); + ca = FZ_COMBINE(FZ_EXPAND(cov), FZ_EXPAND(ca)); + ca = FZ_COMBINE(ca, FZ_EXPAND(a)); if (ca != 0) { - dst[0] = FZ_BLEND(255, dst[0], ca); - dst[1] = FZ_BLEND(g, dst[1], ca); + dst[0] = FZ_BLEND(g, dst[0], ca); + dst[1] = FZ_BLEND(255, dst[1], ca); } } dst += 2; @@ -266,11 +266,11 @@ img_w2i1o2(byte *ag, byte * restrict src, byte cov, int len, byte * restrict dst if (cov != 0) { ca = samplemask(samples, w, h, u, v); - ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca)); + ca = FZ_COMBINE(FZ_EXPAND(cov), FZ_EXPAND(ca)); if (ca != 0) { - dst[0] = FZ_BLEND(255, dst[0], ca); - dst[1] = FZ_BLEND(g, dst[1], ca); + dst[0] = FZ_BLEND(g, dst[0], ca); + dst[1] = FZ_BLEND(255, dst[1], ca); } } dst += 2; @@ -281,20 +281,20 @@ img_w2i1o2(byte *ag, byte * restrict src, byte cov, int len, byte * restrict dst } static void -img_w4i1o4(byte *argb, byte * restrict src, byte cov, int len, byte * restrict dst, +img_w4i1o4(byte *rgba, byte * restrict src, byte cov, int len, byte * restrict dst, fz_pixmap *image, int u, int v, int fa, int fb) { byte *samples = image->samples; int w = image->w; int h = image->h; - int alpha = FZ_EXPAND(argb[0]); - byte r = argb[1]; - byte g = argb[2]; - byte b = argb[3]; + byte r = rgba[0]; + byte g = rgba[1]; + byte b = rgba[2]; + byte a = rgba[3]; - if (alpha == 0) + if (a == 0) return; - if (alpha != 256) + if (a != 255) { while (len--) { @@ -303,14 +303,14 @@ img_w4i1o4(byte *argb, byte * restrict src, byte cov, int len, byte * restrict d if (cov != 0) { ca = samplemask(samples, w, h, u, v); - ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca)); - ca = FZ_COMBINE(ca, alpha); + ca = FZ_COMBINE(FZ_EXPAND(cov), FZ_EXPAND(ca)); + ca = FZ_COMBINE(ca, FZ_EXPAND(a)); if (ca != 0) { - dst[0] = FZ_BLEND(255, dst[0], ca); - dst[1] = FZ_BLEND(r, dst[1], ca); - dst[2] = FZ_BLEND(g, dst[2], ca); - dst[3] = FZ_BLEND(b, dst[3], ca); + dst[0] = FZ_BLEND(r, dst[0], ca); + dst[1] = FZ_BLEND(g, dst[1], ca); + dst[2] = FZ_BLEND(b, dst[2], ca); + dst[3] = FZ_BLEND(255, dst[3], ca); } } dst += 4; @@ -327,13 +327,13 @@ img_w4i1o4(byte *argb, byte * restrict src, byte cov, int len, byte * restrict d if (cov != 0) { ca = samplemask(samples, w, h, u, v); - ca =FZ_COMBINE(FZ_EXPAND(cov),FZ_EXPAND(ca)); + ca = FZ_COMBINE(FZ_EXPAND(cov), FZ_EXPAND(ca)); if (ca != 0) { - dst[0] = FZ_BLEND(255, dst[0], ca); - dst[1] = FZ_BLEND(r, dst[1], ca); - dst[2] = FZ_BLEND(g, dst[2], ca); - dst[3] = FZ_BLEND(b, dst[3], ca); + dst[0] = FZ_BLEND(r, dst[0], ca); + dst[1] = FZ_BLEND(g, dst[1], ca); + dst[2] = FZ_BLEND(b, dst[2], ca); + dst[3] = FZ_BLEND(255, dst[3], ca); } } dst += 4; diff --git a/draw/imageunpack.c b/draw/imageunpack.c index 6a1a1628..48c420bf 100644 --- a/draw/imageunpack.c +++ b/draw/imageunpack.c @@ -19,19 +19,22 @@ static void decodetile(fz_pixmap *pix, int skip, float *decode) int justinvert = 1; unsigned int mask; - min[0] = 0; - max[0] = 255; - sub[0] = 255; - - for (i = skip; i < n; i++) + for (i = 0; i < n-skip; i++) { - min[i] = decode[(i - skip) * 2] * 255; - max[i] = decode[(i - skip) * 2 + 1] * 255; + min[i] = decode[i * 2] * 255; + max[i] = decode[i * 2 + 1] * 255; sub[i] = max[i] - min[i]; needed |= (min[i] != 0) | (max[i] != 255); justinvert &= min[i] == 255 && max[i] == 0 && sub[i] == -255; } + if (skip) + { + min[i] = 0; + max[i] = 255; + sub[i] = 255; + } + if (fz_isbigendian()) mask = 0x00ff00ff; else @@ -115,8 +118,8 @@ static void init1(void) { x = tbit(bits, k); t1pad0[i][k] = x; - t1pad1[i][k * 2 + 0] = 255; - t1pad1[i][k * 2 + 1] = x; + t1pad1[i][k * 2 + 0] = x; + t1pad1[i][k * 2 + 1] = 255; } } @@ -178,7 +181,7 @@ static void loadtile1(byte * restrict src, int sw, byte * restrict dst, int dw, dp = dst; for (x = 0; x < w; x++) { - if ((x % pad) == 0) + if ((x % pad) == pad-1) *dp++ = 255; *dp++ = tbit(src, x); } @@ -204,14 +207,14 @@ static void loadtile1(byte * restrict src, int sw, byte * restrict dst, int dw, while (h--) \ { \ byte *dp = dst; \ - tpad = 0; \ + tpad = pad; \ for (x = 0; x < w; x++) \ { \ - if (!tpad--) { \ - tpad = pad-1; \ + *dp++ = getf(src, x); \ + if (--tpad == 0) { \ + tpad = pad; \ *dp++ = 255; \ } \ - *dp++ = getf(src, x); \ } \ src += sw; \ dst += dw; \ @@ -247,8 +250,8 @@ static void loadtile8(byte * restrict src, int sw, byte * restrict dst, int dw, int x; for (x = w; x > 0; x --) { - *dst++ = 255; *dst++ = *src++; + *dst++ = 255; } src += sw; dst += dw; @@ -263,10 +266,10 @@ static void loadtile8(byte * restrict src, int sw, byte * restrict dst, int dw, int x; for (x = w; x > 0; x -= 3) { - *dp++ = 255; *dp++ = *src++; *dp++ = *src++; *dp++ = *src++; + *dp++ = 255; } src += sw; dst += dw; @@ -278,16 +281,16 @@ static void loadtile8(byte * restrict src, int sw, byte * restrict dst, int dw, while (h--) { byte *dp = dst; - int tpad = 1; + int tpad = pad; int x; for (x = w; x > 0; x--) { + *dp++ = *src++; tpad--; if (tpad == 0) { tpad = pad; *dp++ = 255; } - *dp++ = *src++; } src += sw; dst += dw; diff --git a/draw/meshdraw.c b/draw/meshdraw.c index c0e219c4..72a723e0 100644 --- a/draw/meshdraw.c +++ b/draw/meshdraw.c @@ -147,12 +147,12 @@ drawscan(fz_pixmap *pix, int y, int x1, int x2, int *v1, int *v2, int n) while (w--) { - *p++ = 255; for (k = 0; k < n; k++) { *p++ = v[k] >> 16; v[k] += dv[k]; } + *p++ = 255; } } @@ -376,11 +376,11 @@ fz_rendershade(fz_shade *shade, fz_matrix ctm, fz_pixmap *dest, fz_bbox bbox) d = dest->samples + ((bbox.x0 - dest->x) + (y - dest->y) * dest->w) * dest->n; for (x = bbox.x0; x < bbox.x1; x++) { - sa = s[0]; + sa = s[1]; ssa = 255 - sa; - d[0] = s[0] + fz_mul255(d[0], ssa); for (k = 0; k < dest->colorspace->n; k++) - d[k+1] = fz_mul255(clut[s[1]][k], sa) + fz_mul255(d[k+1], ssa); + d[k] = fz_mul255(clut[s[0]][k], sa) + fz_mul255(d[k+1], ssa); + d[k] = s[1] + fz_mul255(d[k], ssa); s += 2; d += 1 + dest->colorspace->n; } diff --git a/draw/porterduff.c b/draw/porterduff.c index fd7a9d45..05f00f63 100644 --- a/draw/porterduff.c +++ b/draw/porterduff.c @@ -1,5 +1,74 @@ #include "fitz.h" +/* + * The functions in this file implement various flavours of Porter-Duff + * blending. + * + * We take the following as definitions: + * + * Cx = Color (from plane x) + * ax = Alpha (from plane x) + * cx = Cx.ax = Premultiplied color (from plane x) + * + * The general PorterDuff blending equation is: + * + * Blend Z = X op Y cz = Fx.cx + Fy. cy where Fx and Fy depend on op + * + * The two operations we use in this file are: '(X in Y) over Z' and + * 'S over Z'. The definitions of the 'over' and 'in' operations are as + * follows: + * + * For S over Z, Fs = 1, Fz = 1-as + * For X in Y, Fx = ay, Fy = 0 + * + * We have 2 choices; we can either work with premultiplied data, or non + * premultiplied data. Our + * + * First the premultiplied case: + * + * Let S = (X in Y) + * Let R = (X in Y) over Z = S over Z + * + * cs = cx.Fx + cy.Fy (where Fx = ay, Fy = 0) + * = cx.ay + * as = ax.Fx + ay.Fy + * = ax.ay + * + * cr = cs.Fs + cz.Fz (where Fs = 1, Fz = 1-as) + * = cs + cz.(1-as) + * = cx.ay + cz.(1-ax.ay) + * ar = as.Fs + az.Fz + * = as + az.(1-as) + * = ax.ay + az.(1-ax.ay) + * + * This has various nice properties, like not needing any divisions, and + * being symmetric in color and alpha, so this is what we use. Because we + * went through the pain of deriving the non premultiplied forms, we list + * them here too, though they are not used. + * + * Non Pre-multiplied case: + * + * Cs.as = Fx.Cx.ax + Fy.Cy.ay (where Fx = ay, Fy = 0) + * = Cx.ay.ax + * Cs = (Cx.ay.ax)/(ay.ax) + * = Cx + * Cr.ar = Fs.Cs.as + Fz.Cz.az (where Fs = 1, Fz = 1-as) + * = Cs.as + (1-as).Cz.az + * = Cx.ax.ay + Cz.az.(1-ax.ay) + * Cr = (Cx.ax.ay + Cz.az.(1-ax.ay))/(ax.ay + az.(1-ax-ay)) + * + * Much more complex, it seems. However, if we could restrict ourselves to + * the case where we were always plotting onto an opaque background (i.e. + * az = 1), then: + * + * Cr = Cx.(ax.ay) + Cz.(1-ax.ay) + * = (Cx-Cz)*(1-ax.ay) + Cz (a single MLA operation) + * ar = 1 + * + * Sadly, this is not true in the general case, so we abandon this effort + * and stick to using the premultiplied form. + */ + typedef unsigned char byte; /* @@ -11,6 +80,7 @@ static void duff_non(byte * restrict sp, int sw, int sn, byte * restrict dp, int dw, int w0, int h) { int k; + sw -= w0*sn; dw -= w0*sn; while (h--) @@ -18,12 +88,10 @@ duff_non(byte * restrict sp, int sw, int sn, byte * restrict dp, int dw, int w0, int w = w0; while (w--) { - /* RJW: Alpha handling suspicious here; sp[0] counts twice */ - int sa = FZ_EXPAND(sp[0]); - dp[0] = FZ_BLEND(255, dp[0], sa); - for (k = 1; k < sn; k++) + int ssa = 255 - sp[sn-1]; + for (k = 0; k < sn; k++) { - dp[k] = FZ_BLEND(sp[k], dp[k], sa); + dp[k] = sp[k] + fz_mul255(dp[k], ssa); } sp += sn; dp += sn; @@ -38,6 +106,7 @@ static void duff_nimon(byte * restrict sp, int sw, int sn, byte * restrict mp, int mw, int mn, byte * restrict dp, int dw, int w0, int h) { int k; + sw -= w0*sn; mw -= w0*mn; dw -= w0*sn; @@ -46,12 +115,11 @@ duff_nimon(byte * restrict sp, int sw, int sn, byte * restrict mp, int mw, int m int w = w0; while (w--) { - /* TODO: validate this */ - int ma = FZ_COMBINE(FZ_EXPAND(mp[0]), FZ_EXPAND(sp[0])); - dp[0] = FZ_BLEND(255, dp[0], ma); - for (k = 1; k < sn; k++) + int ma = mp[0]; + int ssa = 255-fz_mul255(sp[sn-1], ma); + for (k = 0; k < sn; k++) { - dp[k] = FZ_BLEND(sp[k], dp[k], ma); + dp[k] = fz_mul255(sp[k], ma) + fz_mul255(dp[k], ssa); } sp += sn; mp += mn; @@ -64,22 +132,22 @@ duff_nimon(byte * restrict sp, int sw, int sn, byte * restrict mp, int mw, int m } static void -duff_1o1(byte * restrict sp, int sw, byte * restrict dp, int dw, int w0, int h) +duff_1o1(byte * restrict sp0, int sw, byte * restrict dp0, int dw, int w0, int h) { /* duff_non(sp0, sw, 1, dp0, dw, w0, h); */ - sw -= w0; - dw -= w0; while (h--) { + byte *sp = sp0; + byte *dp = dp0; int w = w0; while (w--) { - dp[0] = FZ_BLEND(255, dp[0], FZ_EXPAND(sp[0])); + dp[0] = sp[0] + fz_mul255(dp[0], 255 - sp[0]); sp ++; dp ++; } - sp += sw; - dp += dw; + sp0 += sw; + dp0 += dw; } } @@ -87,6 +155,7 @@ static void duff_4o4(byte *sp, int sw, byte *dp, int dw, int w0, int h) { /* duff_non(sp0, sw, 4, dp0, dw, w0, h); */ + sw -= w0<<2; dw -= w0<<2; while (h--) @@ -94,11 +163,11 @@ duff_4o4(byte *sp, int sw, byte *dp, int dw, int w0, int h) int w = w0; while (w--) { - int alpha = FZ_EXPAND(sp[0]); - dp[0] = FZ_BLEND(255, dp[0], alpha); - dp[1] = FZ_BLEND(sp[1], dp[1], alpha); - dp[2] = FZ_BLEND(sp[2], dp[2], alpha); - dp[3] = FZ_BLEND(sp[3], dp[3], alpha); + byte ssa = 255 - sp[3]; + dp[0] = sp[0] + fz_mul255(dp[0], ssa); + dp[1] = sp[1] + fz_mul255(dp[1], ssa); + dp[2] = sp[2] + fz_mul255(dp[2], ssa); + dp[3] = sp[3] + fz_mul255(dp[3], ssa); sp += 4; dp += 4; } @@ -111,16 +180,16 @@ static void duff_1i1o1(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restrict dp, int dw, int w0, int h) { /* duff_nimon(sp0, sw, 1, mp0, mw, 1, dp0, dw, w0, h); */ - sw -= w0; - mw -= w0; - dw -= w0; + while (h--) { int w = w0; while (w--) { - int ma = FZ_COMBINE(FZ_EXPAND(mp[0]), FZ_EXPAND(sp[0])); - dp[0] = FZ_BLEND(255, dp[0], ma); + byte ma = mp[0]; + byte sa = fz_mul255(sp[0], ma); + byte ssa = 255 - sa; + dp[0] = sa + fz_mul255(dp[0], ssa); sp ++; mp ++; dp ++; @@ -132,9 +201,37 @@ duff_1i1o1(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restri } static void +duff_2i1o2(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restrict dp, int dw, int w0, int h) +{ + + /* duff_nimon(sp, sw, 2, mp, mw, 1, dp, dw, w0, h); */ + sw -= w0<<1; + dw -= w0<<1; + mw -= w0; + while (h--) + { + int w = w0; + while (w--) + { + byte ma = mp[0]; + byte ssa = 255 - fz_mul255(sp[1], ma); + dp[0] = fz_mul255(sp[0], ma) + fz_mul255(dp[0], ssa); + dp[1] = fz_mul255(sp[1], ma) + fz_mul255(dp[1], ssa); + sp += 2; + mp += 1; + dp += 2; + } + sp += sw; + mp += mw; + dp += dw; + } +} + +static void duff_4i1o4(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restrict dp, int dw, int w0, int h) { /* duff_nimon(sp, sw, 4, mp, mw, 1, dp, dw, w0, h); */ + sw -= w0<<2; dw -= w0<<2; mw -= w0; @@ -143,11 +240,12 @@ duff_4i1o4(byte * restrict sp, int sw, byte * restrict mp, int mw, byte * restri int w = w0; while (w--) { - int ma = FZ_COMBINE(FZ_EXPAND(mp[0]), FZ_EXPAND(sp[0])); - dp[0] = FZ_BLEND(255, dp[0], ma); - dp[1] = FZ_BLEND(sp[1], dp[1], ma); - dp[2] = FZ_BLEND(sp[2], dp[2], ma); - dp[3] = FZ_BLEND(sp[3], dp[3], ma); + byte ma = mp[0]; + byte ssa = 255 - fz_mul255(sp[3], ma); + dp[0] = fz_mul255(sp[0], ma) + fz_mul255(dp[0], ssa); + dp[1] = fz_mul255(sp[1], ma) + fz_mul255(dp[1], ssa); + dp[2] = fz_mul255(sp[2], ma) + fz_mul255(dp[2], ssa); + dp[3] = fz_mul255(sp[3], ma) + fz_mul255(dp[3], ssa); sp += 4; mp += 1; dp += 4; @@ -176,39 +274,39 @@ path_1o1(byte * restrict src, byte cov, int len, byte * restrict dst) } static void -path_w2i1o2(byte * restrict ag, byte * restrict src, byte cov, int len, byte * restrict dst) +path_w2i1o2(byte * restrict ga, byte * restrict src, byte cov, int len, byte * restrict dst) { - int alpha = FZ_EXPAND(ag[0]); - byte g = ag[1]; + byte g = ga[0]; + int a = FZ_EXPAND(ga[1]); while (len--) { int ca; cov += *src; *src = 0; src++; - ca = FZ_COMBINE(FZ_EXPAND(cov), alpha); - dst[0] = FZ_BLEND(255, dst[0], ca); - dst[1] = FZ_BLEND(g, dst[1], ca); + ca = FZ_COMBINE(FZ_EXPAND(cov), a); + dst[0] = FZ_BLEND(g, dst[0], ca); + dst[1] = FZ_BLEND(255, dst[1], ca); dst += 2; } } static void -path_w4i1o4(byte * restrict argb, byte * restrict src, byte cov, int len, byte * restrict dst) +path_w4i1o4(byte * restrict rgba, byte * restrict src, byte cov, int len, byte * restrict dst) { - int alpha = FZ_EXPAND(argb[0]); - byte r = argb[1]; - byte g = argb[2]; - byte b = argb[3]; + byte r = rgba[0]; + byte g = rgba[1]; + byte b = rgba[2]; + int a = FZ_EXPAND(rgba[3]); while (len--) { int ca; cov += *src; *src = 0; src++; - ca = FZ_COMBINE(FZ_EXPAND(cov), alpha); - dst[0] = FZ_BLEND(255, dst[0], ca); - dst[1] = FZ_BLEND(r, dst[1], ca); - dst[2] = FZ_BLEND(g, dst[2], ca); - dst[3] = FZ_BLEND(b, dst[3], ca); + ca = FZ_COMBINE(FZ_EXPAND(cov), a); + dst[0] = FZ_BLEND(r, dst[0], ca); + dst[1] = FZ_BLEND(g, dst[1], ca); + dst[2] = FZ_BLEND(b, dst[2], ca); + dst[3] = FZ_BLEND(255, dst[3], ca); dst += 4; } } @@ -220,6 +318,7 @@ path_w4i1o4(byte * restrict argb, byte * restrict src, byte cov, int len, byte * static void text_1o1(byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h) { + srcw -= w0; dstw -= w0; while (h--) @@ -227,8 +326,7 @@ text_1o1(byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, i int w = w0; while (w--) { - int c = FZ_EXPAND(src[0]); - dst[0] = FZ_BLEND(255, dst[0], c); + dst[0] = src[0] + fz_mul255(dst[0], 255 - src[0]); src++; dst++; } @@ -238,10 +336,10 @@ text_1o1(byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, i } static void -text_w2i1o2(byte * restrict ag, byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h) +text_w2i1o2(byte * restrict ga, byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h) { - int alpha = FZ_EXPAND(ag[0]); - byte g = ag[1]; + byte g = ga[0]; + int a = FZ_EXPAND(ga[1]); srcw -= w0; dstw -= w0<<1; @@ -250,9 +348,9 @@ text_w2i1o2(byte * restrict ag, byte * restrict src, int srcw, byte * restrict d int w = w0; while (w--) { - int c = FZ_COMBINE(FZ_EXPAND(src[0]), alpha); - dst[0] = FZ_BLEND(255, dst[0], c); - dst[1] = FZ_BLEND(g, dst[1], c); + int c = FZ_COMBINE(FZ_EXPAND(src[0]), a); + dst[0] = FZ_BLEND(g, dst[0], c); + dst[1] = FZ_BLEND(255, dst[1], c); src ++; dst += 2; } @@ -262,12 +360,12 @@ text_w2i1o2(byte * restrict ag, byte * restrict src, int srcw, byte * restrict d } static void -text_w4i1o4(byte * restrict argb, byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h) +text_w4i1o4(byte * restrict rgba, byte * restrict src, int srcw, byte * restrict dst, int dstw, int w0, int h) { - int alpha = FZ_EXPAND(argb[0]); - byte r = argb[1]; - byte g = argb[2]; - byte b = argb[3]; + byte r = rgba[0]; + byte g = rgba[1]; + byte b = rgba[2]; + int a = FZ_EXPAND(rgba[3]); srcw -= w0; dstw -= w0<<2; @@ -276,11 +374,11 @@ text_w4i1o4(byte * restrict argb, byte * restrict src, int srcw, byte * restrict int w = w0; while (w--) { - int c = FZ_COMBINE(FZ_EXPAND(src[0]), alpha); - dst[0] = FZ_BLEND(255, dst[0], c); - dst[1] = FZ_BLEND(r, dst[1], c); - dst[2] = FZ_BLEND(g, dst[2], c); - dst[3] = FZ_BLEND(b, dst[3], c); + int c = FZ_COMBINE(FZ_EXPAND(src[0]), a); + dst[0] = FZ_BLEND(r, dst[0], c); + dst[1] = FZ_BLEND(g, dst[1], c); + dst[2] = FZ_BLEND(b, dst[2], c); + dst[3] = FZ_BLEND(255, dst[3], c); src ++; dst += 4; } @@ -298,6 +396,7 @@ void (*fz_duff_nimon)(byte*,int,int,byte*,int,int,byte*,int,int,int) = duff_nimo void (*fz_duff_1o1)(byte*,int,byte*,int,int,int) = duff_1o1; void (*fz_duff_4o4)(byte*,int,byte*,int,int,int) = duff_4o4; void (*fz_duff_1i1o1)(byte*,int,byte*,int,byte*,int,int,int) = duff_1i1o1; +void (*fz_duff_2i1o2)(byte*,int,byte*,int,byte*,int,int,int) = duff_2i1o2; void (*fz_duff_4i1o4)(byte*,int,byte*,int,byte*,int,int,int) = duff_4i1o4; void (*fz_path_1o1)(byte*,byte,int,byte*) = path_1o1; |