summaryrefslogtreecommitdiff
path: root/draw
diff options
context:
space:
mode:
authorTor Andersson <tor@ghostscript.com>2010-07-08 22:31:11 +0200
committerTor Andersson <tor@ghostscript.com>2010-07-08 22:31:11 +0200
commit839eda4994874c02cbaf546a40d374a4edb16539 (patch)
tree34ac89eef9fdb771f3d3fd96e32b42de5c5046a5 /draw
parent7f662f1e98e0b4717d06f1b667fb969d35e60274 (diff)
downloadmupdf-839eda4994874c02cbaf546a40d374a4edb16539.tar.xz
Remove unmaintained MMX optimized function and runtime detection.
Diffstat (limited to 'draw')
-rw-r--r--draw/archx86.c228
1 files changed, 0 insertions, 228 deletions
diff --git a/draw/archx86.c b/draw/archx86.c
deleted file mode 100644
index 6bdb1f6b..00000000
--- a/draw/archx86.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * x86 specific render optims live here
- */
-
-#include "fitz.h"
-
-typedef unsigned char byte;
-
-/* always surround cpu specific code with HAVE_XXX */
-#ifdef HAVE_MMX
-
-/* -mmmx for gcc >= 3.4 enables the mmx intrinsic functions, icc and VC
-shouldn't require anything */
-#include <mmintrin.h>
-
-static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int dw, int w0, int h)
-{
- __m64 mzero = _mm_setzero_si64();
- while (h--)
- {
- byte *sp = sp0;
- byte *mp = mp0;
- byte *dp = dp0;
-
- unsigned *s = (unsigned *)sp;
- unsigned *d = (unsigned *)dp;
-
- int w = w0;
-
- /* TODO: unroll and process two pixels/iteration */
- while (w--)
- {
- int ts = *s++;
- int ma = *mp++ + 1;
- int sa = (((ts>>24) & 0xff) * ma) >> 8;
- int ssa = 255 - sa;
-
- __m64 d0 = _mm_cvtsi32_si64(*d);
- __m64 s0 = _mm_cvtsi32_si64(ts);
-
- /* 4 x 9 bit alpha value */
- __m64 mma = _mm_set1_pi16(ma);
- __m64 mssa = _mm_set1_pi16(ssa);
-
- /* unpack 0000rgba => r0g0b0a0 */
- __m64 d1 = _mm_unpacklo_pi8(d0, mzero);
- __m64 s1 = _mm_unpacklo_pi8(s0, mzero);
-
- /* s1 * ma => r0g0b0a0 */
- __m64 msma = _mm_mullo_pi16(s1, mma);
- /* d1 * mssa */
- __m64 mdssa = _mm_mullo_pi16(d1, mssa);
-
- __m64 res0 = _mm_add_pi16(msma, mdssa);
- /* TODO: is it possible to get rid of the shift? */
- __m64 res1 = _mm_srli_pi16(res0, 8);
-
- /* pack */
- __m64 res2 = _mm_packs_pu16(res1, mzero);
-
- *d++ = _mm_cvtsi64_si32(res2);
- }
-
- sp0 += sw;
- mp0 += mw;
- dp0 += dw;
- }
-
- _mm_empty();
-}
-
-#if 0 /* TODO */
-
-/* Needs to be rgba, not bgra, as well as needing finishing */
-
-static inline unsigned
-getargb(unsigned *s, int w, int h, int u, int v)
-{
- if ((u < 0) | (u >= w) | (v < 0) | (v >= h)) return 0;
- return s[w * v + u];
-}
-
-static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM)
-{
- /* since mmx does not have an unsigned multiply instruction we use
- 17.15 fixed point */
- u0 >>= 1; v0 >>= 1;
- fa >>= 1; fb >>= 1;
- fc >>= 1; fd >>= 1;
-
- while (h--)
- {
- unsigned *s = (unsigned *)src;
- unsigned *d = (unsigned *)dst0;
- int u = u0;
- int v = v0;
- int w = w0;
-
- __m64 mzero = _mm_setzero_si64();
- __m64 m256 = _mm_set1_pi16(256);
- __m64 malphamask = _mm_cvtsi32_si64(0xff);
-
- while (w--)
- {
- int iu = u >> 15;
- int iv = v >> 15;
-
- int fu = u & 0x7fff;
- int fv = v & 0x7fff;
-
- int atedge =
- (iu < 0) | (iu >= (srcw - 1)) |
- (iv < 0) | (iv >= (srch - 1));
-
- __m64 ms0s1;
- __m64 ms2s3;
-
- if (atedge)
- {
- unsigned s0, s1, s2, s3;
-
- /* edge cases use scalar loads */
- s0 = getargb(s, srcw, srch, iu + 0, iv + 0);
- s1 = getargb(s, srcw, srch, iu + 1, iv + 0);
- s2 = getargb(s, srcw, srch, iu + 0, iv + 1);
- s3 = getargb(s, srcw, srch, iu + 1, iv + 1);
-
- /* move to mmx registers */
- ms0s1 = _mm_set_pi32(s1, s0);
- ms2s3 = _mm_set_pi32(s3, s2);
- }
- else
- {
- __m64 *m0s = (__m64*)(s + srcw * (iv + 0) + iu);
- __m64 *m2s = (__m64*)(s + srcw * (iv + 1) + iu);
-
- /* faster vector loads for interior */
- ms0s1 = *m0s;
- ms2s3 = *m2s;
- }
-
- /* unpack src into 4x16bit vectors */
- __m64 ms0 = _mm_unpacklo_pi8(ms0s1, mzero);
- __m64 ms1 = _mm_unpackhi_pi8(ms0s1, mzero);
- __m64 ms2 = _mm_unpacklo_pi8(ms2s3, mzero);
- __m64 ms3 = _mm_unpackhi_pi8(ms2s3, mzero);
-
- /* lerp fu */
-
- __m64 mfu = _mm_set1_pi16(fu);
-
- /* t2 = (s1 - s0) * fu + s0 */
- __m64 t0 = _mm_sub_pi16(ms1, ms0);
- __m64 t1 = _mm_mulhi_pi16(t0, mfu);
- t1 = _mm_adds_pi16(t1, t1);
- __m64 t2 = _mm_add_pi16(t1, ms0);
-
- /* t3 = (s3 - s2) * fu + s2 */
- __m64 t3 = _mm_sub_pi16(ms3, ms2);
- __m64 t4 = _mm_mulhi_pi16(t3, mfu);
- t4 = _mm_adds_pi16(t4, t4);
- __m64 t5 = _mm_add_pi16(t4, ms2);
-
- /* lerp fv */
-
- __m64 mfv = _mm_set1_pi16(fv);
-
- /* t8 = (t5 - t2) * fv + t2 */
- __m64 t6 = _mm_sub_pi16(t5, t2);
- __m64 t7 = _mm_mulhi_pi16(t6, mfv);
- t7 = _mm_adds_pi16(t7, t7);
- __m64 t8 = _mm_add_pi16(t7, t2);
-
- /* load and prepare dst */
- __m64 d0 = _mm_cvtsi32_si64(*d);
-
- __m64 d1 = _mm_unpacklo_pi8(d0, mzero);
-
- /* get src alpha */
-
- /* splat alpha */
- __m64 a0001 = _mm_and_si64(malphamask, t8);
- __m64 a0011 = _mm_unpacklo_pi16(a0001, a0001);
- __m64 a1111 = _mm_unpacklo_pi16(a0011, a0011);
-
- /* 255+1 - sa */
- __m64 sna = _mm_sub_pi16(m256, a1111);
-
- /* blend src with dst */
- __m64 d2 = _mm_mullo_pi16(d1, sna);
- __m64 d3 = _mm_srli_pi16(d2, 8);
- __m64 d4 = _mm_add_pi16(t8, d3);
-
- /* pack and store new dst */
- __m64 d5 = _mm_packs_pu16(d4, mzero);
-
- *d++ = _mm_cvtsi64_si32(d5);
-
- u += fa;
- v += fb;
- }
-
- dst0 += dstw;
- u0 += fc;
- v0 += fd;
- }
-
- _mm_empty();
-}
-
-#endif
-
-#endif /* HAVE_MMX */
-
-#if defined (ARCH_X86) || defined(ARCH_X86_64)
-void
-fz_acceleratearch(void)
-{
-#ifdef HAVE_MMX
- if (fz_cpuflags & HAVE_MMX)
- {
- fz_duff_4i1o4 = duff_4i1o4mmx;
-// TODO fz_img_4o4 = img_4o4mmx;
- }
-#endif
-}
-#endif
-