diff options
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | Makerules | 16 | ||||
-rw-r--r-- | apps/pdfdraw.c | 1 | ||||
-rw-r--r-- | apps/win_main.c | 1 | ||||
-rw-r--r-- | apps/x11_main.c | 1 | ||||
-rw-r--r-- | draw/archx86.c | 228 | ||||
-rw-r--r-- | fitz/base_cpudep.c | 283 | ||||
-rw-r--r-- | fitz/base_string.c | 6 | ||||
-rw-r--r-- | fitz/fitz_base.h | 28 | ||||
-rw-r--r-- | win32/mupdf/mupdf.vcproj | 8 |
10 files changed, 9 insertions, 564 deletions
@@ -74,7 +74,6 @@ $(CMAPDUMP_EXE): $(OBJDIR)/cmapdump.o FITZ_HDR := fitz/fitz.h fitz/fitz_base.h fitz/fitz_draw.h fitz/fitz_stream.h FITZ_SRC := $(addprefix fitz/, \ - base_cpudep.c \ base_error.c base_memory.c base_string.c base_unicode.c \ base_hash.c base_matrix.c base_rect.c \ crypt_aes.c crypt_arc4.c crypt_md5.c \ @@ -24,13 +24,6 @@ SYS_FREETYPE_LIB := `pkg-config --libs freetype2` X11LIBS := -lX11 -lXext PDFVIEW_EXE = $(X11VIEW_EXE) -ifeq "$(build)" "release" -ifeq "$(shell uname -m)" "i686" -CFLAGS += -ffast-math -mmmx -msse -msse2 -march=k8 -DARCH_X86 -DRAW_ARCH_SRC := archx86.c -endif -endif - endif ifeq "$(OS)" "Darwin" @@ -49,15 +42,6 @@ CFLAGS += -m32 LDFLAGS += -m32 endif -ifeq "$(build)" "release" -ifeq "$(arch)" "amd64" -CFLAGS += -ffast-math -mmmx -msse -msse2 -DARCH_X86_64 -else -CFLAGS += -ffast-math -mmmx -msse -msse2 -DARCH_X86 -endif -DRAW_ARCH_SRC := archx86.c -endif - endif # MinGW build depends on thirdparty diff --git a/apps/pdfdraw.c b/apps/pdfdraw.c index 45079ffb..c3b93f6e 100644 --- a/apps/pdfdraw.c +++ b/apps/pdfdraw.c @@ -466,7 +466,6 @@ int main(int argc, char **argv) int c; enum { NO_FILE_OPENED, NO_PAGES_DRAWN, DREW_PAGES } state; - fz_cpudetect(); fz_accelerate(); while ((c = fz_getopt(argc, argv, "b:p:o:r:gtxms")) != -1) diff --git a/apps/win_main.c b/apps/win_main.c index 90aef1d2..8ab8b371 100644 --- a/apps/win_main.c +++ b/apps/win_main.c @@ -773,7 +773,6 @@ WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShow int fd; int code; - fz_cpudetect(); fz_accelerate(); pdfapp_init(&gapp); diff --git a/apps/x11_main.c b/apps/x11_main.c index 5dd9efb5..8172b651 100644 --- a/apps/x11_main.c +++ b/apps/x11_main.c @@ -590,7 +590,6 @@ int main(int argc, char **argv) if (argc - fz_optind == 1) pageno = atoi(argv[fz_optind++]); - fz_cpudetect(); fz_accelerate(); winopen(); diff --git a/draw/archx86.c b/draw/archx86.c deleted file mode 100644 index 6bdb1f6b..00000000 --- a/draw/archx86.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * x86 specific render optims live here - */ - -#include "fitz.h" - -typedef unsigned char byte; - -/* always surround cpu specific code with HAVE_XXX */ -#ifdef HAVE_MMX - -/* -mmmx for gcc >= 3.4 enables the mmx intrinsic functions, icc and VC -shouldn't require anything */ -#include <mmintrin.h> - -static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int dw, int w0, int h) -{ - __m64 mzero = _mm_setzero_si64(); - while (h--) - { - byte *sp = sp0; - byte *mp = mp0; - byte *dp = dp0; - - unsigned *s = (unsigned *)sp; - unsigned *d = (unsigned *)dp; - - int w = w0; - - /* TODO: unroll and process two pixels/iteration */ - while (w--) - { - int ts = *s++; - int ma = *mp++ + 1; - int sa = (((ts>>24) & 0xff) * ma) >> 8; - int ssa = 255 - sa; - - __m64 d0 = _mm_cvtsi32_si64(*d); - __m64 s0 = _mm_cvtsi32_si64(ts); - - /* 4 x 9 bit alpha value */ - __m64 mma = _mm_set1_pi16(ma); - __m64 mssa = _mm_set1_pi16(ssa); - - /* unpack 0000rgba => r0g0b0a0 */ - __m64 d1 = _mm_unpacklo_pi8(d0, mzero); - __m64 s1 = _mm_unpacklo_pi8(s0, mzero); - - /* s1 * ma => r0g0b0a0 */ - __m64 msma = _mm_mullo_pi16(s1, mma); - /* d1 * mssa */ - __m64 mdssa = _mm_mullo_pi16(d1, mssa); - - __m64 res0 = _mm_add_pi16(msma, mdssa); - /* TODO: is it possible to get rid of the shift? */ - __m64 res1 = _mm_srli_pi16(res0, 8); - - /* pack */ - __m64 res2 = _mm_packs_pu16(res1, mzero); - - *d++ = _mm_cvtsi64_si32(res2); - } - - sp0 += sw; - mp0 += mw; - dp0 += dw; - } - - _mm_empty(); -} - -#if 0 /* TODO */ - -/* Needs to be rgba, not bgra, as well as needing finishing */ - -static inline unsigned -getargb(unsigned *s, int w, int h, int u, int v) -{ - if ((u < 0) | (u >= w) | (v < 0) | (v >= h)) return 0; - return s[w * v + u]; -} - -static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM) -{ - /* since mmx does not have an unsigned multiply instruction we use - 17.15 fixed point */ - u0 >>= 1; v0 >>= 1; - fa >>= 1; fb >>= 1; - fc >>= 1; fd >>= 1; - - while (h--) - { - unsigned *s = (unsigned *)src; - unsigned *d = (unsigned *)dst0; - int u = u0; - int v = v0; - int w = w0; - - __m64 mzero = _mm_setzero_si64(); - __m64 m256 = _mm_set1_pi16(256); - __m64 malphamask = _mm_cvtsi32_si64(0xff); - - while (w--) - { - int iu = u >> 15; - int iv = v >> 15; - - int fu = u & 0x7fff; - int fv = v & 0x7fff; - - int atedge = - (iu < 0) | (iu >= (srcw - 1)) | - (iv < 0) | (iv >= (srch - 1)); - - __m64 ms0s1; - __m64 ms2s3; - - if (atedge) - { - unsigned s0, s1, s2, s3; - - /* edge cases use scalar loads */ - s0 = getargb(s, srcw, srch, iu + 0, iv + 0); - s1 = getargb(s, srcw, srch, iu + 1, iv + 0); - s2 = getargb(s, srcw, srch, iu + 0, iv + 1); - s3 = getargb(s, srcw, srch, iu + 1, iv + 1); - - /* move to mmx registers */ - ms0s1 = _mm_set_pi32(s1, s0); - ms2s3 = _mm_set_pi32(s3, s2); - } - else - { - __m64 *m0s = (__m64*)(s + srcw * (iv + 0) + iu); - __m64 *m2s = (__m64*)(s + srcw * (iv + 1) + iu); - - /* faster vector loads for interior */ - ms0s1 = *m0s; - ms2s3 = *m2s; - } - - /* unpack src into 4x16bit vectors */ - __m64 ms0 = _mm_unpacklo_pi8(ms0s1, mzero); - __m64 ms1 = _mm_unpackhi_pi8(ms0s1, mzero); - __m64 ms2 = _mm_unpacklo_pi8(ms2s3, mzero); - __m64 ms3 = _mm_unpackhi_pi8(ms2s3, mzero); - - /* lerp fu */ - - __m64 mfu = _mm_set1_pi16(fu); - - /* t2 = (s1 - s0) * fu + s0 */ - __m64 t0 = _mm_sub_pi16(ms1, ms0); - __m64 t1 = _mm_mulhi_pi16(t0, mfu); - t1 = _mm_adds_pi16(t1, t1); - __m64 t2 = _mm_add_pi16(t1, ms0); - - /* t3 = (s3 - s2) * fu + s2 */ - __m64 t3 = _mm_sub_pi16(ms3, ms2); - __m64 t4 = _mm_mulhi_pi16(t3, mfu); - t4 = _mm_adds_pi16(t4, t4); - __m64 t5 = _mm_add_pi16(t4, ms2); - - /* lerp fv */ - - __m64 mfv = _mm_set1_pi16(fv); - - /* t8 = (t5 - t2) * fv + t2 */ - __m64 t6 = _mm_sub_pi16(t5, t2); - __m64 t7 = _mm_mulhi_pi16(t6, mfv); - t7 = _mm_adds_pi16(t7, t7); - __m64 t8 = _mm_add_pi16(t7, t2); - - /* load and prepare dst */ - __m64 d0 = _mm_cvtsi32_si64(*d); - - __m64 d1 = _mm_unpacklo_pi8(d0, mzero); - - /* get src alpha */ - - /* splat alpha */ - __m64 a0001 = _mm_and_si64(malphamask, t8); - __m64 a0011 = _mm_unpacklo_pi16(a0001, a0001); - __m64 a1111 = _mm_unpacklo_pi16(a0011, a0011); - - /* 255+1 - sa */ - __m64 sna = _mm_sub_pi16(m256, a1111); - - /* blend src with dst */ - __m64 d2 = _mm_mullo_pi16(d1, sna); - __m64 d3 = _mm_srli_pi16(d2, 8); - __m64 d4 = _mm_add_pi16(t8, d3); - - /* pack and store new dst */ - __m64 d5 = _mm_packs_pu16(d4, mzero); - - *d++ = _mm_cvtsi64_si32(d5); - - u += fa; - v += fb; - } - - dst0 += dstw; - u0 += fc; - v0 += fd; - } - - _mm_empty(); -} - -#endif - -#endif /* HAVE_MMX */ - -#if defined (ARCH_X86) || defined(ARCH_X86_64) -void -fz_acceleratearch(void) -{ -#ifdef HAVE_MMX - if (fz_cpuflags & HAVE_MMX) - { - fz_duff_4i1o4 = duff_4i1o4mmx; -// TODO fz_img_4o4 = img_4o4mmx; - } -#endif -} -#endif - diff --git a/fitz/base_cpudep.c b/fitz/base_cpudep.c deleted file mode 100644 index 586a478a..00000000 --- a/fitz/base_cpudep.c +++ /dev/null @@ -1,283 +0,0 @@ -/* -run-time cpu feature detection code -mm, alphabet soup... - -Glenn Kennard <d98gk@efd.lth.se> -*/ - -#include "fitz.h" - -/* global run-time constant */ -unsigned fz_cpuflags = 0; - -int fz_isbigendian(void) -{ - static const int one = 1; - return *(char*)&one == 0; -} - -#ifndef HAVE_CPUDEP - -void fz_cpudetect(void) -{ -} - -#else - -#ifndef _WIN32 -#include <signal.h> /* signal/sigaction */ -#include <setjmp.h> /* sigsetjmp/siglongjmp */ -#endif - -/* -#ifdef _WIN32 -#define sigjmp_buf jmp_buf -#define sigsetjmp(a,b) setjmp(a) -#define siglongjmp longjmp -#endif -*/ - -typedef struct { - void (*test)(void); - const unsigned flag; - const char *name; -} featuretest; - - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - -#ifdef __GNUC__ -static void mmx(void) -{ __asm__ ("pand %mm0, %mm0\n\t"); } - -static void m3dnow(void) -{ __asm__ ("pavgusb %mm0, %mm0\n\t"); } - -static void mmxext(void) /* aka Extended 3DNow! */ -{ __asm__ ("pmaxsw %mm0, %mm0\n\t"); } - -static void sse(void) -{ __asm__ ("andps %xmm0, %xmm0\n\t"); } - -static void sse2(void) -{ __asm__ ("andpd %xmm0, %xmm0\n\t"); } - -/* -static void sse3(void) -{ __asm__ ("haddps %%xmm0, %%xmm0\n\t" : : : "%xmm0"); } -*/ -#else -static void mmx(void) -{ __asm pand mm0, mm0; } - -static void m3dnow(void) -{ __asm pavgusb mm0, mm0; } - -static void mmxext(void) /* aka Extended 3DNow! */ -{ __asm pmaxsw mm0, mm0; } - -static void sse(void) -{ __asm andps xmm0, xmm0; } - -static void sse2(void) -{ __asm andpd xmm0, xmm0; } -#endif - - -#ifdef ARCH_X86_64 -static void amd64(void) -#ifdef __GNUC__ -{ __asm__ ("and %rax, %rax\n\t"); } -#else -{ __asm and rax, rax; } -#endif -#endif - - -static const featuretest features[] = { - { mmx, HAVE_MMX, "mmx" }, - { m3dnow, HAVE_3DNOW, "3dnow" }, - { mmxext, HAVE_MMXEXT, "mmxext" }, - { sse, HAVE_SSE, "sse" }, - { sse2, HAVE_SSE2, "sse2" }, - /* { sse3, HAVE_SSE3, "sse3" }, */ -#ifdef ARCH_X86_64 - { amd64, HAVE_AMD64, "amd64" }, -#endif -}; - -#endif - -static int -enabled(char *env, const char *ext) -{ - int len; - char *s; - if (!env) - return 1; - len = strlen(ext); - s = env; - while ((s = strstr(s, ext))) - { - int atstart = s == env || *(s-1) == ',' || *(s-1) == ' '; - s += len; - if (atstart && (*s == ' ' || *s == ',' || *s == '\0')) { - return 1; - } - } - return 0; -} - -static void -dumpflags(void) -{ - unsigned f = fz_cpuflags; - int i, n; - - fputs("detected cpu features:", stdout); - n = 0; - for (i = 0; i < sizeof(features) / sizeof(featuretest); i++) - { - if (f & features[i].flag) - { - fputc(' ', stdout); - fputs(features[i].name, stdout); - n ++; - } - } - if (!n) - fputs(" none", stdout); - fputc('\n', stdout); -} - -#ifndef _WIN32 - -static sigjmp_buf jmpbuf; -static volatile sig_atomic_t canjump; - -static void -sigillhandler(int sig) -{ - if (!canjump) { - signal(sig, SIG_DFL); - raise(sig); - } - - canjump = 0; - siglongjmp(jmpbuf, 1); -} - -void fz_cpudetect(void) -{ - static int hasrun = 0; - - unsigned flags = 0; - int i; - void (*oldhandler)(int) = NULL; - void (*tmphandler)(int); - char *env; - - if (hasrun) - return; - hasrun = 1; - - env = getenv("CPUACCEL"); - - for (i = 0; i < sizeof(features) / sizeof(featuretest); i++) - { - canjump = 0; - - tmphandler = signal(SIGILL, sigillhandler); - if (!oldhandler) - oldhandler = tmphandler; - - if (sigsetjmp(jmpbuf, 1)) - { - /* test failed - disable feature */ - flags &= ~features[i].flag; - continue; - } - - canjump = 1; - - features[i].test(); - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - /* reset mmx/x87 pipeline state */ - if (features[i].flag & (HAVE_MMX | HAVE_3DNOW | HAVE_MMXEXT)) { - __asm__ __volatile__ ("emms\n\t"); - } -#endif - - /* if we got here the test succeeded */ - if (enabled(env, features[i].name)) - flags |= features[i].flag; - else - flags &= ~features[i].flag; - } - - /* restore previous signal handler */ - signal(SIGILL, oldhandler); - - fz_cpuflags = flags; - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - __asm__ __volatile__ ("emms\n\t"); -#endif - - dumpflags(); -} - -#else /* _WIN32 */ - -void fz_cpudetect(void) -{ - static int hasrun = 0; - - unsigned flags = 0; - int i; - char *env; - - if (hasrun) - return; - hasrun = 1; - - env = getenv("CPUACCEL"); - - for (i = 0; i < sizeof(features) / sizeof(featuretest); i++) - { - __try - { - features[i].test(); - } - __except(EXCEPTION_EXECUTE_HANDLER) - { - /* test failed - disable feature */ - flags &= ~features[i].flag; - continue; - } - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - if (features[i].flag & (HAVE_MMX | HAVE_3DNOW | HAVE_MMXEXT)) { - /* reset mmx/x87 pipeline state */ - __asm emms; - } -#endif - - /* if we got here the test succeeded */ - if (enabled(env, features[i].name)) - flags |= features[i].flag; - else - flags &= ~features[i].flag; - } - - fz_cpuflags = flags; - - dumpflags(); -} - - -#endif - -#endif - diff --git a/fitz/base_string.c b/fitz/base_string.c index e18d8720..b6b85865 100644 --- a/fitz/base_string.c +++ b/fitz/base_string.c @@ -2,6 +2,12 @@ #include <string.h> +int fz_isbigendian(void) +{ + static const int one = 1; + return *(char*)&one == 0; +} + char *fz_strsep(char **stringp, const char *delim) { char *ret = *stringp; diff --git a/fitz/fitz_base.h b/fitz/fitz_base.h index ec1eb9fc..9563b0c9 100644 --- a/fitz/fitz_base.h +++ b/fitz/fitz_base.h @@ -69,33 +69,8 @@ extern int gettimeofday(struct timeval *tv, struct timezone *tz); #endif #endif -/* - * CPU detection and flags - */ - -#if defined(ARCH_X86) || defined(ARCH_X86_64) -#define HAVE_CPUDEP -#define HAVE_MMX (1<<0) -#define HAVE_MMXEXT (1<<1) -#define HAVE_SSE (1<<2) -#define HAVE_SSE2 (1<<3) -#define HAVE_SSE3 (1<<4) -#define HAVE_3DNOW (1<<5) -#define HAVE_AMD64 (1<<6) #endif -#ifdef ARCH_ARM -#define HAVE_CPUDEP -#endif - -/* call this before using fitz */ -extern void fz_cpudetect(void); - -/* treat as constant! */ -extern unsigned fz_cpuflags; - -int fz_isbigendian(void); - /* * Base Fitz runtime. */ @@ -137,6 +112,9 @@ int fz_isbigendian(void); #define CLAMP(x,a,b) ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) ) #endif +/* runtime (hah!) test for endian-ness */ +int fz_isbigendian(void); + /* utf-8 encoding and decoding */ int chartorune(int *rune, char *str); int runetochar(char *str, int *rune); diff --git a/win32/mupdf/mupdf.vcproj b/win32/mupdf/mupdf.vcproj index 2816ca8b..4d7ac6c0 100644 --- a/win32/mupdf/mupdf.vcproj +++ b/win32/mupdf/mupdf.vcproj @@ -289,10 +289,6 @@ Name="fitz"
>
<File
- RelativePath="..\..\fitz\base_cpudep.c"
- >
- </File>
- <File
RelativePath="..\..\fitz\base_error.c"
>
</File>
@@ -505,10 +501,6 @@ >
</File>
<File
- RelativePath="..\..\draw\archx86.c"
- >
- </File>
- <File
RelativePath="..\..\draw\blendmodes.c"
>
</File>
|