diff options
author | Tor Andersson <tor@ghostscript.com> | 2004-11-28 17:41:15 +0100 |
---|---|---|
committer | Tor Andersson <tor@ghostscript.com> | 2004-11-28 17:41:15 +0100 |
commit | 85792218b05cb41d7dd4696443a4fdd6c16e1817 (patch) | |
tree | 8da4dab2e44204e3ebba47ed01ff029ab00df35d | |
parent | 2235b780ce692e1393fdd925eea0cdd9e1a422a1 (diff) | |
download | mupdf-85792218b05cb41d7dd4696443a4fdd6c16e1817.tar.xz |
gka fixes. use truetypes in fontfile.
-rw-r--r-- | TODO | 11 | ||||
-rw-r--r-- | base/cpudep.c | 44 | ||||
-rw-r--r-- | mupdf/fontfile.c | 16 | ||||
-rw-r--r-- | render/rastport.c | 18 | ||||
-rw-r--r-- | render/rastppc.c | 6 | ||||
-rw-r--r-- | render/rastx86.c | 89 | ||||
-rw-r--r-- | test/ximage.c | 38 |
7 files changed, 155 insertions, 67 deletions
@@ -1,3 +1,14 @@ +immediate plan: + 1 - pdf_resolve remake + 2 - refcount resources + 3 - put image load/scale into rastfuncs + 4 - altivec optimize + 5 - gtk+pdf + 6 - page labels + dests + outline + annots + 7 - global font/cmap cache + +--- + colorspace conversions (v2) - cal* - iccbased diff --git a/base/cpudep.c b/base/cpudep.c index 0301213d..c7af38b9 100644 --- a/base/cpudep.c +++ b/base/cpudep.c @@ -32,8 +32,8 @@ static void sse(void) static void sse2(void) { __asm__ ("andpd %xmm0, %xmm0\n\t"); } -static void sse3(void) -{ __asm__ ("haddps %%xmm0, %%xmm0\n\t" : : : "%xmm0"); } +/* static void sse3(void) */ +/* { __asm__ ("haddps %%xmm0, %%xmm0\n\t" : : : "%xmm0"); } */ #ifdef ARCH_X86_64 static void amd64(void) @@ -47,7 +47,7 @@ static const featuretest features[] = { { mmxext, HAVE_MMXEXT, "mmxext" }, { sse, HAVE_SSE, "sse" }, { sse2, HAVE_SSE2, "sse2" }, - { sse3, HAVE_SSE3, "sse3" }, +/* { sse3, HAVE_SSE3, "sse3" }, */ #ifdef ARCH_X86_64 { amd64, HAVE_AMD64, "amd64" } #endif @@ -105,33 +105,45 @@ sigillhandler(int sig) siglongjmp(jmpbuf, 1); } +static int +enabled(char *env, const char *ext) +{ + int len; + char *s; + if (!env) + return 1; + len = strlen(ext); + while ((s = strstr(env, ext))) + { + s += len; + if (*s == ' ' || *s == ',' || *s == '\0') + return 1; + } + return 0; +} + static void dumpflags(void) { unsigned f = fz_cpuflags; int i, n; - fputs("detected cpu features: ", stdout); + fputs("detected cpu features:", stdout); n = 0; for (i = 0; i < sizeof(features) / sizeof(featuretest); i++) { if (f & features[i].flag) { + fputc(' ', stdout); fputs(features[i].name, stdout); n ++; } } if (!n) - fputs("none", stdout); + fputs(" none", stdout); fputc('\n', stdout); } -/* called by runtime before main()... - * TODO: - * CPUACCEL=0 ./x11pdf disables detection - * CPUACCEL='mmx sse' enables only mmx and sse - * not set enables everything - */ void fz_cpudetect(void) { static int hasrun = 0; @@ -140,11 +152,14 @@ void fz_cpudetect(void) int i; void (*oldhandler)(int) = NULL; void (*tmphandler)(int); + char *env; if (hasrun) return; hasrun = 1; + env = getenv("CPUACCEL"); + for (i = 0; i < sizeof(features) / sizeof(featuretest); i++) { canjump = 0; @@ -165,7 +180,10 @@ void fz_cpudetect(void) features[i].test(); /* if we got here the test succeeded */ - flags |= features[i].flag; + if (enabled(env, features[i].name)) + flags |= features[i].flag; + else + flags &= ~features[i].flag; } /* restore previous signal handler */ @@ -176,7 +194,7 @@ void fz_cpudetect(void) dumpflags(); } -static __attribute__((constructor)) void fzcpudetect(void) +static __attribute__((constructor, used)) void fzcpudetect(void) { fz_cpudetect(); } diff --git a/mupdf/fontfile.c b/mupdf/fontfile.c index ff46c8f1..cdce1a86 100644 --- a/mupdf/fontfile.c +++ b/mupdf/fontfile.c @@ -42,11 +42,11 @@ static char *basenames[15] = static struct { char *collection; char *serif; char *gothic; } cidfonts[5] = { - { "Adobe-CNS1", "MOESung-Regular", "MOEKai-Regular" }, - { "Adobe-GB1", "gkai00mp", "gbsn00lp" }, - { "Adobe-Japan1", "WadaMin-Regular", "WadaMaruGo-Regular" }, - { "Adobe-Japan2", "WadaMin-RegularH", "WadaMaruGo-RegularH" }, - { "Adobe-Korea1", "Munhwa-Regular", "MunhwaGothic-Regular" }, + { "Adobe-CNS1", "bkai00mp.ttf", "bsmi00lp.ttf" }, + { "Adobe-GB1", "gkai00mp.ttf", "gbsn00lp.ttf" }, + { "Adobe-Japan1", "kochi-mincho.ttf", "kochi-gothic.ttf" }, + { "Adobe-Japan2", "kochi-mincho.ttf", "kochi-gothic.ttf" }, + { "Adobe-Korea1", "batang.ttf", "dotum.ttf" }, }; static void loadfontdata(int i, unsigned char **d, unsigned int *l) @@ -130,12 +130,14 @@ printf(" load system cid font '%s'\n", filename); fontdir = getenv("FONTDIR"); if (!fontdir) - return fz_throw("ioerror: FONTDIR environment not set"); + { + fontdir = "/usr/local/share/font"; + fz_warn("FONTDIR environment not set"); + } strlcpy(path, fontdir, sizeof path); strlcat(path, "/", sizeof path); strlcat(path, filename, sizeof path); - strlcat(path, ".cid.cff", sizeof path); if (access(path, R_OK)) return fz_throw("ioerror: could not access file '%s'", path); diff --git a/render/rastport.c b/render/rastport.c index 9cae06d5..3eb89e43 100644 --- a/render/rastport.c +++ b/render/rastport.c @@ -264,15 +264,18 @@ static void msk_1o1(byte *src, byte *dst, int w) static void msk_w3i1o4(byte *rgb, byte *src, byte *dst, int n) { + byte rgb0 = rgb[0]; + byte rgb1 = rgb[1]; + byte rgb2 = rgb[2]; byte sa, ssa; while (n--) { sa = src[0]; ssa = 255 - sa; dst[0] = sa + fz_mul255(dst[0], ssa); - dst[1] = rgb[0] + fz_mul255((short)dst[1] - rgb[0], ssa); - dst[2] = rgb[1] + fz_mul255((short)dst[2] - rgb[1], ssa); - dst[3] = rgb[2] + fz_mul255((short)dst[3] - rgb[2], ssa); + dst[1] = rgb0 + fz_mul255((short)dst[1] - rgb0, ssa); + dst[2] = rgb1 + fz_mul255((short)dst[2] - rgb1, ssa); + dst[3] = rgb2 + fz_mul255((short)dst[3] - rgb2, ssa); src ++; dst += 4; } @@ -496,6 +499,9 @@ static void img_4o4(FZ_PSRC, FZ_PDST, FZ_PCTM) static void img_w3i1o4(byte *rgb, FZ_PSRC, FZ_PDST, FZ_PCTM) { + byte rgb0 = rgb[0]; + byte rgb1 = rgb[1]; + byte rgb2 = rgb[2]; byte sa, ssa; while (h--) { @@ -508,9 +514,9 @@ static void img_w3i1o4(byte *rgb, FZ_PSRC, FZ_PDST, FZ_PCTM) sa = samplemask(src, srcw, srch, u, v); ssa = 255 - sa; dstp[0] = sa + fz_mul255(dstp[0], ssa); - dstp[1] = rgb[0] + fz_mul255((short)dstp[1] - rgb[0], ssa); - dstp[2] = rgb[1] + fz_mul255((short)dstp[2] - rgb[1], ssa); - dstp[3] = rgb[2] + fz_mul255((short)dstp[3] - rgb[2], ssa); + dstp[1] = rgb0 + fz_mul255((short)dstp[1] - rgb0, ssa); + dstp[2] = rgb1 + fz_mul255((short)dstp[2] - rgb1, ssa); + dstp[3] = rgb2 + fz_mul255((short)dstp[3] - rgb2, ssa); dstp += 4; u += fa; v += fb; diff --git a/render/rastppc.c b/render/rastppc.c index f26e5b66..276ee2d7 100644 --- a/render/rastppc.c +++ b/render/rastppc.c @@ -1,6 +1,7 @@ /* -PowerPC specific render optims live here -*/ + * PowerPC specific render optims live here + */ + #include <fitz.h> #ifdef HAVE_ALTIVEC @@ -14,6 +15,7 @@ fz_accelrastfuncs(fz_rastfuncs *tab) # ifdef HAVE_ALTIVEC if (fz_cpuflags & HAVE_ALTIVEC) { + puts("installed altivec rastfuncs"); } # endif } diff --git a/render/rastx86.c b/render/rastx86.c index 79020fb1..9360b5e2 100644 --- a/render/rastx86.c +++ b/render/rastx86.c @@ -3,6 +3,8 @@ x86 specific render optims live here */ #include <fitz.h> +typedef unsigned char byte; + /* always surround cpu specific code with HAVE_XXX */ #ifdef HAVE_MMX @@ -10,15 +12,77 @@ x86 specific render optims live here shouldn't require anything */ #include <mmintrin.h> +static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int dw, int w0, int h) +{ + /* + rendering all pages of + x11pdf ~/doc/OpenGL/Presentations/CEDEC2003_Venus_and_Vulcan.pdf + % cumulative self self total + time seconds seconds calls ms/call ms/call name + 30.50 20.04 20.04 261 76.76 76.76 duff_4i1o4 + 21.67 22.02 10.95 221 49.55 49.55 duff_4i1o4mmx + */ + __m64 mzero = _mm_setzero_si64(); + while (h--) + { + byte *sp = sp0; + byte *mp = mp0; + byte *dp = dp0; + + unsigned *s = (unsigned *)sp; + unsigned *d = (unsigned *)dp; + + int w = w0; + + /* TODO: unroll and process two pixels/iteration */ + while (w--) + { + int ts = *s++; + int ma = *mp++ + 1; + int sa = ((ts & 0xff) * ma) >> 8; + int ssa = 254 - sa; + + __m64 d0 = _mm_cvtsi32_si64(*d); + __m64 s0 = _mm_cvtsi32_si64(ts); + + /* 4 x 9 bit alpha value */ + __m64 mma = _mm_set1_pi16(ma); + __m64 mssa = _mm_set1_pi16(ssa); + + /* unpack 0000argb => a0r0g0b0 */ + __m64 d1 = _mm_unpacklo_pi8(d0, mzero); + __m64 s1 = _mm_unpacklo_pi8(s0, mzero); + + /* s1 * ma => a0r0g0b0 */ + __m64 msma = _mm_mullo_pi16(s1, mma); + /* d1 * mssa */ + __m64 mdssa = _mm_mullo_pi16(d1, mssa); + + __m64 res0 = _mm_add_pi16(msma, mdssa); + /* TODO: is it possible to get rid of the shift? */ + __m64 res1 = _mm_srli_pi16(res0, 8); + + /* pack */ + __m64 res2 = _mm_packs_pu16(res1, mzero); + + *d++ = _mm_cvtsi64_si32(res2); + } + + sp0 += sw; + mp0 += mw; + dp0 += dw; + } + + _mm_empty(); +} + static inline unsigned getargb(unsigned *s, int w, int h, int u, int v) { - if (u < 0 || u >= w) return 0; - if (v < 0 || v >= h) return 0; + if (u < 0 | u >= w | v < 0 | v >= h) return 0; return s[w * v + u]; } -/* this code has not been tested since refactoring */ static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM) { /* since mmx does not have an unsigned multiply instruction we use @@ -33,12 +97,16 @@ static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM) unsigned *d = (unsigned *)dst0; int u = u0; int v = v0; - int w = w0; + int w = w0; + + __m64 mzero = _mm_setzero_si64(); + __m64 m256 = _mm_set1_pi16(256); + __m64 malphamask = _mm_cvtsi32_si64(0xff); while (w--) { int iu = u >> 17; - int iv = u >> 17; + int iv = v >> 17; int fu = u & 0x7fff; int fv = v & 0x7fff; @@ -75,7 +143,6 @@ static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM) } /* unpack src into 4x16bit vectors */ - __m64 mzero = _mm_setzero_si64(); __m64 ms0 = _mm_unpackhi_pi8(ms0s1, mzero); __m64 ms1 = _mm_unpacklo_pi8(ms0s1, mzero); __m64 ms2 = _mm_unpackhi_pi8(ms2s3, mzero); @@ -110,14 +177,12 @@ static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM) __m64 d1 = _mm_unpacklo_pi8(d0, mzero); /* get src alpha */ - __m64 m256 = _mm_set1_pi16(256); - __m64 malphamask = _mm_cvtsi32_si64(0xff); - /* splat alpha TODO: better way? */ + /* splat alpha */ __m64 a0001 = _mm_and_si64(malphamask, t8); - __m64 a0010 = _mm_slli_si64(a0001, 16); - __m64 a0011 = _mm_or_si64(a0001, a0010); + __m64 a0011 = _mm_unpacklo_pi16(a0001, a0001); __m64 a1111 = _mm_unpacklo_pi16(a0011, a0011); + /* 255+1 - sa */ __m64 sna = _mm_sub_pi16(m256, a1111); @@ -152,9 +217,9 @@ fz_accelrastfuncs(fz_rastfuncs *tab) # ifdef HAVE_MMX if (fz_cpuflags & HAVE_MMX) { + tab->duff_4i1o4 = duff_4i1o4mmx; tab->img_4o4 = img_4o4mmx; } # endif } #endif - diff --git a/test/ximage.c b/test/ximage.c index 1c16bfda..3b4a9b8b 100644 --- a/test/ximage.c +++ b/test/ximage.c @@ -246,7 +246,7 @@ select_mode(void) info.mode = byteorder == MSBFirst ? RGBA8888 : ABGR8888; } - printf("argb:8888 -> %s\n", modename[info.mode]); + printf("convert ARGB8888 to %s\n", modename[info.mode]); /* select conversion function */ info.convert_func = ximage_convert_funcs[info.mode]; @@ -414,7 +414,6 @@ ximage_blit(Drawable d, GC gc, /* * */ - #ifndef _C99 #ifdef __GNUC__ #define restrict __restrict__ @@ -456,20 +455,16 @@ ximage_convert_bgra8888(PARAMS) int x, y; unsigned *s = (unsigned *)src; unsigned *d = (unsigned *)dst; + unsigned val; for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { - unsigned val = s[x]; - unsigned a0g0 = val & 0xff00ff00; - unsigned gb00 = val << 16; - unsigned zzar = val >> 16; - unsigned gbar = gb00 | zzar; - d[x] = (gbar & 0x00ff00ff) | a0g0; -/* + val = s[x]; d[x] = (val >> 24) | - ((val >> 8) & 0xff) | - ((val << 8) & 0xff0000) | - (val << 24); + ((val >> 8) & 0xff00) | + (val << 24) | + ((val << 8) & 0xff0000); +/* d[x] = (((val >> 24) & 0xff) << 0) | (((val >> 16) & 0xff) << 8) | @@ -487,7 +482,6 @@ ximage_convert_bgra8888(PARAMS) static void ximage_convert_abgr8888(PARAMS) { -#if 1 int x, y; unsigned *s = (unsigned *)src; unsigned *d = (unsigned *)dst; @@ -496,26 +490,16 @@ ximage_convert_abgr8888(PARAMS) for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { val = s[x]; - /* bigendian... */ +#if 1 /* FZ_MSB */ d[x] = (val & 0xff00ff00) | (((val << 16) | (val >> 16)) & 0x00ff00ff); +#else /* FZ_LSB */ + d[x] = (val << 24) | ((val >> 8) & 0xff); +#endif } d += dststride>>2; s += srcstride>>2; } -#else - int x, y; - for (y = 0; y < h; y++) { - for (x = 0; x < w; x++) { - dst[x * 4 + 0] = src[x * 4 + 0]; - dst[x * 4 + 1] = src[x * 4 + 3]; - dst[x * 4 + 2] = src[x * 4 + 2]; - dst[x * 4 + 3] = src[x * 4 + 1]; - } - dst += dststride; - src += srcstride; - } -#endif } static void |