summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor@ghostscript.com>2004-11-28 17:41:15 +0100
committerTor Andersson <tor@ghostscript.com>2004-11-28 17:41:15 +0100
commit85792218b05cb41d7dd4696443a4fdd6c16e1817 (patch)
tree8da4dab2e44204e3ebba47ed01ff029ab00df35d
parent2235b780ce692e1393fdd925eea0cdd9e1a422a1 (diff)
downloadmupdf-85792218b05cb41d7dd4696443a4fdd6c16e1817.tar.xz
gka fixes. use truetypes in fontfile.
-rw-r--r--TODO11
-rw-r--r--base/cpudep.c44
-rw-r--r--mupdf/fontfile.c16
-rw-r--r--render/rastport.c18
-rw-r--r--render/rastppc.c6
-rw-r--r--render/rastx86.c89
-rw-r--r--test/ximage.c38
7 files changed, 155 insertions, 67 deletions
diff --git a/TODO b/TODO
index 3e324907..2cf21011 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,14 @@
+immediate plan:
+ 1 - pdf_resolve remake
+ 2 - refcount resources
+ 3 - put image load/scale into rastfuncs
+ 4 - altivec optimize
+ 5 - gtk+pdf
+ 6 - page labels + dests + outline + annots
+ 7 - global font/cmap cache
+
+---
+
colorspace conversions (v2)
- cal*
- iccbased
diff --git a/base/cpudep.c b/base/cpudep.c
index 0301213d..c7af38b9 100644
--- a/base/cpudep.c
+++ b/base/cpudep.c
@@ -32,8 +32,8 @@ static void sse(void)
static void sse2(void)
{ __asm__ ("andpd %xmm0, %xmm0\n\t"); }
-static void sse3(void)
-{ __asm__ ("haddps %%xmm0, %%xmm0\n\t" : : : "%xmm0"); }
+/* static void sse3(void) */
+/* { __asm__ ("haddps %%xmm0, %%xmm0\n\t" : : : "%xmm0"); } */
#ifdef ARCH_X86_64
static void amd64(void)
@@ -47,7 +47,7 @@ static const featuretest features[] = {
{ mmxext, HAVE_MMXEXT, "mmxext" },
{ sse, HAVE_SSE, "sse" },
{ sse2, HAVE_SSE2, "sse2" },
- { sse3, HAVE_SSE3, "sse3" },
+/* { sse3, HAVE_SSE3, "sse3" }, */
#ifdef ARCH_X86_64
{ amd64, HAVE_AMD64, "amd64" }
#endif
@@ -105,33 +105,45 @@ sigillhandler(int sig)
siglongjmp(jmpbuf, 1);
}
+static int
+enabled(char *env, const char *ext)
+{
+ int len;
+ char *s;
+ if (!env)
+ return 1;
+ len = strlen(ext);
+ while ((s = strstr(env, ext)))
+ {
+ s += len;
+ if (*s == ' ' || *s == ',' || *s == '\0')
+ return 1;
+ }
+ return 0;
+}
+
static void
dumpflags(void)
{
unsigned f = fz_cpuflags;
int i, n;
- fputs("detected cpu features: ", stdout);
+ fputs("detected cpu features:", stdout);
n = 0;
for (i = 0; i < sizeof(features) / sizeof(featuretest); i++)
{
if (f & features[i].flag)
{
+ fputc(' ', stdout);
fputs(features[i].name, stdout);
n ++;
}
}
if (!n)
- fputs("none", stdout);
+ fputs(" none", stdout);
fputc('\n', stdout);
}
-/* called by runtime before main()...
- * TODO:
- * CPUACCEL=0 ./x11pdf disables detection
- * CPUACCEL='mmx sse' enables only mmx and sse
- * not set enables everything
- */
void fz_cpudetect(void)
{
static int hasrun = 0;
@@ -140,11 +152,14 @@ void fz_cpudetect(void)
int i;
void (*oldhandler)(int) = NULL;
void (*tmphandler)(int);
+ char *env;
if (hasrun)
return;
hasrun = 1;
+ env = getenv("CPUACCEL");
+
for (i = 0; i < sizeof(features) / sizeof(featuretest); i++)
{
canjump = 0;
@@ -165,7 +180,10 @@ void fz_cpudetect(void)
features[i].test();
/* if we got here the test succeeded */
- flags |= features[i].flag;
+ if (enabled(env, features[i].name))
+ flags |= features[i].flag;
+ else
+ flags &= ~features[i].flag;
}
/* restore previous signal handler */
@@ -176,7 +194,7 @@ void fz_cpudetect(void)
dumpflags();
}
-static __attribute__((constructor)) void fzcpudetect(void)
+static __attribute__((constructor, used)) void fzcpudetect(void)
{
fz_cpudetect();
}
diff --git a/mupdf/fontfile.c b/mupdf/fontfile.c
index ff46c8f1..cdce1a86 100644
--- a/mupdf/fontfile.c
+++ b/mupdf/fontfile.c
@@ -42,11 +42,11 @@ static char *basenames[15] =
static struct { char *collection; char *serif; char *gothic; } cidfonts[5] =
{
- { "Adobe-CNS1", "MOESung-Regular", "MOEKai-Regular" },
- { "Adobe-GB1", "gkai00mp", "gbsn00lp" },
- { "Adobe-Japan1", "WadaMin-Regular", "WadaMaruGo-Regular" },
- { "Adobe-Japan2", "WadaMin-RegularH", "WadaMaruGo-RegularH" },
- { "Adobe-Korea1", "Munhwa-Regular", "MunhwaGothic-Regular" },
+ { "Adobe-CNS1", "bkai00mp.ttf", "bsmi00lp.ttf" },
+ { "Adobe-GB1", "gkai00mp.ttf", "gbsn00lp.ttf" },
+ { "Adobe-Japan1", "kochi-mincho.ttf", "kochi-gothic.ttf" },
+ { "Adobe-Japan2", "kochi-mincho.ttf", "kochi-gothic.ttf" },
+ { "Adobe-Korea1", "batang.ttf", "dotum.ttf" },
};
static void loadfontdata(int i, unsigned char **d, unsigned int *l)
@@ -130,12 +130,14 @@ printf(" load system cid font '%s'\n", filename);
fontdir = getenv("FONTDIR");
if (!fontdir)
- return fz_throw("ioerror: FONTDIR environment not set");
+ {
+ fontdir = "/usr/local/share/font";
+ fz_warn("FONTDIR environment not set");
+ }
strlcpy(path, fontdir, sizeof path);
strlcat(path, "/", sizeof path);
strlcat(path, filename, sizeof path);
- strlcat(path, ".cid.cff", sizeof path);
if (access(path, R_OK))
return fz_throw("ioerror: could not access file '%s'", path);
diff --git a/render/rastport.c b/render/rastport.c
index 9cae06d5..3eb89e43 100644
--- a/render/rastport.c
+++ b/render/rastport.c
@@ -264,15 +264,18 @@ static void msk_1o1(byte *src, byte *dst, int w)
static void msk_w3i1o4(byte *rgb, byte *src, byte *dst, int n)
{
+ byte rgb0 = rgb[0];
+ byte rgb1 = rgb[1];
+ byte rgb2 = rgb[2];
byte sa, ssa;
while (n--)
{
sa = src[0];
ssa = 255 - sa;
dst[0] = sa + fz_mul255(dst[0], ssa);
- dst[1] = rgb[0] + fz_mul255((short)dst[1] - rgb[0], ssa);
- dst[2] = rgb[1] + fz_mul255((short)dst[2] - rgb[1], ssa);
- dst[3] = rgb[2] + fz_mul255((short)dst[3] - rgb[2], ssa);
+ dst[1] = rgb0 + fz_mul255((short)dst[1] - rgb0, ssa);
+ dst[2] = rgb1 + fz_mul255((short)dst[2] - rgb1, ssa);
+ dst[3] = rgb2 + fz_mul255((short)dst[3] - rgb2, ssa);
src ++;
dst += 4;
}
@@ -496,6 +499,9 @@ static void img_4o4(FZ_PSRC, FZ_PDST, FZ_PCTM)
static void img_w3i1o4(byte *rgb, FZ_PSRC, FZ_PDST, FZ_PCTM)
{
+ byte rgb0 = rgb[0];
+ byte rgb1 = rgb[1];
+ byte rgb2 = rgb[2];
byte sa, ssa;
while (h--)
{
@@ -508,9 +514,9 @@ static void img_w3i1o4(byte *rgb, FZ_PSRC, FZ_PDST, FZ_PCTM)
sa = samplemask(src, srcw, srch, u, v);
ssa = 255 - sa;
dstp[0] = sa + fz_mul255(dstp[0], ssa);
- dstp[1] = rgb[0] + fz_mul255((short)dstp[1] - rgb[0], ssa);
- dstp[2] = rgb[1] + fz_mul255((short)dstp[2] - rgb[1], ssa);
- dstp[3] = rgb[2] + fz_mul255((short)dstp[3] - rgb[2], ssa);
+ dstp[1] = rgb0 + fz_mul255((short)dstp[1] - rgb0, ssa);
+ dstp[2] = rgb1 + fz_mul255((short)dstp[2] - rgb1, ssa);
+ dstp[3] = rgb2 + fz_mul255((short)dstp[3] - rgb2, ssa);
dstp += 4;
u += fa;
v += fb;
diff --git a/render/rastppc.c b/render/rastppc.c
index f26e5b66..276ee2d7 100644
--- a/render/rastppc.c
+++ b/render/rastppc.c
@@ -1,6 +1,7 @@
/*
-PowerPC specific render optims live here
-*/
+ * PowerPC specific render optims live here
+ */
+
#include <fitz.h>
#ifdef HAVE_ALTIVEC
@@ -14,6 +15,7 @@ fz_accelrastfuncs(fz_rastfuncs *tab)
# ifdef HAVE_ALTIVEC
if (fz_cpuflags & HAVE_ALTIVEC)
{
+ puts("installed altivec rastfuncs");
}
# endif
}
diff --git a/render/rastx86.c b/render/rastx86.c
index 79020fb1..9360b5e2 100644
--- a/render/rastx86.c
+++ b/render/rastx86.c
@@ -3,6 +3,8 @@ x86 specific render optims live here
*/
#include <fitz.h>
+typedef unsigned char byte;
+
/* always surround cpu specific code with HAVE_XXX */
#ifdef HAVE_MMX
@@ -10,15 +12,77 @@ x86 specific render optims live here
shouldn't require anything */
#include <mmintrin.h>
+static void duff_4i1o4mmx(byte *sp0, int sw, byte *mp0, int mw, byte *dp0, int dw, int w0, int h)
+{
+ /*
+ rendering all pages of
+ x11pdf ~/doc/OpenGL/Presentations/CEDEC2003_Venus_and_Vulcan.pdf
+ % cumulative self self total
+ time seconds seconds calls ms/call ms/call name
+ 30.50 20.04 20.04 261 76.76 76.76 duff_4i1o4
+ 21.67 22.02 10.95 221 49.55 49.55 duff_4i1o4mmx
+ */
+ __m64 mzero = _mm_setzero_si64();
+ while (h--)
+ {
+ byte *sp = sp0;
+ byte *mp = mp0;
+ byte *dp = dp0;
+
+ unsigned *s = (unsigned *)sp;
+ unsigned *d = (unsigned *)dp;
+
+ int w = w0;
+
+ /* TODO: unroll and process two pixels/iteration */
+ while (w--)
+ {
+ int ts = *s++;
+ int ma = *mp++ + 1;
+ int sa = ((ts & 0xff) * ma) >> 8;
+ int ssa = 254 - sa;
+
+ __m64 d0 = _mm_cvtsi32_si64(*d);
+ __m64 s0 = _mm_cvtsi32_si64(ts);
+
+ /* 4 x 9 bit alpha value */
+ __m64 mma = _mm_set1_pi16(ma);
+ __m64 mssa = _mm_set1_pi16(ssa);
+
+ /* unpack 0000argb => a0r0g0b0 */
+ __m64 d1 = _mm_unpacklo_pi8(d0, mzero);
+ __m64 s1 = _mm_unpacklo_pi8(s0, mzero);
+
+ /* s1 * ma => a0r0g0b0 */
+ __m64 msma = _mm_mullo_pi16(s1, mma);
+ /* d1 * mssa */
+ __m64 mdssa = _mm_mullo_pi16(d1, mssa);
+
+ __m64 res0 = _mm_add_pi16(msma, mdssa);
+ /* TODO: is it possible to get rid of the shift? */
+ __m64 res1 = _mm_srli_pi16(res0, 8);
+
+ /* pack */
+ __m64 res2 = _mm_packs_pu16(res1, mzero);
+
+ *d++ = _mm_cvtsi64_si32(res2);
+ }
+
+ sp0 += sw;
+ mp0 += mw;
+ dp0 += dw;
+ }
+
+ _mm_empty();
+}
+
static inline unsigned
getargb(unsigned *s, int w, int h, int u, int v)
{
- if (u < 0 || u >= w) return 0;
- if (v < 0 || v >= h) return 0;
+ if (u < 0 | u >= w | v < 0 | v >= h) return 0;
return s[w * v + u];
}
-/* this code has not been tested since refactoring */
static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM)
{
/* since mmx does not have an unsigned multiply instruction we use
@@ -33,12 +97,16 @@ static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM)
unsigned *d = (unsigned *)dst0;
int u = u0;
int v = v0;
- int w = w0;
+ int w = w0;
+
+ __m64 mzero = _mm_setzero_si64();
+ __m64 m256 = _mm_set1_pi16(256);
+ __m64 malphamask = _mm_cvtsi32_si64(0xff);
while (w--)
{
int iu = u >> 17;
- int iv = u >> 17;
+ int iv = v >> 17;
int fu = u & 0x7fff;
int fv = v & 0x7fff;
@@ -75,7 +143,6 @@ static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM)
}
/* unpack src into 4x16bit vectors */
- __m64 mzero = _mm_setzero_si64();
__m64 ms0 = _mm_unpackhi_pi8(ms0s1, mzero);
__m64 ms1 = _mm_unpacklo_pi8(ms0s1, mzero);
__m64 ms2 = _mm_unpackhi_pi8(ms2s3, mzero);
@@ -110,14 +177,12 @@ static void img_4o4mmx(FZ_PSRC, FZ_PDST, FZ_PCTM)
__m64 d1 = _mm_unpacklo_pi8(d0, mzero);
/* get src alpha */
- __m64 m256 = _mm_set1_pi16(256);
- __m64 malphamask = _mm_cvtsi32_si64(0xff);
- /* splat alpha TODO: better way? */
+ /* splat alpha */
__m64 a0001 = _mm_and_si64(malphamask, t8);
- __m64 a0010 = _mm_slli_si64(a0001, 16);
- __m64 a0011 = _mm_or_si64(a0001, a0010);
+ __m64 a0011 = _mm_unpacklo_pi16(a0001, a0001);
__m64 a1111 = _mm_unpacklo_pi16(a0011, a0011);
+
/* 255+1 - sa */
__m64 sna = _mm_sub_pi16(m256, a1111);
@@ -152,9 +217,9 @@ fz_accelrastfuncs(fz_rastfuncs *tab)
# ifdef HAVE_MMX
if (fz_cpuflags & HAVE_MMX)
{
+ tab->duff_4i1o4 = duff_4i1o4mmx;
tab->img_4o4 = img_4o4mmx;
}
# endif
}
#endif
-
diff --git a/test/ximage.c b/test/ximage.c
index 1c16bfda..3b4a9b8b 100644
--- a/test/ximage.c
+++ b/test/ximage.c
@@ -246,7 +246,7 @@ select_mode(void)
info.mode = byteorder == MSBFirst ? RGBA8888 : ABGR8888;
}
- printf("argb:8888 -> %s\n", modename[info.mode]);
+ printf("convert ARGB8888 to %s\n", modename[info.mode]);
/* select conversion function */
info.convert_func = ximage_convert_funcs[info.mode];
@@ -414,7 +414,6 @@ ximage_blit(Drawable d, GC gc,
/*
*
*/
-
#ifndef _C99
#ifdef __GNUC__
#define restrict __restrict__
@@ -456,20 +455,16 @@ ximage_convert_bgra8888(PARAMS)
int x, y;
unsigned *s = (unsigned *)src;
unsigned *d = (unsigned *)dst;
+ unsigned val;
for (y = 0; y < h; y++) {
for (x = 0; x < w; x++) {
- unsigned val = s[x];
- unsigned a0g0 = val & 0xff00ff00;
- unsigned gb00 = val << 16;
- unsigned zzar = val >> 16;
- unsigned gbar = gb00 | zzar;
- d[x] = (gbar & 0x00ff00ff) | a0g0;
-/*
+ val = s[x];
d[x] =
(val >> 24) |
- ((val >> 8) & 0xff) |
- ((val << 8) & 0xff0000) |
- (val << 24);
+ ((val >> 8) & 0xff00) |
+ (val << 24) |
+ ((val << 8) & 0xff0000);
+/*
d[x] =
(((val >> 24) & 0xff) << 0) |
(((val >> 16) & 0xff) << 8) |
@@ -487,7 +482,6 @@ ximage_convert_bgra8888(PARAMS)
static void
ximage_convert_abgr8888(PARAMS)
{
-#if 1
int x, y;
unsigned *s = (unsigned *)src;
unsigned *d = (unsigned *)dst;
@@ -496,26 +490,16 @@ ximage_convert_abgr8888(PARAMS)
for (y = 0; y < h; y++) {
for (x = 0; x < w; x++) {
val = s[x];
- /* bigendian... */
+#if 1 /* FZ_MSB */
d[x] = (val & 0xff00ff00) |
(((val << 16) | (val >> 16)) & 0x00ff00ff);
+#else /* FZ_LSB */
+ d[x] = (val << 24) | ((val >> 8) & 0xff);
+#endif
}
d += dststride>>2;
s += srcstride>>2;
}
-#else
- int x, y;
- for (y = 0; y < h; y++) {
- for (x = 0; x < w; x++) {
- dst[x * 4 + 0] = src[x * 4 + 0];
- dst[x * 4 + 1] = src[x * 4 + 3];
- dst[x * 4 + 2] = src[x * 4 + 2];
- dst[x * 4 + 3] = src[x * 4 + 1];
- }
- dst += dststride;
- src += srcstride;
- }
-#endif
}
static void