From dbadb1dd634c8c9419215ade0666a7fb69a4447b Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Thu, 12 Jun 2014 10:28:57 -0700 Subject: libpayload: Reorder default memcpy, speed up memset and memcmp The current default memcpy first copies single bytes to align the amount, then copies the rest as full words. In practice, the start of a buffer is much more likely to be word-aligned then the end, and aligned word access are usually more efficient. This patch reorders those accesses to first copy as many full words as possible and then finish the rest with byte accesses to optimize this common case. This fixes a data abort when using USB on ARM without CONFIG_GPL. Due to some limitations of how DMA memory is set up in coreboot on ARM, it currently does not support unaligned accesses. (This could be fixed with a more complicated patch, but it's usually not an issue... unless, of course, your memcpy happens to be braindead). Also add word-aligned accesses to memset and memcmp while I'm at it, and make memcmp's return value standard's compliant. BUG=chrome-os-partner:24957 TEST=Manual Original-Change-Id: I2a7bcb35626a05a9a43fcfd99eb958b485d7622a Original-Signed-off-by: Julius Werner Original-Reviewed-on: https://chromium-review.googlesource.com/203547 Original-Reviewed-by: Stefan Reinauer Original-Reviewed-by: David Hendricks (cherry picked from commit 05a64d2e107e1675cc3442e6dabe14a341e55673) Signed-off-by: Marc Jones Change-Id: I0030ca8a203c97587b0da31a0a5e9e11b0be050f Reviewed-on: http://review.coreboot.org/8126 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer --- payloads/libpayload/libc/memory.c | 63 +++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/payloads/libpayload/libc/memory.c b/payloads/libpayload/libc/memory.c index 12d7e33dfd..aec60e4eb8 100644 --- a/payloads/libpayload/libc/memory.c +++ b/payloads/libpayload/libc/memory.c @@ -35,12 +35,22 @@ static void *default_memset(void *s, int c, size_t n) { - char *os = s; + size_t i; + void *ret = s; + unsigned long w = c & 0xff; - while (n--) - *(os++) = c; + for (i = 1; i < sizeof(unsigned long); i <<= 1) + w = (w << (i * 8)) | w; - return s; + for (i = 0; i < n / sizeof(unsigned long); i++) + ((unsigned long *)s)[i] = w; + + s += i * sizeof(unsigned long); + + for (i = 0; i < n % sizeof(unsigned long); i++) + ((u8 *)s)[i] = (u8)c; + + return ret; } void *memset(void *s, int c, size_t n) @@ -48,18 +58,17 @@ void *memset(void *s, int c, size_t n) static void *default_memcpy(void *dst, const void *src, size_t n) { - int i; + size_t i; void *ret = dst; - for(i = 0; i < n % sizeof(unsigned long); i++) - ((unsigned char *) dst)[i] = ((unsigned char *) src)[i]; + for(i = 0; i < n / sizeof(unsigned long); i++) + ((unsigned long *)dst)[i] = ((unsigned long *)src)[i]; - n -= i; - src += i; - dst += i; + src += i * sizeof(unsigned long); + dst += i * sizeof(unsigned long); - for(i = 0; i < n / sizeof(unsigned long); i++) - ((unsigned long *) dst)[i] = ((unsigned long *) src)[i]; + for(i = 0; i < n % sizeof(unsigned long); i++) + ((u8 *)dst)[i] = ((u8 *)src)[i]; return ret; } @@ -69,8 +78,7 @@ void *memcpy(void *dst, const void *src, size_t n) static void *default_memmove(void *dst, const void *src, size_t n) { - int i; - unsigned long offs; + size_t i, offs; if (src > dst) return memcpy(dst, src, n); @@ -78,8 +86,7 @@ static void *default_memmove(void *dst, const void *src, size_t n) offs = n - (n % sizeof(unsigned long)); for (i = (n % sizeof(unsigned long)) - 1; i >= 0; i--) - ((unsigned char *)dst)[i + offs] = - ((unsigned char *)src)[i + offs]; + ((u8 *)dst)[i + offs] = ((u8 *)src)[i + offs]; for (i = n / sizeof(unsigned long) - 1; i >= 0; i--) ((unsigned long *)dst)[i] = ((unsigned long *)src)[i]; @@ -95,17 +102,27 @@ void *memmove(void *dst, const void *src, size_t n) * * @param s1 Pointer to the first area to compare. * @param s2 Pointer to the second area to compare. - * @param len Size of the first area in bytes (both must have the same length). - * @return If len is 0, return zero. If the areas match, return zero. - * Otherwise return non-zero. + * @param n Size of the first area in bytes (both must have the same length). + * @return If n is 0, return zero. Otherwise, return a value less than, equal + * to, or greater than zero if s1 is found less than, equal to, or + * greater than s2 respectively. */ -static int default_memcmp(const void *s1, const void *s2, size_t len) +static int default_memcmp(const void *s1, const void *s2, size_t n) { - for (; len && *(char *)s1++ == *(char *)s2++; len--) ; - return len; + size_t i; + + for (i = 0; i < n / sizeof(unsigned long); i++) + if (((unsigned long *)s1)[i] != ((unsigned long *)s2)[i]) + break; /* fall through to find differing byte */ + + for (i *= sizeof(unsigned long); i < n; i++) + if (((u8 *)s1)[i] != ((u8 *)s2)[i]) + return ((u8 *)s1)[i] - ((u8 *)s2)[i]; + + return 0; } -int memcmp(const void *s1, const void *s2, size_t len) +int memcmp(const void *s1, const void *s2, size_t n) __attribute__((weak, alias("default_memcmp"))); -- cgit v1.2.3