summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Serbinenko <phcoder@gmail.com>2014-02-05 17:00:40 +0100
committerVladimir Serbinenko <phcoder@gmail.com>2014-02-05 23:04:53 +0100
commit3d6ffe76f8a505c2dff5d5c6146da3d63dad6e82 (patch)
treea6a9aafffb24fb903df8ef5f9fbaee94b8df748f
parent79c712cb9e35ea9c0f383c047b4aa9590b64496d (diff)
downloadcoreboot-3d6ffe76f8a505c2dff5d5c6146da3d63dad6e82.tar.xz
load_payload: Use 32-bit accesses to speed up decompression.
Flash prefers 32-bit sequential access. On some platforms ROM is not cached due to i.a. MTRR shortage. Moreover ROM caching is not currently enabled by default. With this patch payload decompression is sped up by theoretical factor of 4. Test on X201, with caching disabled: Before: 90:load payload 4,470,841 (24,505) 99:selfboot jump 6,073,812 (1,602,971) After: 90:load payload 4,530,979 (17,728) 99:selfboot jump 5,103,408 (572,429) Change-Id: Id17e61316dbbf73f4a837bf173f88bf26c01c62b Signed-off-by: Vladimir Serbinenko <phcoder@gmail.com> Reviewed-on: http://review.coreboot.org/5144 Reviewed-by: Aaron Durbin <adurbin@google.com> Reviewed-by: Paul Menzel <paulepanter@users.sourceforge.net> Tested-by: build bot (Jenkins)
-rw-r--r--src/lib/cbfs_core.c6
-rw-r--r--src/lib/lzmadecode.c11
2 files changed, 16 insertions, 1 deletions
diff --git a/src/lib/cbfs_core.c b/src/lib/cbfs_core.c
index 839b994cf3..50c037e573 100644
--- a/src/lib/cbfs_core.c
+++ b/src/lib/cbfs_core.c
@@ -202,6 +202,12 @@ int cbfs_decompress(int algo, void *src, void *dst, int len)
{
switch (algo) {
case CBFS_COMPRESS_NONE:
+ /* Reads need to be aligned at 4 bytes to avoid
+ poor flash performance. */
+ while (len && ((u32)src & 3)) {
+ *(u8*)dst++ = *(u8*)src++;
+ len--;
+ }
memmove(dst, src, len);
return len;
#ifdef CBFS_CORE_WITH_LZMA
diff --git a/src/lib/lzmadecode.c b/src/lib/lzmadecode.c
index 1cf647d27b..fb57f4fd4d 100644
--- a/src/lib/lzmadecode.c
+++ b/src/lib/lzmadecode.c
@@ -28,7 +28,10 @@
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5
-#define RC_READ_BYTE (*Buffer++)
+/* Use 32-bit reads whenever possible to avoid bad flash performance. */
+#define RC_READ_BYTE (look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \
+ : ((((UInt32) Buffer & 3) || ((SizeT) (BufferLim - Buffer) < 4)) ? (*Buffer++) \
+ : ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), (look_ahead_ptr = 1), look_ahead.raw[0])))
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
{ int i; for(i = 0; i < 5; i++) { RC_TEST; Code = (Code << 8) | RC_READ_BYTE; }}
@@ -149,6 +152,12 @@ int LzmaDecode(CLzmaDecoderState *vs,
int len = 0;
const Byte *Buffer;
const Byte *BufferLim;
+ int look_ahead_ptr = 4;
+ union
+ {
+ Byte raw[4];
+ UInt32 dw;
+ } look_ahead;
UInt32 Range;
UInt32 Code;