From fd9defc0cac3d3a89b3f1d9f973efbb2233f1ac6 Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 21 Jan 2014 20:11:22 -0800 Subject: arm: Redesign, clarify and clean up cache related code This patch changes several cache-related pieces to be cleaner, faster or more correct. The largest point is removing the old arm_invalidate_caches() function and surrounding bootblock code to initialize SCTLR and replace it with an all-assembly function that takes care of cache and SCTLR initialization to bring the system to a known state. It runs without stack and before coreboot makes any write accesses to be as compatible as possible with whatever state the system was left in by preceeding code. This also finally fixes the dreaded icache bug that wasted hundreds of milliseconds during boot. Old-Change-Id: I7bb4995af8184f6383f8e3b1b870b0662bde8bd4 Signed-off-by: Julius Werner Reviewed-on: https://chromium-review.googlesource.com/183890 (cherry picked from commit 07a35925dc957919bf88dfc90515971a36e81b97) nyan_big: apply cache-related changes from nyan This applies the same changes from 07a3592 that were applied to nyan. Old-Change-Id: Idcbe85436d7a2f65fcd751954012eb5f4bec0b6c Reviewed-on: https://chromium-review.googlesource.com/184551 Commit-Queue: David Hendricks Tested-by: David Hendricks Reviewed-by: David Hendricks (cherry picked from commit 4af27f02614da41c611aee2c6d175b1b948428ea) Squashed the followup patch for nyan_big into the original patch. Change-Id: Id14aef7846355ea2da496e55da227b635aca409e Signed-off-by: Isaac Christensen (cherry picked from commit 4cbf25f8eca3a12bbfec5b015953c0fc2b69c877) Signed-off-by: Marc Jones Reviewed-on: http://review.coreboot.org/6993 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer --- payloads/libpayload/arch/arm/cache.c | 78 ++++----------------------- payloads/libpayload/include/arm/arch/cache.h | 19 +------ payloads/libpayload/include/x86/arch/cache.h | 2 +- src/arch/arm/armv4/cache.c | 7 +-- src/arch/arm/armv7/bootblock.S | 2 + src/arch/arm/armv7/bootblock_simple.c | 17 ------ src/arch/arm/armv7/cache.c | 79 ++++------------------------ src/arch/arm/armv7/cpu.S | 31 +++++++++++ src/arch/arm/armv7/mmu.c | 13 +++-- src/arch/arm/include/armv4/arch/cache.h | 7 +-- src/arch/arm/include/armv7/arch/cache.h | 19 +------ src/arch/arm/stages.c | 18 ++----- src/mainboard/google/daisy/mainboard.c | 1 - src/mainboard/google/nyan/romstage.c | 23 +++----- src/mainboard/google/nyan_big/romstage.c | 23 +++----- src/mainboard/google/peach_pit/mainboard.c | 1 - src/soc/samsung/exynos5420/wakeup.c | 1 - 17 files changed, 94 insertions(+), 247 deletions(-) diff --git a/payloads/libpayload/arch/arm/cache.c b/payloads/libpayload/arch/arm/cache.c index defe640654..63715bc5a6 100644 --- a/payloads/libpayload/arch/arm/cache.c +++ b/payloads/libpayload/arch/arm/cache.c @@ -38,26 +38,9 @@ void tlb_invalidate_all(void) { - /* - * FIXME: ARMv7 Architecture Ref. Manual claims that the distinction - * instruction vs. data TLBs is deprecated in ARMv7, however this does - * not seem to be the case as of Cortex-A15. - */ + /* TLBIALL includes dTLB and iTLB on systems that have them. */ tlbiall(); - dtlbiall(); - itlbiall(); - isb(); dsb(); -} - -void icache_invalidate_all(void) -{ - /* - * icache can be entirely invalidated with one operation. - * Note: If branch predictors are architecturally-visible, ICIALLU - * also performs a BPIALL operation (B2-1283 in arch manual) - */ - iciallu(); isb(); } @@ -135,6 +118,11 @@ void dcache_invalidate_by_mva(void const *addr, size_t len) dcache_op_mva(addr, len, OP_DCIMVAC); } +/* + * CAUTION: This implementation assumes that coreboot never uses non-identity + * page tables for pages containing executed code. If you ever want to violate + * this assumption, have fun figuring out the associated problems on your own. + */ void dcache_mmu_disable(void) { uint32_t sctlr; @@ -150,58 +138,14 @@ void dcache_mmu_enable(void) uint32_t sctlr; sctlr = read_sctlr(); - dcache_clean_invalidate_all(); sctlr |= SCTLR_C | SCTLR_M; write_sctlr(sctlr); } -void arm_invalidate_caches(void) +void cache_sync_instructions(void) { - uint32_t clidr; - int level; - - /* Invalidate branch predictor */ - bpiall(); - - /* Iterate thru each cache identified in CLIDR and invalidate */ - clidr = read_clidr(); - for (level = 0; level < 7; level++) { - unsigned int ctype = (clidr >> (level * 3)) & 0x7; - uint32_t csselr; - - switch(ctype) { - case 0x0: - /* no cache */ - break; - case 0x1: - /* icache only */ - csselr = (level << 1) | 1; - write_csselr(csselr); - icache_invalidate_all(); - break; - case 0x2: - case 0x4: - /* dcache only or unified cache */ - csselr = level << 1; - write_csselr(csselr); - dcache_invalidate_all(); - break; - case 0x3: - /* separate icache and dcache */ - csselr = (level << 1) | 1; - write_csselr(csselr); - icache_invalidate_all(); - - csselr = level << 1; - write_csselr(csselr); - dcache_invalidate_all(); - break; - default: - /* reserved */ - break; - } - } - - /* Invalidate TLB */ - tlb_invalidate_all(); + dcache_clean_all(); /* includes trailing DSB (in assembly) */ + iciallu(); /* includes BPIALLU (architecturally) */ + dsb(); + isb(); } diff --git a/payloads/libpayload/include/arm/arch/cache.h b/payloads/libpayload/include/arm/arch/cache.h index 5210dfe6a8..470eb55108 100644 --- a/payloads/libpayload/include/arm/arch/cache.h +++ b/payloads/libpayload/include/arm/arch/cache.h @@ -93,18 +93,6 @@ static inline void isb(void) * Low-level TLB maintenance operations */ -/* invalidate entire data TLB */ -static inline void dtlbiall(void) -{ - asm volatile ("mcr p15, 0, %0, c8, c6, 0" : : "r" (0) : "memory"); -} - -/* invalidate entire instruction TLB */ -static inline void itlbiall(void) -{ - asm volatile ("mcr p15, 0, %0, c8, c5, 0" : : "r" (0)); -} - /* invalidate entire unified TLB */ static inline void tlbiall(void) { @@ -313,8 +301,8 @@ void dcache_mmu_disable(void); /* dcache and MMU enable */ void dcache_mmu_enable(void); -/* icache invalidate all (on current level given by CSSELR) */ -void icache_invalidate_all(void); +/* perform all icache/dcache maintenance needed after loading new code */ +void cache_sync_instructions(void); /* tlb invalidate all */ void tlb_invalidate_all(void); @@ -323,9 +311,6 @@ void tlb_invalidate_all(void); * Generalized setup/init functions */ -/* invalidate all caches on ARM */ -void arm_invalidate_caches(void); - /* mmu initialization (set page table address, set permissions, etc) */ void mmu_init(void); diff --git a/payloads/libpayload/include/x86/arch/cache.h b/payloads/libpayload/include/x86/arch/cache.h index 396048802f..ffefcdbe19 100644 --- a/payloads/libpayload/include/x86/arch/cache.h +++ b/payloads/libpayload/include/x86/arch/cache.h @@ -41,6 +41,6 @@ #define dcache_invalidate_by_mva(addr, len) #define dcache_clean_invalidate_all() #define dcache_clean_invalidate_by_mva(addr, len) -#define icache_invalidate_all() +#define cache_sync_instructions() #endif diff --git a/src/arch/arm/armv4/cache.c b/src/arch/arm/armv4/cache.c index e5cf293dce..140beee060 100644 --- a/src/arch/arm/armv4/cache.c +++ b/src/arch/arm/armv4/cache.c @@ -39,10 +39,6 @@ void tlb_invalidate_all(void) { } -void icache_invalidate_all(void) -{ -} - void dcache_clean_all(void) { } @@ -82,11 +78,10 @@ void dcache_mmu_disable(void) { } - void dcache_mmu_enable(void) { } -void arm_invalidate_caches(void) +void cache_sync_instructions(void) { } diff --git a/src/arch/arm/armv7/bootblock.S b/src/arch/arm/armv7/bootblock.S index 4258caf439..9ed5d543aa 100644 --- a/src/arch/arm/armv7/bootblock.S +++ b/src/arch/arm/armv7/bootblock.S @@ -46,6 +46,8 @@ ENDPROC(_start) .thumb ENTRY(_thumb_start) + bl arm_init_caches + /* * From Cortex-A Series Programmer's Guide: * Only CPU 0 performs initialization. Other CPUs go into WFI diff --git a/src/arch/arm/armv7/bootblock_simple.c b/src/arch/arm/armv7/bootblock_simple.c index 5cd59704d2..248ea4e2b5 100644 --- a/src/arch/arm/armv7/bootblock_simple.c +++ b/src/arch/arm/armv7/bootblock_simple.c @@ -32,23 +32,6 @@ void main(void) { const char *stage_name = "fallback/romstage"; void *entry; - uint32_t sctlr; - - /* Globally disable MMU, caches, and branch prediction (these should - * be disabled by default on reset) */ - sctlr = read_sctlr(); - sctlr &= ~(SCTLR_M | SCTLR_C | SCTLR_Z | SCTLR_I); - write_sctlr(sctlr); - - arm_invalidate_caches(); - - /* - * Re-enable icache and branch prediction. MMU and dcache will be - * set up later. - */ - sctlr = read_sctlr(); - sctlr |= SCTLR_Z | SCTLR_I; - write_sctlr(sctlr); bootblock_cpu_init(); bootblock_mainboard_init(); diff --git a/src/arch/arm/armv7/cache.c b/src/arch/arm/armv7/cache.c index 7db86c81f1..31819f7f48 100644 --- a/src/arch/arm/armv7/cache.c +++ b/src/arch/arm/armv7/cache.c @@ -37,26 +37,9 @@ void tlb_invalidate_all(void) { - /* - * FIXME: ARMv7 Architecture Ref. Manual claims that the distinction - * instruction vs. data TLBs is deprecated in ARMv7, however this does - * not seem to be the case as of Cortex-A15. - */ + /* TLBIALL includes dTLB and iTLB on systems that have them. */ tlbiall(); - dtlbiall(); - itlbiall(); - isb(); dsb(); -} - -void icache_invalidate_all(void) -{ - /* - * icache can be entirely invalidated with one operation. - * Note: If branch predictors are architecturally-visible, ICIALLU - * also performs a BPIALL operation (B2-1283 in arch manual) - */ - iciallu(); isb(); } @@ -133,6 +116,11 @@ void dcache_invalidate_by_mva(void const *addr, size_t len) dcache_op_mva(addr, len, OP_DCIMVAC); } +/* + * CAUTION: This implementation assumes that coreboot never uses non-identity + * page tables for pages containing executed code. If you ever want to violate + * this assumption, have fun figuring out the associated problems on your own. + */ void dcache_mmu_disable(void) { uint32_t sctlr; @@ -143,64 +131,19 @@ void dcache_mmu_disable(void) write_sctlr(sctlr); } - void dcache_mmu_enable(void) { uint32_t sctlr; sctlr = read_sctlr(); - dcache_clean_invalidate_all(); sctlr |= SCTLR_C | SCTLR_M; write_sctlr(sctlr); } -void arm_invalidate_caches(void) +void cache_sync_instructions(void) { - uint32_t clidr; - int level; - - /* Invalidate branch predictor */ - bpiall(); - - /* Iterate thru each cache identified in CLIDR and invalidate */ - clidr = read_clidr(); - for (level = 0; level < 7; level++) { - unsigned int ctype = (clidr >> (level * 3)) & 0x7; - uint32_t csselr; - - switch(ctype) { - case 0x0: - /* no cache */ - break; - case 0x1: - /* icache only */ - csselr = (level << 1) | 1; - write_csselr(csselr); - icache_invalidate_all(); - break; - case 0x2: - case 0x4: - /* dcache only or unified cache */ - csselr = level << 1; - write_csselr(csselr); - dcache_invalidate_all(); - break; - case 0x3: - /* separate icache and dcache */ - csselr = (level << 1) | 1; - write_csselr(csselr); - icache_invalidate_all(); - - csselr = level << 1; - write_csselr(csselr); - dcache_invalidate_all(); - break; - default: - /* reserved */ - break; - } - } - - /* Invalidate TLB */ - tlb_invalidate_all(); + dcache_clean_all(); /* includes trailing DSB (in assembly) */ + iciallu(); /* includes BPIALLU (architecturally) */ + dsb(); + isb(); } diff --git a/src/arch/arm/armv7/cpu.S b/src/arch/arm/armv7/cpu.S index 29a19e76df..5738116c21 100644 --- a/src/arch/arm/armv7/cpu.S +++ b/src/arch/arm/armv7/cpu.S @@ -104,6 +104,37 @@ bx lr .endm +/* + * Bring an ARM processor we just gained control of (e.g. from IROM) into a + * known state regarding caches/SCTLR. Completely cleans and invalidates + * icache/dcache, disables MMU and dcache (if active), and enables unaligned + * accesses, icache and branch prediction (if inactive). Clobbers r4 and r5. + */ +ENTRY(arm_init_caches) + /* r4: SCTLR, return address: r5 (stay valid for the whole function) */ + mov r5, lr + mrc p15, 0, r4, c1, c0, 0 + + /* Activate ICache (12) and Branch Prediction (11) already for speed */ + orr r4, # (1 << 11) | (1 << 12) + mcr p15, 0, r4, c1, c0, 0 + + /* Flush and invalidate dcache in ascending order */ + bl dcache_clean_invalidate_all + + /* Deactivate MMU (0), Alignment Check (1) and DCache (2) */ + and r4, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2) + mcr p15, 0, r4, c1, c0, 0 + + /* Invalidate icache and TLB for good measure */ + mcr p15, 0, r0, c7, c5, 0 + mcr p15, 0, r0, c8, c7, 0 + dsb + isb + + bx r5 +ENDPROC(arm_init_caches) + ENTRY(dcache_invalidate_all) dcache_apply_all crm=c6 ENDPROC(dcache_invalidate_all) diff --git a/src/arch/arm/armv7/mmu.c b/src/arch/arm/armv7/mmu.c index cc915a6a33..d71003057a 100644 --- a/src/arch/arm/armv7/mmu.c +++ b/src/arch/arm/armv7/mmu.c @@ -106,11 +106,18 @@ void mmu_config_range(unsigned long start_mb, unsigned long size_mb, for (i = start_mb; i < start_mb + size_mb; i++) writel((i << 20) | attr, &ttb_entry[i]); - /* Flush the page table entries, and old translations from the TLB. */ - for (i = start_mb; i < start_mb + size_mb; i++) { + /* Flush the page table entries from the dcache. */ + for (i = start_mb; i < start_mb + size_mb; i++) dccmvac((uintptr_t)&ttb_entry[i]); + + dsb(); + + /* Invalidate the TLB entries. */ + for (i = start_mb; i < start_mb + size_mb; i++) tlbimvaa(i*MiB); - } + + dsb(); + isb(); } void mmu_init(void) diff --git a/src/arch/arm/include/armv4/arch/cache.h b/src/arch/arm/include/armv4/arch/cache.h index 6a3f593f28..e41ff9a49f 100644 --- a/src/arch/arm/include/armv4/arch/cache.h +++ b/src/arch/arm/include/armv4/arch/cache.h @@ -66,8 +66,8 @@ void dcache_mmu_disable(void); /* dcache and MMU enable */ void dcache_mmu_enable(void); -/* icache invalidate all (on current level given by CSSELR) */ -void icache_invalidate_all(void); +/* perform all icache/dcache maintenance needed after loading new code */ +void cache_sync_instructions(void); /* tlb invalidate all */ void tlb_invalidate_all(void); @@ -76,9 +76,6 @@ void tlb_invalidate_all(void); * Generalized setup/init functions */ -/* invalidate all caches on ARM */ -void arm_invalidate_caches(void); - /* mmu initialization (set page table address, set permissions, etc) */ void mmu_init(void); diff --git a/src/arch/arm/include/armv7/arch/cache.h b/src/arch/arm/include/armv7/arch/cache.h index 5210dfe6a8..470eb55108 100644 --- a/src/arch/arm/include/armv7/arch/cache.h +++ b/src/arch/arm/include/armv7/arch/cache.h @@ -93,18 +93,6 @@ static inline void isb(void) * Low-level TLB maintenance operations */ -/* invalidate entire data TLB */ -static inline void dtlbiall(void) -{ - asm volatile ("mcr p15, 0, %0, c8, c6, 0" : : "r" (0) : "memory"); -} - -/* invalidate entire instruction TLB */ -static inline void itlbiall(void) -{ - asm volatile ("mcr p15, 0, %0, c8, c5, 0" : : "r" (0)); -} - /* invalidate entire unified TLB */ static inline void tlbiall(void) { @@ -313,8 +301,8 @@ void dcache_mmu_disable(void); /* dcache and MMU enable */ void dcache_mmu_enable(void); -/* icache invalidate all (on current level given by CSSELR) */ -void icache_invalidate_all(void); +/* perform all icache/dcache maintenance needed after loading new code */ +void cache_sync_instructions(void); /* tlb invalidate all */ void tlb_invalidate_all(void); @@ -323,9 +311,6 @@ void tlb_invalidate_all(void); * Generalized setup/init functions */ -/* invalidate all caches on ARM */ -void arm_invalidate_caches(void); - /* mmu initialization (set page table address, set permissions, etc) */ void mmu_init(void); diff --git a/src/arch/arm/stages.c b/src/arch/arm/stages.c index 38d1b1928a..47f13fa71d 100644 --- a/src/arch/arm/stages.c +++ b/src/arch/arm/stages.c @@ -20,12 +20,7 @@ /* * This file contains entry/exit functions for each stage during coreboot * execution (bootblock entry and ramstage exit will depend on external - * loading. - * - * Unlike other files, this one should be compiled with a -m option to - * specify a pre-determined instruction set. This is to ensure consistency - * in the CPU operating mode (ARM or Thumb) when hand-off between stages - * occurs. + * loading). * * Entry points must be placed at the location the previous stage jumps * to (the lowest address in the stage image). This is done by giving @@ -49,13 +44,10 @@ void stage_entry(void) void stage_exit(void *addr) { void (*doit)(void) = addr; - /* make sure any code we installed is written to memory. Not all ARM have - * unified caches. - */ - dcache_clean_all(); - /* Because most stages copy code to memory, it's a safe and hygienic thing - * to flush the icache here. + /* + * Most stages load code so we need to sync caches here. Should maybe + * go into cbfs_load_stage() instead... */ - icache_invalidate_all(); + cache_sync_instructions(); doit(); } diff --git a/src/mainboard/google/daisy/mainboard.c b/src/mainboard/google/daisy/mainboard.c index 0adadb6649..8a252b1a19 100644 --- a/src/mainboard/google/daisy/mainboard.c +++ b/src/mainboard/google/daisy/mainboard.c @@ -334,7 +334,6 @@ static void mainboard_enable(device_t dev) mmu_config_range(DRAM_START, DRAM_SIZE, DCACHE_WRITEBACK); mmu_config_range(DMA_START >> 20, DMA_SIZE >> 20, DCACHE_OFF); mmu_config_range(DRAM_END, 4096 - DRAM_END, DCACHE_OFF); - dcache_invalidate_all(); dcache_mmu_enable(); const unsigned epll_hz = 192000000; diff --git a/src/mainboard/google/nyan/romstage.c b/src/mainboard/google/nyan/romstage.c index 3eaf38cf34..d7daea8291 100644 --- a/src/mainboard/google/nyan/romstage.c +++ b/src/mainboard/google/nyan/romstage.c @@ -69,26 +69,13 @@ static void configure_l2actlr(void) write_l2actlr(val); } -void main(void) +static void __attribute__((noinline)) romstage(void) { int dram_size_mb; #if CONFIG_COLLECT_TIMESTAMPS uint64_t romstage_start_time = timestamp_get(); #endif - // Globally disable MMU, caches and branch prediction (these should - // already be disabled by default on reset). - uint32_t sctlr = read_sctlr(); - sctlr &= ~(SCTLR_M | SCTLR_C | SCTLR_Z | SCTLR_I); - write_sctlr(sctlr); - - arm_invalidate_caches(); - - // Renable icache and branch prediction. - sctlr = read_sctlr(); - sctlr |= SCTLR_Z | SCTLR_I; - write_sctlr(sctlr); - configure_l2ctlr(); configure_l2actlr(); @@ -110,7 +97,6 @@ void main(void) CONFIG_DRAM_DMA_SIZE >> 20, DCACHE_OFF); mmu_config_range(dram_end, 4096 - dram_end, DCACHE_OFF); mmu_disable_range(0, 1); - dcache_invalidate_all(); dcache_mmu_enable(); /* For quality of the user experience, it's important to get @@ -146,3 +132,10 @@ void main(void) #endif stage_exit(entry); } + +/* Stub to force arm_init_caches to the top, before any stack/memory accesses */ +void main(void) +{ + asm ("bl arm_init_caches" ::: "r0","r1","r2","r3","r4","r5","ip"); + romstage(); +} diff --git a/src/mainboard/google/nyan_big/romstage.c b/src/mainboard/google/nyan_big/romstage.c index 80eea776af..5fd255e104 100644 --- a/src/mainboard/google/nyan_big/romstage.c +++ b/src/mainboard/google/nyan_big/romstage.c @@ -69,26 +69,13 @@ static void configure_l2actlr(void) write_l2actlr(val); } -void main(void) +static void __attribute__((noinline)) romstage(void) { int dram_size_mb; #if CONFIG_COLLECT_TIMESTAMPS uint64_t romstage_start_time = timestamp_get(); #endif - // Globally disable MMU, caches and branch prediction (these should - // already be disabled by default on reset). - uint32_t sctlr = read_sctlr(); - sctlr &= ~(SCTLR_M | SCTLR_C | SCTLR_Z | SCTLR_I); - write_sctlr(sctlr); - - arm_invalidate_caches(); - - // Renable icache and branch prediction. - sctlr = read_sctlr(); - sctlr |= SCTLR_Z | SCTLR_I; - write_sctlr(sctlr); - configure_l2ctlr(); configure_l2actlr(); @@ -110,7 +97,6 @@ void main(void) CONFIG_DRAM_DMA_SIZE >> 20, DCACHE_OFF); mmu_config_range(dram_end, 4096 - dram_end, DCACHE_OFF); mmu_disable_range(0, 1); - dcache_invalidate_all(); dcache_mmu_enable(); /* For quality of the user experience, it's important to get @@ -146,3 +132,10 @@ void main(void) #endif stage_exit(entry); } + +/* Stub to force arm_init_caches to the top, before any stack/memory accesses */ +void main(void) +{ + asm ("bl arm_init_caches" ::: "r0","r1","r2","r3","r4","r5","ip"); + romstage(); +} diff --git a/src/mainboard/google/peach_pit/mainboard.c b/src/mainboard/google/peach_pit/mainboard.c index 1fb441d1ca..706447ae92 100644 --- a/src/mainboard/google/peach_pit/mainboard.c +++ b/src/mainboard/google/peach_pit/mainboard.c @@ -469,7 +469,6 @@ static void mainboard_enable(device_t dev) /* set up caching for the DRAM */ mmu_config_range(DRAM_START, DRAM_SIZE, DCACHE_WRITEBACK); mmu_config_range(DMA_START >> 20, DMA_SIZE >> 20, DCACHE_OFF); - tlb_invalidate_all(); const unsigned epll_hz = 192000000; const unsigned sample_rate = 48000; diff --git a/src/soc/samsung/exynos5420/wakeup.c b/src/soc/samsung/exynos5420/wakeup.c index 753afd9591..a2407175f9 100644 --- a/src/soc/samsung/exynos5420/wakeup.c +++ b/src/soc/samsung/exynos5420/wakeup.c @@ -29,7 +29,6 @@ void wakeup(void) power_init(); /* Ensure ps_hold_setup() for early wakeup. */ dcache_mmu_disable(); - icache_invalidate_all(); power_exit_wakeup(); /* Should never return. If we do, reset. */ power_reset(); -- cgit v1.2.3