diff options
Diffstat (limited to 'src/arch/arm64/armv8/cpu.S')
-rw-r--r-- | src/arch/arm64/armv8/cpu.S | 81 |
1 files changed, 73 insertions, 8 deletions
diff --git a/src/arch/arm64/armv8/cpu.S b/src/arch/arm64/armv8/cpu.S index 711c338685..1bb8c838ad 100644 --- a/src/arch/arm64/armv8/cpu.S +++ b/src/arch/arm64/armv8/cpu.S @@ -1,8 +1,8 @@ /* - * Based on arch/arm/include/asm/cacheflush.h + * Optimized assembly for low-level CPU operations on ARM64 processors. * - * Copyright (C) 1999-2002 Russell King. - * Copyright (C) 2012 ARM Ltd. + * Copyright (c) 2010 Per Odlund <per.odlund@armagedon.se> + * Copyright (c) 2014 Google Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -15,11 +15,77 @@ */ #include <arch/asm.h> -#include <arch/cache_helpers.h> + +.macro dcache_apply_all crm + dsb sy + mrs x0, clidr_el1 // read CLIDR + and w3, w0, #0x07000000 // narrow to LoC + lsr w3, w3, #23 // left align LoC (low 4 bits) + cbz w3, 5f //done + + mov w10, #0 // w10 = 2 * cache level + mov w8, #1 // w8 = constant 0b1 + +1: //next_level + add w2, w10, w10, lsr #1 // calculate 3 * cache level + lsr w1, w0, w2 // extract 3-bit cache type for this level + and w1, w1, #0x7 // w1 = cache type + cmp w1, #2 // is it data or i&d? + b.lt 4f //skip + msr csselr_el1, x10 // select current cache level + isb // sync change of csselr + mrs x1, ccsidr_el1 // w1 = read ccsidr + and w2, w1, #7 // w2 = log2(linelen_bytes) - 4 + add w2, w2, #4 // w2 = log2(linelen_bytes) + ubfx w4, w1, #3, #10 // w4 = associativity - 1 (also + // max way number) + clz w5, w4 // w5 = 32 - log2(ways) + // (bit position of way in DC) + lsl w9, w4, w5 // w9 = max way number + // (aligned for DC) + lsl w16, w8, w5 // w16 = amount to decrement (way + // number per iteration) +2: //next_way + ubfx w7, w1, #13, #15 // w7 = max set #, right aligned + lsl w7, w7, w2 // w7 = max set #, DC aligned + lsl w17, w8, w2 // w17 = amount to decrement (set + // number per iteration) + +3: //next_set + orr w11, w10, w9 // w11 = combine way # & cache # + orr w11, w11, w7 // ... and set # + dc \crm, x11 // clean and/or invalidate line + subs w7, w7, w17 // decrement set number + b.ge 3b //next_set + subs x9, x9, x16 // decrement way number + b.ge 2b //next_way + +4: //skip + add w10, w10, #2 // increment 2 *cache level + cmp w3, w10 // Went beyond LoC? + b.gt 1b //next_level + +5: //done + dsb sy + isb + ret +.endm + +ENTRY(dcache_invalidate_all) + dcache_apply_all crm=isw +ENDPROC(dcache_invalidate_all) + +ENTRY(dcache_clean_all) + dcache_apply_all crm=csw +ENDPROC(dcache_clean_all) + +ENTRY(dcache_clean_invalidate_all) + dcache_apply_all crm=cisw +ENDPROC(dcache_clean_invalidate_all) /* * Bring an ARMv8 processor we just gained control of (e.g. from IROM) into a - * known state regarding caches/SCTLR/PSTATE. Completely cleans and invalidates + * known state regarding caches/SCTLR/PSTATE. Completely invalidates * icache/dcache, disables MMU and dcache (if active), and enables unaligned * accesses, icache and branch prediction (if inactive). Seeds the stack and * initializes SP_EL0. Clobbers R22 and R23. @@ -41,9 +107,8 @@ ENTRY(arm64_init_cpu) msr sctlr_el3, x22 isb - /* Flush and invalidate dcache */ - mov x0, #DCCISW - bl flush_dcache_all + /* Invalidate dcache */ + bl dcache_invalidate_all /* Deactivate MMU (0), Alignment Check (1) and DCache (2) */ and x22, x22, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2) |