summaryrefslogtreecommitdiff
path: root/src/arch/arm64/armv8/cpu.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/arch/arm64/armv8/cpu.S')
-rw-r--r--src/arch/arm64/armv8/cpu.S81
1 files changed, 73 insertions, 8 deletions
diff --git a/src/arch/arm64/armv8/cpu.S b/src/arch/arm64/armv8/cpu.S
index 711c338685..1bb8c838ad 100644
--- a/src/arch/arm64/armv8/cpu.S
+++ b/src/arch/arm64/armv8/cpu.S
@@ -1,8 +1,8 @@
/*
- * Based on arch/arm/include/asm/cacheflush.h
+ * Optimized assembly for low-level CPU operations on ARM64 processors.
*
- * Copyright (C) 1999-2002 Russell King.
- * Copyright (C) 2012 ARM Ltd.
+ * Copyright (c) 2010 Per Odlund <per.odlund@armagedon.se>
+ * Copyright (c) 2014 Google Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -15,11 +15,77 @@
*/
#include <arch/asm.h>
-#include <arch/cache_helpers.h>
+
+.macro dcache_apply_all crm
+ dsb sy
+ mrs x0, clidr_el1 // read CLIDR
+ and w3, w0, #0x07000000 // narrow to LoC
+ lsr w3, w3, #23 // left align LoC (low 4 bits)
+ cbz w3, 5f //done
+
+ mov w10, #0 // w10 = 2 * cache level
+ mov w8, #1 // w8 = constant 0b1
+
+1: //next_level
+ add w2, w10, w10, lsr #1 // calculate 3 * cache level
+ lsr w1, w0, w2 // extract 3-bit cache type for this level
+ and w1, w1, #0x7 // w1 = cache type
+ cmp w1, #2 // is it data or i&d?
+ b.lt 4f //skip
+ msr csselr_el1, x10 // select current cache level
+ isb // sync change of csselr
+ mrs x1, ccsidr_el1 // w1 = read ccsidr
+ and w2, w1, #7 // w2 = log2(linelen_bytes) - 4
+ add w2, w2, #4 // w2 = log2(linelen_bytes)
+ ubfx w4, w1, #3, #10 // w4 = associativity - 1 (also
+ // max way number)
+ clz w5, w4 // w5 = 32 - log2(ways)
+ // (bit position of way in DC)
+ lsl w9, w4, w5 // w9 = max way number
+ // (aligned for DC)
+ lsl w16, w8, w5 // w16 = amount to decrement (way
+ // number per iteration)
+2: //next_way
+ ubfx w7, w1, #13, #15 // w7 = max set #, right aligned
+ lsl w7, w7, w2 // w7 = max set #, DC aligned
+ lsl w17, w8, w2 // w17 = amount to decrement (set
+ // number per iteration)
+
+3: //next_set
+ orr w11, w10, w9 // w11 = combine way # & cache #
+ orr w11, w11, w7 // ... and set #
+ dc \crm, x11 // clean and/or invalidate line
+ subs w7, w7, w17 // decrement set number
+ b.ge 3b //next_set
+ subs x9, x9, x16 // decrement way number
+ b.ge 2b //next_way
+
+4: //skip
+ add w10, w10, #2 // increment 2 *cache level
+ cmp w3, w10 // Went beyond LoC?
+ b.gt 1b //next_level
+
+5: //done
+ dsb sy
+ isb
+ ret
+.endm
+
+ENTRY(dcache_invalidate_all)
+ dcache_apply_all crm=isw
+ENDPROC(dcache_invalidate_all)
+
+ENTRY(dcache_clean_all)
+ dcache_apply_all crm=csw
+ENDPROC(dcache_clean_all)
+
+ENTRY(dcache_clean_invalidate_all)
+ dcache_apply_all crm=cisw
+ENDPROC(dcache_clean_invalidate_all)
/*
* Bring an ARMv8 processor we just gained control of (e.g. from IROM) into a
- * known state regarding caches/SCTLR/PSTATE. Completely cleans and invalidates
+ * known state regarding caches/SCTLR/PSTATE. Completely invalidates
* icache/dcache, disables MMU and dcache (if active), and enables unaligned
* accesses, icache and branch prediction (if inactive). Seeds the stack and
* initializes SP_EL0. Clobbers R22 and R23.
@@ -41,9 +107,8 @@ ENTRY(arm64_init_cpu)
msr sctlr_el3, x22
isb
- /* Flush and invalidate dcache */
- mov x0, #DCCISW
- bl flush_dcache_all
+ /* Invalidate dcache */
+ bl dcache_invalidate_all
/* Deactivate MMU (0), Alignment Check (1) and DCache (2) */
and x22, x22, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2)