From 57afc5e0f2309ba9f7fbd171642f04c6da9d9976 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Mon, 5 Mar 2018 09:53:47 +0100 Subject: arch/arm64/armv8/mmu: Add support for 48bit VA The VA space needs to be extended to support 48bit, as on Cavium SoCs the MMIO starts at 1 << 47. The following changes were done to coreboot and libpayload: * Use page table lvl 0 * Increase VA bits to 48 * Enable 256TB in MMU controller * Add additional asserts Tested on Cavium SoC and two ARM64 Chromebooks. Change-Id: I89e6a4809b6b725c3945bad7fce82b0dfee7c262 Signed-off-by: Patrick Rudolph Reviewed-on: https://review.coreboot.org/24970 Tested-by: build bot (Jenkins) Reviewed-by: Julius Werner --- payloads/libpayload/arch/arm64/mmu.c | 25 +++++++++++----------- payloads/libpayload/include/arm64/arch/mmu.h | 9 ++++---- src/arch/arm64/armv8/mmu.c | 31 ++++++++++++++-------------- src/arch/arm64/include/armv8/arch/mmu.h | 9 ++++---- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/payloads/libpayload/arch/arm64/mmu.c b/payloads/libpayload/arch/arm64/mmu.c index d84f969dc6..c860ee0fc8 100644 --- a/payloads/libpayload/arch/arm64/mmu.c +++ b/payloads/libpayload/arch/arm64/mmu.c @@ -172,6 +172,7 @@ static uint64_t init_xlat_table(uint64_t base_addr, uint64_t size, uint64_t tag) { + uint64_t l0_index = (base_addr & L0_ADDR_MASK) >> L0_ADDR_SHIFT; uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT; uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT; uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT; @@ -179,12 +180,12 @@ static uint64_t init_xlat_table(uint64_t base_addr, uint64_t desc; uint64_t attr = get_block_attr(tag); - /* L1 table lookup - * If VA has bits more than L2 can resolve, lookup starts at L1 - * Assumption: we don't need L0 table in coreboot */ - if (BITS_PER_VA > L1_ADDR_SHIFT) { - if ((size >= L1_XLAT_SIZE) && - IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) { + /* L0 entry stores a table descriptor (doesn't support blocks) */ + table = get_next_level_table(&table[l0_index], L1_XLAT_SIZE); + + /* L1 table lookup */ + if ((size >= L1_XLAT_SIZE) && + IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) { /* If block address is aligned and size is greater than * or equal to size addressed by each L1 entry, we can * directly store a block desc */ @@ -192,13 +193,12 @@ static uint64_t init_xlat_table(uint64_t base_addr, table[l1_index] = desc; /* L2 lookup is not required */ return L1_XLAT_SIZE; - } - table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE); } - /* L2 table lookup - * If lookup was performed at L1, L2 table addr is obtained from L1 desc - * else, lookup starts at ttbr address */ + /* L1 entry stores a table descriptor */ + table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE); + + /* L2 table lookup */ if ((size >= L2_XLAT_SIZE) && IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) { /* If block address is aligned and size is greater than @@ -226,6 +226,7 @@ static void sanity_check(uint64_t addr, uint64_t size) { assert(!(addr & GRANULE_SIZE_MASK) && !(size & GRANULE_SIZE_MASK) && + (addr + size < (1UL << BITS_PER_VA)) && size >= GRANULE_SIZE); } @@ -344,7 +345,7 @@ void mmu_enable(void) /* Initialize TCR flags */ raw_write_tcr_current(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC | - TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_64GB | + TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_256TB | TCR_TBI_USED); /* Initialize TTBR */ diff --git a/payloads/libpayload/include/arm64/arch/mmu.h b/payloads/libpayload/include/arm64/arch/mmu.h index 2f87d09681..3cea696f64 100644 --- a/payloads/libpayload/include/arm64/arch/mmu.h +++ b/payloads/libpayload/include/arm64/arch/mmu.h @@ -83,7 +83,7 @@ extern char _start[], _end[]; /* XLAT Table Init Attributes */ #define VA_START 0x0 -#define BITS_PER_VA 33 +#define BITS_PER_VA 48 #define MIN_64_BIT_ADDR (1UL << 32) /* Granule size of 4KB is being used */ #define GRANULE_SIZE_SHIFT 12 @@ -92,14 +92,12 @@ extern char _start[], _end[]; #define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1) #define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3) +#define L0_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 3) #define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2) #define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1) #define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0) -#if BITS_PER_VA > L1_ADDR_SHIFT + BITS_RESOLVED_PER_LVL - #error "BITS_PER_VA too large (we don't have L0 table support)" -#endif - +#define L0_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L0_ADDR_SHIFT) #define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT) #define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT) #define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT) @@ -109,6 +107,7 @@ extern char _start[], _end[]; #define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT) #define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT) #define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT) +#define L0_XLAT_SIZE (1UL << L0_ADDR_SHIFT) /* Block indices required for MAIR */ #define BLOCK_INDEX_MEM_DEV_NGNRNE 0 diff --git a/src/arch/arm64/armv8/mmu.c b/src/arch/arm64/armv8/mmu.c index 55bd703d44..a24e7c6fdd 100644 --- a/src/arch/arm64/armv8/mmu.c +++ b/src/arch/arm64/armv8/mmu.c @@ -141,6 +141,7 @@ static uint64_t init_xlat_table(uint64_t base_addr, uint64_t size, uint64_t tag) { + uint64_t l0_index = (base_addr & L0_ADDR_MASK) >> L0_ADDR_SHIFT; uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT; uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT; uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT; @@ -148,12 +149,12 @@ static uint64_t init_xlat_table(uint64_t base_addr, uint64_t desc; uint64_t attr = get_block_attr(tag); - /* L1 table lookup - * If VA has bits more than L2 can resolve, lookup starts at L1 - * Assumption: we don't need L0 table in coreboot */ - if (BITS_PER_VA > L1_ADDR_SHIFT) { - if ((size >= L1_XLAT_SIZE) && - IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) { + /* L0 entry stores a table descriptor (doesn't support blocks) */ + table = get_next_level_table(&table[l0_index], L1_XLAT_SIZE); + + /* L1 table lookup */ + if ((size >= L1_XLAT_SIZE) && + IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) { /* If block address is aligned and size is greater than * or equal to size addressed by each L1 entry, we can * directly store a block desc */ @@ -161,13 +162,12 @@ static uint64_t init_xlat_table(uint64_t base_addr, table[l1_index] = desc; /* L2 lookup is not required */ return L1_XLAT_SIZE; - } - table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE); } - /* L2 table lookup - * If lookup was performed at L1, L2 table addr is obtained from L1 desc - * else, lookup starts at ttbr address */ + /* L1 entry stores a table descriptor */ + table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE); + + /* L2 table lookup */ if ((size >= L2_XLAT_SIZE) && IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) { /* If block address is aligned and size is greater than @@ -195,6 +195,7 @@ static void sanity_check(uint64_t addr, uint64_t size) { assert(!(addr & GRANULE_SIZE_MASK) && !(size & GRANULE_SIZE_MASK) && + (addr + size < (1UL << BITS_PER_VA)) && size >= GRANULE_SIZE); } @@ -202,7 +203,7 @@ static void sanity_check(uint64_t addr, uint64_t size) * Desc : Returns the page table entry governing a specific address. */ static uint64_t get_pte(void *addr) { - int shift = BITS_PER_VA > L1_ADDR_SHIFT ? L1_ADDR_SHIFT : L2_ADDR_SHIFT; + int shift = L0_ADDR_SHIFT; uint64_t *pte = (uint64_t *)_ttb; while (1) { @@ -257,8 +258,8 @@ void mmu_init(void) for (; _ettb - (u8 *)table > 0; table += GRANULE_SIZE/sizeof(*table)) table[0] = UNUSED_DESC; - /* Initialize the root table (L1) to be completely unmapped. */ - uint64_t *root = setup_new_table(INVALID_DESC, L1_XLAT_SIZE); + /* Initialize the root table (L0) to be completely unmapped. */ + uint64_t *root = setup_new_table(INVALID_DESC, L0_XLAT_SIZE); assert((u8 *)root == _ttb); /* Initialize TTBR */ @@ -269,7 +270,7 @@ void mmu_init(void) /* Initialize TCR flags */ raw_write_tcr_el3(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC | - TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_64GB | + TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_256TB | TCR_TBI_USED); } diff --git a/src/arch/arm64/include/armv8/arch/mmu.h b/src/arch/arm64/include/armv8/arch/mmu.h index a8120736fa..f0e551e52d 100644 --- a/src/arch/arm64/include/armv8/arch/mmu.h +++ b/src/arch/arm64/include/armv8/arch/mmu.h @@ -69,7 +69,7 @@ /* XLAT Table Init Attributes */ #define VA_START 0x0 -#define BITS_PER_VA 33 +#define BITS_PER_VA 48 /* Granule size of 4KB is being used */ #define GRANULE_SIZE_SHIFT 12 #define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT) @@ -77,14 +77,12 @@ #define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1) #define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3) +#define L0_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 3) #define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2) #define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1) #define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0) -#if BITS_PER_VA > L1_ADDR_SHIFT + BITS_RESOLVED_PER_LVL - #error "BITS_PER_VA too large (we don't have L0 table support)" -#endif - +#define L0_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L0_ADDR_SHIFT) #define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT) #define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT) #define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT) @@ -94,6 +92,7 @@ #define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT) #define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT) #define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT) +#define L0_XLAT_SIZE (1UL << L0_ADDR_SHIFT) /* Block indices required for MAIR */ #define BLOCK_INDEX_MEM_DEV_NGNRNE 0 -- cgit v1.2.3