summaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/i386/entry16.inc112
-rw-r--r--src/cpu/i386/entry16.lds2
-rw-r--r--src/cpu/i386/entry32.inc55
-rw-r--r--src/cpu/i386/entry32.lds14
-rw-r--r--src/cpu/i386/reset16.inc27
-rw-r--r--src/cpu/i386/reset16.lds14
-rw-r--r--src/cpu/i386/reset32.inc10
-rw-r--r--src/cpu/i386/reset32.lds14
-rw-r--r--src/cpu/k8/cpufixup.c59
-rw-r--r--src/cpu/k8/earlymtrr.inc99
-rw-r--r--src/cpu/p5/cpuid.c222
-rw-r--r--src/cpu/p6/mtrr.c356
12 files changed, 984 insertions, 0 deletions
diff --git a/src/cpu/i386/entry16.inc b/src/cpu/i386/entry16.inc
new file mode 100644
index 0000000000..c357504735
--- /dev/null
+++ b/src/cpu/i386/entry16.inc
@@ -0,0 +1,112 @@
+/*
+This software and ancillary information (herein called SOFTWARE )
+called LinuxBIOS is made available under the terms described
+here. The SOFTWARE has been approved for release with associated
+LA-CC Number 00-34 . Unless otherwise indicated, this SOFTWARE has
+been authored by an employee or employees of the University of
+California, operator of the Los Alamos National Laboratory under
+Contract No. W-7405-ENG-36 with the U.S. Department of Energy. The
+U.S. Government has rights to use, reproduce, and distribute this
+SOFTWARE. The public may copy, distribute, prepare derivative works
+and publicly display this SOFTWARE without charge, provided that this
+Notice and any statement of authorship are reproduced on all copies.
+Neither the Government nor the University makes any warranty, express
+or implied, or assumes any liability or responsibility for the use of
+this SOFTWARE. If SOFTWARE is modified to produce derivative works,
+such modified SOFTWARE should be clearly marked, so as not to confuse
+it with the version available from LANL.
+ */
+/* Copyright 2000, Ron Minnich, Advanced Computing Lab, LANL
+ * rminnich@lanl.gov
+ */
+
+
+/** Start code to put an i386 or later processor into 32-bit
+ * protected mode.
+ */
+
+/* .section ".rom.text" */
+#include <arch/rom_segs.h>
+.code16
+.globl EXT(_start)
+.type EXT(_start), @function
+
+EXT(_start):
+ cli
+
+/* thanks to kmliu@sis.tw.com for this TBL fix ... */
+/**/
+/* IMMEDIATELY invalidate the translation lookaside buffer before executing*/
+/* any further code. Even though paging is disabled we could still get*/
+/*false address translations due to the TLB if we didn't invalidate it.*/
+/**/
+ xorl %eax, %eax
+ movl %eax, %cr3 /* Invalidate TLB*/
+
+ /* invalidate the cache */
+ invd
+
+ /* Note: gas handles memory addresses in 16 bit code very poorly.
+ * In particular it doesn't appear to have a directive allowing you
+ * associate a section or even an absolute offset with a segment register.
+ *
+ * This means that anything except cs:ip relative offsets are
+ * a real pain in 16 bit mode. And explains why it is almost
+ * imposible to get gas to do lgdt correctly.
+ *
+ * One way to work around this is to have the linker do the
+ * math instead of the assembler. This solves the very
+ * pratical problem of being able to write code that can
+ * be relocated.
+ *
+ * An lgdt call before we have memory enabled cannot be
+ * position independent, as we cannot execute a call
+ * instruction to get our current instruction pointer.
+ * So while this code is relocateable it isn't arbitrarily
+ * relocatable.
+ *
+ * The criteria for relocation have been relaxed to their
+ * utmost, so that we can use the same code for both
+ * our initial entry point and startup of the second cpu.
+ * The code assumes when executing at _start that:
+ * (((cs & 0xfff) == 0) and (ip == _start & 0xffff))
+ * or
+ * ((cs == anything) and (ip == 0)).
+ *
+ * The restrictions in reset16.inc mean that _start initially
+ * must be loaded at or above 0xffff0000 or below 0x100000.
+ *
+ * The linker scripts computs gdtptr16_offset by simply returning
+ * the low 16 bits. This means that the intial segment used
+ * when start is called must be 64K aligned. This should not
+ * restrict the address as the ip address can be anything.
+ */
+
+ movw %cs, %ax
+ shlw $4, %ax
+ movw $EXT(gdtptr16_offset), %bx
+ subw %ax, %bx
+ data32 lgdt %cs:(%bx)
+
+ movl %cr0, %eax
+ andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */
+ orl $0x60000001, %eax /* CD, NW, PE = 1 */
+ movl %eax, %cr0
+
+ /* Now that we are in protected mode jump to a 32 bit code segment. */
+ data32 ljmp $ROM_CODE_SEG, $__protected_start
+
+/** The gdt has a 4 Gb code segment at 0x10, and a 4 GB data segment
+ * at 0x18; these are Linux-compatible.
+ */
+
+.align 4
+.globl EXT(gdtptr16)
+EXT(gdtptr16):
+ .word gdt_end - gdt -1 /* compute the table limit */
+ .long gdt /* we know the offset */
+
+.globl EXT(_estart)
+EXT(_estart):
+ .code32
+
diff --git a/src/cpu/i386/entry16.lds b/src/cpu/i386/entry16.lds
new file mode 100644
index 0000000000..db37e66302
--- /dev/null
+++ b/src/cpu/i386/entry16.lds
@@ -0,0 +1,2 @@
+ gdtptr16_offset = gdtptr16 & 0xffff;
+ _start_offset = _start & 0xffff;
diff --git a/src/cpu/i386/entry32.inc b/src/cpu/i386/entry32.inc
new file mode 100644
index 0000000000..8ccd638e95
--- /dev/null
+++ b/src/cpu/i386/entry32.inc
@@ -0,0 +1,55 @@
+/* For starting linuxBIOS in protected mode */
+
+#include <arch/rom_segs.h>
+
+/* .section ".rom.text" */
+ .code32
+
+ .align 4
+.globl EXT(gdtptr)
+
+gdt:
+EXT(gdtptr):
+ .word gdt_end - gdt -1 /* compute the table limit */
+ .long gdt /* we know the offset */
+ .word 0
+
+/* flat code segment */
+ .word 0xffff, 0x0000
+ .byte 0x00, 0x9b, 0xcf, 0x00
+
+/* flat data segment */
+ .word 0xffff, 0x0000
+ .byte 0x00, 0x93, 0xcf, 0x00
+
+gdt_end:
+
+
+/*
+ * When we come here we are in protected mode. We expand
+ * the stack and copies the data segment from ROM to the
+ * memory.
+ *
+ * After that, we call the chipset bootstrap routine that
+ * does what is left of the chipset initialization.
+ *
+ * NOTE aligned to 4 so that we are sure that the prefetch
+ * cache will be reloaded.
+ */
+ .align 4
+.globl EXT(protected_start)
+EXT(protected_start):
+
+ lgdt %cs:gdtptr
+ ljmp $ROM_CODE_SEG, $__protected_start
+
+__protected_start:
+ intel_chip_post_macro(0x10) /* post 10 */
+
+ movw $ROM_DATA_SEG, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ movw %ax, %fs
+ movw %ax, %gs
+
diff --git a/src/cpu/i386/entry32.lds b/src/cpu/i386/entry32.lds
new file mode 100644
index 0000000000..37a75ba6ae
--- /dev/null
+++ b/src/cpu/i386/entry32.lds
@@ -0,0 +1,14 @@
+/*
+ _cache_ram_seg_base = DEFINED(CACHE_RAM_BASE)? CACHE_RAM_BASE - _rodata : 0;
+ _cache_ram_seg_base_low = (_cache_ram_seg_base) & 0xffff;
+ _cache_ram_seg_base_middle = (_cache_ram_seg_base >> 16) & 0xff;
+ _cache_ram_seg_base_high = (_cache_ram_seg_base >> 24) & 0xff;
+
+ _rom_code_seg_base = _ltext - _text;
+ _rom_code_seg_base_low = (_rom_code_seg_base) & 0xffff;
+ _rom_code_seg_base_middle = (_rom_code_seg_base >> 16) & 0xff;
+ _rom_code_seg_base_high = (_rom_code_seg_base >> 24) & 0xff;
+*/
+
+
+
diff --git a/src/cpu/i386/reset16.inc b/src/cpu/i386/reset16.inc
new file mode 100644
index 0000000000..7c911d9ff2
--- /dev/null
+++ b/src/cpu/i386/reset16.inc
@@ -0,0 +1,27 @@
+ .section ".reset"
+ .code16
+.globl EXT(reset_vector)
+EXT(reset_vector):
+#if _ROMBASE >= 0xffff0000
+ /* Hmm.
+ * _start_offset is the low 16 bits of _start.
+ * Theoretically we should have problems but it compiles
+ * and links properly with binutils 2.9.5 & 2.10.90
+ * This is probably a case that needs fixing in binutils.
+ * And then we can just use _start.
+ * We also need something like the assume directive in
+ * other assemblers to tell it where the segment registers
+ * are pointing in memory right now.
+ */
+ jmp EXT(_start_offset)
+#elif (_ROMBASE < 0x100000)
+ ljmp $((_ROMBASE & 0xf0000)>>4),$EXT(_start_offset);
+#else
+#error _ROMBASE is an unsupported value
+#endif
+
+ . = 0x8;
+ .code32
+ jmp EXT(protected_start)
+
+ .previous
diff --git a/src/cpu/i386/reset16.lds b/src/cpu/i386/reset16.lds
new file mode 100644
index 0000000000..80f2fc0c6f
--- /dev/null
+++ b/src/cpu/i386/reset16.lds
@@ -0,0 +1,14 @@
+/*
+ * _ROMTOP : The top of the rom used where we
+ * need to put the reset vector.
+ */
+
+SECTIONS {
+ _ROMTOP = (_ROMBASE >= 0xffff0000)? 0xfffffff0 : 0xffff0;
+ . = _ROMTOP;
+ .reset . : {
+ *(.reset)
+ . = 15 ;
+ BYTE(0x00);
+ }
+}
diff --git a/src/cpu/i386/reset32.inc b/src/cpu/i386/reset32.inc
new file mode 100644
index 0000000000..ec743b70cc
--- /dev/null
+++ b/src/cpu/i386/reset32.inc
@@ -0,0 +1,10 @@
+ .section ".reset"
+ .code16
+.globl EXT(reset_vector)
+EXT(reset_vector):
+
+ . = 0x8;
+ .code32
+ jmp EXT(protected_start)
+
+ .previous
diff --git a/src/cpu/i386/reset32.lds b/src/cpu/i386/reset32.lds
new file mode 100644
index 0000000000..fa6db86b1a
--- /dev/null
+++ b/src/cpu/i386/reset32.lds
@@ -0,0 +1,14 @@
+/*
+ * _ROMTOP : The top of the rom used where we
+ * need to put the reset vector.
+ */
+
+SECTIONS {
+ _ROMTOP = _ROMBASE + ROM_IMAGE_SIZE - 0x10;
+ . = _ROMTOP;
+ .reset (.): {
+ *(.reset)
+ . = 15 ;
+ BYTE(0x00);
+ }
+}
diff --git a/src/cpu/k8/cpufixup.c b/src/cpu/k8/cpufixup.c
new file mode 100644
index 0000000000..9f306d1156
--- /dev/null
+++ b/src/cpu/k8/cpufixup.c
@@ -0,0 +1,59 @@
+/* Needed so the AMD K8 runs correctly. */
+#include <console/console.h>
+#include <mem.h>
+#include <cpu/p6/msr.h>
+
+#define TOP_MEM 0xc001001A
+#define TOP_MEM2 0xc001001D
+#define IORR_FIRST 0xC0010016
+#define IORR_LAST 0xC0010019
+#define SYSCFG 0xC0010010
+
+#define MTRRVARDRAMEN (1 << 20)
+
+void k8_cpufixup(struct mem_range *mem)
+{
+ unsigned long lo = 0, hi = 0, i;
+ unsigned long ram_megabytes;
+
+ /* For now no Athlon board has significant holes in it's
+ * address space so just find the last memory region
+ * and compute the end of memory from that.
+ */
+ for(i = 0; mem[i].sizek; i++)
+ ;
+ if (i == 0)
+ return;
+ ram_megabytes = (mem[i-1].basek + mem[i-1].sizek) *1024;
+
+
+ // 8 MB alignment please
+ ram_megabytes += 0x7fffff;
+ ram_megabytes &= (~0x7fffff);
+
+ // set top_mem registers to ram size
+ printk_spew("Setting top_mem to 0x%x\n", ram_megabytes);
+ rdmsr(TOP_MEM, lo, hi);
+ printk_spew("TOPMEM was 0x%02x:0x%02x\n", hi, lo);
+ hi = 0;
+ lo = ram_megabytes;
+ wrmsr(TOP_MEM, lo, hi);
+
+ // I am setting this even though I won't enable it
+ wrmsr(TOP_MEM2, lo, hi);
+
+ /* zero the IORR's before we enable to prevent
+ * undefined side effects
+ */
+ lo = hi = 0;
+ for (i = IORR_FIRST; i <= IORR_LAST; i++)
+ wrmsr(i, lo, hi);
+
+ rdmsr(SYSCFG, lo, hi);
+ printk_spew("SYSCFG was 0x%x:0x%x\n", hi, lo);
+ lo |= MTRRVARDRAMEN;
+ wrmsr(SYSCFG, lo, hi);
+ rdmsr(SYSCFG, lo, hi);
+ printk_spew("SYSCFG IS NOW 0x%x:0x%x\n", hi, lo);
+}
+
diff --git a/src/cpu/k8/earlymtrr.inc b/src/cpu/k8/earlymtrr.inc
new file mode 100644
index 0000000000..7cd8443618
--- /dev/null
+++ b/src/cpu/k8/earlymtrr.inc
@@ -0,0 +1,99 @@
+#include <cpu/k8/mtrr.h>
+
+/* The fixed and variable MTRRs are powered-up with random values, clear them to
+ * MTRR_TYPE_UNCACHABLE for safty reason
+ */
+
+earlymtrr_start:
+ xorl %eax, %eax # clear %eax and %edx
+ xorl %edx, %edx #
+ movl $fixed_mtrr_msr, %esi
+
+clear_fixed_var_mtrr:
+ lodsl (%esi), %eax
+ testl %eax, %eax
+ jz clear_fixed_var_mtrr_out
+
+ movl %eax, %ecx
+ xorl %eax, %eax
+ wrmsr
+
+ jmp clear_fixed_var_mtrr
+clear_fixed_var_mtrr_out:
+
+/* enable memory access for 0 - 8MB using top_mem */
+ movl $TOP_MEM, %ecx
+ xorl %edx, %edx
+ movl $0x0800000, %eax
+ wrmsr
+
+set_var_mtrr:
+ /* enable caching for 0 - 128MB using variable mtrr */
+ movl $0x200, %ecx
+ rdmsr
+ andl $0xfffffff0, %edx
+ orl $0x00000000, %edx
+ andl $0x00000f00, %eax
+ orl $0x00000006, %eax
+ wrmsr
+
+ movl $0x201, %ecx
+ rdmsr
+ andl $0xfffffff0, %edx
+ orl $0x0000000f, %edx
+ andl $0x000007ff, %eax
+ orl $0xf0000800, %eax
+ wrmsr
+
+#if defined(XIP_ROM_SIZE) && defined(XIP_ROM_BASE)
+ /* enable write protect caching so we can do execute in place
+ * on the flash rom.
+ */
+ movl $0x202, %ecx
+ xorl %edx, %edx
+ movl $(XIP_ROM_BASE | 0x005), %eax
+ wrmsr
+
+ movl $0x203, %ecx
+ movl $0x0000000f, %edx
+ movl $(~(XIP_ROM_SIZE - 1) | 0x800), %eax
+ wrmsr
+#endif /* XIP_ROM_SIZE && XIP_ROM_BASE */
+
+enable_mtrr:
+ /* Set the default memory type and enable fixed and variable MTRRs */
+ movl $0x2ff, %ecx
+ xorl %edx, %edx
+ /* Enable Variable MTRRs */
+ movl $0x00000800, %eax
+ wrmsr
+
+ /* Enable the MTRRs in SYSCFG */
+ movl $SYSCFG_MSR, %ecx
+ rdmsr
+ orl $(SYSCFG_MSR_MtrrVarDramEn), %eax
+ wrmsr
+
+ /* enable cache */
+ movl %cr0, %eax
+ andl $0x9fffffff,%eax
+ movl %eax, %cr0
+
+ jmp earlymtrr_end
+
+fixed_mtrr_msr:
+ .long 0x250, 0x258, 0x259
+ .long 0x268, 0x269, 0x26A
+ .long 0x26B, 0x26C, 0x26D
+ .long 0x26E, 0x26F
+var_mtrr_msr:
+ .long 0x200, 0x201, 0x202, 0x203
+ .long 0x204, 0x205, 0x206, 0x207
+ .long 0x208, 0x209, 0x20A, 0x20B
+ .long 0x20C, 0x20D, 0x20E, 0x20F
+var_iorr_msr:
+ .long 0xC0010016, 0xC0010017, 0xC0010018, 0xC0010019
+mem_top:
+ .long 0xC001001A, 0xC001001D
+ .long 0x000 /* NULL, end of table */
+earlymtrr_end:
diff --git a/src/cpu/p5/cpuid.c b/src/cpu/p5/cpuid.c
new file mode 100644
index 0000000000..d90cc2c124
--- /dev/null
+++ b/src/cpu/p5/cpuid.c
@@ -0,0 +1,222 @@
+#ifndef lint
+static char rcsid[] = "$Id$";
+#endif
+
+#include <console/console.h>
+#include <cpu/p5/cpuid.h>
+#ifdef i586
+#include <cpu/p6/msr.h>
+#endif
+
+
+int mtrr_check(void)
+{
+#ifdef i686
+ /* Only Pentium Pro and later have MTRR */
+ unsigned long low, high;
+
+ printk_debug("\nMTRR check\n");
+
+ rdmsr(0x2ff, low, high);
+ low = low >> 10;
+
+ printk_debug("Fixed MTRRs : ");
+ if (low & 0x01)
+ printk_debug("Enabled\n");
+ else
+ printk_debug("Disabled\n");
+
+ printk_debug("Variable MTRRs: ");
+ if (low & 0x02)
+ printk_debug("Enabled\n");
+ else
+ printk_debug("Disabled\n");
+
+ printk_debug("\n");
+
+ post_code(0x93);
+ return ((int) low);
+#else /* !i686 */
+ return 0;
+#endif /* i686 */
+}
+
+void display_cpuid(void)
+{
+ int op, eax, ebx, ecx, edx;
+ int max_op;
+
+ max_op = 0;
+
+ printk_debug("\n");
+
+ for (op = 0; op <= max_op; op++) {
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ if (0 == op) {
+ max_op = eax;
+ printk_debug("Max cpuid index : %d\n", eax);
+ printk_debug("Vendor ID : "
+ "%c%c%c%c%c%c%c%c%c%c%c%c\n",
+ ebx, ebx >> 8, ebx >> 16, ebx >> 24, edx,
+ edx >> 8, edx >> 16, edx >> 24, ecx, ecx >> 8,
+ ecx >> 16, ecx >> 24);
+ } else if (1 == op) {
+ printk_debug("Processor Type : 0x%02x\n",
+ (eax >> 12) & 0x03);
+ printk_debug("Processor Family : 0x%02x\n",
+ (eax >> 8) & 0x0f);
+ printk_debug("Processor Model : 0x%02x\n",
+ (eax >> 4) & 0x0f);
+ printk_debug("Processor Mask : 0x%02x\n",
+ (ecx >> 0) & 0x0f);
+ printk_debug("Processor Stepping : 0x%02x\n",
+ (eax >> 0) & 0x0f);
+ printk_debug("Feature flags : 0x%08x\n", edx);
+ } else if (2 == op) {
+ int desc[4];
+ int ii;
+ int _desc;
+
+ printk_debug("\n");
+
+ printk_debug("Cache/TLB descriptor values: %d "
+ "reads required\n", eax & 0xff);
+
+ desc[0] = eax;
+ desc[1] = ebx;
+ desc[2] = ecx;
+ desc[3] = edx;
+
+ for (ii = 1; ii < 16; ii++) {
+ if (desc[ii >> 2] & 0x80000000) {
+ printk_debug("reserved descriptor\n");
+ continue;
+ }
+
+ _desc =
+ ((desc[ii >> 2]) >> ((ii & 0x3) << 3))
+ & 0xff;
+ printk_debug("Desc 0x%02x : ", _desc);
+
+ switch (_desc) {
+ case 0x00:
+ printk_debug("null\n");
+ break;
+
+ case 0x01:
+ printk_debug("Instr TLB: "
+ "4KB pages, "
+ "4-way set assoc, "
+ "32 entries\n");
+ break;
+
+ case 0x02:
+ printk_debug("Instr TLB: "
+ "4MB pages, "
+ "fully assoc, " "2 entries\n");
+ break;
+
+ case 0x03:
+ printk_debug("Data TLB: "
+ "4KB pages, "
+ "4-way set assoc, "
+ "64 entries\n");
+ break;
+
+ case 0x04:
+ printk_debug("Data TLB: "
+ "4MB pages, "
+ "4-way set assoc, "
+ "8 entries\n");
+ break;
+
+ case 0x06:
+ printk_debug("Inst cache: "
+ "8K bytes, "
+ "4-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ case 0x08:
+ printk_debug("Inst cache: "
+ "16K bytes, "
+ "4-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ case 0x0a:
+ printk_debug("Data cache: "
+ "8K bytes, "
+ "2-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ case 0x0c:
+ printk_debug("Data cache: "
+ "16K bytes, "
+ "2-way or 4-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ case 0x40:
+ printk_debug("No L2 cache\n");
+ break;
+
+ case 0x41:
+ printk_debug("L2 Unified cache: "
+ "128K bytes, "
+ "4-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ case 0x42:
+ printk_debug("L2 Unified cache: "
+ "256K bytes, "
+ "4-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ case 0x43:
+ printk_debug("L2 Unified cache: "
+ "512K bytes, "
+ "4-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ case 0x44:
+ printk_debug("L2 Unified cache: "
+ "1M byte, "
+ "4-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ case 0x45:
+ printk_debug("L2 Unified cache: "
+ "2M byte, "
+ "4-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ case 0x82:
+ printk_debug("L2 Unified cache: "
+ "256K bytes, "
+ "8-way set assoc, "
+ "32 byte line size\n");
+ break;
+
+ default:
+ printk_debug("UNKNOWN\n");
+ }
+ }
+ printk_debug("\n");
+ } else {
+ printk_debug("op: 0x%02x eax:0x%08x "
+ "ebx:0x%08x ecx:0x%08x edx:0x%08x\n",
+ op, eax, ebx, ecx, edx);
+ }
+ }
+
+ printk_debug("\n");
+ post_code(0x92);
+}
diff --git a/src/cpu/p6/mtrr.c b/src/cpu/p6/mtrr.c
new file mode 100644
index 0000000000..b88e174869
--- /dev/null
+++ b/src/cpu/p6/mtrr.c
@@ -0,0 +1,356 @@
+/*
+ * intel_mtrr.c: setting MTRR to decent values for cache initialization on P6
+ *
+ * Derived from intel_set_mtrr in intel_subr.c and mtrr.c in linux kernel
+ *
+ * Copyright 2000 Silicon Integrated System Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ * Reference: Intel Architecture Software Developer's Manual, Volume 3: System Programming
+ *
+ * $Id$
+ */
+
+#ifndef lint
+static char rcsid[] = "$Id$";
+#endif
+
+#include <console/console.h>
+#include <mem.h>
+#include <cpu/p6/msr.h>
+#include <cpu/p6/mtrr.h>
+#include <cpu/k7/mtrr.h>
+
+#define arraysize(x) (sizeof(x)/sizeof((x)[0]))
+
+static unsigned int mtrr_msr[] = {
+ MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR,
+ MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR, MTRRfix4K_D8000_MSR,
+ MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR, MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR,
+};
+
+
+static void intel_enable_fixed_mtrr(void)
+{
+ unsigned long low, high;
+
+ rdmsr(MTRRdefType_MSR, low, high);
+ low |= 0xc00;
+ wrmsr(MTRRdefType_MSR, low, high);
+}
+
+static void intel_enable_var_mtrr(void)
+{
+ unsigned long low, high;
+
+ rdmsr(MTRRdefType_MSR, low, high);
+ low |= 0x800;
+ wrmsr(MTRRdefType_MSR, low, high);
+}
+
+static inline void disable_cache(void)
+{
+ unsigned int tmp;
+ /* Disable cache */
+ /* Write back the cache and flush TLB */
+ asm volatile (
+ "movl %%cr0, %0\n\t"
+ "orl $0x40000000, %0\n\t"
+ "wbinvd\n\t"
+ "movl %0, %%cr0\n\t"
+ "wbinvd\n\t"
+ :"=r" (tmp)
+ ::"memory");
+}
+
+static inline void enable_cache(void)
+{
+ unsigned int tmp;
+ // turn cache back on.
+ asm volatile (
+ "movl %%cr0, %0\n\t"
+ "andl $0x9fffffff, %0\n\t"
+ "movl %0, %%cr0\n\t"
+ :"=r" (tmp)
+ ::"memory");
+}
+
+/* setting variable mtrr, comes from linux kernel source */
+static void intel_set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, unsigned char type)
+{
+ unsigned long base_high, base_low;
+ unsigned long mask_high, mask_low;
+
+ base_high = basek >> 22;
+ base_low = basek << 10;
+
+ if (sizek < 4*1024*1024) {
+ mask_high = 0x0F;
+ mask_low = ~((sizek << 10) -1);
+ }
+ else {
+ mask_high = 0x0F & (~((sizek >> 22) -1));
+ mask_low = 0;
+ }
+
+ if (reg >= 8)
+ return;
+
+ // it is recommended that we disable and enable cache when we
+ // do this.
+ disable_cache();
+ if (sizek == 0) {
+ /* The invalid bit is kept in the mask, so we simply clear the
+ relevant mask register to disable a range. */
+ wrmsr (MTRRphysMask_MSR (reg), 0, 0);
+ } else {
+ /* Bit 32-35 of MTRRphysMask should be set to 1 */
+ wrmsr (MTRRphysBase_MSR(reg), base_low | type, base_high);
+ wrmsr (MTRRphysMask_MSR(reg), mask_low | 0x800, mask_high);
+ }
+ enable_cache();
+}
+
+/* setting variable mtrr, comes from linux kernel source */
+void set_var_mtrr(unsigned int reg, unsigned long base, unsigned long size, unsigned char type)
+{
+ unsigned int tmp;
+
+ if (reg >= 8)
+ return;
+
+ // it is recommended that we disable and enable cache when we
+ // do this.
+ disable_cache();
+ if (size == 0) {
+ /* The invalid bit is kept in the mask, so we simply clear the
+ relevant mask register to disable a range. */
+ wrmsr (MTRRphysMask_MSR (reg), 0, 0);
+ } else {
+ /* Bit 32-35 of MTRRphysMask should be set to 1 */
+ wrmsr (MTRRphysBase_MSR (reg), base | type, 0);
+ wrmsr (MTRRphysMask_MSR (reg), ~(size - 1) | 0x800, 0x0F);
+ }
+
+ // turn cache back on.
+ enable_cache();
+}
+
+/* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
+static inline unsigned int fms(unsigned int x)
+{
+ int r;
+
+ __asm__("bsrl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $0,%0\n"
+ "1:" : "=r" (r) : "g" (x));
+ return r;
+}
+
+/* fms: find least sigificant bit set */
+static inline unsigned int fls(unsigned int x)
+{
+ int r;
+
+ __asm__("bsfl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $32,%0\n"
+ "1:" : "=r" (r) : "g" (x));
+ return r;
+}
+
+/* setting up variable and fixed mtrr
+ *
+ * From Intel Vol. III Section 9.12.4, the Range Size and Base Alignment has some kind of requirement:
+ * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
+ * 2. The base address must be 2^N aligned, where the N here is equal to the N in previous
+ * requirement. So a 8K range must be 8K aligned not 4K aligned.
+ *
+ * These requirement is meet by "decompositing" the ramsize into Sum(Cn * 2^n, n = [0..N], Cn = [0, 1]).
+ * For Cm = 1, there is a WB range of 2^m size at base address Sum(Cm * 2^m, m = [N..n]).
+ * A 124MB (128MB - 4MB SMA) example:
+ * ramsize = 124MB == 64MB (at 0MB) + 32MB (at 64MB) + 16MB (at 96MB ) + 8MB (at 112MB) + 4MB (120MB).
+ * But this wastes a lot of MTRR registers so we use another more "aggresive" way with Uncacheable Regions.
+ *
+ * In the Uncacheable Region scheme, we try to cover the whole ramsize by one WB region as possible,
+ * If (an only if) this can not be done we will try to decomposite the ramesize, the mathematical formula
+ * whould be ramsize = Sum(Cn * 2^n, n = [0..N], Cn = [-1, 0, 1]). For Cn = -1, a Uncachable Region is used.
+ * The same 124MB example:
+ * ramsize = 124MB == 128MB WB (at 0MB) + 4MB UC (at 124MB)
+ * or a 156MB (128MB + 32MB - 4MB SMA) example:
+ * ramsize = 156MB == 128MB WB (at 0MB) + 32MB WB (at 128MB) + 4MB UC (at 156MB)
+ */
+/* 2 MTRRS are reserved for the operating system */
+#define BIOS_MTRRS 6
+#define OS_MTRRS 2
+#define MTRRS (BIOS_MTRRS + OS_MTRRS)
+
+
+static void set_fixed_mtrrs(unsigned int first, unsigned int last, unsigned char type)
+{
+ unsigned int i;
+ unsigned int fixed_msr = NUM_FIXED_RANGES >> 3;
+ unsigned long low, high;
+ low = high = 0; /* Shut up gcc */
+ for(i = first; i < last; i++) {
+ /* When I switch to a new msr read it in */
+ if (fixed_msr != i >> 3) {
+ /* But first write out the old msr */
+ if (fixed_msr < (NUM_FIXED_RANGES >> 3)) {
+ disable_cache();
+ wrmsr(mtrr_msr[fixed_msr], low, high);
+ enable_cache();
+ }
+ fixed_msr = i>>3;
+ rdmsr(mtrr_msr[fixed_msr], low, high);
+ }
+ if ((i & 7) < 4) {
+ low &= ~(0xff << ((i&3)*8));
+ low |= type << ((i&3)*8);
+ } else {
+ high &= ~(0xff << ((i&3)*8));
+ high |= type << ((i&3)*8);
+ }
+ }
+ /* Write out the final msr */
+ if (fixed_msr < (NUM_FIXED_RANGES >> 3)) {
+ disable_cache();
+ wrmsr(mtrr_msr[fixed_msr], low, high);
+ enable_cache();
+ }
+}
+
+static unsigned fixed_mtrr_index(unsigned long addrk)
+{
+ unsigned index;
+ index = (addrk - 0) >> 6;
+ if (index >= 8) {
+ index = ((addrk - 8*64) >> 4) + 8;
+ }
+ if (index >= 24) {
+ index = ((addrk - (8*64 + 16*16)) >> 2) + 24;
+ }
+ if (index > NUM_FIXED_RANGES) {
+ index = NUM_FIXED_RANGES;
+ }
+ return index;
+}
+
+static unsigned int range_to_mtrr(unsigned int reg,
+ unsigned long range_startk, unsigned long range_sizek,
+ unsigned long next_range_startk)
+{
+ if (!range_sizek || (reg >= BIOS_MTRRS)) {
+ return reg;
+ }
+ while(range_sizek) {
+ unsigned long max_align, align;
+ unsigned long sizek;
+ /* Compute the maximum size I can make a range */
+ max_align = fls(range_startk);
+ align = fms(range_sizek);
+ if (align > max_align) {
+ align = max_align;
+ }
+ sizek = 1 << align;
+ printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type WB\n",
+ reg, range_startk >>10, sizek >> 10);
+ intel_set_var_mtrr(reg++, range_startk, sizek, MTRR_TYPE_WRBACK);
+ range_startk += sizek;
+ range_sizek -= sizek;
+ if (reg >= BIOS_MTRRS)
+ break;
+ }
+ return reg;
+}
+
+void setup_mtrrs(struct mem_range *mem)
+{
+ /* Try this the simple way of incrementally adding together
+ * mtrrs. If this doesn't work out we can get smart again
+ * and clear out the mtrrs.
+ */
+ struct mem_range *memp;
+ unsigned long range_startk, range_sizek;
+ unsigned int reg;
+
+ printk_debug("\n");
+ /* Initialized the fixed_mtrrs to uncached */
+ printk_debug("Setting fixed MTRRs(%d-%d) type: UC\n",
+ 0, NUM_FIXED_RANGES);
+ set_fixed_mtrrs(0, NUM_FIXED_RANGES, MTRR_TYPE_UNCACHABLE);
+
+ /* Now see which of the fixed mtrrs cover ram.
+ */
+ for(memp = mem; memp->sizek; memp++) {
+ unsigned int start_mtrr;
+ unsigned int last_mtrr;
+ start_mtrr = fixed_mtrr_index(memp->basek);
+ last_mtrr = fixed_mtrr_index(memp->basek + memp->sizek);
+ if (start_mtrr >= NUM_FIXED_RANGES) {
+ break;
+ }
+ printk_debug("Setting fixed MTRRs(%d-%d) type: WB\n",
+ start_mtrr, last_mtrr);
+ set_fixed_mtrrs(start_mtrr, last_mtrr, MTRR_TYPE_WRBACK);
+ }
+ printk_debug("DONE fixed MTRRs\n");
+ /* Cache as many memory areas as possible */
+ /* FIXME is there an algorithm for computing the optimal set of mtrrs?
+ * In some cases it is definitely possible to do better.
+ */
+ range_startk = 0;
+ range_sizek = 0;
+ reg = 0;
+ for (memp = mem; memp->sizek; memp++) {
+ /* See if I can merge with the last range
+ * Either I am below 1M and the fixed mtrrs handle it, or
+ * the ranges touch.
+ */
+ if ((memp->basek <= 1024) || (range_startk + range_sizek == memp->basek)) {
+ unsigned long endk = memp->basek + memp->sizek;
+ range_sizek = endk - range_startk;
+ continue;
+ }
+ /* Write the range mtrrs */
+ if (range_sizek != 0) {
+ reg = range_to_mtrr(reg, range_startk, range_sizek, memp->basek);
+ range_startk = 0;
+ range_sizek = 0;
+ if (reg >= BIOS_MTRRS)
+ break;
+ }
+ /* Allocate an msr */
+ range_startk = memp->basek;
+ range_sizek = memp->sizek;
+ }
+ /* Write the last range */
+ reg = range_to_mtrr(reg, range_startk, range_sizek, 0);
+ printk_debug("DONE variable MTRRs\n");
+ printk_debug("Clear out the extra MTRR's\n");
+ /* Clear out the extra MTRR's */
+ while(reg < MTRRS) {
+ intel_set_var_mtrr(reg++, 0, 0, 0);
+ }
+ /* enable fixed MTRR */
+ printk_debug("call intel_enable_fixed_mtrr()\n");
+ intel_enable_fixed_mtrr();
+ printk_debug("call intel_enable_var_mtrr()\n");
+ intel_enable_var_mtrr();
+ printk_debug("Leave %s\n", __FUNCTION__);
+}