From 067302968967cf74a9c9ed8fb81e1f53c6ead176 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 8 Nov 2007 16:11:09 -0500 Subject: Compiling: Fix for 64bit compile on Darwin/OSX 10.5. --HG-- extra : convert_revision : 1f23f7a3952f55cca8293fb43ae15db42005aeac --- src/base/hashmap.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/base/hashmap.hh b/src/base/hashmap.hh index b78cc02e8..179191c7c 100644 --- a/src/base/hashmap.hh +++ b/src/base/hashmap.hh @@ -59,7 +59,7 @@ namespace m5 { // namespace __hash_namespace { -#if !defined(__LP64__) && !defined(__alpha__) && !defined(__SUNPRO_CC) +#if (__APPLE__) || !defined(__LP64__) && !defined(__alpha__) && !defined(__SUNPRO_CC) template<> struct hash { size_t operator()(uint64_t r) const { -- cgit v1.2.3 From 46505821ec00cead429af990358d2a4dd28e87b6 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 8 Nov 2007 18:51:50 -0800 Subject: ISA parser: Make the isa parser generate MaxInstSrcRegs and MaxInstDestRegs. --HG-- extra : convert_revision : 8c35891945c6b4ebc320f0c88a7a0449f3c4b4d5 --- src/arch/SConscript | 2 +- src/arch/alpha/isa_traits.hh | 7 +++---- src/arch/isa_parser.py | 34 ++++++++++++++++++++++++++++++++++ src/arch/mips/isa_traits.hh | 7 +++---- src/arch/sparc/isa_traits.hh | 7 +++---- src/arch/x86/isa_traits.hh | 7 +++---- 6 files changed, 47 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/arch/SConscript b/src/arch/SConscript index e051c44af..66f93870e 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -97,7 +97,7 @@ execfile(cpu_models_file.srcnode().abspath) # Several files are generated from the ISA description. # We always get the basic decoder and header file. -isa_desc_gen_files = [ 'decoder.cc', 'decoder.hh' ] +isa_desc_gen_files = [ 'decoder.cc', 'decoder.hh', 'max_inst_regs.hh' ] # We also get an execute file for each selected CPU model. isa_desc_gen_files += [CpuModel.dict[cpu].filename for cpu in env['CPU_MODELS']] diff --git a/src/arch/alpha/isa_traits.hh b/src/arch/alpha/isa_traits.hh index 53eea5f69..be1d1b8bb 100644 --- a/src/arch/alpha/isa_traits.hh +++ b/src/arch/alpha/isa_traits.hh @@ -35,6 +35,7 @@ namespace LittleEndianGuest {} #include "arch/alpha/ipr.hh" +#include "arch/alpha/max_inst_regs.hh" #include "arch/alpha/types.hh" #include "config/full_system.hh" #include "sim/host.hh" @@ -44,6 +45,8 @@ class StaticInstPtr; namespace AlphaISA { using namespace LittleEndianGuest; + using AlphaISAInst::MaxInstSrcRegs; + using AlphaISAInst::MaxInstDestRegs; // These enumerate all the registers for dependence tracking. enum DependenceTags { @@ -144,10 +147,6 @@ namespace AlphaISA const int TotalDataRegs = NumIntRegs + NumFloatRegs; - // Static instruction parameters - const int MaxInstSrcRegs = 3; - const int MaxInstDestRegs = 2; - // semantically meaningful register indices const int ZeroReg = 31; // architecturally meaningful // the rest of these depend on the ABI diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index fb398d152..25908e986 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -1573,6 +1573,8 @@ def buildOperandNameMap(userDict, lineno): global operandsWithExtRE operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE) +maxInstSrcRegs = 0 +maxInstDestRegs = 0 class OperandList: @@ -1636,6 +1638,12 @@ class OperandList: if self.memOperand: error(0, "Code block has more than one memory operand.") self.memOperand = op_desc + global maxInstSrcRegs + global maxInstDestRegs + if maxInstSrcRegs < self.numSrcRegs: + maxInstSrcRegs = self.numSrcRegs + if maxInstDestRegs < self.numDestRegs: + maxInstDestRegs = self.numDestRegs # now make a final pass to finalize op_desc fields that may depend # on the register enumeration for op_desc in self.items: @@ -1855,6 +1863,22 @@ namespace %(namespace)s { %(decode_function)s ''' +max_inst_regs_template = ''' +/* + * DO NOT EDIT THIS FILE!!! + * + * It was automatically generated from the ISA description in %(filename)s + */ + +namespace %(namespace)s { + + const int MaxInstSrcRegs = %(MaxInstSrcRegs)d; + const int MaxInstDestRegs = %(MaxInstDestRegs)d; + +} // namespace %(namespace)s + +''' + # Update the output file only if the new contents are different from # the current contents. Minimizes the files that need to be rebuilt @@ -1954,6 +1978,16 @@ def parse_isa_desc(isa_desc_file, output_dir): update_if_needed(output_dir + '/' + cpu.filename, file_template % vars()) + # The variable names here are hacky, but this will creat local variables + # which will be referenced in vars() which have the value of the globals. + global maxInstSrcRegs + MaxInstSrcRegs = maxInstSrcRegs + global maxInstDestRegs + MaxInstDestRegs = maxInstDestRegs + # max_inst_regs.hh + update_if_needed(output_dir + '/max_inst_regs.hh', \ + max_inst_regs_template % vars()) + # global list of CpuModel objects (see cpu_models.py) cpu_models = [] diff --git a/src/arch/mips/isa_traits.hh b/src/arch/mips/isa_traits.hh index 5d4403553..cc584faa8 100644 --- a/src/arch/mips/isa_traits.hh +++ b/src/arch/mips/isa_traits.hh @@ -32,6 +32,7 @@ #ifndef __ARCH_MIPS_ISA_TRAITS_HH__ #define __ARCH_MIPS_ISA_TRAITS_HH__ +#include "arch/mips/max_inst_regs.hh" #include "arch/mips/types.hh" #include "sim/host.hh" @@ -44,6 +45,8 @@ class StaticInstPtr; namespace MipsISA { using namespace LittleEndianGuest; + using MipsISAInst::MaxInstSrcRegs; + using MipsISAInst::MaxInstDestRegs; StaticInstPtr decodeInst(ExtMachInst); @@ -64,10 +67,6 @@ namespace MipsISA const int NumFloatArchRegs = 32; const int NumFloatSpecialRegs = 5; - // Static instruction parameters - const int MaxInstSrcRegs = 5; - const int MaxInstDestRegs = 4; - // semantically meaningful register indices const int ZeroReg = 0; const int AssemblerReg = 1; diff --git a/src/arch/sparc/isa_traits.hh b/src/arch/sparc/isa_traits.hh index 4f3d20606..133817eb5 100644 --- a/src/arch/sparc/isa_traits.hh +++ b/src/arch/sparc/isa_traits.hh @@ -33,6 +33,7 @@ #define __ARCH_SPARC_ISA_TRAITS_HH__ #include "arch/sparc/types.hh" +#include "arch/sparc/max_inst_regs.hh" #include "arch/sparc/sparc_traits.hh" #include "config/full_system.hh" #include "sim/host.hh" @@ -49,6 +50,8 @@ namespace SparcISA //This makes sure the big endian versions of certain functions are used. using namespace BigEndianGuest; + using SparcISAInst::MaxInstSrcRegs; + using SparcISAInst::MaxInstDestRegs; // SPARC has a delay slot #define ISA_HAS_DELAY_SLOT 1 @@ -76,10 +79,6 @@ namespace SparcISA // Some OS syscall use a second register (o1) to return a second value const int SyscallPseudoReturnReg = ArgumentReg[1]; - //XXX These numbers are bogus - const int MaxInstSrcRegs = 8; - const int MaxInstDestRegs = 9; - //8K. This value is implmentation specific; and should probably //be somewhere else. const int LogVMPageSize = 13; diff --git a/src/arch/x86/isa_traits.hh b/src/arch/x86/isa_traits.hh index 762f9b172..abb7694ed 100644 --- a/src/arch/x86/isa_traits.hh +++ b/src/arch/x86/isa_traits.hh @@ -59,6 +59,7 @@ #define __ARCH_X86_ISATRAITS_HH__ #include "arch/x86/intregs.hh" +#include "arch/x86/max_inst_regs.hh" #include "arch/x86/types.hh" #include "arch/x86/x86_traits.hh" #include "sim/host.hh" @@ -72,6 +73,8 @@ namespace X86ISA //This makes sure the little endian version of certain functions //are used. using namespace LittleEndianGuest; + using X86ISAInst::MaxInstSrcRegs; + using X86ISAInst::MaxInstDestRegs; // X86 does not have a delay slot #define ISA_HAS_DELAY_SLOT 0 @@ -121,10 +124,6 @@ namespace X86ISA // value const int SyscallPseudoReturnReg = INTREG_RDX; - //XXX These numbers are bogus - const int MaxInstSrcRegs = 10; - const int MaxInstDestRegs = 10; - //4k. This value is not constant on x86. const int LogVMPageSize = 12; const int VMPageSize = (1 << LogVMPageSize); -- cgit v1.2.3 From 7c0076d5f3492d181b5b60fa13ac2c4f80821916 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 8 Nov 2007 23:42:44 -0800 Subject: Make non Apple compilation work again. Ali may have to refix this. --HG-- extra : convert_revision : 0f9455643eec14034314908ee26a6d693c54a864 --- src/base/hashmap.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/base/hashmap.hh b/src/base/hashmap.hh index 179191c7c..f8d799780 100644 --- a/src/base/hashmap.hh +++ b/src/base/hashmap.hh @@ -59,7 +59,7 @@ namespace m5 { // namespace __hash_namespace { -#if (__APPLE__) || !defined(__LP64__) && !defined(__alpha__) && !defined(__SUNPRO_CC) +#if defined(__APPLE__) || !defined(__LP64__) && !defined(__alpha__) && !defined(__SUNPRO_CC) template<> struct hash { size_t operator()(uint64_t r) const { -- cgit v1.2.3 From 6cfe4176f506ba0bdcb54a016da0d8063913e9c4 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 8 Nov 2007 23:50:10 -0800 Subject: Alpha: Fix a long standing bug where all code ran as PAL code in FS. --HG-- extra : convert_revision : 654a2376a601ddf91665ca627403518911b32532 --- src/arch/alpha/predecoder.hh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/arch/alpha/predecoder.hh b/src/arch/alpha/predecoder.hh index 7a6bb3c02..725b35b9d 100644 --- a/src/arch/alpha/predecoder.hh +++ b/src/arch/alpha/predecoder.hh @@ -74,8 +74,7 @@ namespace AlphaISA { ext_inst = inst; #if FULL_SYSTEM - if (pc && 0x1) - ext_inst|=(static_cast(pc & 0x1) << 32); + ext_inst|=(static_cast(pc & 0x1) << 32); #endif } -- cgit v1.2.3 From ada071db53208bf02afee79390d1169130ce99ed Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 11 Nov 2007 17:23:22 -0800 Subject: SPARC: Force %g1 to be zero on process startup even though it normally already should be. --HG-- extra : convert_revision : 9feb63109e8c955b49c7e96acad1ad7c29a4349f --- src/arch/sparc/process.cc | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/arch/sparc/process.cc b/src/arch/sparc/process.cc index 1f5d0b077..0c40fe58d 100644 --- a/src/arch/sparc/process.cc +++ b/src/arch/sparc/process.cc @@ -429,6 +429,10 @@ Sparc64LiveProcess::argsInit(int intSize, int pageSize) threadContexts[0]->setIntReg(ArgumentReg[1], argv_array_base); threadContexts[0]->setIntReg(StackPointerReg, stack_min - StackBias); + // %g1 is a pointer to a function that should be run at exit. Since we + // don't have anything like that, it should be set to 0. + threadContexts[0]->setIntReg(1, 0); + Addr prog_entry = objFile->entryPoint(); threadContexts[0]->setPC(prog_entry); threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst)); @@ -658,6 +662,10 @@ Sparc32LiveProcess::argsInit(int intSize, int pageSize) //threadContexts[0]->setIntReg(ArgumentReg[1], argv_array_base); threadContexts[0]->setIntReg(StackPointerReg, stack_min); + // %g1 is a pointer to a function that should be run at exit. Since we + // don't have anything like that, it should be set to 0. + threadContexts[0]->setIntReg(1, 0); + uint32_t prog_entry = objFile->entryPoint(); threadContexts[0]->setPC(prog_entry); threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst)); -- cgit v1.2.3 From aaa30714b3808a9283cda41bf29e167bf6b8edb0 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:37:54 -0800 Subject: X86: Various fixes to indexing segmentation related registers --HG-- extra : convert_revision : 3d45da3a3fb38327582cfdfb72cfc4ce1b1d31af --- src/arch/x86/insts/static_inst.cc | 16 ++++++++++------ src/arch/x86/isa/microasm.isa | 2 +- src/arch/x86/isa/microops/ldstop.isa | 8 ++++---- src/arch/x86/miscregs.hh | 4 ++++ src/arch/x86/segmentregs.hh | 1 + src/arch/x86/tlb.cc | 2 +- 6 files changed, 21 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/arch/x86/insts/static_inst.cc b/src/arch/x86/insts/static_inst.cc index d2ec8878c..183700fa9 100644 --- a/src/arch/x86/insts/static_inst.cc +++ b/src/arch/x86/insts/static_inst.cc @@ -56,6 +56,7 @@ */ #include "arch/x86/insts/static_inst.hh" +#include "arch/x86/segmentregs.hh" namespace X86ISA { @@ -75,24 +76,27 @@ namespace X86ISA { switch (segment) { - case 0: + case SEGMENT_REG_ES: ccprintf(os, "ES"); break; - case 1: + case SEGMENT_REG_CS: ccprintf(os, "CS"); break; - case 2: + case SEGMENT_REG_SS: ccprintf(os, "SS"); break; - case 3: + case SEGMENT_REG_DS: ccprintf(os, "DS"); break; - case 4: + case SEGMENT_REG_FS: ccprintf(os, "FS"); break; - case 5: + case SEGMENT_REG_GS: ccprintf(os, "GS"); break; + case SEGMENT_REG_INT: + ccprintf(os, "INT"); + break; default: panic("Unrecognized segment %d\n", segment); } diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index 0c43d4c13..e05582e37 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -108,7 +108,7 @@ let {{ # This segment selects an internal address space mapped to MSRs, # CPUID info, etc. - assembler.symbols["intseg"] = "NUM_SEGMENTREGS" + assembler.symbols["intseg"] = "SEGMENT_REG_INT" for reg in ('ax', 'bx', 'cx', 'dx', 'sp', 'bp', 'si', 'di'): assembler.symbols["r%s" % reg] = "INTREG_R%s" % reg.upper() diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa index 61adde8d1..77152a190 100644 --- a/src/arch/x86/isa/microops/ldstop.isa +++ b/src/arch/x86/isa/microops/ldstop.isa @@ -123,7 +123,7 @@ def template MicroLoadExecute {{ %(ea_code)s; DPRINTF(X86, "%s : %s: The address is %#x\n", instMnem, mnemonic, EA); - fault = read(xc, EA, Mem, (%(mem_flags)s) | (1 << segment)); + fault = read(xc, EA, Mem, (%(mem_flags)s) | segment); if(fault == NoFault) { @@ -150,7 +150,7 @@ def template MicroLoadInitiateAcc {{ %(ea_code)s; DPRINTF(X86, "%s : %s: The address is %#x\n", instMnem, mnemonic, EA); - fault = read(xc, EA, Mem, (%(mem_flags)s) | (1 << segment)); + fault = read(xc, EA, Mem, (%(mem_flags)s) | segment); return fault; } @@ -197,7 +197,7 @@ def template MicroStoreExecute {{ if(fault == NoFault) { - fault = write(xc, Mem, EA, (%(mem_flags)s) | (1 << segment)); + fault = write(xc, Mem, EA, (%(mem_flags)s) | segment); if(fault == NoFault) { %(op_wb)s; @@ -224,7 +224,7 @@ def template MicroStoreInitiateAcc {{ if(fault == NoFault) { - fault = write(xc, Mem, EA, (%(mem_flags)s) | (1 << segment)); + fault = write(xc, Mem, EA, (%(mem_flags)s) | segment); if(fault == NoFault) { %(op_wb)s; diff --git a/src/arch/x86/miscregs.hh b/src/arch/x86/miscregs.hh index a516a2018..3a30b9800 100644 --- a/src/arch/x86/miscregs.hh +++ b/src/arch/x86/miscregs.hh @@ -258,6 +258,7 @@ namespace X86ISA MISCREG_DS, MISCREG_FS, MISCREG_GS, + MISCREG_INT, // This isn't actually used. // Hidden segment base field MISCREG_SEG_BASE_BASE = MISCREG_SEG_SEL_BASE + NumSegments, @@ -267,6 +268,7 @@ namespace X86ISA MISCREG_DS_BASE, MISCREG_FS_BASE, MISCREG_GS_BASE, + MISCREG_INT_BASE, // Hidden segment limit field MISCREG_SEG_LIMIT_BASE = MISCREG_SEG_BASE_BASE + NumSegments, @@ -276,6 +278,7 @@ namespace X86ISA MISCREG_DS_LIMIT, MISCREG_FS_LIMIT, MISCREG_GS_LIMIT, + MISCREG_INT_LIMIT, // This isn't actually used. // Hidden segment limit attributes MISCREG_SEG_ATTR_BASE = MISCREG_SEG_LIMIT_BASE + NumSegments, @@ -285,6 +288,7 @@ namespace X86ISA MISCREG_DS_ATTR, MISCREG_FS_ATTR, MISCREG_GS_ATTR, + MISCREG_INT_ATTR, // This isn't actually used. // System segment selectors MISCREG_SYSSEG_SEL_BASE = MISCREG_SEG_ATTR_BASE + NumSegments, diff --git a/src/arch/x86/segmentregs.hh b/src/arch/x86/segmentregs.hh index 9fd9bcb0e..524b756d6 100644 --- a/src/arch/x86/segmentregs.hh +++ b/src/arch/x86/segmentregs.hh @@ -68,6 +68,7 @@ namespace X86ISA SEGMENT_REG_DS, SEGMENT_REG_FS, SEGMENT_REG_GS, + SEGMENT_REG_INT, NUM_SEGMENTREGS }; diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index 6afee6d72..bf5a8434b 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -150,7 +150,7 @@ TLB::translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute) // If this is true, we're dealing with a request to read an internal // value. - if (seg == NUM_SEGMENTREGS) { + if (seg == SEGMENT_REG_INT) { Addr prefix = vaddr & IntAddrPrefixMask; if (prefix == IntAddrPrefixCPUID) { panic("CPUID memory space not yet implemented!\n"); -- cgit v1.2.3 From d89d80a5d0bea8a17149abc490ba0d46bff5b11a Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:38:02 -0800 Subject: X86: Make some of the bits of CR0 do what they're supposed to. --HG-- extra : convert_revision : 13e79ef1ef09bd842d5e075e31f98ab2a4357901 --- src/arch/x86/miscregfile.cc | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/arch/x86/miscregfile.cc b/src/arch/x86/miscregfile.cc index e2c39c7cd..cd76e96aa 100644 --- a/src/arch/x86/miscregfile.cc +++ b/src/arch/x86/miscregfile.cc @@ -154,7 +154,30 @@ void MiscRegFile::setRegNoEffect(int miscReg, const MiscReg &val) void MiscRegFile::setReg(int miscReg, const MiscReg &val, ThreadContext * tc) { - setRegNoEffect(miscReg, val); + MiscReg newVal = val; + switch(miscReg) + { + case MISCREG_CR0: + CR0 toggled = regVal[miscReg] ^ val; + CR0 newCR0 = val; + Efer efer = regVal[MISCREG_EFER]; + if (toggled.pg && efer.lme) { + if (newCR0.pg) { + //Turning on long mode + efer.lma = 1; + regVal[MISCREG_EFER] = efer; + } else { + //Turning off long mode + efer.lma = 0; + regVal[MISCREG_EFER] = efer; + } + } + //This must always be 1. + newCR0.et = 1; + newVal = newCR0; + break; + } + setRegNoEffect(miscReg, newVal); } void MiscRegFile::serialize(std::ostream & os) -- cgit v1.2.3 From 53cb6cbcc15a2c38e38c22da60db3e025e6ddc17 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:38:10 -0800 Subject: X86: Implement the startupCPU function. --HG-- extra : convert_revision : d2331a0e0bd14863e82004508558f657c5b900a2 --- src/arch/x86/utility.cc | 164 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/arch/x86/utility.cc b/src/arch/x86/utility.cc index 0eee0c93e..b2a6ea040 100644 --- a/src/arch/x86/utility.cc +++ b/src/arch/x86/utility.cc @@ -60,6 +60,7 @@ #include "arch/x86/segmentregs.hh" #include "arch/x86/utility.hh" #include "arch/x86/x86_traits.hh" +#include "sim/system.hh" namespace X86ISA { @@ -253,20 +254,177 @@ void initCPU(ThreadContext *tc, int cpuId) #endif +#if FULL_SYSTEM void startupCPU(ThreadContext *tc, int cpuId) { if (cpuId == 0) { // This is the boot strap processor (BSP). Initialize it to look like - // the boot loader has just turned control over to the 64 bit OS. - - // Enable paging, turn on long mode, etc. + // the boot loader has just turned control over to the 64 bit OS. We + // won't actually set up real mode or legacy protected mode descriptor + // tables because we aren't executing any code that would require + // them. We do, however toggle the control bits in the correct order + // while allowing consistency checks and the underlying mechansims + // just to be safe. + + const int NumPDTs = 4; + + const Addr PageMapLevel4 = 0x70000; + const Addr PageDirPtrTable = 0x71000; + const Addr PageDirTable[NumPDTs] = + {0x72000, 0x73000, 0x74000, 0x75000}; + const Addr GDTBase = 0x76000; + + const int PML4Bits = 9; + const int PDPTBits = 9; + const int PDTBits = 9; + + // Get a port to write the page tables and descriptor tables. + FunctionalPort * physPort = tc->getPhysPort(); + + /* + * Set up the gdt. + */ + // Place holder at selector 0 + uint64_t nullDescriptor = 0; + physPort->writeBlob(GDTBase, (uint8_t *)(&nullDescriptor), 8); + + //64 bit code segment + SegDescriptor csDesc = 0; + csDesc.type.c = 0; // Not conforming + csDesc.dpl = 0; // Privelege level 0 + csDesc.p = 1; // Present + csDesc.l = 1; // 64 bit + csDesc.d = 0; // default operand size + //Because we're dealing with a pointer and I don't think it's + //guaranteed that there isn't anything in a nonvirtual class between + //it's beginning in memory and it's actual data, we'll use an + //intermediary. + uint64_t csDescVal = csDesc; + physPort->writeBlob(GDTBase, (uint8_t *)(&csDescVal), 8); + + tc->setMiscReg(MISCREG_GDTR_BASE, GDTBase); + tc->setMiscReg(MISCREG_GDTR_LIMIT, 0xF); + + /* + * Identity map the first 4GB of memory. In order to map this region + * of memory in long mode, there needs to be one actual page map level + * 4 entry which points to one page directory pointer table which + * points to 4 different page directory tables which are full of two + * megabyte pages. All of the other entries in valid tables are set + * to indicate that they don't pertain to anything valid and will + * cause a fault if used. + */ + + // Put valid values in all of the various table entries which indicate + // that those entries don't point to further tables or pages. Then + // set the values of those entries which are needed. + + // Page Map Level 4 + + // read/write, user, not present + uint64_t pml4e = X86ISA::htog(0x6); + for (int offset = 0; offset < (1 << PML4Bits) * 8; offset += 8) { + physPort->writeBlob(PageMapLevel4 + offset, (uint8_t *)(&pml4e), 8); + } + // Point to the only PDPT + pml4e = X86ISA::htog(0x7 | PageDirPtrTable); + physPort->writeBlob(PageMapLevel4, (uint8_t *)(&pml4e), 8); + + // Page Directory Pointer Table + + // read/write, user, not present + uint64_t pdpe = X86ISA::htog(0x6); + for (int offset = 0; offset < (1 << PDPTBits) * 8; offset += 8) { + physPort->writeBlob(PageDirPtrTable + offset, + (uint8_t *)(&pdpe), 8); + } + // Point to the PDTs + for (int table = 0; table < NumPDTs; table++) { + pdpe = X86ISA::htog(0x7 | PageDirTable[table]); + physPort->writeBlob(PageDirPtrTable + table * 8, + (uint8_t *)(&pdpe), 8); + } + + // Page Directory Tables + + Addr base = 0; + const Addr pageSize = 2 << 20; + for (int table = 0; table < NumPDTs; table++) { + for (int offset = 0; offset < (1 << PDTBits) * 8; offset += 8) { + // read/write, user, present, 4MB + uint64_t pdte = X86ISA::htog(0x87 | base); + physPort->writeBlob(PageDirTable[table] + offset, + (uint8_t *)(&pdte), 8); + base += pageSize; + } + } + + /* + * Transition from real mode all the way up to Long mode + */ + CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); + //Turn off paging. + cr0.pg = 0; + tc->setMiscReg(MISCREG_CR0, cr0); + //Turn on protected mode. + cr0.pe = 1; + tc->setMiscReg(MISCREG_CR0, cr0); + + CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4); + //Turn on pae. + cr4.pae = 1; + tc->setMiscReg(MISCREG_CR4, cr4); + + //Point to the page tables. + tc->setMiscReg(MISCREG_CR3, PageMapLevel4); + + Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); + //Enable long mode. + efer.lme = 1; + tc->setMiscReg(MISCREG_EFER, efer); + + //Activate long mode. + cr0.pg = 1; + tc->setMiscReg(MISCREG_CR0, cr0); + + /* + * Far jump into 64 bit mode. + */ + // Set the selector + tc->setMiscReg(MISCREG_CS, 1); + // Manually set up the segment attributes. In the future when there's + // other existing functionality to do this, that could be used + // instead. + SegAttr csAttr = 0; + csAttr.writable = 0; + csAttr.readable = 1; + csAttr.expandDown = 0; + csAttr.dpl = 0; + csAttr.defaultSize = 0; + csAttr.longMode = 1; + tc->setMiscReg(MISCREG_CS_ATTR, csAttr); + + tc->setPC(tc->getSystemPtr()->kernelEntry); + tc->setNextPC(tc->readPC()); + + // We should now be in long mode. Yay! tc->activate(0); } else { // This is an application processor (AP). It should be initialized to // look like only the BIOS POST has run on it and put then put it into // a halted state. + tc->suspend(); } } +#else + +void startupCPU(ThreadContext *tc, int cpuId) +{ + tc->activate(0); +} + +#endif + } //namespace X86_ISA -- cgit v1.2.3 From 7a39457d7ff5fd80484061a4ff7006921899b229 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:38:17 -0800 Subject: X86: Make the micropc available through the thread context objects. This is necssary for fault handlers that branch to non-zero micro PCs. --HG-- extra : convert_revision : c1cb4863d779a9f4a508d0b450e64fb7a985f264 --- src/cpu/o3/thread_context.hh | 10 ++++++++++ src/cpu/o3/thread_context_impl.hh | 30 +++++++++++++++++++++++++++++- src/cpu/thread_context.hh | 16 ++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index 31e08db4c..55b385d11 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -203,6 +203,16 @@ class O3ThreadContext : public ThreadContext /** Sets this thread's next PC. */ virtual void setNextPC(uint64_t val); + virtual uint64_t readMicroPC() + { return cpu->readMicroPC(thread->readTid()); } + + virtual void setMicroPC(uint64_t val); + + virtual uint64_t readNextMicroPC() + { return cpu->readNextMicroPC(thread->readTid()); } + + virtual void setNextMicroPC(uint64_t val); + /** Reads a miscellaneous register. */ virtual MiscReg readMiscRegNoEffect(int misc_reg) { return cpu->readMiscRegNoEffect(misc_reg, thread->readTid()); } diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index efbbc2329..55584629e 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -289,9 +289,13 @@ O3ThreadContext::copyArchRegs(ThreadContext *tc) // Copy the misc regs. TheISA::copyMiscRegs(tc, this); - // Then finally set the PC and the next PC. + // Then finally set the PC, the next PC, the nextNPC, the micropc, and the + // next micropc. cpu->setPC(tc->readPC(), tid); cpu->setNextPC(tc->readNextPC(), tid); + cpu->setNextNPC(tc->readNextNPC(), tid); + cpu->setMicroPC(tc->readMicroPC(), tid); + cpu->setNextMicroPC(tc->readNextMicroPC(), tid); #if !FULL_SYSTEM this->thread->funcExeInst = tc->readFuncExeInst(); #endif @@ -448,6 +452,30 @@ O3ThreadContext::setNextPC(uint64_t val) } } +template +void +O3ThreadContext::setMicroPC(uint64_t val) +{ + cpu->setMicroPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setNextMicroPC(uint64_t val) +{ + cpu->setNextMicroPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + template void O3ThreadContext::setMiscRegNoEffect(int misc_reg, const MiscReg &val) diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 31fdb42c2..0d09492ee 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -226,6 +226,14 @@ class ThreadContext virtual void setNextNPC(uint64_t val) = 0; + virtual uint64_t readMicroPC() = 0; + + virtual void setMicroPC(uint64_t val) = 0; + + virtual uint64_t readNextMicroPC() = 0; + + virtual void setNextMicroPC(uint64_t val) = 0; + virtual MiscReg readMiscRegNoEffect(int misc_reg) = 0; virtual MiscReg readMiscReg(int misc_reg) = 0; @@ -419,6 +427,14 @@ class ProxyThreadContext : public ThreadContext void setNextNPC(uint64_t val) { actualTC->setNextNPC(val); } + uint64_t readMicroPC() { return actualTC->readMicroPC(); } + + void setMicroPC(uint64_t val) { actualTC->setMicroPC(val); } + + uint64_t readNextMicroPC() { return actualTC->readMicroPC(); } + + void setNextMicroPC(uint64_t val) { actualTC->setMicroPC(val); } + MiscReg readMiscRegNoEffect(int misc_reg) { return actualTC->readMiscRegNoEffect(misc_reg); } -- cgit v1.2.3 From f17f3d20be08d25f176138691a29897df54e5cc0 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:38:24 -0800 Subject: X86: Implement a page table walker. --HG-- extra : convert_revision : 36bab5750100318faa9ba7178dc2e38590053aec --- src/arch/x86/X86TLB.py | 7 +- src/arch/x86/tlb.cc | 136 ++++++++++++++++++++++++++++++++++- src/arch/x86/tlb.hh | 144 +++++++++++++++++++++++++++++++++++++- src/cpu/BaseCPU.py | 9 ++- src/cpu/o3/O3CPU.py | 2 +- src/cpu/simple/AtomicSimpleCPU.py | 3 +- src/cpu/simple/TimingSimpleCPU.py | 2 +- 7 files changed, 293 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/arch/x86/X86TLB.py b/src/arch/x86/X86TLB.py index ce4db4f4c..2d562ba9a 100644 --- a/src/arch/x86/X86TLB.py +++ b/src/arch/x86/X86TLB.py @@ -53,12 +53,15 @@ # # Authors: Gabe Black -from m5.SimObject import SimObject +from MemObject import MemObject from m5.params import * -class X86TLB(SimObject): + +class X86TLB(MemObject): type = 'X86TLB' abstract = True size = Param.Int("TLB size") + walker_port = Port("Port for the hardware table walker") + system = Param.System(Parent.any, "system object") class X86DTB(X86TLB): type = 'X86DTB' diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index bf5a8434b..e30e820b4 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -72,7 +72,7 @@ namespace X86ISA { -TLB::TLB(const Params *p) : SimObject(p), size(p->size) +TLB::TLB(const Params *p) : MemObject(p), walker(name(), this), size(p->size) { tlb = new TlbEntry[size]; std::memset(tlb, 0, sizeof(TlbEntry) * size); @@ -81,6 +81,140 @@ TLB::TLB(const Params *p) : SimObject(p), size(p->size) freeList.push_back(&tlb[x]); } +bool +TLB::Walker::doNext(uint64_t data, PacketPtr &write) +{ + assert(state != Ready && state != Waiting); + write = NULL; + switch(state) { + case LongPML4: + nextState = LongPDP; + break; + case LongPDP: + nextState = LongPD; + break; + case LongPD: + nextState = LongPTE; + break; + case LongPTE: + nextState = Ready; + return false; + case PAEPDP: + nextState = PAEPD; + break; + case PAEPD: + break; + case PAEPTE: + nextState = Ready; + return false; + case PSEPD: + break; + case PD: + nextState = PTE; + break; + case PTE: + nextState = Ready; + return false; + default: + panic("Unknown page table walker state %d!\n"); + } + return true; +} + +void +TLB::Walker::buildReadPacket(Addr addr) +{ + readRequest.setPhys(addr, size, PHYSICAL | uncachable ? UNCACHEABLE : 0); + readPacket.reinitFromRequest(); +} + +TLB::walker::buildWritePacket(Addr addr) +{ + writeRequest.setPhys(addr, size, PHYSICAL | uncachable ? UNCACHEABLE : 0); + writePacket.reinitFromRequest(); + +bool +TLB::Walker::WalkerPort::recvTiming(PacketPtr pkt) +{ + if (pkt->isResponse() && !pkt->wasNacked()) { + if (pkt->isRead()) { + assert(packet); + assert(walker->state == Waiting); + packet = NULL; + walker->state = walker->nextState; + walker->nextState = Ready; + PacketPtr write; + if (walker->doNext(pkt, write)) { + packet = &walker->packet; + port->sendTiming(packet); + } + if (write) { + writes.push_back(write); + } + while (!port->blocked() && writes.size()) { + if (port->sendTiming(writes.front())) { + writes.pop_front(); + outstandingWrites++; + } + } + } else { + outstandingWrites--; + } + } else if (pkt->wasNacked()) { + pkt->reinitNacked(); + if (!sendTiming(pkt)) { + if (pkt->isWrite()) { + writes.push_front(pkt); + } + } + } + return true; +} + +Tick +TLB::Walker::WalkerPort::recvAtomic(PacketPtr pkt) +{ + return 0; +} + +void +TLB::Walker::WalkerPort::recvFunctional(PacketPtr pkt) +{ + return; +} + +void +TLB::Walker::WalkerPort::recvStatusChange(Status status) +{ + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } + return; + } + + panic("Unexpected recvStatusChange.\n"); +} + +void +TLB::Walker::WalkerPort::recvRetry() +{ + retrying = false; + if (!sendTiming(packet)) { + retrying = true; + } +} + +Port * +TLB::getPort(const std::string &if_name, int idx) +{ + if (if_name == "walker_port") + return &walker.port; + else + panic("No tlb port named %s!\n", if_name); +} + void TLB::insert(Addr vpn, TlbEntry &entry) { diff --git a/src/arch/x86/tlb.hh b/src/arch/x86/tlb.hh index 12739379c..726c25374 100644 --- a/src/arch/x86/tlb.hh +++ b/src/arch/x86/tlb.hh @@ -59,10 +59,12 @@ #define __ARCH_X86_TLB_HH__ #include +#include #include "arch/x86/pagetable.hh" #include "arch/x86/segmentregs.hh" #include "config/full_system.hh" +#include "mem/mem_object.hh" #include "mem/request.hh" #include "params/X86DTB.hh" #include "params/X86ITB.hh" @@ -76,13 +78,16 @@ namespace X86ISA { static const unsigned StoreCheck = 1 << NUM_SEGMENTREGS; - class TLB : public SimObject + class TLB; + + class TLB : public MemObject { -#if !FULL_SYSTEM protected: friend class FakeITLBFault; friend class FakeDTLBFault; -#endif + + System * sys; + public: typedef X86TLBParams Params; TLB(const Params *p); @@ -91,6 +96,137 @@ namespace X86ISA TlbEntry *lookup(Addr va, bool update_lru = true); +#if FULL_SYSTEM + protected: + class Walker + { + public: + enum State { + Ready, + Waiting, + LongPML4, + LongPDP, + LongPD, + LongPTE, + PAEPDP, + PAEPD, + PAEPTE, + PSEPD, + PD, + PTE + }; + + // Act on the current state and determine what to do next. If the + // walker has finished updating the TLB, this will return false. + bool doNext(PacketPtr read, PacketPtr &write); + + // This does an actual load to feed the walker. If we're in + // atomic mode, this will drive the state machine itself until + // the TLB is filled. If we're in timing mode, the port getting + // a reply will drive the machine using this function which will + // return after starting the memory operation. + void doMemory(Addr addr); + + // Kick off the state machine. + void start(bool _uncachable, Addr _vaddr, Addr cr3, State next) + { + assert(state == Ready); + state = Waiting; + nextState = next; + // If PAE isn't being used, entries are 4 bytes. Otherwise + // they're 8. + if (next == PSEPD || next == PD || next == PTE) + size = 4; + else + size = 8; + vaddr = _vaddr; + uncachable = _uncacheable; + buildPacket(cr3); + if (state == Enums::timing) { + port->sendTiming(&packet); + } else if (state == Enums::atomic) { + port->sendAtomic(&packet); + Addr addr; + while(doNext(packet.get(), addr)) { + buildPacket(addr); + port->sendAtomic(&packet); + } + } else { + panic("Unrecognized memory system mode.\n"); + } + }; + + protected: + friend class TLB; + + class WalkerPort : public Port + { + public: + WalkerPort(const std::string &_name, Walker * _walker) : + Port(_name, _walker->tlb), walker(_walker), + packet(NULL), snoopRangeSent(false), retrying(false) + {} + + protected: + Walker * walker; + + PacketPtr packet; + vector writes; + + bool snoopRangeSent; + bool retrying; + + bool recvTiming(PacketPtr pkt); + Tick recvAtomic(PacketPtr pkt); + void recvFunctional(PacketPtr pkt); + void recvStatusChange(Status status); + void recvRetry(); + void getDeviceAddressRanges(AddrRangeList &resp, + bool &snoop) + { + resp.clear(); + snoop = true; + } + + public: + bool sendTiming(PacketPtr pkt) + { + retrying = !Port::sendTiming(pkt); + return !retrying; + } + + bool blocked() { return retrying; } + }; + + friend class WalkerPort; + + WalkerPort port; + + Packet packet; + Request request; + + TLB * tlb; + + State state; + State nextState; + int size; + + Addr vaddr; + + public: + Walker(const std::string &_name, TLB * _tlb) : + port(_name + "-walker_port", this), + packet(&request, ReadExReq, Broadcast), + tlb(_tlb), state(Ready), nextState(Ready) + { + } + + + }; + + Walker walker; +#endif + protected: int size; @@ -100,6 +236,8 @@ namespace X86ISA EntryList freeList; EntryList entryList; + Port *getPort(const std::string &if_name, int idx = -1); + void insert(Addr vpn, TlbEntry &entry); void invalidateAll(); diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index 9b2b99c58..1af30a532 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -100,18 +100,25 @@ class BaseCPU(SimObject): _mem_ports = [] + if build_env['TARGET_ISA'] == 'x86': + itb.walker_port = Port("ITB page table walker port") + dtb.walker_port = Port("ITB page table walker port") + _mem_ports = ["itb.walker_port", "dtb.walker_port"] + def connectMemPorts(self, bus): for p in self._mem_ports: if p != 'physmem_port': exec('self.%s = bus.port' % p) def addPrivateSplitL1Caches(self, ic, dc): - assert(len(self._mem_ports) == 2 or len(self._mem_ports) == 3) + assert(len(self._mem_ports) < 6) self.icache = ic self.dcache = dc self.icache_port = ic.cpu_side self.dcache_port = dc.cpu_side self._mem_ports = ['icache.mem_side', 'dcache.mem_side'] + if build_env['TARGET_ISA'] == 'x86': + self._mem_ports += ["itb.walker_port", "dtb.walker_port"] def addTwoLevelCacheHierarchy(self, ic, dc, l2c): self.addPrivateSplitL1Caches(ic, dc) diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 27ca8ce1e..f0284b2cf 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -58,7 +58,7 @@ class DerivO3CPU(BaseCPU): cachePorts = Param.Unsigned(200, "Cache Ports") icache_port = Port("Instruction Port") dcache_port = Port("Data Port") - _mem_ports = ['icache_port', 'dcache_port'] + _mem_ports = BaseCPU._mem_ports + ['icache_port', 'dcache_port'] decodeToFetchDelay = Param.Unsigned(1, "Decode to fetch delay") renameToFetchDelay = Param.Unsigned(1 ,"Rename to fetch delay") diff --git a/src/cpu/simple/AtomicSimpleCPU.py b/src/cpu/simple/AtomicSimpleCPU.py index bfd1825c2..28c2aa9c9 100644 --- a/src/cpu/simple/AtomicSimpleCPU.py +++ b/src/cpu/simple/AtomicSimpleCPU.py @@ -41,4 +41,5 @@ class AtomicSimpleCPU(BaseCPU): icache_port = Port("Instruction Port") dcache_port = Port("Data Port") physmem_port = Port("Physical Memory Port") - _mem_ports = ['icache_port', 'dcache_port', 'physmem_port'] + _mem_ports = BaseCPU._mem_ports + \ + ['icache_port', 'dcache_port', 'physmem_port'] diff --git a/src/cpu/simple/TimingSimpleCPU.py b/src/cpu/simple/TimingSimpleCPU.py index 2fcde175c..7e777e813 100644 --- a/src/cpu/simple/TimingSimpleCPU.py +++ b/src/cpu/simple/TimingSimpleCPU.py @@ -38,4 +38,4 @@ class TimingSimpleCPU(BaseCPU): profile = Param.Latency('0ns', "trace the kernel stack") icache_port = Port("Instruction Port") dcache_port = Port("Data Port") - _mem_ports = ['icache_port', 'dcache_port'] + _mem_ports = BaseCPU._mem_ports + ['icache_port', 'dcache_port'] -- cgit v1.2.3 From fce45baf178b43c2ea1476967fba3766e9b2ea9d Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:38:31 -0800 Subject: X86: Work on the page table walker, TLB, and related faults. --HG-- extra : convert_revision : 9edde958b7e571c07072785f18f9109f73b8059f --- src/arch/x86/X86TLB.py | 1 + src/arch/x86/faults.cc | 15 ++ src/arch/x86/faults.hh | 16 -- src/arch/x86/pagetable.hh | 18 +- src/arch/x86/tlb.cc | 431 +++++++++++++++++++++++++++++++++++++++++----- src/arch/x86/tlb.hh | 114 ++++++------ src/cpu/BaseCPU.py | 4 +- src/cpu/simple/base.cc | 2 +- 8 files changed, 473 insertions(+), 128 deletions(-) (limited to 'src') diff --git a/src/arch/x86/X86TLB.py b/src/arch/x86/X86TLB.py index 2d562ba9a..5c174be59 100644 --- a/src/arch/x86/X86TLB.py +++ b/src/arch/x86/X86TLB.py @@ -55,6 +55,7 @@ from MemObject import MemObject from m5.params import * +from m5.proxy import * class X86TLB(MemObject): type = 'X86TLB' diff --git a/src/arch/x86/faults.cc b/src/arch/x86/faults.cc index 13341f1de..abb5d98d7 100644 --- a/src/arch/x86/faults.cc +++ b/src/arch/x86/faults.cc @@ -93,6 +93,8 @@ #include "arch/x86/isa_traits.hh" #include "mem/page_table.hh" #include "sim/process.hh" +#else +#include "arch/x86/tlb.hh" #endif namespace X86ISA @@ -112,6 +114,19 @@ namespace X86ISA { panic("X86 faults are not implemented!"); } + + void FakeITLBFault::invoke(ThreadContext * tc) + { + // Start the page table walker. + tc->getITBPtr()->walker.start(tc, vaddr); + } + + void FakeDTLBFault::invoke(ThreadContext * tc) + { + // Start the page table walker. + tc->getDTBPtr()->walker.start(tc, vaddr); + } + #else // !FULL_SYSTEM void FakeITLBFault::invoke(ThreadContext * tc) { diff --git a/src/arch/x86/faults.hh b/src/arch/x86/faults.hh index 5a573754a..78a55d0e1 100644 --- a/src/arch/x86/faults.hh +++ b/src/arch/x86/faults.hh @@ -369,44 +369,28 @@ namespace X86ISA // the tlb on a miss and are to take the place of a hardware table walker. class FakeITLBFault : public X86Fault { -#if !FULL_SYSTEM protected: Addr vaddr; public: FakeITLBFault(Addr _vaddr) : X86Fault("fake instruction tlb fault", "itlb"), vaddr(_vaddr) -#else - public: - FakeITLBFault() : - X86Fault("fake instruction tlb fault", "itlb") -#endif {} -#if !FULL_SYSTEM void invoke(ThreadContext * tc); -#endif }; class FakeDTLBFault : public X86Fault { -#if !FULL_SYSTEM protected: Addr vaddr; public: FakeDTLBFault(Addr _vaddr) : X86Fault("fake data tlb fault", "dtlb"), vaddr(_vaddr) -#else - public: - FakeDTLBFault() : - X86Fault("fake data tlb fault", "dtlb") -#endif {} -#if !FULL_SYSTEM void invoke(ThreadContext * tc); -#endif }; }; diff --git a/src/arch/x86/pagetable.hh b/src/arch/x86/pagetable.hh index cc614168c..e42693c03 100644 --- a/src/arch/x86/pagetable.hh +++ b/src/arch/x86/pagetable.hh @@ -62,16 +62,26 @@ #include #include "sim/host.hh" +#include "base/bitunion.hh" #include "base/misc.hh" class Checkpoint; namespace X86ISA { - struct VAddr - { - VAddr(Addr a) { panic("not implemented yet."); } - }; + BitUnion64(VAddr) + Bitfield<20, 12> longl1; + Bitfield<29, 21> longl2; + Bitfield<38, 30> longl3; + Bitfield<47, 39> longl4; + + Bitfield<20, 12> pael1; + Bitfield<29, 21> pael2; + Bitfield<31, 30> pael3; + + Bitfield<21, 12> norml1; + Bitfield<31, 22> norml2; + EndBitUnion(VAddr) struct TlbEntry { diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index e30e820b4..704ab3027 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -64,6 +64,7 @@ #include "arch/x86/x86_traits.hh" #include "base/bitfield.hh" #include "base/trace.hh" +#include "config/full_system.hh" #include "cpu/thread_context.hh" #include "cpu/base.hh" #include "mem/packet_access.hh" @@ -72,7 +73,11 @@ namespace X86ISA { +#if FULL_SYSTEM TLB::TLB(const Params *p) : MemObject(p), walker(name(), this), size(p->size) +#else +TLB::TLB(const Params *p) : MemObject(p), size(p->size) +#endif { tlb = new TlbEntry[size]; std::memset(tlb, 0, sizeof(TlbEntry) * size); @@ -81,91 +86,377 @@ TLB::TLB(const Params *p) : MemObject(p), walker(name(), this), size(p->size) freeList.push_back(&tlb[x]); } -bool -TLB::Walker::doNext(uint64_t data, PacketPtr &write) +#if FULL_SYSTEM + +// Unfortunately, the placement of the base field in a page table entry is +// very erratic and would make a mess here. It might be moved here at some +// point in the future. +BitUnion64(PageTableEntry) + Bitfield<63> nx; + Bitfield<11, 9> avl; + Bitfield<8> g; + Bitfield<7> ps; + Bitfield<6> d; + Bitfield<5> a; + Bitfield<4> pcd; + Bitfield<3> pwt; + Bitfield<2> u; + Bitfield<1> w; + Bitfield<0> p; +EndBitUnion(PageTableEntry) + +void +TLB::Walker::doNext(PacketPtr &read, PacketPtr &write) { assert(state != Ready && state != Waiting); write = NULL; + PageTableEntry pte; + if (size == 8) + pte = read->get(); + else + pte = read->get(); + VAddr vaddr = entry.vaddr; + bool uncacheable = pte.pcd; + Addr nextRead = 0; + bool doWrite = false; + bool badNX = pte.nx && (!tlb->allowNX || !enableNX); switch(state) { case LongPML4: + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * size; + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (badNX) + panic("NX violation!\n"); + entry.noExec = pte.nx; + if (!pte.p) + panic("Page not present!\n"); nextState = LongPDP; break; case LongPDP: + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * size; + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); nextState = LongPD; break; case LongPD: - nextState = LongPTE; - break; + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + if (!pte.ps) { + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = + ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * size; + nextState = LongPTE; + break; + } else { + // 2 MB page + entry.size = 2 * (1 << 20); + entry.paddr = (uint64_t)pte & (mask(31) << 21); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + } case LongPTE: + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + entry.paddr = (uint64_t)pte & (mask(40) << 12); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); + tlb->insert(entry.vaddr, entry); nextState = Ready; - return false; + delete read->req; + delete read; + read = NULL; + return; case PAEPDP: + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * size; + if (!pte.p) + panic("Page not present!\n"); nextState = PAEPD; break; case PAEPD: - break; + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + if (!pte.ps) { + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * size; + nextState = PAEPTE; + break; + } else { + // 2 MB page + entry.size = 2 * (1 << 20); + entry.paddr = (uint64_t)pte & (mask(31) << 21); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + } case PAEPTE: + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + entry.paddr = (uint64_t)pte & (mask(40) << 12); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 7); + entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); + tlb->insert(entry.vaddr, entry); nextState = Ready; - return false; + delete read->req; + delete read; + read = NULL; + return; case PSEPD: - break; + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (!pte.p) + panic("Page not present!\n"); + if (!pte.ps) { + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = + ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size; + nextState = PTE; + break; + } else { + // 4 MB page + entry.size = 4 * (1 << 20); + entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22; + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + } case PD: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (!pte.p) + panic("Page not present!\n"); + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size; + nextState = PTE; + break; nextState = PTE; break; case PTE: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (!pte.p) + panic("Page not present!\n"); + entry.paddr = (uint64_t)pte & (mask(20) << 12); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 7); + entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); + tlb->insert(entry.vaddr, entry); nextState = Ready; - return false; + delete read->req; + delete read; + read = NULL; + return; default: panic("Unknown page table walker state %d!\n"); } - return true; + PacketPtr oldRead = read; + //If we didn't return, we're setting up another read. + uint32_t flags = oldRead->req->getFlags(); + if (uncacheable) + flags |= UNCACHEABLE; + else + flags &= ~UNCACHEABLE; + RequestPtr request = + new Request(nextRead, oldRead->getSize(), flags); + read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast); + read->allocate(); + //If we need to write, adjust the read packet to write the modified value + //back to memory. + if (doWrite) { + write = oldRead; + write->set(pte); + write->cmd = MemCmd::WriteReq; + write->setDest(Packet::Broadcast); + } else { + write = NULL; + delete oldRead->req; + delete oldRead; + } } void -TLB::Walker::buildReadPacket(Addr addr) +TLB::Walker::start(ThreadContext * _tc, Addr vaddr) { - readRequest.setPhys(addr, size, PHYSICAL | uncachable ? UNCACHEABLE : 0); - readPacket.reinitFromRequest(); + assert(state == Ready); + assert(!tc); + tc = _tc; + + VAddr addr = vaddr; + + //Figure out what we're doing. + CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3); + Addr top = 0; + // Check if we're in long mode or not + Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); + size = 8; + if (efer.lma) { + // Do long mode. + state = LongPML4; + top = (cr3.longPdtb << 12) + addr.longl4 * size; + } else { + // We're in some flavor of legacy mode. + CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4); + if (cr4.pae) { + // Do legacy PAE. + state = PAEPDP; + top = (cr3.paePdtb << 5) + addr.pael3 * size; + } else { + size = 4; + top = (cr3.pdtb << 12) + addr.norml2 * size; + if (cr4.pse) { + // Do legacy PSE. + state = PSEPD; + } else { + // Do legacy non PSE. + state = PD; + } + } + } + nextState = Ready; + entry.vaddr = vaddr; + + enableNX = efer.nxe; + + RequestPtr request = + new Request(top, size, PHYSICAL | cr3.pcd ? UNCACHEABLE : 0); + read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast); + read->allocate(); + Enums::MemoryMode memMode = tlb->sys->getMemoryMode(); + if (memMode == Enums::timing) { + tc->suspend(); + port.sendTiming(read); + } else if (memMode == Enums::atomic) { + do { + port.sendAtomic(read); + PacketPtr write = NULL; + doNext(read, write); + state = nextState; + nextState = Ready; + if (write) + port.sendAtomic(write); + } while(read); + tc = NULL; + state = Ready; + nextState = Waiting; + } else { + panic("Unrecognized memory system mode.\n"); + } } -TLB::walker::buildWritePacket(Addr addr) +bool +TLB::Walker::WalkerPort::recvTiming(PacketPtr pkt) { - writeRequest.setPhys(addr, size, PHYSICAL | uncachable ? UNCACHEABLE : 0); - writePacket.reinitFromRequest(); + return walker->recvTiming(pkt); +} bool -TLB::Walker::WalkerPort::recvTiming(PacketPtr pkt) +TLB::Walker::recvTiming(PacketPtr pkt) { + inflight--; if (pkt->isResponse() && !pkt->wasNacked()) { if (pkt->isRead()) { - assert(packet); - assert(walker->state == Waiting); - packet = NULL; - walker->state = walker->nextState; - walker->nextState = Ready; - PacketPtr write; - if (walker->doNext(pkt, write)) { - packet = &walker->packet; - port->sendTiming(packet); - } + assert(inflight); + assert(state == Waiting); + assert(!read); + state = nextState; + nextState = Ready; + PacketPtr write = NULL; + doNext(pkt, write); + state = Waiting; + read = pkt; if (write) { writes.push_back(write); } - while (!port->blocked() && writes.size()) { - if (port->sendTiming(writes.front())) { - writes.pop_front(); - outstandingWrites++; - } - } + sendPackets(); } else { - outstandingWrites--; + sendPackets(); + } + if (inflight == 0 && read == NULL && writes.size() == 0) { + tc->activate(0); + tc = NULL; + state = Ready; + nextState = Waiting; } } else if (pkt->wasNacked()) { pkt->reinitNacked(); - if (!sendTiming(pkt)) { + if (!port.sendTiming(pkt)) { + retrying = true; if (pkt->isWrite()) { - writes.push_front(pkt); + writes.push_back(pkt); + } else { + assert(!read); + read = pkt; } + } else { + inflight++; } } return true; @@ -199,10 +490,48 @@ TLB::Walker::WalkerPort::recvStatusChange(Status status) void TLB::Walker::WalkerPort::recvRetry() +{ + walker->recvRetry(); +} + +void +TLB::Walker::recvRetry() { retrying = false; - if (!sendTiming(packet)) { - retrying = true; + sendPackets(); +} + +void +TLB::Walker::sendPackets() +{ + //If we're already waiting for the port to become available, just return. + if (retrying) + return; + + //Reads always have priority + if (read) { + if (!port.sendTiming(read)) { + retrying = true; + return; + } else { + inflight++; + delete read->req; + delete read; + read = NULL; + } + } + //Send off as many of the writes as we can. + while (writes.size()) { + PacketPtr write = writes.back(); + if (!port.sendTiming(write)) { + retrying = true; + return; + } else { + inflight++; + delete write->req; + delete write; + writes.pop_back(); + } } } @@ -215,6 +544,16 @@ TLB::getPort(const std::string &if_name, int idx) panic("No tlb port named %s!\n", if_name); } +#else + +Port * +TLB::getPort(const std::string &if_name, int idx) +{ + panic("No tlb ports in se!\n", if_name); +} + +#endif + void TLB::insert(Addr vpn, TlbEntry &entry) { @@ -582,10 +921,12 @@ TLB::translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute) // If protected mode has been enabled... if (cr0.pe) { + DPRINTF(TLB, "In protected mode.\n"); Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); // If we're not in 64-bit mode, do protection/limit checks if (!efer.lma || !csAttr.longMode) { + DPRINTF(TLB, "Not in long mode. Checking segment protection.\n"); SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); if (!attr.writable && write) return new GeneralProtection(0); @@ -594,6 +935,7 @@ TLB::translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute) Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); if (!attr.expandDown) { + DPRINTF(TLB, "Checking an expand down segment.\n"); // We don't have to worry about the access going around the // end of memory because accesses will be broken up into // pieces at boundaries aligned on sizes smaller than an @@ -618,25 +960,28 @@ TLB::translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute) } // If paging is enabled, do the translation. if (cr0.pg) { + DPRINTF(TLB, "Paging enabled.\n"); // The vaddr already has the segment base applied. TlbEntry *entry = lookup(vaddr); if (!entry) { -#if FULL_SYSTEM - return new TlbFault(); -#else return new TlbFault(vaddr); -#endif } else { // Do paging protection checks. - Addr paddr = entry->paddr | (vaddr & mask(12)); + DPRINTF(TLB, "Entry found with paddr %#x, doing protection checks.\n", entry->paddr); + Addr paddr = entry->paddr | (vaddr & (entry->size-1)); + DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr); req->setPaddr(paddr); } } else { //Use the address which already has segmentation applied. + DPRINTF(TLB, "Paging disabled.\n"); + DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, vaddr); req->setPaddr(vaddr); } } else { // Real mode + DPRINTF(TLB, "In real mode.\n"); + DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, vaddr); req->setPaddr(vaddr); } return NoFault; diff --git a/src/arch/x86/tlb.hh b/src/arch/x86/tlb.hh index 726c25374..d45f94520 100644 --- a/src/arch/x86/tlb.hh +++ b/src/arch/x86/tlb.hh @@ -59,6 +59,7 @@ #define __ARCH_X86_TLB_HH__ #include +#include #include #include "arch/x86/pagetable.hh" @@ -88,6 +89,8 @@ namespace X86ISA System * sys; + bool allowNX; + public: typedef X86TLBParams Params; TLB(const Params *p); @@ -116,65 +119,55 @@ namespace X86ISA PTE }; - // Act on the current state and determine what to do next. If the - // walker has finished updating the TLB, this will return false. - bool doNext(PacketPtr read, PacketPtr &write); - - // This does an actual load to feed the walker. If we're in - // atomic mode, this will drive the state machine itself until - // the TLB is filled. If we're in timing mode, the port getting - // a reply will drive the machine using this function which will - // return after starting the memory operation. - void doMemory(Addr addr); + // Act on the current state and determine what to do next. read + // should be the packet that just came back from a read and write + // should be NULL. When the function returns, read is either NULL + // if the machine is finished, or points to a packet to initiate + // the next read. If any write is required to update an "accessed" + // bit, write will point to a packet to do the write. Otherwise it + // will be NULL. + void doNext(PacketPtr &read, PacketPtr &write); // Kick off the state machine. - void start(bool _uncachable, Addr _vaddr, Addr cr3, State next) - { - assert(state == Ready); - state = Waiting; - nextState = next; - // If PAE isn't being used, entries are 4 bytes. Otherwise - // they're 8. - if (next == PSEPD || next == PD || next == PTE) - size = 4; - else - size = 8; - vaddr = _vaddr; - uncachable = _uncacheable; - buildPacket(cr3); - if (state == Enums::timing) { - port->sendTiming(&packet); - } else if (state == Enums::atomic) { - port->sendAtomic(&packet); - Addr addr; - while(doNext(packet.get(), addr)) { - buildPacket(addr); - port->sendAtomic(&packet); - } - } else { - panic("Unrecognized memory system mode.\n"); - } - }; + void start(ThreadContext * _tc, Addr vaddr); protected: friend class TLB; + /* + * State having to do with sending packets. + */ + PacketPtr read; + std::vector writes; + + // How many memory operations are in flight. + unsigned inflight; + + bool retrying; + + /* + * Functions for dealing with packets. + */ + bool recvTiming(PacketPtr pkt); + void recvRetry(); + + void sendPackets(); + + /* + * Port for accessing memory + */ class WalkerPort : public Port { public: WalkerPort(const std::string &_name, Walker * _walker) : Port(_name, _walker->tlb), walker(_walker), - packet(NULL), snoopRangeSent(false), retrying(false) + snoopRangeSent(false) {} protected: Walker * walker; - PacketPtr packet; - vector writes; - bool snoopRangeSent; - bool retrying; bool recvTiming(PacketPtr pkt); Tick recvAtomic(PacketPtr pkt); @@ -187,46 +180,41 @@ namespace X86ISA resp.clear(); snoop = true; } - - public: - bool sendTiming(PacketPtr pkt) - { - retrying = !Port::sendTiming(pkt); - return !retrying; - } - - bool blocked() { return retrying; } }; friend class WalkerPort; WalkerPort port; - Packet packet; - Request request; - + // The TLB we're supposed to load. TLB * tlb; + /* + * State machine state. + */ + ThreadContext * tc; State state; State nextState; int size; - - Addr vaddr; + bool enableNX; + TlbEntry entry; public: Walker(const std::string &_name, TLB * _tlb) : + read(NULL), inflight(0), retrying(false), port(_name + "-walker_port", this), - packet(&request, ReadExReq, Broadcast), - tlb(_tlb), state(Ready), nextState(Ready) + tlb(_tlb), + tc(NULL), state(Ready), nextState(Ready) { } - - }; Walker walker; + #endif + Port *getPort(const std::string &if_name, int idx = -1); + protected: int size; @@ -236,8 +224,6 @@ namespace X86ISA EntryList freeList; EntryList entryList; - Port *getPort(const std::string &if_name, int idx = -1); - void insert(Addr vpn, TlbEntry &entry); void invalidateAll(); @@ -262,6 +248,8 @@ namespace X86ISA typedef X86ITBParams Params; ITB(const Params *p) : TLB(p) { + sys = p->system; + allowNX = false; } Fault translate(RequestPtr &req, ThreadContext *tc); @@ -275,6 +263,8 @@ namespace X86ISA typedef X86DTBParams Params; DTB(const Params *p) : TLB(p) { + sys = p->system; + allowNX = true; } Fault translate(RequestPtr &req, ThreadContext *tc, bool write); #if FULL_SYSTEM diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index 1af30a532..cb5793e57 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -100,7 +100,7 @@ class BaseCPU(SimObject): _mem_ports = [] - if build_env['TARGET_ISA'] == 'x86': + if build_env['TARGET_ISA'] == 'x86' and build_env['FULL_SYSTEM']: itb.walker_port = Port("ITB page table walker port") dtb.walker_port = Port("ITB page table walker port") _mem_ports = ["itb.walker_port", "dtb.walker_port"] @@ -117,7 +117,7 @@ class BaseCPU(SimObject): self.icache_port = ic.cpu_side self.dcache_port = dc.cpu_side self._mem_ports = ['icache.mem_side', 'dcache.mem_side'] - if build_env['TARGET_ISA'] == 'x86': + if build_env['TARGET_ISA'] == 'x86' and build_env['FULL_SYSTEM']: self._mem_ports += ["itb.walker_port", "dtb.walker_port"] def addTwoLevelCacheHierarchy(self, ic, dc, l2c): diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 1611a7275..98e29d8d1 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -466,9 +466,9 @@ BaseSimpleCPU::advancePC(Fault fault) if (fault != NoFault) { curMacroStaticInst = StaticInst::nullStaticInstPtr; predecoder.reset(); - fault->invoke(tc); thread->setMicroPC(0); thread->setNextMicroPC(1); + fault->invoke(tc); } else { //If we're at the last micro op for this instruction if (curStaticInst && curStaticInst->isLastMicroop()) { -- cgit v1.2.3 From 6d4ba8de34e4427a0488e6bc0a9f5b9ec5b1a265 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:38:38 -0800 Subject: X86: Flesh out the opcode groups for two byte opcodes. --HG-- extra : convert_revision : 4d51befd6dae4035c0eb685d33e1f5e38467c766 --- src/arch/x86/isa/decoder/two_byte_opcodes.isa | 210 ++++++++++++++++++++++++-- 1 file changed, 198 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa index 0482fdf23..30034a305 100644 --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa @@ -61,8 +61,62 @@ 0x0F: decode OPCODE_OP_TOP5 { format WarnUnimpl { 0x00: decode OPCODE_OP_BOTTOM3 { - 0x00: group6(); - 0x01: group7(); + //0x00: group6(); + 0x00: decode MODRM_REG { + 0x0: sldt_Mw_or_Rv(); + 0x1: str_Mw_or_Rv(); + 0x2: lldt_Mw_or_Rv(); + 0x3: ltr_Mw_or_Rv(); + 0x4: verr_Mw_or_Rv(); + 0x5: verw_Mw_or_Rv(); + //0x6: jmpe_Ev(); // IA-64 + default: Inst::UD2(); + } + //0x01: group7(); // Ugly, ugly, ugly... + 0x01: decode MODRM_MOD { + 0x3: decode MODRM_REG { + 0x0: decode MODRM_RM { + 0x1: vmcall(); + 0x2: vmlaunch(); + 0x3: vmresume(); + 0x4: vmxoff(); + default: Inst::UD2(); + } + 0x1: decode MODRM_RM { + 0x0: monitor(); + 0x1: mwait(); + default: Inst::UD2(); + } + 0x3: decode MODRM_RM { + 0x0: vmrun(); + 0x1: vmmcall(); + 0x2: vmload(); + 0x3: vmsave(); + 0x4: stgi(); + 0x5: clgi(); + 0x6: skinit(); + 0x7: invlpga(); + } + 0x4: smsw_Rv(); + 0x6: lmsw_Rv(); + 0x7: decode MODRM_RM { + 0x0: swapgs(); + 0x1: rdtscp(); + default: Inst::UD2(); + } + default: Inst::UD2(); + } + default: decode MODRM_REG { + 0x0: sgdt_Ms(); + 0x1: sidt_Ms(); + 0x2: lgdt_Ms(); + 0x3: lidt_Ms(); + 0x4: smsw_Mw(); + 0x6: lmsw_Mw(); + 0x7: invlpg_M(); + default: Inst::UD2(); + } + } 0x02: lar_Gv_Ew(); 0x03: lsl_Gv_Ew(); //sandpile.org doesn't seem to know what this is... ? @@ -397,9 +451,58 @@ // no prefix 0x0: decode OPCODE_OP_BOTTOM3 { 0x0: pshufw_Pq_Qq_Ib(); - 0x1: group13_pshimw(); - 0x2: group14_pshimd(); - 0x3: group15_pshimq(); + //0x1: group13_pshimw(); + 0x1: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrlw_PRq_Ib(); + 0x1: psrlw_VRo_Ib(); + } + 0x4: decode LEGACY_OP { + 0x0: psraw_PRq_Ib(); + 0x1: psraw_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: psllw_PRq_Ib(); + 0x1: psllw_VRo_Ib(); + } + default: Inst::UD2(); + } + //0x2: group14_pshimd(); + 0x2: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrld_PRq_Ib(); + 0x1: psrld_VRo_Ib(); + } + 0x4: decode LEGACY_OP { + 0x0: psrad_PRq_Ib(); + 0x1: psrad_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: pslld_PRq_Ib(); + 0x1: pslld_VRo_Ib(); + } + default: Inst::UD2(); + } + //0x3: group15_pshimq(); + 0x3: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrlq_PRq_Ib(); + 0x1: psrlq_VRo_Ib(); + } + 0x3: decode LEGACY_OP { + 0x0: Inst::UD2(); + 0x1: psrldq_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: psllq_PRq_Ib(); + 0x1: psllq_VRo_Ib(); + } + 0x7: decode LEGACY_OP { + 0x0: Inst::UD2(); + 0x1: pslldq_VRo_Ib(); + } + default: Inst::UD2(); + } 0x4: pcmpeqb_Pq_Qq(); 0x5: pcmpeqw_Pq_Qq(); 0x6: pcmpeqd_Pq_Qq(); @@ -413,9 +516,58 @@ // operand size (0x66) 0x1: decode OPCODE_OP_BOTTOM3 { 0x0: pshufd_Vo_Wo_Ib(); - 0x1: group13_pshimw(); - 0x2: group14_pshimd(); - 0x3: group15_pshimq_dq(); + //0x1: group13_pshimw(); + 0x1: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrlw_PRq_Ib(); + 0x1: psrlw_VRo_Ib(); + } + 0x4: decode LEGACY_OP { + 0x0: psraw_PRq_Ib(); + 0x1: psraw_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: psllw_PRq_Ib(); + 0x1: psllw_VRo_Ib(); + } + default: Inst::UD2(); + } + //0x2: group14_pshimd(); + 0x2: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrld_PRq_Ib(); + 0x1: psrld_VRo_Ib(); + } + 0x4: decode LEGACY_OP { + 0x0: psrad_PRq_Ib(); + 0x1: psrad_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: pslld_PRq_Ib(); + 0x1: pslld_VRo_Ib(); + } + default: Inst::UD2(); + } + //0x3: group15_pshimq(); + 0x3: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrlq_PRq_Ib(); + 0x1: psrlq_VRo_Ib(); + } + 0x3: decode LEGACY_OP { + 0x0: Inst::UD2(); + 0x1: psrldq_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: psllq_PRq_Ib(); + 0x1: psllq_VRo_Ib(); + } + 0x7: decode LEGACY_OP { + 0x0: Inst::UD2(); + 0x1: pslldq_VRo_Ib(); + } + default: Inst::UD2(); + } 0x4: pcmpeqb_Vo_Wo(); 0x5: pcmpeqw_Vo_Wo(); 0x6: pcmpeqd_Vo_Wo(); @@ -518,7 +670,21 @@ 0x3: bts_Ev_Gv(); 0x4: shrd_Ev_Gv_Ib(); 0x5: shrd_Ev_Gv_rCl(); - 0x6: group16(); + //0x6: group16(); + 0x6: decode MODRM_MOD { + 0x3: decode MODRM_REG { + 0x5: lfence(); + 0x6: mfence(); + 0x7: sfence(); + default: Inst::UD2(); + } + default: decode MODRM_REG { + 0x0: fxsave(); + 0x1: fxrstor(); + 0x7: clflush(); + default: Inst::UD2(); + } + } 0x7: Inst::IMUL(Gv,Ev); } 0x16: decode OPCODE_OP_BOTTOM3 { @@ -536,8 +702,16 @@ } 0x17: decode OPCODE_OP_BOTTOM3 { 0x0: jmpe_Jz(); // IA-64? - 0x1: group11_UD2(); - 0x2: group8_Ev_Ib(); + //0x1: group11_UD2(); + 0x1: Inst::UD2(); + //0x2: group8_Ev_Ib(); + 0x2: decode MODRM_REG { + 0x4: bt_Ev_Ib(); + 0x5: bts_Ev_Ib(); + 0x6: btr_Ev_Ib(); + 0x7: btc_Ev_Ib(); + default: Inst::UD2(); + } 0x3: btc_Ev_Gv(); 0x4: bsf_Gv_Ev(); 0x5: bsr_Gv_Ev(); @@ -550,7 +724,19 @@ 0x18: decode OPCODE_OP_BOTTOM3 { 0x0: xadd_Eb_Gb(); 0x1: xadd_Ev_Gv(); - 0x7: group9(); + //0x7: group9(); + 0x7: decode MODRM_REG { + 0x1: cmpxchg_Mq(); + 0x6: decode LEGACY_OP { + 0x1: vmclear_Mq(); + default: decode LEGACY_REP { + 0x1: vmxon_Mq(); + 0x0: vmptrld_Mq(); + } + } + 0x7: vmptrst_Mq(); + default: Inst::UD2(); + } default: decode LEGACY_DECODEVAL { // no prefix 0x0: decode OPCODE_OP_BOTTOM3 { -- cgit v1.2.3 From f9ddb894dd92d6cc5601d65a3c58dc5dd73f7ac7 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:38:45 -0800 Subject: X86: Change the meaning of the sext and zext width operand, and make sext set zext if the sign bit is 0. --HG-- extra : convert_revision : 08bd7b4ff183038c016612d04ac73b20a255d141 --- .../data_conversion/sign_extension.py | 2 +- .../general_purpose/data_conversion/translate.py | 2 +- .../insts/general_purpose/data_transfer/move.py | 30 +++++++++++----------- .../data_transfer/stack_operations.py | 4 +-- .../general_purpose/input_output/general_io.py | 4 +-- .../general_purpose/input_output/string_io.py | 8 +++--- src/arch/x86/isa/microops/regop.isa | 18 ++++++++++--- 7 files changed, 39 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/arch/x86/isa/insts/general_purpose/data_conversion/sign_extension.py b/src/arch/x86/isa/insts/general_purpose/data_conversion/sign_extension.py index 9a7c226af..ae3c6cc6f 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_conversion/sign_extension.py +++ b/src/arch/x86/isa/insts/general_purpose/data_conversion/sign_extension.py @@ -55,7 +55,7 @@ microcode = ''' def macroop CDQE_R { - sext reg, reg, "env.dataSize << 2" + sexti reg, reg, "env.dataSize << 2 - 1" }; def macroop CQO_R_R { diff --git a/src/arch/x86/isa/insts/general_purpose/data_conversion/translate.py b/src/arch/x86/isa/insts/general_purpose/data_conversion/translate.py index c2ccb9d19..d6ae7885a 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_conversion/translate.py +++ b/src/arch/x86/isa/insts/general_purpose/data_conversion/translate.py @@ -55,7 +55,7 @@ microcode = ''' def macroop XLAT { - zext t1, rax, 8 + zexti t1, rax, 7 # Here, t1 can be used directly. The value of al is supposed to be treated # as unsigned. Since we zero extended it from 8 bits above and the address # size has to be at least 16 bits, t1 will not be sign extended. diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py index 04f9ea12a..ada7f28a3 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py @@ -111,48 +111,48 @@ def macroop MOV_P_I { # def macroop MOVSXD_R_R { - sext reg, regm, 32 + sexti reg, regm, 31 }; def macroop MOVSXD_R_M { ld t1, seg, sib, disp, dataSize=4 - sext reg, t1, 32 + sexti reg, t1, 31 }; def macroop MOVSXD_R_P { rdip t7 ld t1, seg, riprel, disp, dataSize=4 - sext reg, t1, 32 + sexti reg, t1, 31 }; def macroop MOVSX_B_R_R { - sext reg, regm, 8 + sexti reg, regm, 7 }; def macroop MOVSX_B_R_M { ld reg, seg, sib, disp, dataSize=1 - sext reg, reg, 8 + sexti reg, reg, 7 }; def macroop MOVSX_B_R_P { rdip t7 ld reg, seg, riprel, disp, dataSize=1 - sext reg, reg, 8 + sexti reg, reg, 7 }; def macroop MOVSX_W_R_R { - sext reg, regm, 16 + sexti reg, regm, 15 }; def macroop MOVSX_W_R_M { ld reg, seg, sib, disp, dataSize=2 - sext reg, reg, 16 + sexti reg, reg, 15 }; def macroop MOVSX_W_R_P { rdip t7 ld reg, seg, riprel, disp, dataSize=2 - sext reg, reg, 16 + sexti reg, reg, 15 }; # @@ -160,33 +160,33 @@ def macroop MOVSX_W_R_P { # def macroop MOVZX_B_R_R { - zext reg, regm, 8 + zexti reg, regm, 7 }; def macroop MOVZX_B_R_M { ld t1, seg, sib, disp, dataSize=1 - zext reg, t1, 8 + zexti reg, t1, 7 }; def macroop MOVZX_B_R_P { rdip t7 ld t1, seg, riprel, disp, dataSize=1 - zext reg, t1, 8 + zexti reg, t1, 7 }; def macroop MOVZX_W_R_R { - zext reg, regm, 16 + zexti reg, regm, 15 }; def macroop MOVZX_W_R_M { ld t1, seg, sib, disp, dataSize=2 - zext reg, t1, 16 + zexti reg, t1, 15 }; def macroop MOVZX_W_R_P { rdip t7 ld t1, seg, riprel, disp, dataSize=2 - zext reg, t1, 16 + zexti reg, t1, 15 }; ''' #let {{ diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py index 5884d68c2..6c51f3171 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py @@ -162,9 +162,9 @@ def macroop ENTER_I_I { # Pull the different components out of the immediate limm t1, imm - zext t2, t1, 16, dataSize=2 + zexti t2, t1, 15, dataSize=2 srl t1, t1, 16 - zext t1, t1, 6 + zexti t1, t1, 5 # t1 is now the masked nesting level, and t2 is the amount of storage. # Push rbp. diff --git a/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py b/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py index c01a11035..75a361eb7 100644 --- a/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py +++ b/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py @@ -62,7 +62,7 @@ microcode = ''' def macroop IN_R_R { limm t1, "IntAddrPrefixIO" - zext t2, regm, 16, dataSize=2 + zexti t2, regm, 15, dataSize=2 ld reg, intseg, [1, t1, t2], addressSize=8 }; @@ -74,7 +74,7 @@ microcode = ''' def macroop OUT_R_R { limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 st regm, intseg, [1, t1, t2], addressSize=8 }; ''' diff --git a/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py b/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py index a8acbbc39..b44203d9c 100644 --- a/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py +++ b/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py @@ -62,7 +62,7 @@ def macroop INS_M_R { mov t3, t3, t4, flags=(nCEZF,), dataSize=asz limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 ld t6, intseg, [1, t1, t2], addressSize=8 st t6, es, [1, t0, rdi] @@ -78,7 +78,7 @@ def macroop INS_E_M_R { mov t3, t3, t4, flags=(nCEZF,), dataSize=asz limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 topOfLoop: ld t6, intseg, [1, t1, t2], addressSize=8 @@ -98,7 +98,7 @@ def macroop OUTS_R_M { mov t3, t3, t4, flags=(nCEZF,), dataSize=asz limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 ld t6, ds, [1, t0, rsi] st t6, intseg, [1, t1, t2], addressSize=8 @@ -114,7 +114,7 @@ def macroop OUTS_E_R_M { mov t3, t3, t4, flags=(nCEZF,), dataSize=asz limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 topOfLoop: ld t6, ds, [1, t0, rsi] diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index 892c44487..b5e17d36d 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -318,7 +318,7 @@ let {{ # If there's something optional to do with flags, generate # a version without it and fix up this version to use it. - if flag_code is not "" or cond_check is not "true": + if flag_code != "" or cond_check != "true": self.buildCppClasses(name, Name, suffix, code, "", "true", else_code) suffix = "Flags" + suffix @@ -866,12 +866,22 @@ let {{ class Sext(RegOp): code = ''' IntReg val = psrc1; - int sign_bit = bits(val, imm8-1, imm8-1); - uint64_t maskVal = mask(imm8); + // Mask the bit position so that it wraps. + int bitPos = op2 & (dataSize * 8 - 1); + int sign_bit = bits(val, bitPos, bitPos); + uint64_t maskVal = mask(bitPos+1); val = sign_bit ? (val | ~maskVal) : (val & maskVal); DestReg = merge(DestReg, val, dataSize); ''' + flag_code = ''' + if (!sign_bit) + ccFlagBits = ccFlagBits & + ~(ext & (CFBit | ECFBit | ZFBit | EZFBit)); + else + ccFlagBits = ccFlagBits | + (ext & (CFBit | ECFBit | ZFBit | EZFBit)); + ''' class Zext(RegOp): - code = 'DestReg = bits(psrc1, imm8-1, 0);' + code = 'DestReg = bits(psrc1, op2, 0);' }}; -- cgit v1.2.3 From 4d4d2883f9c84f0cebec4b65479c11540dbb36f7 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:38:53 -0800 Subject: X86: Implement some bit testing instructions. --HG-- extra : convert_revision : 54585e276e44322be9c56af0b2eabfe8d4b3e430 --- src/arch/x86/isa/decoder/two_byte_opcodes.isa | 28 +-- .../general_purpose/compare_and_test/bit_test.py | 250 ++++++++++++++++++++- 2 files changed, 254 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa index 30034a305..233a5602d 100644 --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa @@ -657,7 +657,7 @@ 0x0: push_fs(); 0x1: pop_fs(); 0x2: Inst::CPUID(rAd); - 0x3: bt_Ev_Gv(); + 0x3: Inst::BT(Ev,Gv); 0x4: shld_Ev_Gv_Ib(); 0x5: shld_Ev_Gv_rCl(); 0x6: xbts_and_cmpxchg(); @@ -667,7 +667,7 @@ 0x0: push_gs(); 0x1: pop_gs(); 0x2: rsm_smm(); - 0x3: bts_Ev_Gv(); + 0x3: Inst::BTS(Ev,Gv); 0x4: shrd_Ev_Gv_Ib(); 0x5: shrd_Ev_Gv_rCl(); //0x6: group16(); @@ -691,7 +691,7 @@ 0x0: Inst::CMPXCHG(Eb,Gb); 0x1: Inst::CMPXCHG(Ev,Gv); 0x2: lss_Gz_Mp(); - 0x3: btr_Ev_Gv(); + 0x3: Inst::BTR(Ev,Gv); 0x4: lfs_Gz_Mp(); 0x5: lgs_Gz_Mp(); //The size of the second operand in these instructions should @@ -702,17 +702,19 @@ } 0x17: decode OPCODE_OP_BOTTOM3 { 0x0: jmpe_Jz(); // IA-64? - //0x1: group11_UD2(); - 0x1: Inst::UD2(); - //0x2: group8_Ev_Ib(); - 0x2: decode MODRM_REG { - 0x4: bt_Ev_Ib(); - 0x5: bts_Ev_Ib(); - 0x6: btr_Ev_Ib(); - 0x7: btc_Ev_Ib(); - default: Inst::UD2(); + format Inst { + //0x1: group11_UD2(); + 0x1: UD2(); + //0x2: group8_Ev_Ib(); + 0x2: decode MODRM_REG { + 0x4: BT(Ev,Ib); + 0x5: BTS(Ev,Ib); + 0x6: BTR(Ev,Ib); + 0x7: BTC(Ev,Ib); + default: UD2(); + } + 0x3: BTC(Ev,Gv); } - 0x3: btc_Ev_Gv(); 0x4: bsf_Gv_Ev(); 0x5: bsr_Gv_Ev(); //The size of the second operand in these instructions should diff --git a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py index e950f008a..883ec4411 100644 --- a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py +++ b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py @@ -53,14 +53,242 @@ # # Authors: Gabe Black -microcode = "" -#let {{ -# class BT(Inst): -# "GenFault ${new UnimpInstFault}" -# class BTC(Inst): -# "GenFault ${new UnimpInstFault}" -# class BTR(Inst): -# "GenFault ${new UnimpInstFault}" -# class BTS(Inst): -# "GenFault ${new UnimpInstFault}" -#}}; +microcode = ''' +def macroop BT_R_I { + sexti t0, reg, imm, flags=(CF,) +}; + +def macroop BT_M_I { + limm t1, imm + # This fudges just a tiny bit, but it's reasonable to expect the + # microcode generation logic to have the log of the various sizes + # floating around as well. + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + ld t1, seg, [scale, index, t2], disp + sexti t0, t1, imm, flags=(CF,) +}; + +def macroop BT_P_I { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + ld t1, seg, [1, t2, t7] + sexti t0, t1, imm, flags=(CF,) +}; + +def macroop BT_R_R { + sext t0, reg, regm, flags=(CF,) +}; + +def macroop BT_M_R { + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + ld t1, seg, [scale, index, t2], disp + sext t0, t1, reg, flags=(CF,) +}; + +def macroop BT_P_R { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + ld t1, seg, [1, t2, t7] + sext t0, t1, reg, flags=(CF,) +}; + +def macroop BTC_R_I { + sexti t0, reg, imm, flags=(CF,) + limm t1, 1 + roli t1, t1, imm + xor reg, reg, t1 +}; + +def macroop BTC_M_I { + limm t1, imm + # This fudges just a tiny bit, but it's reasonable to expect the + # microcode generation logic to have the log of the various sizes + # floating around as well. + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, 1 + roli t3, t3, imm + ldst t1, seg, [scale, index, t2], disp + sexti t0, t1, imm, flags=(CF,) + xor t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTC_P_I { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, 1 + roli t3, t3, imm + ldst t1, seg, [1, t2, t7] + sexti t0, t1, imm, flags=(CF,) + xor t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTC_R_R { + sext t0, reg, regm, flags=(CF,) + limm t1, 1 + rol t1, t1, regm + xor reg, reg, t1 +}; + +def macroop BTC_M_R { + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, 1 + rol t3, t3, reg + ldst t1, seg, [scale, index, t2], disp + sext t0, t1, reg, flags=(CF,) + xor t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTC_P_R { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, 1 + rol t3, t3, reg + ldst t1, seg, [1, t2, t7] + sext t0, t1, reg, flags=(CF,) + xor t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTR_R_I { + sexti t0, reg, imm, flags=(CF,) + limm t1, "(uint64_t(-(2ULL)))" + roli t1, t1, imm + and reg, reg, t1 +}; + +def macroop BTR_M_I { + limm t1, imm + # This fudges just a tiny bit, but it's reasonable to expect the + # microcode generation logic to have the log of the various sizes + # floating around as well. + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, "(uint64_t(-(2ULL)))" + roli t3, t3, imm + ldst t1, seg, [scale, index, t2], disp + sexti t0, t1, imm, flags=(CF,) + and t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTR_P_I { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, "(uint64_t(-(2ULL)))" + roli t3, t3, imm + ldst t1, seg, [1, t2, t7] + sexti t0, t1, imm, flags=(CF,) + and t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTR_R_R { + sext t0, reg, regm, flags=(CF,) + limm t1, "(uint64_t(-(2ULL)))" + rol t1, t1, regm + and reg, reg, t1 +}; + +def macroop BTR_M_R { + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, "(uint64_t(-(2ULL)))" + rol t3, t3, reg + ldst t1, seg, [scale, index, t2], disp + sext t0, t1, reg, flags=(CF,) + and t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTR_P_R { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, "(uint64_t(-(2ULL)))" + rol t3, t3, reg + ldst t1, seg, [1, t2, t7] + sext t0, t1, reg, flags=(CF,) + and t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTS_R_I { + sexti t0, reg, imm, flags=(CF,) + limm t1, 1 + roli t1, t1, imm + or reg, reg, t1 +}; + +def macroop BTS_M_I { + limm t1, imm + # This fudges just a tiny bit, but it's reasonable to expect the + # microcode generation logic to have the log of the various sizes + # floating around as well. + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, 1 + roli t3, t3, imm + ldst t1, seg, [scale, index, t2], disp + sexti t0, t1, imm, flags=(CF,) + or t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTS_P_I { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, 1 + roli t3, t3, imm + ldst t1, seg, [1, t2, t7] + sexti t0, t1, imm, flags=(CF,) + or t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTS_R_R { + sext t0, reg, regm, flags=(CF,) + limm t1, 1 + rol t1, t1, regm + or reg, reg, t1 +}; + +def macroop BTS_M_R { + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, 1 + rol t3, t3, reg + ldst t1, seg, [scale, index, t2], disp + sext t0, t1, reg, flags=(CF,) + or t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTS_P_R { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, 1 + rol t3, t3, reg + ldst t1, seg, [1, t2, t7] + sext t0, t1, reg, flags=(CF,) + or t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; +''' -- cgit v1.2.3 From f1f5dd79bf8c2cf2ef64cc1432a4a0601d475e72 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:38:59 -0800 Subject: X86: Implement the wrcr microop which writes a control register, and some control register work. --HG-- extra : convert_revision : 3e9daef9cdd0665c033420e5b4f981649e9908ab --- src/arch/x86/isa/decoder/two_byte_opcodes.isa | 2 +- .../insts/general_purpose/data_transfer/move.py | 4 + src/arch/x86/isa/microasm.isa | 3 + src/arch/x86/isa/microops/regop.isa | 52 +++++++++++++ src/arch/x86/isa/operands.isa | 4 + src/arch/x86/isa/specialize.isa | 8 +- src/arch/x86/miscregfile.cc | 89 ++++++++++------------ 7 files changed, 113 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa index 233a5602d..f3485bc4e 100644 --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa @@ -202,7 +202,7 @@ 0x0: decode OPCODE_OP_BOTTOM3 { 0x0: mov_Rd_Cd(); 0x1: mov_Rd_Dd(); - 0x2: mov_Cd_Rd(); + 0x2: Inst::MOV(Cd,Rd); 0x3: mov_Dd_Rd(); 0x4: mov_Rd_Td(); 0x6: mov_Td_Rd(); diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py index ada7f28a3..a15fc21ef 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py @@ -188,6 +188,10 @@ def macroop MOVZX_W_R_P { ld t1, seg, riprel, disp, dataSize=2 zexti reg, t1, 15 }; + +def macroop MOV_C_R { + wrcr reg, regm +}; ''' #let {{ # class MOVD(Inst): diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index e05582e37..040bb2036 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -113,6 +113,9 @@ let {{ for reg in ('ax', 'bx', 'cx', 'dx', 'sp', 'bp', 'si', 'di'): assembler.symbols["r%s" % reg] = "INTREG_R%s" % reg.upper() + for reg in range(15): + assembler.symbols["cr%d" % reg] = "MISCREG_CR%d" % reg + for flag in ('CF', 'PF', 'ECF', 'AF', 'EZF', 'ZF', 'SF', 'OF'): assembler.symbols[flag] = flag + "Bit" diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index b5e17d36d..58b267e0d 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -884,4 +884,56 @@ let {{ class Zext(RegOp): code = 'DestReg = bits(psrc1, op2, 0);' + + class Wrcr(RegOp): + def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): + super(Wrcr, self).__init__(dest, \ + src1, "NUM_INTREGS", flags, dataSize) + code = ''' + if (dest == 1 || (dest > 4 && dest < 8) || (dest > 8)) { + fault = new InvalidOpcode(); + } else { + // There are *s in the line below so it doesn't confuse the + // parser. They may be unnecessary. + //Mis*cReg old*Val = pick(Cont*rolDest, 0, dat*aSize); + MiscReg newVal = psrc1; + + // Check for any modifications that would cause a fault. + switch(dest) { + case 0: + { + Efer efer = EferOp; + CR0 cr0 = newVal; + CR4 oldCr4 = CR4Op; + if (bits(newVal, 63, 32) || + (!cr0.pe && cr0.pg) || + (!cr0.cd && cr0.nw) || + (cr0.pg && efer.lme && !oldCr4.pae)) + fault = new GeneralProtection(0); + } + break; + case 2: + break; + case 3: + break; + case 4: + { + CR4 cr4 = newVal; + // PAE can't be disabled in long mode. + if (bits(newVal, 63, 11) || + (machInst.mode.mode == LongMode && !cr4.pae)) + fault = new GeneralProtection(0); + } + break; + case 8: + { + if (bits(newVal, 63, 4)) + fault = new GeneralProtection(0); + } + default: + panic("Unrecognized control register %d.\\n", dest); + } + ControlDest = newVal; + } + ''' }}; diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa index 8c0eacca2..542638edd 100644 --- a/src/arch/x86/isa/operands.isa +++ b/src/arch/x86/isa/operands.isa @@ -122,5 +122,9 @@ def operands {{ # instructions don't map their indexes with an old value. 'TOP': ('ControlReg', 'ub', 'MISCREG_X87_TOP', None, 61), 'SegBase': ('ControlReg', 'uqw', 'MISCREG_SEG_BASE(segment)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 70), + 'ControlDest': ('ControlReg', 'uqw', 'MISCREG_CR(dest)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 71), + 'ControlSrc1': ('ControlReg', 'uqw', 'MISCREG_CR(src1)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 72), + 'EferOp': ('ControlReg', 'uqw', 'MISCREG_EFER', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 73), + 'CR4Op': ('ControlReg', 'uqw', 'MISCREG_CR4', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 74), 'Mem': ('Mem', 'uqw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100) }}; diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa index cf6b6ff86..3802d8949 100644 --- a/src/arch/x86/isa/specialize.isa +++ b/src/arch/x86/isa/specialize.isa @@ -153,7 +153,13 @@ let {{ return doRipRelativeDecode(Name, opTypes, env) elif opType.tag == None or opType.size == None: raise Exception, "Problem parsing operand tag: %s" % opType.tag - elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"): + elif opType.tag == "C": + env.addReg(ModRMRegIndex) + Name += "_C" + elif opType.tag == "D": + env.addReg(ModRMRegIndex) + Name += "_D" + elif opType.tag in ("G", "P", "S", "T", "V"): # Use the "reg" field of the ModRM byte to select the register env.addReg(ModRMRegIndex) Name += "_R" diff --git a/src/arch/x86/miscregfile.cc b/src/arch/x86/miscregfile.cc index cd76e96aa..a6aed336f 100644 --- a/src/arch/x86/miscregfile.cc +++ b/src/arch/x86/miscregfile.cc @@ -106,22 +106,15 @@ void MiscRegFile::clear() MiscReg MiscRegFile::readRegNoEffect(int miscReg) { - switch(miscReg) - { - case MISCREG_CR1: - case MISCREG_CR5: - case MISCREG_CR6: - case MISCREG_CR7: - case MISCREG_CR9: - case MISCREG_CR10: - case MISCREG_CR11: - case MISCREG_CR12: - case MISCREG_CR13: - case MISCREG_CR14: - case MISCREG_CR15: - panic("Tried to read invalid control register %d\n", miscReg); - break; - } + // Make sure we're not dealing with an illegal control register. + // Instructions should filter out these indexes, and nothing else should + // attempt to read them directly. + assert( miscReg != MISCREG_CR1 && + !(miscReg > MISCREG_CR4 && + miscReg < MISCREG_CR8) && + !(miscReg > MISCREG_CR8 && + miscReg <= MISCREG_CR15)); + return regVal[miscReg]; } @@ -132,22 +125,14 @@ MiscReg MiscRegFile::readReg(int miscReg, ThreadContext * tc) void MiscRegFile::setRegNoEffect(int miscReg, const MiscReg &val) { - switch(miscReg) - { - case MISCREG_CR1: - case MISCREG_CR5: - case MISCREG_CR6: - case MISCREG_CR7: - case MISCREG_CR9: - case MISCREG_CR10: - case MISCREG_CR11: - case MISCREG_CR12: - case MISCREG_CR13: - case MISCREG_CR14: - case MISCREG_CR15: - panic("Tried to write invalid control register %d\n", miscReg); - break; - } + // Make sure we're not dealing with an illegal control register. + // Instructions should filter out these indexes, and nothing else should + // attempt to write to them directly. + assert( miscReg != MISCREG_CR1 && + !(miscReg > MISCREG_CR4 && + miscReg < MISCREG_CR8) && + !(miscReg > MISCREG_CR8 && + miscReg <= MISCREG_CR15)); regVal[miscReg] = val; } @@ -158,23 +143,33 @@ void MiscRegFile::setReg(int miscReg, switch(miscReg) { case MISCREG_CR0: - CR0 toggled = regVal[miscReg] ^ val; - CR0 newCR0 = val; - Efer efer = regVal[MISCREG_EFER]; - if (toggled.pg && efer.lme) { - if (newCR0.pg) { - //Turning on long mode - efer.lma = 1; - regVal[MISCREG_EFER] = efer; - } else { - //Turning off long mode - efer.lma = 0; - regVal[MISCREG_EFER] = efer; + { + CR0 toggled = regVal[miscReg] ^ val; + CR0 newCR0 = val; + Efer efer = regVal[MISCREG_EFER]; + if (toggled.pg && efer.lme) { + if (newCR0.pg) { + //Turning on long mode + efer.lma = 1; + regVal[MISCREG_EFER] = efer; + } else { + //Turning off long mode + efer.lma = 0; + regVal[MISCREG_EFER] = efer; + } } + //This must always be 1. + newCR0.et = 1; + newVal = newCR0; } - //This must always be 1. - newCR0.et = 1; - newVal = newCR0; + break; + case MISCREG_CR2: + break; + case MISCREG_CR3: + break; + case MISCREG_CR4: + break; + case MISCREG_CR8: break; } setRegNoEffect(miscReg, newVal); -- cgit v1.2.3 From 49507982685b4e807e612ff176fb67901415a2ce Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:39:07 -0800 Subject: X86: Implement tlb invalidation and make it happen some of the times it should. --HG-- extra : convert_revision : 376516d33cd539fa526c834ef2b2c33069af3040 --- src/arch/x86/miscregfile.cc | 15 +++++++++++++++ src/arch/x86/tlb.cc | 17 +++++++++++++++++ src/arch/x86/tlb.hh | 13 +++++++------ 3 files changed, 39 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/arch/x86/miscregfile.cc b/src/arch/x86/miscregfile.cc index a6aed336f..71908098e 100644 --- a/src/arch/x86/miscregfile.cc +++ b/src/arch/x86/miscregfile.cc @@ -86,6 +86,8 @@ */ #include "arch/x86/miscregfile.hh" +#include "arch/x86/tlb.hh" +#include "cpu/thread_context.hh" #include "sim/serialize.hh" using namespace X86ISA; @@ -158,6 +160,10 @@ void MiscRegFile::setReg(int miscReg, regVal[MISCREG_EFER] = efer; } } + if (toggled.pg) { + tc->getITBPtr()->invalidateAll(); + tc->getDTBPtr()->invalidateAll(); + } //This must always be 1. newCR0.et = 1; newVal = newCR0; @@ -166,8 +172,17 @@ void MiscRegFile::setReg(int miscReg, case MISCREG_CR2: break; case MISCREG_CR3: + tc->getITBPtr()->invalidateNonGlobal(); + tc->getDTBPtr()->invalidateNonGlobal(); break; case MISCREG_CR4: + { + CR4 toggled = regVal[miscReg] ^ val; + if (toggled.pae || toggled.pse || toggled.pge) { + tc->getITBPtr()->invalidateAll(); + tc->getDTBPtr()->invalidateAll(); + } + } break; case MISCREG_CR8: break; diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index 704ab3027..1184bf9de 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -380,6 +380,7 @@ TLB::Walker::start(ThreadContext * _tc, Addr vaddr) } } } + nextState = Ready; entry.vaddr = vaddr; @@ -595,11 +596,27 @@ TLB::lookup(Addr va, bool update_lru) void TLB::invalidateAll() { + DPRINTF(TLB, "Invalidating all entries.\n"); + while (!entryList.empty()) { + TlbEntry *entry = entryList.front(); + entryList.pop_front(); + freeList.push_back(entry); + } } void TLB::invalidateNonGlobal() { + DPRINTF(TLB, "Invalidating all non global entries.\n"); + EntryList::iterator entryIt; + for (entryIt = entryList.begin(); entryIt != entryList.end();) { + if (!(*entryIt)->global) { + freeList.push_back(*entryIt); + entryList.erase(entryIt++); + } else { + entryIt++; + } + } } void diff --git a/src/arch/x86/tlb.hh b/src/arch/x86/tlb.hh index d45f94520..93bbf2c9d 100644 --- a/src/arch/x86/tlb.hh +++ b/src/arch/x86/tlb.hh @@ -215,6 +215,13 @@ namespace X86ISA Port *getPort(const std::string &if_name, int idx = -1); + public: + void invalidateAll(); + + void invalidateNonGlobal(); + + void demapPage(Addr va); + protected: int size; @@ -226,12 +233,6 @@ namespace X86ISA void insert(Addr vpn, TlbEntry &entry); - void invalidateAll(); - - void invalidateNonGlobal(); - - void demapPage(Addr va); - template Fault translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute); -- cgit v1.2.3 From 917ae9ec668fde45c8cb614d9fac29df33892fa1 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 14:39:14 -0800 Subject: X86: Fix a stupid typo where WRMSR and RDMSR were switched, and add a debug statement. --HG-- extra : convert_revision : f1eb17291f4c01f3c0fa8f99650bc1edf09d21de --- src/arch/x86/isa/insts/system/msrs.py | 4 ++-- src/arch/x86/tlb.cc | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/arch/x86/isa/insts/system/msrs.py b/src/arch/x86/isa/insts/system/msrs.py index ea576510b..20b9b2a0b 100644 --- a/src/arch/x86/isa/insts/system/msrs.py +++ b/src/arch/x86/isa/insts/system/msrs.py @@ -54,7 +54,7 @@ # Authors: Gabe Black microcode = ''' -def macroop WRMSR +def macroop RDMSR { limm t1, "IntAddrPrefixMSR >> 3" ld t2, intseg, [8, t1, rcx], dataSize=8, addressSize=4 @@ -63,7 +63,7 @@ def macroop WRMSR mov rdx, rdx, t2, dataSize=4 }; -def macroop RDMSR +def macroop WRMSR { limm t1, "IntAddrPrefixMSR >> 3" mov t2, t2, rdx, dataSize=4 diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index 1184bf9de..dd516d2a0 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -641,6 +641,7 @@ TLB::translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute) // If this is true, we're dealing with a request to read an internal // value. if (seg == SEGMENT_REG_INT) { + DPRINTF(TLB, "Addresses references internal memory.\n"); Addr prefix = vaddr & IntAddrPrefixMask; if (prefix == IntAddrPrefixCPUID) { panic("CPUID memory space not yet implemented!\n"); -- cgit v1.2.3 From 6095dceb0c34cf79ecbd799ab4b2cbe7b7c8629a Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 18:06:02 -0800 Subject: Params: Fix check for cycles in the configuration and clarify the comments/error message. --HG-- extra : convert_revision : 8f35dde408fae874bcba1a248d32a22222d98c35 --- src/python/m5/SimObject.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index d3e7d7975..78df6bef1 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -793,12 +793,14 @@ class SimObject(object): # necessary to construct it. Does *not* recursively create # children. def getCCObject(self): - params = self.getCCParams() if not self._ccObject: - self._ccObject = -1 # flag to catch cycles in recursion + # Cycles in the configuration heirarchy are not supported. This + # will catch the resulting recursion and stop. + self._ccObject = -1 + params = self.getCCParams() self._ccObject = params.create() elif self._ccObject == -1: - raise RuntimeError, "%s: recursive call to getCCObject()" \ + raise RuntimeError, "%s: Cycle found in configuration heirarchy." \ % self.path() return self._ccObject -- cgit v1.2.3 From 1048b548fabfb7af2113f226f2151d3eb0e63289 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Nov 2007 18:06:57 -0800 Subject: X86: Separate out the page table walker into it's own cc and hh. --HG-- extra : convert_revision : cbc3af01ca3dc911a59224a574007c5c0bcf6042 --- src/arch/x86/SConscript | 1 + src/arch/x86/X86TLB.py | 19 +- src/arch/x86/faults.cc | 4 +- src/arch/x86/pagetable_walker.cc | 533 +++++++++++++++++++++++++++++++++++++++ src/arch/x86/pagetable_walker.hh | 189 ++++++++++++++ src/arch/x86/tlb.cc | 489 ++--------------------------------- src/arch/x86/tlb.hh | 138 ++-------- src/cpu/BaseCPU.py | 4 +- 8 files changed, 775 insertions(+), 602 deletions(-) create mode 100644 src/arch/x86/pagetable_walker.cc create mode 100644 src/arch/x86/pagetable_walker.hh (limited to 'src') diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript index 68a18d4c0..eef3956ee 100644 --- a/src/arch/x86/SConscript +++ b/src/arch/x86/SConscript @@ -112,6 +112,7 @@ if env['TARGET_ISA'] == 'x86': SimObject('X86System.py') # Full-system sources + Source('pagetable_walker.cc') Source('system.cc') Source('stacktrace.cc') Source('vtophys.cc') diff --git a/src/arch/x86/X86TLB.py b/src/arch/x86/X86TLB.py index 5c174be59..dc080f37e 100644 --- a/src/arch/x86/X86TLB.py +++ b/src/arch/x86/X86TLB.py @@ -54,15 +54,28 @@ # Authors: Gabe Black from MemObject import MemObject +from m5.SimObject import SimObject from m5.params import * from m5.proxy import * +from m5 import build_env -class X86TLB(MemObject): +if build_env['FULL_SYSTEM']: + class X86PagetableWalker(MemObject): + type = 'X86PagetableWalker' + cxx_namespace = 'X86ISA' + cxx_class = 'Walker' + port = Port("Port for the hardware table walker") + system = Param.System(Parent.any, "system object") + +class X86TLB(SimObject): type = 'X86TLB' + cxx_namespace = 'X86ISA' + cxx_class = 'TLB' abstract = True size = Param.Int("TLB size") - walker_port = Port("Port for the hardware table walker") - system = Param.System(Parent.any, "system object") + if build_env['FULL_SYSTEM']: + walker = Param.X86PagetableWalker(\ + X86PagetableWalker(), "page table walker") class X86DTB(X86TLB): type = 'X86DTB' diff --git a/src/arch/x86/faults.cc b/src/arch/x86/faults.cc index abb5d98d7..1c94a1251 100644 --- a/src/arch/x86/faults.cc +++ b/src/arch/x86/faults.cc @@ -118,13 +118,13 @@ namespace X86ISA void FakeITLBFault::invoke(ThreadContext * tc) { // Start the page table walker. - tc->getITBPtr()->walker.start(tc, vaddr); + tc->getITBPtr()->walk(tc, vaddr); } void FakeDTLBFault::invoke(ThreadContext * tc) { // Start the page table walker. - tc->getDTBPtr()->walker.start(tc, vaddr); + tc->getDTBPtr()->walk(tc, vaddr); } #else // !FULL_SYSTEM diff --git a/src/arch/x86/pagetable_walker.cc b/src/arch/x86/pagetable_walker.cc new file mode 100644 index 000000000..0472dcd21 --- /dev/null +++ b/src/arch/x86/pagetable_walker.cc @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2007 The Hewlett-Packard Development Company + * All rights reserved. + * + * Redistribution and use of this software in source and binary forms, + * with or without modification, are permitted provided that the + * following conditions are met: + * + * The software must be used only for Non-Commercial Use which means any + * use which is NOT directed to receiving any direct monetary + * compensation for, or commercial advantage from such use. Illustrative + * examples of non-commercial use are academic research, personal study, + * teaching, education and corporate research & development. + * Illustrative examples of commercial use are distributing products for + * commercial advantage and providing services using the software for + * commercial advantage. + * + * If you wish to use this software or functionality therein that may be + * covered by patents for commercial use, please contact: + * Director of Intellectual Property Licensing + * Office of Strategy and Technology + * Hewlett-Packard Company + * 1501 Page Mill Road + * Palo Alto, California 94304 + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. Redistributions + * in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. Neither the name of + * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. No right of + * sublicense is granted herewith. Derivatives of the software and + * output created using the software may be prepared, but only for + * Non-Commercial Uses. Derivatives of the software may be shared with + * others provided: (i) the others agree to abide by the list of + * conditions herein which includes the Non-Commercial Use restrictions; + * and (ii) such Derivatives of the software include the above copyright + * notice to acknowledge the contribution from this software where + * applicable, this list of conditions and the disclaimer below. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/x86/pagetable.hh" +#include "arch/x86/pagetable_walker.hh" +#include "arch/x86/tlb.hh" +#include "base/bitfield.hh" +#include "cpu/thread_context.hh" +#include "cpu/base.hh" +#include "mem/packet_access.hh" +#include "mem/request.hh" +#include "sim/system.hh" + +namespace X86ISA { + +// Unfortunately, the placement of the base field in a page table entry is +// very erratic and would make a mess here. It might be moved here at some +// point in the future. +BitUnion64(PageTableEntry) + Bitfield<63> nx; + Bitfield<11, 9> avl; + Bitfield<8> g; + Bitfield<7> ps; + Bitfield<6> d; + Bitfield<5> a; + Bitfield<4> pcd; + Bitfield<3> pwt; + Bitfield<2> u; + Bitfield<1> w; + Bitfield<0> p; +EndBitUnion(PageTableEntry) + +void +Walker::doNext(PacketPtr &read, PacketPtr &write) +{ + assert(state != Ready && state != Waiting); + write = NULL; + PageTableEntry pte; + if (size == 8) + pte = read->get(); + else + pte = read->get(); + VAddr vaddr = entry.vaddr; + bool uncacheable = pte.pcd; + Addr nextRead = 0; + bool doWrite = false; + bool badNX = pte.nx && (!tlb->allowNX() || !enableNX); + switch(state) { + case LongPML4: + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * size; + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (badNX) + panic("NX violation!\n"); + entry.noExec = pte.nx; + if (!pte.p) + panic("Page not present!\n"); + nextState = LongPDP; + break; + case LongPDP: + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * size; + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + nextState = LongPD; + break; + case LongPD: + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + if (!pte.ps) { + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = + ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * size; + nextState = LongPTE; + break; + } else { + // 2 MB page + entry.size = 2 * (1 << 20); + entry.paddr = (uint64_t)pte & (mask(31) << 21); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + } + case LongPTE: + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + entry.paddr = (uint64_t)pte & (mask(40) << 12); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + case PAEPDP: + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * size; + if (!pte.p) + panic("Page not present!\n"); + nextState = PAEPD; + break; + case PAEPD: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + if (!pte.ps) { + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * size; + nextState = PAEPTE; + break; + } else { + // 2 MB page + entry.size = 2 * (1 << 20); + entry.paddr = (uint64_t)pte & (mask(31) << 21); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + } + case PAEPTE: + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + entry.paddr = (uint64_t)pte & (mask(40) << 12); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 7); + entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + case PSEPD: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (!pte.p) + panic("Page not present!\n"); + if (!pte.ps) { + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = + ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size; + nextState = PTE; + break; + } else { + // 4 MB page + entry.size = 4 * (1 << 20); + entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22; + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + } + case PD: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (!pte.p) + panic("Page not present!\n"); + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size; + nextState = PTE; + break; + nextState = PTE; + break; + case PTE: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (!pte.p) + panic("Page not present!\n"); + entry.paddr = (uint64_t)pte & (mask(20) << 12); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 7); + entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + default: + panic("Unknown page table walker state %d!\n"); + } + PacketPtr oldRead = read; + //If we didn't return, we're setting up another read. + uint32_t flags = oldRead->req->getFlags(); + if (uncacheable) + flags |= UNCACHEABLE; + else + flags &= ~UNCACHEABLE; + RequestPtr request = + new Request(nextRead, oldRead->getSize(), flags); + read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast); + read->allocate(); + //If we need to write, adjust the read packet to write the modified value + //back to memory. + if (doWrite) { + write = oldRead; + write->set(pte); + write->cmd = MemCmd::WriteReq; + write->setDest(Packet::Broadcast); + } else { + write = NULL; + delete oldRead->req; + delete oldRead; + } +} + +void +Walker::start(ThreadContext * _tc, Addr vaddr) +{ + assert(state == Ready); + assert(!tc); + tc = _tc; + + VAddr addr = vaddr; + + //Figure out what we're doing. + CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3); + Addr top = 0; + // Check if we're in long mode or not + Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); + size = 8; + if (efer.lma) { + // Do long mode. + state = LongPML4; + top = (cr3.longPdtb << 12) + addr.longl4 * size; + } else { + // We're in some flavor of legacy mode. + CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4); + if (cr4.pae) { + // Do legacy PAE. + state = PAEPDP; + top = (cr3.paePdtb << 5) + addr.pael3 * size; + } else { + size = 4; + top = (cr3.pdtb << 12) + addr.norml2 * size; + if (cr4.pse) { + // Do legacy PSE. + state = PSEPD; + } else { + // Do legacy non PSE. + state = PD; + } + } + } + + nextState = Ready; + entry.vaddr = vaddr; + + enableNX = efer.nxe; + + RequestPtr request = + new Request(top, size, PHYSICAL | cr3.pcd ? UNCACHEABLE : 0); + read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast); + read->allocate(); + Enums::MemoryMode memMode = sys->getMemoryMode(); + if (memMode == Enums::timing) { + tc->suspend(); + port.sendTiming(read); + } else if (memMode == Enums::atomic) { + do { + port.sendAtomic(read); + PacketPtr write = NULL; + doNext(read, write); + state = nextState; + nextState = Ready; + if (write) + port.sendAtomic(write); + } while(read); + tc = NULL; + state = Ready; + nextState = Waiting; + } else { + panic("Unrecognized memory system mode.\n"); + } +} + +bool +Walker::WalkerPort::recvTiming(PacketPtr pkt) +{ + return walker->recvTiming(pkt); +} + +bool +Walker::recvTiming(PacketPtr pkt) +{ + inflight--; + if (pkt->isResponse() && !pkt->wasNacked()) { + if (pkt->isRead()) { + assert(inflight); + assert(state == Waiting); + assert(!read); + state = nextState; + nextState = Ready; + PacketPtr write = NULL; + doNext(pkt, write); + state = Waiting; + read = pkt; + if (write) { + writes.push_back(write); + } + sendPackets(); + } else { + sendPackets(); + } + if (inflight == 0 && read == NULL && writes.size() == 0) { + tc->activate(0); + tc = NULL; + state = Ready; + nextState = Waiting; + } + } else if (pkt->wasNacked()) { + pkt->reinitNacked(); + if (!port.sendTiming(pkt)) { + retrying = true; + if (pkt->isWrite()) { + writes.push_back(pkt); + } else { + assert(!read); + read = pkt; + } + } else { + inflight++; + } + } + return true; +} + +Tick +Walker::WalkerPort::recvAtomic(PacketPtr pkt) +{ + return 0; +} + +void +Walker::WalkerPort::recvFunctional(PacketPtr pkt) +{ + return; +} + +void +Walker::WalkerPort::recvStatusChange(Status status) +{ + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } + return; + } + + panic("Unexpected recvStatusChange.\n"); +} + +void +Walker::WalkerPort::recvRetry() +{ + walker->recvRetry(); +} + +void +Walker::recvRetry() +{ + retrying = false; + sendPackets(); +} + +void +Walker::sendPackets() +{ + //If we're already waiting for the port to become available, just return. + if (retrying) + return; + + //Reads always have priority + if (read) { + if (!port.sendTiming(read)) { + retrying = true; + return; + } else { + inflight++; + delete read->req; + delete read; + read = NULL; + } + } + //Send off as many of the writes as we can. + while (writes.size()) { + PacketPtr write = writes.back(); + if (!port.sendTiming(write)) { + retrying = true; + return; + } else { + inflight++; + delete write->req; + delete write; + writes.pop_back(); + } + } +} + +Port * +Walker::getPort(const std::string &if_name, int idx) +{ + if (if_name == "port") + return &port; + else + panic("No page table walker port named %s!\n", if_name); +} + +} + +X86ISA::Walker * +X86PagetableWalkerParams::create() +{ + return new X86ISA::Walker(this); +} diff --git a/src/arch/x86/pagetable_walker.hh b/src/arch/x86/pagetable_walker.hh new file mode 100644 index 000000000..324f16f3c --- /dev/null +++ b/src/arch/x86/pagetable_walker.hh @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2007 The Hewlett-Packard Development Company + * All rights reserved. + * + * Redistribution and use of this software in source and binary forms, + * with or without modification, are permitted provided that the + * following conditions are met: + * + * The software must be used only for Non-Commercial Use which means any + * use which is NOT directed to receiving any direct monetary + * compensation for, or commercial advantage from such use. Illustrative + * examples of non-commercial use are academic research, personal study, + * teaching, education and corporate research & development. + * Illustrative examples of commercial use are distributing products for + * commercial advantage and providing services using the software for + * commercial advantage. + * + * If you wish to use this software or functionality therein that may be + * covered by patents for commercial use, please contact: + * Director of Intellectual Property Licensing + * Office of Strategy and Technology + * Hewlett-Packard Company + * 1501 Page Mill Road + * Palo Alto, California 94304 + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. Redistributions + * in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. Neither the name of + * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. No right of + * sublicense is granted herewith. Derivatives of the software and + * output created using the software may be prepared, but only for + * Non-Commercial Uses. Derivatives of the software may be shared with + * others provided: (i) the others agree to abide by the list of + * conditions herein which includes the Non-Commercial Use restrictions; + * and (ii) such Derivatives of the software include the above copyright + * notice to acknowledge the contribution from this software where + * applicable, this list of conditions and the disclaimer below. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_X86_PAGE_TABLE_WALKER_HH__ +#define __ARCH_X86_PAGE_TABLE_WALKER_HH__ + +#include + +#include "arch/x86/pagetable.hh" +#include "arch/x86/tlb.hh" +#include "mem/mem_object.hh" +#include "mem/packet.hh" +#include "params/X86PagetableWalker.hh" +#include "sim/host.hh" + +class ThreadContext; + +namespace X86ISA +{ + class Walker : public MemObject + { + public: + enum State { + Ready, + Waiting, + // Long mode + LongPML4, LongPDP, LongPD, LongPTE, + // PAE legacy mode + PAEPDP, PAEPD, PAEPTE, + // Non PAE legacy mode with and without PSE + PSEPD, PD, PTE + }; + + // Act on the current state and determine what to do next. read + // should be the packet that just came back from a read and write + // should be NULL. When the function returns, read is either NULL + // if the machine is finished, or points to a packet to initiate + // the next read. If any write is required to update an "accessed" + // bit, write will point to a packet to do the write. Otherwise it + // will be NULL. + void doNext(PacketPtr &read, PacketPtr &write); + + // Kick off the state machine. + void start(ThreadContext * _tc, Addr vaddr); + + protected: + + /* + * State having to do with sending packets. + */ + PacketPtr read; + std::vector writes; + + // How many memory operations are in flight. + unsigned inflight; + + bool retrying; + + /* + * Functions for dealing with packets. + */ + bool recvTiming(PacketPtr pkt); + void recvRetry(); + + void sendPackets(); + + /* + * Port for accessing memory + */ + class WalkerPort : public Port + { + public: + WalkerPort(const std::string &_name, Walker * _walker) : + Port(_name, _walker), walker(_walker), + snoopRangeSent(false) + {} + + protected: + Walker * walker; + + bool snoopRangeSent; + + bool recvTiming(PacketPtr pkt); + Tick recvAtomic(PacketPtr pkt); + void recvFunctional(PacketPtr pkt); + void recvStatusChange(Status status); + void recvRetry(); + void getDeviceAddressRanges(AddrRangeList &resp, + bool &snoop) + { + resp.clear(); + snoop = true; + } + }; + + Port *getPort(const std::string &if_name, int idx = -1); + + friend class WalkerPort; + + WalkerPort port; + + // The TLB we're supposed to load. + TLB * tlb; + System * sys; + + /* + * State machine state. + */ + ThreadContext * tc; + State state; + State nextState; + int size; + bool enableNX; + TlbEntry entry; + + public: + + void setTLB(TLB * _tlb) + { + tlb = _tlb; + } + + typedef X86PagetableWalkerParams Params; + + Walker(const Params *params) : + MemObject(params), + read(NULL), inflight(0), retrying(false), + port(name() + ".port", this), + tlb(NULL), sys(params->system), + tc(NULL), state(Ready), nextState(Ready) + { + } + }; +} +#endif // __ARCH_X86_PAGE_TABLE_WALKER_HH__ diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index dd516d2a0..68a22bc16 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -69,491 +69,26 @@ #include "cpu/base.hh" #include "mem/packet_access.hh" #include "mem/request.hh" -#include "sim/system.hh" - -namespace X86ISA { #if FULL_SYSTEM -TLB::TLB(const Params *p) : MemObject(p), walker(name(), this), size(p->size) -#else -TLB::TLB(const Params *p) : MemObject(p), size(p->size) +#include "arch/x86/pagetable_walker.hh" #endif + +namespace X86ISA { + +TLB::TLB(const Params *p) : SimObject(p), size(p->size) { tlb = new TlbEntry[size]; std::memset(tlb, 0, sizeof(TlbEntry) * size); for (int x = 0; x < size; x++) freeList.push_back(&tlb[x]); -} #if FULL_SYSTEM - -// Unfortunately, the placement of the base field in a page table entry is -// very erratic and would make a mess here. It might be moved here at some -// point in the future. -BitUnion64(PageTableEntry) - Bitfield<63> nx; - Bitfield<11, 9> avl; - Bitfield<8> g; - Bitfield<7> ps; - Bitfield<6> d; - Bitfield<5> a; - Bitfield<4> pcd; - Bitfield<3> pwt; - Bitfield<2> u; - Bitfield<1> w; - Bitfield<0> p; -EndBitUnion(PageTableEntry) - -void -TLB::Walker::doNext(PacketPtr &read, PacketPtr &write) -{ - assert(state != Ready && state != Waiting); - write = NULL; - PageTableEntry pte; - if (size == 8) - pte = read->get(); - else - pte = read->get(); - VAddr vaddr = entry.vaddr; - bool uncacheable = pte.pcd; - Addr nextRead = 0; - bool doWrite = false; - bool badNX = pte.nx && (!tlb->allowNX || !enableNX); - switch(state) { - case LongPML4: - nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * size; - doWrite = !pte.a; - pte.a = 1; - entry.writable = pte.w; - entry.user = pte.u; - if (badNX) - panic("NX violation!\n"); - entry.noExec = pte.nx; - if (!pte.p) - panic("Page not present!\n"); - nextState = LongPDP; - break; - case LongPDP: - nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * size; - doWrite = !pte.a; - pte.a = 1; - entry.writable = entry.writable && pte.w; - entry.user = entry.user && pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page not present!\n"); - nextState = LongPD; - break; - case LongPD: - doWrite = !pte.a; - pte.a = 1; - entry.writable = entry.writable && pte.w; - entry.user = entry.user && pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page not present!\n"); - if (!pte.ps) { - // 4 KB page - entry.size = 4 * (1 << 10); - nextRead = - ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * size; - nextState = LongPTE; - break; - } else { - // 2 MB page - entry.size = 2 * (1 << 20); - entry.paddr = (uint64_t)pte & (mask(31) << 21); - entry.uncacheable = uncacheable; - entry.global = pte.g; - entry.patBit = bits(pte, 12); - entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); - tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; - } - case LongPTE: - doWrite = !pte.a; - pte.a = 1; - entry.writable = entry.writable && pte.w; - entry.user = entry.user && pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page not present!\n"); - entry.paddr = (uint64_t)pte & (mask(40) << 12); - entry.uncacheable = uncacheable; - entry.global = pte.g; - entry.patBit = bits(pte, 12); - entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); - tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; - case PAEPDP: - nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * size; - if (!pte.p) - panic("Page not present!\n"); - nextState = PAEPD; - break; - case PAEPD: - doWrite = !pte.a; - pte.a = 1; - entry.writable = pte.w; - entry.user = pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page not present!\n"); - if (!pte.ps) { - // 4 KB page - entry.size = 4 * (1 << 10); - nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * size; - nextState = PAEPTE; - break; - } else { - // 2 MB page - entry.size = 2 * (1 << 20); - entry.paddr = (uint64_t)pte & (mask(31) << 21); - entry.uncacheable = uncacheable; - entry.global = pte.g; - entry.patBit = bits(pte, 12); - entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); - tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; - } - case PAEPTE: - doWrite = !pte.a; - pte.a = 1; - entry.writable = entry.writable && pte.w; - entry.user = entry.user && pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page not present!\n"); - entry.paddr = (uint64_t)pte & (mask(40) << 12); - entry.uncacheable = uncacheable; - entry.global = pte.g; - entry.patBit = bits(pte, 7); - entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); - tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; - case PSEPD: - doWrite = !pte.a; - pte.a = 1; - entry.writable = pte.w; - entry.user = pte.u; - if (!pte.p) - panic("Page not present!\n"); - if (!pte.ps) { - // 4 KB page - entry.size = 4 * (1 << 10); - nextRead = - ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size; - nextState = PTE; - break; - } else { - // 4 MB page - entry.size = 4 * (1 << 20); - entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22; - entry.uncacheable = uncacheable; - entry.global = pte.g; - entry.patBit = bits(pte, 12); - entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1); - tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; - } - case PD: - doWrite = !pte.a; - pte.a = 1; - entry.writable = pte.w; - entry.user = pte.u; - if (!pte.p) - panic("Page not present!\n"); - // 4 KB page - entry.size = 4 * (1 << 10); - nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size; - nextState = PTE; - break; - nextState = PTE; - break; - case PTE: - doWrite = !pte.a; - pte.a = 1; - entry.writable = pte.w; - entry.user = pte.u; - if (!pte.p) - panic("Page not present!\n"); - entry.paddr = (uint64_t)pte & (mask(20) << 12); - entry.uncacheable = uncacheable; - entry.global = pte.g; - entry.patBit = bits(pte, 7); - entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); - tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; - default: - panic("Unknown page table walker state %d!\n"); - } - PacketPtr oldRead = read; - //If we didn't return, we're setting up another read. - uint32_t flags = oldRead->req->getFlags(); - if (uncacheable) - flags |= UNCACHEABLE; - else - flags &= ~UNCACHEABLE; - RequestPtr request = - new Request(nextRead, oldRead->getSize(), flags); - read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast); - read->allocate(); - //If we need to write, adjust the read packet to write the modified value - //back to memory. - if (doWrite) { - write = oldRead; - write->set(pte); - write->cmd = MemCmd::WriteReq; - write->setDest(Packet::Broadcast); - } else { - write = NULL; - delete oldRead->req; - delete oldRead; - } -} - -void -TLB::Walker::start(ThreadContext * _tc, Addr vaddr) -{ - assert(state == Ready); - assert(!tc); - tc = _tc; - - VAddr addr = vaddr; - - //Figure out what we're doing. - CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3); - Addr top = 0; - // Check if we're in long mode or not - Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); - size = 8; - if (efer.lma) { - // Do long mode. - state = LongPML4; - top = (cr3.longPdtb << 12) + addr.longl4 * size; - } else { - // We're in some flavor of legacy mode. - CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4); - if (cr4.pae) { - // Do legacy PAE. - state = PAEPDP; - top = (cr3.paePdtb << 5) + addr.pael3 * size; - } else { - size = 4; - top = (cr3.pdtb << 12) + addr.norml2 * size; - if (cr4.pse) { - // Do legacy PSE. - state = PSEPD; - } else { - // Do legacy non PSE. - state = PD; - } - } - } - - nextState = Ready; - entry.vaddr = vaddr; - - enableNX = efer.nxe; - - RequestPtr request = - new Request(top, size, PHYSICAL | cr3.pcd ? UNCACHEABLE : 0); - read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast); - read->allocate(); - Enums::MemoryMode memMode = tlb->sys->getMemoryMode(); - if (memMode == Enums::timing) { - tc->suspend(); - port.sendTiming(read); - } else if (memMode == Enums::atomic) { - do { - port.sendAtomic(read); - PacketPtr write = NULL; - doNext(read, write); - state = nextState; - nextState = Ready; - if (write) - port.sendAtomic(write); - } while(read); - tc = NULL; - state = Ready; - nextState = Waiting; - } else { - panic("Unrecognized memory system mode.\n"); - } -} - -bool -TLB::Walker::WalkerPort::recvTiming(PacketPtr pkt) -{ - return walker->recvTiming(pkt); -} - -bool -TLB::Walker::recvTiming(PacketPtr pkt) -{ - inflight--; - if (pkt->isResponse() && !pkt->wasNacked()) { - if (pkt->isRead()) { - assert(inflight); - assert(state == Waiting); - assert(!read); - state = nextState; - nextState = Ready; - PacketPtr write = NULL; - doNext(pkt, write); - state = Waiting; - read = pkt; - if (write) { - writes.push_back(write); - } - sendPackets(); - } else { - sendPackets(); - } - if (inflight == 0 && read == NULL && writes.size() == 0) { - tc->activate(0); - tc = NULL; - state = Ready; - nextState = Waiting; - } - } else if (pkt->wasNacked()) { - pkt->reinitNacked(); - if (!port.sendTiming(pkt)) { - retrying = true; - if (pkt->isWrite()) { - writes.push_back(pkt); - } else { - assert(!read); - read = pkt; - } - } else { - inflight++; - } - } - return true; -} - -Tick -TLB::Walker::WalkerPort::recvAtomic(PacketPtr pkt) -{ - return 0; -} - -void -TLB::Walker::WalkerPort::recvFunctional(PacketPtr pkt) -{ - return; -} - -void -TLB::Walker::WalkerPort::recvStatusChange(Status status) -{ - if (status == RangeChange) { - if (!snoopRangeSent) { - snoopRangeSent = true; - sendStatusChange(Port::RangeChange); - } - return; - } - - panic("Unexpected recvStatusChange.\n"); -} - -void -TLB::Walker::WalkerPort::recvRetry() -{ - walker->recvRetry(); -} - -void -TLB::Walker::recvRetry() -{ - retrying = false; - sendPackets(); -} - -void -TLB::Walker::sendPackets() -{ - //If we're already waiting for the port to become available, just return. - if (retrying) - return; - - //Reads always have priority - if (read) { - if (!port.sendTiming(read)) { - retrying = true; - return; - } else { - inflight++; - delete read->req; - delete read; - read = NULL; - } - } - //Send off as many of the writes as we can. - while (writes.size()) { - PacketPtr write = writes.back(); - if (!port.sendTiming(write)) { - retrying = true; - return; - } else { - inflight++; - delete write->req; - delete write; - writes.pop_back(); - } - } -} - -Port * -TLB::getPort(const std::string &if_name, int idx) -{ - if (if_name == "walker_port") - return &walker.port; - else - panic("No tlb port named %s!\n", if_name); -} - -#else - -Port * -TLB::getPort(const std::string &if_name, int idx) -{ - panic("No tlb ports in se!\n", if_name); -} - + walker = p->walker; + walker->setTLB(this); #endif +} void TLB::insert(Addr vpn, TlbEntry &entry) @@ -593,6 +128,14 @@ TLB::lookup(Addr va, bool update_lru) return NULL; } +#if FULL_SYSTEM +void +TLB::walk(ThreadContext * _tc, Addr vaddr) +{ + walker->start(_tc, vaddr); +} +#endif + void TLB::invalidateAll() { diff --git a/src/arch/x86/tlb.hh b/src/arch/x86/tlb.hh index 93bbf2c9d..a361c2291 100644 --- a/src/arch/x86/tlb.hh +++ b/src/arch/x86/tlb.hh @@ -77,21 +77,26 @@ class Packet; namespace X86ISA { + class Walker; + static const unsigned StoreCheck = 1 << NUM_SEGMENTREGS; class TLB; - class TLB : public MemObject + class TLB : public SimObject { protected: friend class FakeITLBFault; friend class FakeDTLBFault; - System * sys; - - bool allowNX; + bool _allowNX; public: + bool allowNX() const + { + return _allowNX; + } + typedef X86TLBParams Params; TLB(const Params *p); @@ -101,119 +106,11 @@ namespace X86ISA #if FULL_SYSTEM protected: - class Walker - { - public: - enum State { - Ready, - Waiting, - LongPML4, - LongPDP, - LongPD, - LongPTE, - PAEPDP, - PAEPD, - PAEPTE, - PSEPD, - PD, - PTE - }; - - // Act on the current state and determine what to do next. read - // should be the packet that just came back from a read and write - // should be NULL. When the function returns, read is either NULL - // if the machine is finished, or points to a packet to initiate - // the next read. If any write is required to update an "accessed" - // bit, write will point to a packet to do the write. Otherwise it - // will be NULL. - void doNext(PacketPtr &read, PacketPtr &write); - - // Kick off the state machine. - void start(ThreadContext * _tc, Addr vaddr); - - protected: - friend class TLB; - - /* - * State having to do with sending packets. - */ - PacketPtr read; - std::vector writes; - - // How many memory operations are in flight. - unsigned inflight; - - bool retrying; - - /* - * Functions for dealing with packets. - */ - bool recvTiming(PacketPtr pkt); - void recvRetry(); - - void sendPackets(); - - /* - * Port for accessing memory - */ - class WalkerPort : public Port - { - public: - WalkerPort(const std::string &_name, Walker * _walker) : - Port(_name, _walker->tlb), walker(_walker), - snoopRangeSent(false) - {} - - protected: - Walker * walker; - - bool snoopRangeSent; - - bool recvTiming(PacketPtr pkt); - Tick recvAtomic(PacketPtr pkt); - void recvFunctional(PacketPtr pkt); - void recvStatusChange(Status status); - void recvRetry(); - void getDeviceAddressRanges(AddrRangeList &resp, - bool &snoop) - { - resp.clear(); - snoop = true; - } - }; - - friend class WalkerPort; - - WalkerPort port; - - // The TLB we're supposed to load. - TLB * tlb; - - /* - * State machine state. - */ - ThreadContext * tc; - State state; - State nextState; - int size; - bool enableNX; - TlbEntry entry; - - public: - Walker(const std::string &_name, TLB * _tlb) : - read(NULL), inflight(0), retrying(false), - port(_name + "-walker_port", this), - tlb(_tlb), - tc(NULL), state(Ready), nextState(Ready) - { - } - }; - - Walker walker; -#endif + Walker * walker; - Port *getPort(const std::string &if_name, int idx = -1); + void walk(ThreadContext * _tc, Addr vaddr); +#endif public: void invalidateAll(); @@ -231,13 +128,14 @@ namespace X86ISA EntryList freeList; EntryList entryList; - void insert(Addr vpn, TlbEntry &entry); - template Fault translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute); public: + + void insert(Addr vpn, TlbEntry &entry); + // Checkpointing virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); @@ -249,8 +147,7 @@ namespace X86ISA typedef X86ITBParams Params; ITB(const Params *p) : TLB(p) { - sys = p->system; - allowNX = false; + _allowNX = false; } Fault translate(RequestPtr &req, ThreadContext *tc); @@ -264,8 +161,7 @@ namespace X86ISA typedef X86DTBParams Params; DTB(const Params *p) : TLB(p) { - sys = p->system; - allowNX = true; + _allowNX = true; } Fault translate(RequestPtr &req, ThreadContext *tc, bool write); #if FULL_SYSTEM diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index cb5793e57..9fc1db9f1 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -101,9 +101,7 @@ class BaseCPU(SimObject): _mem_ports = [] if build_env['TARGET_ISA'] == 'x86' and build_env['FULL_SYSTEM']: - itb.walker_port = Port("ITB page table walker port") - dtb.walker_port = Port("ITB page table walker port") - _mem_ports = ["itb.walker_port", "dtb.walker_port"] + _mem_ports = ["itb.walker.port", "dtb.walker.port"] def connectMemPorts(self, bus): for p in self._mem_ports: -- cgit v1.2.3 From 5772e3cadaf12cf155e6ad0cb79a3b7333d1dd10 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 13 Nov 2007 01:31:43 -0800 Subject: X86: Make microcode use presegmentation RIPs and the rest of m5 use post segmentation RIPS. --HG-- extra : convert_revision : d8cda7c8b9a2afb8a9d601b6d61529a96c5f87fe --- src/arch/x86/isa/microops/regop.isa | 4 ++-- src/arch/x86/isa/operands.isa | 1 + src/arch/x86/process.cc | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index 58b267e0d..4ac3a9d98 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -835,7 +835,7 @@ let {{ ''' class Wrip(WrRegOp, CondRegOp): - code = 'RIP = psrc1 + sop2' + code = 'RIP = psrc1 + sop2 + CSBase' else_code="RIP = RIP;" class Br(WrRegOp, CondRegOp): @@ -846,7 +846,7 @@ let {{ code = 'ccFlagBits = psrc1 ^ op2' class Rdip(RdRegOp): - code = 'DestReg = RIP' + code = 'DestReg = RIP - CSBase' class Ruflags(RdRegOp): code = 'DestReg = ccFlagBits' diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa index 542638edd..f50e71727 100644 --- a/src/arch/x86/isa/operands.isa +++ b/src/arch/x86/isa/operands.isa @@ -126,5 +126,6 @@ def operands {{ 'ControlSrc1': ('ControlReg', 'uqw', 'MISCREG_CR(src1)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 72), 'EferOp': ('ControlReg', 'uqw', 'MISCREG_EFER', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 73), 'CR4Op': ('ControlReg', 'uqw', 'MISCREG_CR4', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 74), + 'CSBase': ('ControlReg', 'udw', 'MISCREG_CS_BASE', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 80), 'Mem': ('Mem', 'uqw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100) }}; diff --git a/src/arch/x86/process.cc b/src/arch/x86/process.cc index c6246c76b..633b2f136 100644 --- a/src/arch/x86/process.cc +++ b/src/arch/x86/process.cc @@ -461,6 +461,8 @@ X86LiveProcess::argsInit(int intSize, int pageSize) threadContexts[0]->setIntReg(StackPointerReg, stack_min); Addr prog_entry = objFile->entryPoint(); + // There doesn't need to be any segment base added in since we're dealing + // with the flat segmentation model. threadContexts[0]->setPC(prog_entry); threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst)); -- cgit v1.2.3 From 7c8e4ca3a3b66becbc3e4e7b5e106f5c44b09b6f Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Wed, 14 Nov 2007 23:42:08 -0500 Subject: Checkpointing: Name SE page table entries better so that there isn't a problem if multiple workloads are being run at once. --HG-- extra : convert_revision : 3bac9bd7fd93fcadf764e2991c5b029f2c745c08 --- src/mem/page_table.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc index 6220305b8..54165f293 100644 --- a/src/mem/page_table.cc +++ b/src/mem/page_table.cc @@ -156,7 +156,7 @@ PageTable::serialize(std::ostream &os) PTableItr iter = pTable.begin(); PTableItr end = pTable.end(); while (iter != end) { - os << "\n[" << csprintf("%s.Entry%d", name(), count) << "]\n"; + os << "\n[" << csprintf("%s.Entry%d", process->name(), count) << "]\n"; paramOut(os, "vaddr", iter->first); iter->second.serialize(os); @@ -178,9 +178,9 @@ PageTable::unserialize(Checkpoint *cp, const std::string §ion) pTable.clear(); while(i < count) { - paramIn(cp, csprintf("%s.Entry%d", name(), i), "vaddr", vaddr); + paramIn(cp, csprintf("%s.Entry%d", process->name(), i), "vaddr", vaddr); entry = new TheISA::TlbEntry(); - entry->unserialize(cp, csprintf("%s.Entry%d", name(), i)); + entry->unserialize(cp, csprintf("%s.Entry%d", process->name(), i)); pTable[vaddr] = *entry; ++i; } -- cgit v1.2.3