diff options
Diffstat (limited to 'src')
42 files changed, 1718 insertions, 171 deletions
diff --git a/src/arch/SConscript b/src/arch/SConscript index e051c44af..66f93870e 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -97,7 +97,7 @@ execfile(cpu_models_file.srcnode().abspath) # Several files are generated from the ISA description. # We always get the basic decoder and header file. -isa_desc_gen_files = [ 'decoder.cc', 'decoder.hh' ] +isa_desc_gen_files = [ 'decoder.cc', 'decoder.hh', 'max_inst_regs.hh' ] # We also get an execute file for each selected CPU model. isa_desc_gen_files += [CpuModel.dict[cpu].filename for cpu in env['CPU_MODELS']] diff --git a/src/arch/alpha/isa_traits.hh b/src/arch/alpha/isa_traits.hh index 53eea5f69..be1d1b8bb 100644 --- a/src/arch/alpha/isa_traits.hh +++ b/src/arch/alpha/isa_traits.hh @@ -35,6 +35,7 @@ namespace LittleEndianGuest {} #include "arch/alpha/ipr.hh" +#include "arch/alpha/max_inst_regs.hh" #include "arch/alpha/types.hh" #include "config/full_system.hh" #include "sim/host.hh" @@ -44,6 +45,8 @@ class StaticInstPtr; namespace AlphaISA { using namespace LittleEndianGuest; + using AlphaISAInst::MaxInstSrcRegs; + using AlphaISAInst::MaxInstDestRegs; // These enumerate all the registers for dependence tracking. enum DependenceTags { @@ -144,10 +147,6 @@ namespace AlphaISA const int TotalDataRegs = NumIntRegs + NumFloatRegs; - // Static instruction parameters - const int MaxInstSrcRegs = 3; - const int MaxInstDestRegs = 2; - // semantically meaningful register indices const int ZeroReg = 31; // architecturally meaningful // the rest of these depend on the ABI diff --git a/src/arch/alpha/predecoder.hh b/src/arch/alpha/predecoder.hh index 7a6bb3c02..725b35b9d 100644 --- a/src/arch/alpha/predecoder.hh +++ b/src/arch/alpha/predecoder.hh @@ -74,8 +74,7 @@ namespace AlphaISA { ext_inst = inst; #if FULL_SYSTEM - if (pc && 0x1) - ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32); + ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32); #endif } diff --git a/src/arch/sparc/isa_traits.hh b/src/arch/sparc/isa_traits.hh index 4f3d20606..133817eb5 100644 --- a/src/arch/sparc/isa_traits.hh +++ b/src/arch/sparc/isa_traits.hh @@ -33,6 +33,7 @@ #define __ARCH_SPARC_ISA_TRAITS_HH__ #include "arch/sparc/types.hh" +#include "arch/sparc/max_inst_regs.hh" #include "arch/sparc/sparc_traits.hh" #include "config/full_system.hh" #include "sim/host.hh" @@ -49,6 +50,8 @@ namespace SparcISA //This makes sure the big endian versions of certain functions are used. using namespace BigEndianGuest; + using SparcISAInst::MaxInstSrcRegs; + using SparcISAInst::MaxInstDestRegs; // SPARC has a delay slot #define ISA_HAS_DELAY_SLOT 1 @@ -76,10 +79,6 @@ namespace SparcISA // Some OS syscall use a second register (o1) to return a second value const int SyscallPseudoReturnReg = ArgumentReg[1]; - //XXX These numbers are bogus - const int MaxInstSrcRegs = 8; - const int MaxInstDestRegs = 9; - //8K. This value is implmentation specific; and should probably //be somewhere else. const int LogVMPageSize = 13; diff --git a/src/arch/sparc/process.cc b/src/arch/sparc/process.cc index 1f5d0b077..0c40fe58d 100644 --- a/src/arch/sparc/process.cc +++ b/src/arch/sparc/process.cc @@ -429,6 +429,10 @@ Sparc64LiveProcess::argsInit(int intSize, int pageSize) threadContexts[0]->setIntReg(ArgumentReg[1], argv_array_base); threadContexts[0]->setIntReg(StackPointerReg, stack_min - StackBias); + // %g1 is a pointer to a function that should be run at exit. Since we + // don't have anything like that, it should be set to 0. + threadContexts[0]->setIntReg(1, 0); + Addr prog_entry = objFile->entryPoint(); threadContexts[0]->setPC(prog_entry); threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst)); @@ -658,6 +662,10 @@ Sparc32LiveProcess::argsInit(int intSize, int pageSize) //threadContexts[0]->setIntReg(ArgumentReg[1], argv_array_base); threadContexts[0]->setIntReg(StackPointerReg, stack_min); + // %g1 is a pointer to a function that should be run at exit. Since we + // don't have anything like that, it should be set to 0. + threadContexts[0]->setIntReg(1, 0); + uint32_t prog_entry = objFile->entryPoint(); threadContexts[0]->setPC(prog_entry); threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst)); diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript index 68a18d4c0..eef3956ee 100644 --- a/src/arch/x86/SConscript +++ b/src/arch/x86/SConscript @@ -112,6 +112,7 @@ if env['TARGET_ISA'] == 'x86': SimObject('X86System.py') # Full-system sources + Source('pagetable_walker.cc') Source('system.cc') Source('stacktrace.cc') Source('vtophys.cc') diff --git a/src/arch/x86/X86TLB.py b/src/arch/x86/X86TLB.py index ce4db4f4c..dc080f37e 100644 --- a/src/arch/x86/X86TLB.py +++ b/src/arch/x86/X86TLB.py @@ -53,12 +53,29 @@ # # Authors: Gabe Black +from MemObject import MemObject from m5.SimObject import SimObject from m5.params import * +from m5.proxy import * +from m5 import build_env + +if build_env['FULL_SYSTEM']: + class X86PagetableWalker(MemObject): + type = 'X86PagetableWalker' + cxx_namespace = 'X86ISA' + cxx_class = 'Walker' + port = Port("Port for the hardware table walker") + system = Param.System(Parent.any, "system object") + class X86TLB(SimObject): type = 'X86TLB' + cxx_namespace = 'X86ISA' + cxx_class = 'TLB' abstract = True size = Param.Int("TLB size") + if build_env['FULL_SYSTEM']: + walker = Param.X86PagetableWalker(\ + X86PagetableWalker(), "page table walker") class X86DTB(X86TLB): type = 'X86DTB' diff --git a/src/arch/x86/faults.cc b/src/arch/x86/faults.cc index 13341f1de..1c94a1251 100644 --- a/src/arch/x86/faults.cc +++ b/src/arch/x86/faults.cc @@ -93,6 +93,8 @@ #include "arch/x86/isa_traits.hh" #include "mem/page_table.hh" #include "sim/process.hh" +#else +#include "arch/x86/tlb.hh" #endif namespace X86ISA @@ -112,6 +114,19 @@ namespace X86ISA { panic("X86 faults are not implemented!"); } + + void FakeITLBFault::invoke(ThreadContext * tc) + { + // Start the page table walker. + tc->getITBPtr()->walk(tc, vaddr); + } + + void FakeDTLBFault::invoke(ThreadContext * tc) + { + // Start the page table walker. + tc->getDTBPtr()->walk(tc, vaddr); + } + #else // !FULL_SYSTEM void FakeITLBFault::invoke(ThreadContext * tc) { diff --git a/src/arch/x86/faults.hh b/src/arch/x86/faults.hh index 5a573754a..78a55d0e1 100644 --- a/src/arch/x86/faults.hh +++ b/src/arch/x86/faults.hh @@ -369,44 +369,28 @@ namespace X86ISA // the tlb on a miss and are to take the place of a hardware table walker. class FakeITLBFault : public X86Fault { -#if !FULL_SYSTEM protected: Addr vaddr; public: FakeITLBFault(Addr _vaddr) : X86Fault("fake instruction tlb fault", "itlb"), vaddr(_vaddr) -#else - public: - FakeITLBFault() : - X86Fault("fake instruction tlb fault", "itlb") -#endif {} -#if !FULL_SYSTEM void invoke(ThreadContext * tc); -#endif }; class FakeDTLBFault : public X86Fault { -#if !FULL_SYSTEM protected: Addr vaddr; public: FakeDTLBFault(Addr _vaddr) : X86Fault("fake data tlb fault", "dtlb"), vaddr(_vaddr) -#else - public: - FakeDTLBFault() : - X86Fault("fake data tlb fault", "dtlb") -#endif {} -#if !FULL_SYSTEM void invoke(ThreadContext * tc); -#endif }; }; diff --git a/src/arch/x86/insts/static_inst.cc b/src/arch/x86/insts/static_inst.cc index d2ec8878c..183700fa9 100644 --- a/src/arch/x86/insts/static_inst.cc +++ b/src/arch/x86/insts/static_inst.cc @@ -56,6 +56,7 @@ */ #include "arch/x86/insts/static_inst.hh" +#include "arch/x86/segmentregs.hh" namespace X86ISA { @@ -75,24 +76,27 @@ namespace X86ISA { switch (segment) { - case 0: + case SEGMENT_REG_ES: ccprintf(os, "ES"); break; - case 1: + case SEGMENT_REG_CS: ccprintf(os, "CS"); break; - case 2: + case SEGMENT_REG_SS: ccprintf(os, "SS"); break; - case 3: + case SEGMENT_REG_DS: ccprintf(os, "DS"); break; - case 4: + case SEGMENT_REG_FS: ccprintf(os, "FS"); break; - case 5: + case SEGMENT_REG_GS: ccprintf(os, "GS"); break; + case SEGMENT_REG_INT: + ccprintf(os, "INT"); + break; default: panic("Unrecognized segment %d\n", segment); } diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa index 0482fdf23..f3485bc4e 100644 --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa @@ -61,8 +61,62 @@ 0x0F: decode OPCODE_OP_TOP5 { format WarnUnimpl { 0x00: decode OPCODE_OP_BOTTOM3 { - 0x00: group6(); - 0x01: group7(); + //0x00: group6(); + 0x00: decode MODRM_REG { + 0x0: sldt_Mw_or_Rv(); + 0x1: str_Mw_or_Rv(); + 0x2: lldt_Mw_or_Rv(); + 0x3: ltr_Mw_or_Rv(); + 0x4: verr_Mw_or_Rv(); + 0x5: verw_Mw_or_Rv(); + //0x6: jmpe_Ev(); // IA-64 + default: Inst::UD2(); + } + //0x01: group7(); // Ugly, ugly, ugly... + 0x01: decode MODRM_MOD { + 0x3: decode MODRM_REG { + 0x0: decode MODRM_RM { + 0x1: vmcall(); + 0x2: vmlaunch(); + 0x3: vmresume(); + 0x4: vmxoff(); + default: Inst::UD2(); + } + 0x1: decode MODRM_RM { + 0x0: monitor(); + 0x1: mwait(); + default: Inst::UD2(); + } + 0x3: decode MODRM_RM { + 0x0: vmrun(); + 0x1: vmmcall(); + 0x2: vmload(); + 0x3: vmsave(); + 0x4: stgi(); + 0x5: clgi(); + 0x6: skinit(); + 0x7: invlpga(); + } + 0x4: smsw_Rv(); + 0x6: lmsw_Rv(); + 0x7: decode MODRM_RM { + 0x0: swapgs(); + 0x1: rdtscp(); + default: Inst::UD2(); + } + default: Inst::UD2(); + } + default: decode MODRM_REG { + 0x0: sgdt_Ms(); + 0x1: sidt_Ms(); + 0x2: lgdt_Ms(); + 0x3: lidt_Ms(); + 0x4: smsw_Mw(); + 0x6: lmsw_Mw(); + 0x7: invlpg_M(); + default: Inst::UD2(); + } + } 0x02: lar_Gv_Ew(); 0x03: lsl_Gv_Ew(); //sandpile.org doesn't seem to know what this is... ? @@ -148,7 +202,7 @@ 0x0: decode OPCODE_OP_BOTTOM3 { 0x0: mov_Rd_Cd(); 0x1: mov_Rd_Dd(); - 0x2: mov_Cd_Rd(); + 0x2: Inst::MOV(Cd,Rd); 0x3: mov_Dd_Rd(); 0x4: mov_Rd_Td(); 0x6: mov_Td_Rd(); @@ -397,9 +451,58 @@ // no prefix 0x0: decode OPCODE_OP_BOTTOM3 { 0x0: pshufw_Pq_Qq_Ib(); - 0x1: group13_pshimw(); - 0x2: group14_pshimd(); - 0x3: group15_pshimq(); + //0x1: group13_pshimw(); + 0x1: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrlw_PRq_Ib(); + 0x1: psrlw_VRo_Ib(); + } + 0x4: decode LEGACY_OP { + 0x0: psraw_PRq_Ib(); + 0x1: psraw_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: psllw_PRq_Ib(); + 0x1: psllw_VRo_Ib(); + } + default: Inst::UD2(); + } + //0x2: group14_pshimd(); + 0x2: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrld_PRq_Ib(); + 0x1: psrld_VRo_Ib(); + } + 0x4: decode LEGACY_OP { + 0x0: psrad_PRq_Ib(); + 0x1: psrad_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: pslld_PRq_Ib(); + 0x1: pslld_VRo_Ib(); + } + default: Inst::UD2(); + } + //0x3: group15_pshimq(); + 0x3: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrlq_PRq_Ib(); + 0x1: psrlq_VRo_Ib(); + } + 0x3: decode LEGACY_OP { + 0x0: Inst::UD2(); + 0x1: psrldq_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: psllq_PRq_Ib(); + 0x1: psllq_VRo_Ib(); + } + 0x7: decode LEGACY_OP { + 0x0: Inst::UD2(); + 0x1: pslldq_VRo_Ib(); + } + default: Inst::UD2(); + } 0x4: pcmpeqb_Pq_Qq(); 0x5: pcmpeqw_Pq_Qq(); 0x6: pcmpeqd_Pq_Qq(); @@ -413,9 +516,58 @@ // operand size (0x66) 0x1: decode OPCODE_OP_BOTTOM3 { 0x0: pshufd_Vo_Wo_Ib(); - 0x1: group13_pshimw(); - 0x2: group14_pshimd(); - 0x3: group15_pshimq_dq(); + //0x1: group13_pshimw(); + 0x1: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrlw_PRq_Ib(); + 0x1: psrlw_VRo_Ib(); + } + 0x4: decode LEGACY_OP { + 0x0: psraw_PRq_Ib(); + 0x1: psraw_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: psllw_PRq_Ib(); + 0x1: psllw_VRo_Ib(); + } + default: Inst::UD2(); + } + //0x2: group14_pshimd(); + 0x2: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrld_PRq_Ib(); + 0x1: psrld_VRo_Ib(); + } + 0x4: decode LEGACY_OP { + 0x0: psrad_PRq_Ib(); + 0x1: psrad_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: pslld_PRq_Ib(); + 0x1: pslld_VRo_Ib(); + } + default: Inst::UD2(); + } + //0x3: group15_pshimq(); + 0x3: decode MODRM_REG { + 0x2: decode LEGACY_OP { + 0x0: psrlq_PRq_Ib(); + 0x1: psrlq_VRo_Ib(); + } + 0x3: decode LEGACY_OP { + 0x0: Inst::UD2(); + 0x1: psrldq_VRo_Ib(); + } + 0x6: decode LEGACY_OP { + 0x0: psllq_PRq_Ib(); + 0x1: psllq_VRo_Ib(); + } + 0x7: decode LEGACY_OP { + 0x0: Inst::UD2(); + 0x1: pslldq_VRo_Ib(); + } + default: Inst::UD2(); + } 0x4: pcmpeqb_Vo_Wo(); 0x5: pcmpeqw_Vo_Wo(); 0x6: pcmpeqd_Vo_Wo(); @@ -505,7 +657,7 @@ 0x0: push_fs(); 0x1: pop_fs(); 0x2: Inst::CPUID(rAd); - 0x3: bt_Ev_Gv(); + 0x3: Inst::BT(Ev,Gv); 0x4: shld_Ev_Gv_Ib(); 0x5: shld_Ev_Gv_rCl(); 0x6: xbts_and_cmpxchg(); @@ -515,17 +667,31 @@ 0x0: push_gs(); 0x1: pop_gs(); 0x2: rsm_smm(); - 0x3: bts_Ev_Gv(); + 0x3: Inst::BTS(Ev,Gv); 0x4: shrd_Ev_Gv_Ib(); 0x5: shrd_Ev_Gv_rCl(); - 0x6: group16(); + //0x6: group16(); + 0x6: decode MODRM_MOD { + 0x3: decode MODRM_REG { + 0x5: lfence(); + 0x6: mfence(); + 0x7: sfence(); + default: Inst::UD2(); + } + default: decode MODRM_REG { + 0x0: fxsave(); + 0x1: fxrstor(); + 0x7: clflush(); + default: Inst::UD2(); + } + } 0x7: Inst::IMUL(Gv,Ev); } 0x16: decode OPCODE_OP_BOTTOM3 { 0x0: Inst::CMPXCHG(Eb,Gb); 0x1: Inst::CMPXCHG(Ev,Gv); 0x2: lss_Gz_Mp(); - 0x3: btr_Ev_Gv(); + 0x3: Inst::BTR(Ev,Gv); 0x4: lfs_Gz_Mp(); 0x5: lgs_Gz_Mp(); //The size of the second operand in these instructions should @@ -536,9 +702,19 @@ } 0x17: decode OPCODE_OP_BOTTOM3 { 0x0: jmpe_Jz(); // IA-64? - 0x1: group11_UD2(); - 0x2: group8_Ev_Ib(); - 0x3: btc_Ev_Gv(); + format Inst { + //0x1: group11_UD2(); + 0x1: UD2(); + //0x2: group8_Ev_Ib(); + 0x2: decode MODRM_REG { + 0x4: BT(Ev,Ib); + 0x5: BTS(Ev,Ib); + 0x6: BTR(Ev,Ib); + 0x7: BTC(Ev,Ib); + default: UD2(); + } + 0x3: BTC(Ev,Gv); + } 0x4: bsf_Gv_Ev(); 0x5: bsr_Gv_Ev(); //The size of the second operand in these instructions should @@ -550,7 +726,19 @@ 0x18: decode OPCODE_OP_BOTTOM3 { 0x0: xadd_Eb_Gb(); 0x1: xadd_Ev_Gv(); - 0x7: group9(); + //0x7: group9(); + 0x7: decode MODRM_REG { + 0x1: cmpxchg_Mq(); + 0x6: decode LEGACY_OP { + 0x1: vmclear_Mq(); + default: decode LEGACY_REP { + 0x1: vmxon_Mq(); + 0x0: vmptrld_Mq(); + } + } + 0x7: vmptrst_Mq(); + default: Inst::UD2(); + } default: decode LEGACY_DECODEVAL { // no prefix 0x0: decode OPCODE_OP_BOTTOM3 { diff --git a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py index e950f008a..883ec4411 100644 --- a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py +++ b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py @@ -53,14 +53,242 @@ # # Authors: Gabe Black -microcode = "" -#let {{ -# class BT(Inst): -# "GenFault ${new UnimpInstFault}" -# class BTC(Inst): -# "GenFault ${new UnimpInstFault}" -# class BTR(Inst): -# "GenFault ${new UnimpInstFault}" -# class BTS(Inst): -# "GenFault ${new UnimpInstFault}" -#}}; +microcode = ''' +def macroop BT_R_I { + sexti t0, reg, imm, flags=(CF,) +}; + +def macroop BT_M_I { + limm t1, imm + # This fudges just a tiny bit, but it's reasonable to expect the + # microcode generation logic to have the log of the various sizes + # floating around as well. + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + ld t1, seg, [scale, index, t2], disp + sexti t0, t1, imm, flags=(CF,) +}; + +def macroop BT_P_I { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + ld t1, seg, [1, t2, t7] + sexti t0, t1, imm, flags=(CF,) +}; + +def macroop BT_R_R { + sext t0, reg, regm, flags=(CF,) +}; + +def macroop BT_M_R { + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + ld t1, seg, [scale, index, t2], disp + sext t0, t1, reg, flags=(CF,) +}; + +def macroop BT_P_R { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + ld t1, seg, [1, t2, t7] + sext t0, t1, reg, flags=(CF,) +}; + +def macroop BTC_R_I { + sexti t0, reg, imm, flags=(CF,) + limm t1, 1 + roli t1, t1, imm + xor reg, reg, t1 +}; + +def macroop BTC_M_I { + limm t1, imm + # This fudges just a tiny bit, but it's reasonable to expect the + # microcode generation logic to have the log of the various sizes + # floating around as well. + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, 1 + roli t3, t3, imm + ldst t1, seg, [scale, index, t2], disp + sexti t0, t1, imm, flags=(CF,) + xor t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTC_P_I { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, 1 + roli t3, t3, imm + ldst t1, seg, [1, t2, t7] + sexti t0, t1, imm, flags=(CF,) + xor t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTC_R_R { + sext t0, reg, regm, flags=(CF,) + limm t1, 1 + rol t1, t1, regm + xor reg, reg, t1 +}; + +def macroop BTC_M_R { + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, 1 + rol t3, t3, reg + ldst t1, seg, [scale, index, t2], disp + sext t0, t1, reg, flags=(CF,) + xor t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTC_P_R { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, 1 + rol t3, t3, reg + ldst t1, seg, [1, t2, t7] + sext t0, t1, reg, flags=(CF,) + xor t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTR_R_I { + sexti t0, reg, imm, flags=(CF,) + limm t1, "(uint64_t(-(2ULL)))" + roli t1, t1, imm + and reg, reg, t1 +}; + +def macroop BTR_M_I { + limm t1, imm + # This fudges just a tiny bit, but it's reasonable to expect the + # microcode generation logic to have the log of the various sizes + # floating around as well. + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, "(uint64_t(-(2ULL)))" + roli t3, t3, imm + ldst t1, seg, [scale, index, t2], disp + sexti t0, t1, imm, flags=(CF,) + and t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTR_P_I { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, "(uint64_t(-(2ULL)))" + roli t3, t3, imm + ldst t1, seg, [1, t2, t7] + sexti t0, t1, imm, flags=(CF,) + and t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTR_R_R { + sext t0, reg, regm, flags=(CF,) + limm t1, "(uint64_t(-(2ULL)))" + rol t1, t1, regm + and reg, reg, t1 +}; + +def macroop BTR_M_R { + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, "(uint64_t(-(2ULL)))" + rol t3, t3, reg + ldst t1, seg, [scale, index, t2], disp + sext t0, t1, reg, flags=(CF,) + and t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTR_P_R { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, "(uint64_t(-(2ULL)))" + rol t3, t3, reg + ldst t1, seg, [1, t2, t7] + sext t0, t1, reg, flags=(CF,) + and t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTS_R_I { + sexti t0, reg, imm, flags=(CF,) + limm t1, 1 + roli t1, t1, imm + or reg, reg, t1 +}; + +def macroop BTS_M_I { + limm t1, imm + # This fudges just a tiny bit, but it's reasonable to expect the + # microcode generation logic to have the log of the various sizes + # floating around as well. + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, 1 + roli t3, t3, imm + ldst t1, seg, [scale, index, t2], disp + sexti t0, t1, imm, flags=(CF,) + or t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTS_P_I { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, 1 + roli t3, t3, imm + ldst t1, seg, [1, t2, t7] + sexti t0, t1, imm, flags=(CF,) + or t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTS_R_R { + sext t0, reg, regm, flags=(CF,) + limm t1, 1 + rol t1, t1, regm + or reg, reg, t1 +}; + +def macroop BTS_M_R { + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + add t2, t2, base + limm t3, 1 + rol t3, t3, reg + ldst t1, seg, [scale, index, t2], disp + sext t0, t1, reg, flags=(CF,) + or t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; + +def macroop BTS_P_R { + rdip t7 + limm t1, imm + srai t2, t1, "(env.dataSize == 8) ? 3 : ((env.dataSize == 4) ? 2 : 1)" + limm t3, 1 + rol t3, t3, reg + ldst t1, seg, [1, t2, t7] + sext t0, t1, reg, flags=(CF,) + or t1, t1, t3 + st t1, seg, [scale, index, t2], disp +}; +''' diff --git a/src/arch/x86/isa/insts/general_purpose/data_conversion/sign_extension.py b/src/arch/x86/isa/insts/general_purpose/data_conversion/sign_extension.py index 9a7c226af..ae3c6cc6f 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_conversion/sign_extension.py +++ b/src/arch/x86/isa/insts/general_purpose/data_conversion/sign_extension.py @@ -55,7 +55,7 @@ microcode = ''' def macroop CDQE_R { - sext reg, reg, "env.dataSize << 2" + sexti reg, reg, "env.dataSize << 2 - 1" }; def macroop CQO_R_R { diff --git a/src/arch/x86/isa/insts/general_purpose/data_conversion/translate.py b/src/arch/x86/isa/insts/general_purpose/data_conversion/translate.py index c2ccb9d19..d6ae7885a 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_conversion/translate.py +++ b/src/arch/x86/isa/insts/general_purpose/data_conversion/translate.py @@ -55,7 +55,7 @@ microcode = ''' def macroop XLAT { - zext t1, rax, 8 + zexti t1, rax, 7 # Here, t1 can be used directly. The value of al is supposed to be treated # as unsigned. Since we zero extended it from 8 bits above and the address # size has to be at least 16 bits, t1 will not be sign extended. diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py index 04f9ea12a..a15fc21ef 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py @@ -111,48 +111,48 @@ def macroop MOV_P_I { # def macroop MOVSXD_R_R { - sext reg, regm, 32 + sexti reg, regm, 31 }; def macroop MOVSXD_R_M { ld t1, seg, sib, disp, dataSize=4 - sext reg, t1, 32 + sexti reg, t1, 31 }; def macroop MOVSXD_R_P { rdip t7 ld t1, seg, riprel, disp, dataSize=4 - sext reg, t1, 32 + sexti reg, t1, 31 }; def macroop MOVSX_B_R_R { - sext reg, regm, 8 + sexti reg, regm, 7 }; def macroop MOVSX_B_R_M { ld reg, seg, sib, disp, dataSize=1 - sext reg, reg, 8 + sexti reg, reg, 7 }; def macroop MOVSX_B_R_P { rdip t7 ld reg, seg, riprel, disp, dataSize=1 - sext reg, reg, 8 + sexti reg, reg, 7 }; def macroop MOVSX_W_R_R { - sext reg, regm, 16 + sexti reg, regm, 15 }; def macroop MOVSX_W_R_M { ld reg, seg, sib, disp, dataSize=2 - sext reg, reg, 16 + sexti reg, reg, 15 }; def macroop MOVSX_W_R_P { rdip t7 ld reg, seg, riprel, disp, dataSize=2 - sext reg, reg, 16 + sexti reg, reg, 15 }; # @@ -160,33 +160,37 @@ def macroop MOVSX_W_R_P { # def macroop MOVZX_B_R_R { - zext reg, regm, 8 + zexti reg, regm, 7 }; def macroop MOVZX_B_R_M { ld t1, seg, sib, disp, dataSize=1 - zext reg, t1, 8 + zexti reg, t1, 7 }; def macroop MOVZX_B_R_P { rdip t7 ld t1, seg, riprel, disp, dataSize=1 - zext reg, t1, 8 + zexti reg, t1, 7 }; def macroop MOVZX_W_R_R { - zext reg, regm, 16 + zexti reg, regm, 15 }; def macroop MOVZX_W_R_M { ld t1, seg, sib, disp, dataSize=2 - zext reg, t1, 16 + zexti reg, t1, 15 }; def macroop MOVZX_W_R_P { rdip t7 ld t1, seg, riprel, disp, dataSize=2 - zext reg, t1, 16 + zexti reg, t1, 15 +}; + +def macroop MOV_C_R { + wrcr reg, regm }; ''' #let {{ diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py index 5884d68c2..6c51f3171 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py @@ -162,9 +162,9 @@ def macroop ENTER_I_I { # Pull the different components out of the immediate limm t1, imm - zext t2, t1, 16, dataSize=2 + zexti t2, t1, 15, dataSize=2 srl t1, t1, 16 - zext t1, t1, 6 + zexti t1, t1, 5 # t1 is now the masked nesting level, and t2 is the amount of storage. # Push rbp. diff --git a/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py b/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py index c01a11035..75a361eb7 100644 --- a/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py +++ b/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py @@ -62,7 +62,7 @@ microcode = ''' def macroop IN_R_R { limm t1, "IntAddrPrefixIO" - zext t2, regm, 16, dataSize=2 + zexti t2, regm, 15, dataSize=2 ld reg, intseg, [1, t1, t2], addressSize=8 }; @@ -74,7 +74,7 @@ microcode = ''' def macroop OUT_R_R { limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 st regm, intseg, [1, t1, t2], addressSize=8 }; ''' diff --git a/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py b/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py index a8acbbc39..b44203d9c 100644 --- a/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py +++ b/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py @@ -62,7 +62,7 @@ def macroop INS_M_R { mov t3, t3, t4, flags=(nCEZF,), dataSize=asz limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 ld t6, intseg, [1, t1, t2], addressSize=8 st t6, es, [1, t0, rdi] @@ -78,7 +78,7 @@ def macroop INS_E_M_R { mov t3, t3, t4, flags=(nCEZF,), dataSize=asz limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 topOfLoop: ld t6, intseg, [1, t1, t2], addressSize=8 @@ -98,7 +98,7 @@ def macroop OUTS_R_M { mov t3, t3, t4, flags=(nCEZF,), dataSize=asz limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 ld t6, ds, [1, t0, rsi] st t6, intseg, [1, t1, t2], addressSize=8 @@ -114,7 +114,7 @@ def macroop OUTS_E_R_M { mov t3, t3, t4, flags=(nCEZF,), dataSize=asz limm t1, "IntAddrPrefixIO" - zext t2, reg, 16, dataSize=2 + zexti t2, reg, 15, dataSize=2 topOfLoop: ld t6, ds, [1, t0, rsi] diff --git a/src/arch/x86/isa/insts/system/msrs.py b/src/arch/x86/isa/insts/system/msrs.py index ea576510b..20b9b2a0b 100644 --- a/src/arch/x86/isa/insts/system/msrs.py +++ b/src/arch/x86/isa/insts/system/msrs.py @@ -54,7 +54,7 @@ # Authors: Gabe Black microcode = ''' -def macroop WRMSR +def macroop RDMSR { limm t1, "IntAddrPrefixMSR >> 3" ld t2, intseg, [8, t1, rcx], dataSize=8, addressSize=4 @@ -63,7 +63,7 @@ def macroop WRMSR mov rdx, rdx, t2, dataSize=4 }; -def macroop RDMSR +def macroop WRMSR { limm t1, "IntAddrPrefixMSR >> 3" mov t2, t2, rdx, dataSize=4 diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index 0c43d4c13..040bb2036 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -108,11 +108,14 @@ let {{ # This segment selects an internal address space mapped to MSRs, # CPUID info, etc. - assembler.symbols["intseg"] = "NUM_SEGMENTREGS" + assembler.symbols["intseg"] = "SEGMENT_REG_INT" for reg in ('ax', 'bx', 'cx', 'dx', 'sp', 'bp', 'si', 'di'): assembler.symbols["r%s" % reg] = "INTREG_R%s" % reg.upper() + for reg in range(15): + assembler.symbols["cr%d" % reg] = "MISCREG_CR%d" % reg + for flag in ('CF', 'PF', 'ECF', 'AF', 'EZF', 'ZF', 'SF', 'OF'): assembler.symbols[flag] = flag + "Bit" diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa index 61adde8d1..77152a190 100644 --- a/src/arch/x86/isa/microops/ldstop.isa +++ b/src/arch/x86/isa/microops/ldstop.isa @@ -123,7 +123,7 @@ def template MicroLoadExecute {{ %(ea_code)s; DPRINTF(X86, "%s : %s: The address is %#x\n", instMnem, mnemonic, EA); - fault = read(xc, EA, Mem, (%(mem_flags)s) | (1 << segment)); + fault = read(xc, EA, Mem, (%(mem_flags)s) | segment); if(fault == NoFault) { @@ -150,7 +150,7 @@ def template MicroLoadInitiateAcc {{ %(ea_code)s; DPRINTF(X86, "%s : %s: The address is %#x\n", instMnem, mnemonic, EA); - fault = read(xc, EA, Mem, (%(mem_flags)s) | (1 << segment)); + fault = read(xc, EA, Mem, (%(mem_flags)s) | segment); return fault; } @@ -197,7 +197,7 @@ def template MicroStoreExecute {{ if(fault == NoFault) { - fault = write(xc, Mem, EA, (%(mem_flags)s) | (1 << segment)); + fault = write(xc, Mem, EA, (%(mem_flags)s) | segment); if(fault == NoFault) { %(op_wb)s; @@ -224,7 +224,7 @@ def template MicroStoreInitiateAcc {{ if(fault == NoFault) { - fault = write(xc, Mem, EA, (%(mem_flags)s) | (1 << segment)); + fault = write(xc, Mem, EA, (%(mem_flags)s) | segment); if(fault == NoFault) { %(op_wb)s; diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index 892c44487..4ac3a9d98 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -318,7 +318,7 @@ let {{ # If there's something optional to do with flags, generate # a version without it and fix up this version to use it. - if flag_code is not "" or cond_check is not "true": + if flag_code != "" or cond_check != "true": self.buildCppClasses(name, Name, suffix, code, "", "true", else_code) suffix = "Flags" + suffix @@ -835,7 +835,7 @@ let {{ ''' class Wrip(WrRegOp, CondRegOp): - code = 'RIP = psrc1 + sop2' + code = 'RIP = psrc1 + sop2 + CSBase' else_code="RIP = RIP;" class Br(WrRegOp, CondRegOp): @@ -846,7 +846,7 @@ let {{ code = 'ccFlagBits = psrc1 ^ op2' class Rdip(RdRegOp): - code = 'DestReg = RIP' + code = 'DestReg = RIP - CSBase' class Ruflags(RdRegOp): code = 'DestReg = ccFlagBits' @@ -866,12 +866,74 @@ let {{ class Sext(RegOp): code = ''' IntReg val = psrc1; - int sign_bit = bits(val, imm8-1, imm8-1); - uint64_t maskVal = mask(imm8); + // Mask the bit position so that it wraps. + int bitPos = op2 & (dataSize * 8 - 1); + int sign_bit = bits(val, bitPos, bitPos); + uint64_t maskVal = mask(bitPos+1); val = sign_bit ? (val | ~maskVal) : (val & maskVal); DestReg = merge(DestReg, val, dataSize); ''' + flag_code = ''' + if (!sign_bit) + ccFlagBits = ccFlagBits & + ~(ext & (CFBit | ECFBit | ZFBit | EZFBit)); + else + ccFlagBits = ccFlagBits | + (ext & (CFBit | ECFBit | ZFBit | EZFBit)); + ''' class Zext(RegOp): - code = 'DestReg = bits(psrc1, imm8-1, 0);' + code = 'DestReg = bits(psrc1, op2, 0);' + + class Wrcr(RegOp): + def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): + super(Wrcr, self).__init__(dest, \ + src1, "NUM_INTREGS", flags, dataSize) + code = ''' + if (dest == 1 || (dest > 4 && dest < 8) || (dest > 8)) { + fault = new InvalidOpcode(); + } else { + // There are *s in the line below so it doesn't confuse the + // parser. They may be unnecessary. + //Mis*cReg old*Val = pick(Cont*rolDest, 0, dat*aSize); + MiscReg newVal = psrc1; + + // Check for any modifications that would cause a fault. + switch(dest) { + case 0: + { + Efer efer = EferOp; + CR0 cr0 = newVal; + CR4 oldCr4 = CR4Op; + if (bits(newVal, 63, 32) || + (!cr0.pe && cr0.pg) || + (!cr0.cd && cr0.nw) || + (cr0.pg && efer.lme && !oldCr4.pae)) + fault = new GeneralProtection(0); + } + break; + case 2: + break; + case 3: + break; + case 4: + { + CR4 cr4 = newVal; + // PAE can't be disabled in long mode. + if (bits(newVal, 63, 11) || + (machInst.mode.mode == LongMode && !cr4.pae)) + fault = new GeneralProtection(0); + } + break; + case 8: + { + if (bits(newVal, 63, 4)) + fault = new GeneralProtection(0); + } + default: + panic("Unrecognized control register %d.\\n", dest); + } + ControlDest = newVal; + } + ''' }}; diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa index 8c0eacca2..f50e71727 100644 --- a/src/arch/x86/isa/operands.isa +++ b/src/arch/x86/isa/operands.isa @@ -122,5 +122,10 @@ def operands {{ # instructions don't map their indexes with an old value. 'TOP': ('ControlReg', 'ub', 'MISCREG_X87_TOP', None, 61), 'SegBase': ('ControlReg', 'uqw', 'MISCREG_SEG_BASE(segment)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 70), + 'ControlDest': ('ControlReg', 'uqw', 'MISCREG_CR(dest)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 71), + 'ControlSrc1': ('ControlReg', 'uqw', 'MISCREG_CR(src1)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 72), + 'EferOp': ('ControlReg', 'uqw', 'MISCREG_EFER', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 73), + 'CR4Op': ('ControlReg', 'uqw', 'MISCREG_CR4', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 74), + 'CSBase': ('ControlReg', 'udw', 'MISCREG_CS_BASE', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 80), 'Mem': ('Mem', 'uqw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100) }}; diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa index cf6b6ff86..3802d8949 100644 --- a/src/arch/x86/isa/specialize.isa +++ b/src/arch/x86/isa/specialize.isa @@ -153,7 +153,13 @@ let {{ return doRipRelativeDecode(Name, opTypes, env) elif opType.tag == None or opType.size == None: raise Exception, "Problem parsing operand tag: %s" % opType.tag - elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"): + elif opType.tag == "C": + env.addReg(ModRMRegIndex) + Name += "_C" + elif opType.tag == "D": + env.addReg(ModRMRegIndex) + Name += "_D" + elif opType.tag in ("G", "P", "S", "T", "V"): # Use the "reg" field of the ModRM byte to select the register env.addReg(ModRMRegIndex) Name += "_R" diff --git a/src/arch/x86/isa_traits.hh b/src/arch/x86/isa_traits.hh index 762f9b172..abb7694ed 100644 --- a/src/arch/x86/isa_traits.hh +++ b/src/arch/x86/isa_traits.hh @@ -59,6 +59,7 @@ #define __ARCH_X86_ISATRAITS_HH__ #include "arch/x86/intregs.hh" +#include "arch/x86/max_inst_regs.hh" #include "arch/x86/types.hh" #include "arch/x86/x86_traits.hh" #include "sim/host.hh" @@ -72,6 +73,8 @@ namespace X86ISA //This makes sure the little endian version of certain functions //are used. using namespace LittleEndianGuest; + using X86ISAInst::MaxInstSrcRegs; + using X86ISAInst::MaxInstDestRegs; // X86 does not have a delay slot #define ISA_HAS_DELAY_SLOT 0 @@ -121,10 +124,6 @@ namespace X86ISA // value const int SyscallPseudoReturnReg = INTREG_RDX; - //XXX These numbers are bogus - const int MaxInstSrcRegs = 10; - const int MaxInstDestRegs = 10; - //4k. This value is not constant on x86. const int LogVMPageSize = 12; const int VMPageSize = (1 << LogVMPageSize); diff --git a/src/arch/x86/miscregfile.cc b/src/arch/x86/miscregfile.cc index e2c39c7cd..71908098e 100644 --- a/src/arch/x86/miscregfile.cc +++ b/src/arch/x86/miscregfile.cc @@ -86,6 +86,8 @@ */ #include "arch/x86/miscregfile.hh" +#include "arch/x86/tlb.hh" +#include "cpu/thread_context.hh" #include "sim/serialize.hh" using namespace X86ISA; @@ -106,22 +108,15 @@ void MiscRegFile::clear() MiscReg MiscRegFile::readRegNoEffect(int miscReg) { - switch(miscReg) - { - case MISCREG_CR1: - case MISCREG_CR5: - case MISCREG_CR6: - case MISCREG_CR7: - case MISCREG_CR9: - case MISCREG_CR10: - case MISCREG_CR11: - case MISCREG_CR12: - case MISCREG_CR13: - case MISCREG_CR14: - case MISCREG_CR15: - panic("Tried to read invalid control register %d\n", miscReg); - break; - } + // Make sure we're not dealing with an illegal control register. + // Instructions should filter out these indexes, and nothing else should + // attempt to read them directly. + assert( miscReg != MISCREG_CR1 && + !(miscReg > MISCREG_CR4 && + miscReg < MISCREG_CR8) && + !(miscReg > MISCREG_CR8 && + miscReg <= MISCREG_CR15)); + return regVal[miscReg]; } @@ -132,29 +127,67 @@ MiscReg MiscRegFile::readReg(int miscReg, ThreadContext * tc) void MiscRegFile::setRegNoEffect(int miscReg, const MiscReg &val) { - switch(miscReg) - { - case MISCREG_CR1: - case MISCREG_CR5: - case MISCREG_CR6: - case MISCREG_CR7: - case MISCREG_CR9: - case MISCREG_CR10: - case MISCREG_CR11: - case MISCREG_CR12: - case MISCREG_CR13: - case MISCREG_CR14: - case MISCREG_CR15: - panic("Tried to write invalid control register %d\n", miscReg); - break; - } + // Make sure we're not dealing with an illegal control register. + // Instructions should filter out these indexes, and nothing else should + // attempt to write to them directly. + assert( miscReg != MISCREG_CR1 && + !(miscReg > MISCREG_CR4 && + miscReg < MISCREG_CR8) && + !(miscReg > MISCREG_CR8 && + miscReg <= MISCREG_CR15)); regVal[miscReg] = val; } void MiscRegFile::setReg(int miscReg, const MiscReg &val, ThreadContext * tc) { - setRegNoEffect(miscReg, val); + MiscReg newVal = val; + switch(miscReg) + { + case MISCREG_CR0: + { + CR0 toggled = regVal[miscReg] ^ val; + CR0 newCR0 = val; + Efer efer = regVal[MISCREG_EFER]; + if (toggled.pg && efer.lme) { + if (newCR0.pg) { + //Turning on long mode + efer.lma = 1; + regVal[MISCREG_EFER] = efer; + } else { + //Turning off long mode + efer.lma = 0; + regVal[MISCREG_EFER] = efer; + } + } + if (toggled.pg) { + tc->getITBPtr()->invalidateAll(); + tc->getDTBPtr()->invalidateAll(); + } + //This must always be 1. + newCR0.et = 1; + newVal = newCR0; + } + break; + case MISCREG_CR2: + break; + case MISCREG_CR3: + tc->getITBPtr()->invalidateNonGlobal(); + tc->getDTBPtr()->invalidateNonGlobal(); + break; + case MISCREG_CR4: + { + CR4 toggled = regVal[miscReg] ^ val; + if (toggled.pae || toggled.pse || toggled.pge) { + tc->getITBPtr()->invalidateAll(); + tc->getDTBPtr()->invalidateAll(); + } + } + break; + case MISCREG_CR8: + break; + } + setRegNoEffect(miscReg, newVal); } void MiscRegFile::serialize(std::ostream & os) diff --git a/src/arch/x86/miscregs.hh b/src/arch/x86/miscregs.hh index a516a2018..3a30b9800 100644 --- a/src/arch/x86/miscregs.hh +++ b/src/arch/x86/miscregs.hh @@ -258,6 +258,7 @@ namespace X86ISA MISCREG_DS, MISCREG_FS, MISCREG_GS, + MISCREG_INT, // This isn't actually used. // Hidden segment base field MISCREG_SEG_BASE_BASE = MISCREG_SEG_SEL_BASE + NumSegments, @@ -267,6 +268,7 @@ namespace X86ISA MISCREG_DS_BASE, MISCREG_FS_BASE, MISCREG_GS_BASE, + MISCREG_INT_BASE, // Hidden segment limit field MISCREG_SEG_LIMIT_BASE = MISCREG_SEG_BASE_BASE + NumSegments, @@ -276,6 +278,7 @@ namespace X86ISA MISCREG_DS_LIMIT, MISCREG_FS_LIMIT, MISCREG_GS_LIMIT, + MISCREG_INT_LIMIT, // This isn't actually used. // Hidden segment limit attributes MISCREG_SEG_ATTR_BASE = MISCREG_SEG_LIMIT_BASE + NumSegments, @@ -285,6 +288,7 @@ namespace X86ISA MISCREG_DS_ATTR, MISCREG_FS_ATTR, MISCREG_GS_ATTR, + MISCREG_INT_ATTR, // This isn't actually used. // System segment selectors MISCREG_SYSSEG_SEL_BASE = MISCREG_SEG_ATTR_BASE + NumSegments, diff --git a/src/arch/x86/pagetable.hh b/src/arch/x86/pagetable.hh index cc614168c..e42693c03 100644 --- a/src/arch/x86/pagetable.hh +++ b/src/arch/x86/pagetable.hh @@ -62,16 +62,26 @@ #include <string> #include "sim/host.hh" +#include "base/bitunion.hh" #include "base/misc.hh" class Checkpoint; namespace X86ISA { - struct VAddr - { - VAddr(Addr a) { panic("not implemented yet."); } - }; + BitUnion64(VAddr) + Bitfield<20, 12> longl1; + Bitfield<29, 21> longl2; + Bitfield<38, 30> longl3; + Bitfield<47, 39> longl4; + + Bitfield<20, 12> pael1; + Bitfield<29, 21> pael2; + Bitfield<31, 30> pael3; + + Bitfield<21, 12> norml1; + Bitfield<31, 22> norml2; + EndBitUnion(VAddr) struct TlbEntry { diff --git a/src/arch/x86/pagetable_walker.cc b/src/arch/x86/pagetable_walker.cc new file mode 100644 index 000000000..0472dcd21 --- /dev/null +++ b/src/arch/x86/pagetable_walker.cc @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2007 The Hewlett-Packard Development Company + * All rights reserved. + * + * Redistribution and use of this software in source and binary forms, + * with or without modification, are permitted provided that the + * following conditions are met: + * + * The software must be used only for Non-Commercial Use which means any + * use which is NOT directed to receiving any direct monetary + * compensation for, or commercial advantage from such use. Illustrative + * examples of non-commercial use are academic research, personal study, + * teaching, education and corporate research & development. + * Illustrative examples of commercial use are distributing products for + * commercial advantage and providing services using the software for + * commercial advantage. + * + * If you wish to use this software or functionality therein that may be + * covered by patents for commercial use, please contact: + * Director of Intellectual Property Licensing + * Office of Strategy and Technology + * Hewlett-Packard Company + * 1501 Page Mill Road + * Palo Alto, California 94304 + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. Redistributions + * in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. Neither the name of + * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. No right of + * sublicense is granted herewith. Derivatives of the software and + * output created using the software may be prepared, but only for + * Non-Commercial Uses. Derivatives of the software may be shared with + * others provided: (i) the others agree to abide by the list of + * conditions herein which includes the Non-Commercial Use restrictions; + * and (ii) such Derivatives of the software include the above copyright + * notice to acknowledge the contribution from this software where + * applicable, this list of conditions and the disclaimer below. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/x86/pagetable.hh" +#include "arch/x86/pagetable_walker.hh" +#include "arch/x86/tlb.hh" +#include "base/bitfield.hh" +#include "cpu/thread_context.hh" +#include "cpu/base.hh" +#include "mem/packet_access.hh" +#include "mem/request.hh" +#include "sim/system.hh" + +namespace X86ISA { + +// Unfortunately, the placement of the base field in a page table entry is +// very erratic and would make a mess here. It might be moved here at some +// point in the future. +BitUnion64(PageTableEntry) + Bitfield<63> nx; + Bitfield<11, 9> avl; + Bitfield<8> g; + Bitfield<7> ps; + Bitfield<6> d; + Bitfield<5> a; + Bitfield<4> pcd; + Bitfield<3> pwt; + Bitfield<2> u; + Bitfield<1> w; + Bitfield<0> p; +EndBitUnion(PageTableEntry) + +void +Walker::doNext(PacketPtr &read, PacketPtr &write) +{ + assert(state != Ready && state != Waiting); + write = NULL; + PageTableEntry pte; + if (size == 8) + pte = read->get<uint64_t>(); + else + pte = read->get<uint32_t>(); + VAddr vaddr = entry.vaddr; + bool uncacheable = pte.pcd; + Addr nextRead = 0; + bool doWrite = false; + bool badNX = pte.nx && (!tlb->allowNX() || !enableNX); + switch(state) { + case LongPML4: + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl3 * size; + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (badNX) + panic("NX violation!\n"); + entry.noExec = pte.nx; + if (!pte.p) + panic("Page not present!\n"); + nextState = LongPDP; + break; + case LongPDP: + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl2 * size; + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + nextState = LongPD; + break; + case LongPD: + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + if (!pte.ps) { + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = + ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * size; + nextState = LongPTE; + break; + } else { + // 2 MB page + entry.size = 2 * (1 << 20); + entry.paddr = (uint64_t)pte & (mask(31) << 21); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + } + case LongPTE: + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + entry.paddr = (uint64_t)pte & (mask(40) << 12); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + case PAEPDP: + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * size; + if (!pte.p) + panic("Page not present!\n"); + nextState = PAEPD; + break; + case PAEPD: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + if (!pte.ps) { + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * size; + nextState = PAEPTE; + break; + } else { + // 2 MB page + entry.size = 2 * (1 << 20); + entry.paddr = (uint64_t)pte & (mask(31) << 21); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + } + case PAEPTE: + doWrite = !pte.a; + pte.a = 1; + entry.writable = entry.writable && pte.w; + entry.user = entry.user && pte.u; + if (badNX) + panic("NX violation!\n"); + if (!pte.p) + panic("Page not present!\n"); + entry.paddr = (uint64_t)pte & (mask(40) << 12); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 7); + entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + case PSEPD: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (!pte.p) + panic("Page not present!\n"); + if (!pte.ps) { + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = + ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size; + nextState = PTE; + break; + } else { + // 4 MB page + entry.size = 4 * (1 << 20); + entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22; + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 12); + entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + } + case PD: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (!pte.p) + panic("Page not present!\n"); + // 4 KB page + entry.size = 4 * (1 << 10); + nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size; + nextState = PTE; + break; + nextState = PTE; + break; + case PTE: + doWrite = !pte.a; + pte.a = 1; + entry.writable = pte.w; + entry.user = pte.u; + if (!pte.p) + panic("Page not present!\n"); + entry.paddr = (uint64_t)pte & (mask(20) << 12); + entry.uncacheable = uncacheable; + entry.global = pte.g; + entry.patBit = bits(pte, 7); + entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); + tlb->insert(entry.vaddr, entry); + nextState = Ready; + delete read->req; + delete read; + read = NULL; + return; + default: + panic("Unknown page table walker state %d!\n"); + } + PacketPtr oldRead = read; + //If we didn't return, we're setting up another read. + uint32_t flags = oldRead->req->getFlags(); + if (uncacheable) + flags |= UNCACHEABLE; + else + flags &= ~UNCACHEABLE; + RequestPtr request = + new Request(nextRead, oldRead->getSize(), flags); + read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast); + read->allocate(); + //If we need to write, adjust the read packet to write the modified value + //back to memory. + if (doWrite) { + write = oldRead; + write->set<uint64_t>(pte); + write->cmd = MemCmd::WriteReq; + write->setDest(Packet::Broadcast); + } else { + write = NULL; + delete oldRead->req; + delete oldRead; + } +} + +void +Walker::start(ThreadContext * _tc, Addr vaddr) +{ + assert(state == Ready); + assert(!tc); + tc = _tc; + + VAddr addr = vaddr; + + //Figure out what we're doing. + CR3 cr3 = tc->readMiscRegNoEffect(MISCREG_CR3); + Addr top = 0; + // Check if we're in long mode or not + Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); + size = 8; + if (efer.lma) { + // Do long mode. + state = LongPML4; + top = (cr3.longPdtb << 12) + addr.longl4 * size; + } else { + // We're in some flavor of legacy mode. + CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4); + if (cr4.pae) { + // Do legacy PAE. + state = PAEPDP; + top = (cr3.paePdtb << 5) + addr.pael3 * size; + } else { + size = 4; + top = (cr3.pdtb << 12) + addr.norml2 * size; + if (cr4.pse) { + // Do legacy PSE. + state = PSEPD; + } else { + // Do legacy non PSE. + state = PD; + } + } + } + + nextState = Ready; + entry.vaddr = vaddr; + + enableNX = efer.nxe; + + RequestPtr request = + new Request(top, size, PHYSICAL | cr3.pcd ? UNCACHEABLE : 0); + read = new Packet(request, MemCmd::ReadExReq, Packet::Broadcast); + read->allocate(); + Enums::MemoryMode memMode = sys->getMemoryMode(); + if (memMode == Enums::timing) { + tc->suspend(); + port.sendTiming(read); + } else if (memMode == Enums::atomic) { + do { + port.sendAtomic(read); + PacketPtr write = NULL; + doNext(read, write); + state = nextState; + nextState = Ready; + if (write) + port.sendAtomic(write); + } while(read); + tc = NULL; + state = Ready; + nextState = Waiting; + } else { + panic("Unrecognized memory system mode.\n"); + } +} + +bool +Walker::WalkerPort::recvTiming(PacketPtr pkt) +{ + return walker->recvTiming(pkt); +} + +bool +Walker::recvTiming(PacketPtr pkt) +{ + inflight--; + if (pkt->isResponse() && !pkt->wasNacked()) { + if (pkt->isRead()) { + assert(inflight); + assert(state == Waiting); + assert(!read); + state = nextState; + nextState = Ready; + PacketPtr write = NULL; + doNext(pkt, write); + state = Waiting; + read = pkt; + if (write) { + writes.push_back(write); + } + sendPackets(); + } else { + sendPackets(); + } + if (inflight == 0 && read == NULL && writes.size() == 0) { + tc->activate(0); + tc = NULL; + state = Ready; + nextState = Waiting; + } + } else if (pkt->wasNacked()) { + pkt->reinitNacked(); + if (!port.sendTiming(pkt)) { + retrying = true; + if (pkt->isWrite()) { + writes.push_back(pkt); + } else { + assert(!read); + read = pkt; + } + } else { + inflight++; + } + } + return true; +} + +Tick +Walker::WalkerPort::recvAtomic(PacketPtr pkt) +{ + return 0; +} + +void +Walker::WalkerPort::recvFunctional(PacketPtr pkt) +{ + return; +} + +void +Walker::WalkerPort::recvStatusChange(Status status) +{ + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } + return; + } + + panic("Unexpected recvStatusChange.\n"); +} + +void +Walker::WalkerPort::recvRetry() +{ + walker->recvRetry(); +} + +void +Walker::recvRetry() +{ + retrying = false; + sendPackets(); +} + +void +Walker::sendPackets() +{ + //If we're already waiting for the port to become available, just return. + if (retrying) + return; + + //Reads always have priority + if (read) { + if (!port.sendTiming(read)) { + retrying = true; + return; + } else { + inflight++; + delete read->req; + delete read; + read = NULL; + } + } + //Send off as many of the writes as we can. + while (writes.size()) { + PacketPtr write = writes.back(); + if (!port.sendTiming(write)) { + retrying = true; + return; + } else { + inflight++; + delete write->req; + delete write; + writes.pop_back(); + } + } +} + +Port * +Walker::getPort(const std::string &if_name, int idx) +{ + if (if_name == "port") + return &port; + else + panic("No page table walker port named %s!\n", if_name); +} + +} + +X86ISA::Walker * +X86PagetableWalkerParams::create() +{ + return new X86ISA::Walker(this); +} diff --git a/src/arch/x86/pagetable_walker.hh b/src/arch/x86/pagetable_walker.hh new file mode 100644 index 000000000..324f16f3c --- /dev/null +++ b/src/arch/x86/pagetable_walker.hh @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2007 The Hewlett-Packard Development Company + * All rights reserved. + * + * Redistribution and use of this software in source and binary forms, + * with or without modification, are permitted provided that the + * following conditions are met: + * + * The software must be used only for Non-Commercial Use which means any + * use which is NOT directed to receiving any direct monetary + * compensation for, or commercial advantage from such use. Illustrative + * examples of non-commercial use are academic research, personal study, + * teaching, education and corporate research & development. + * Illustrative examples of commercial use are distributing products for + * commercial advantage and providing services using the software for + * commercial advantage. + * + * If you wish to use this software or functionality therein that may be + * covered by patents for commercial use, please contact: + * Director of Intellectual Property Licensing + * Office of Strategy and Technology + * Hewlett-Packard Company + * 1501 Page Mill Road + * Palo Alto, California 94304 + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. Redistributions + * in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. Neither the name of + * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. No right of + * sublicense is granted herewith. Derivatives of the software and + * output created using the software may be prepared, but only for + * Non-Commercial Uses. Derivatives of the software may be shared with + * others provided: (i) the others agree to abide by the list of + * conditions herein which includes the Non-Commercial Use restrictions; + * and (ii) such Derivatives of the software include the above copyright + * notice to acknowledge the contribution from this software where + * applicable, this list of conditions and the disclaimer below. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_X86_PAGE_TABLE_WALKER_HH__ +#define __ARCH_X86_PAGE_TABLE_WALKER_HH__ + +#include <vector> + +#include "arch/x86/pagetable.hh" +#include "arch/x86/tlb.hh" +#include "mem/mem_object.hh" +#include "mem/packet.hh" +#include "params/X86PagetableWalker.hh" +#include "sim/host.hh" + +class ThreadContext; + +namespace X86ISA +{ + class Walker : public MemObject + { + public: + enum State { + Ready, + Waiting, + // Long mode + LongPML4, LongPDP, LongPD, LongPTE, + // PAE legacy mode + PAEPDP, PAEPD, PAEPTE, + // Non PAE legacy mode with and without PSE + PSEPD, PD, PTE + }; + + // Act on the current state and determine what to do next. read + // should be the packet that just came back from a read and write + // should be NULL. When the function returns, read is either NULL + // if the machine is finished, or points to a packet to initiate + // the next read. If any write is required to update an "accessed" + // bit, write will point to a packet to do the write. Otherwise it + // will be NULL. + void doNext(PacketPtr &read, PacketPtr &write); + + // Kick off the state machine. + void start(ThreadContext * _tc, Addr vaddr); + + protected: + + /* + * State having to do with sending packets. + */ + PacketPtr read; + std::vector<PacketPtr> writes; + + // How many memory operations are in flight. + unsigned inflight; + + bool retrying; + + /* + * Functions for dealing with packets. + */ + bool recvTiming(PacketPtr pkt); + void recvRetry(); + + void sendPackets(); + + /* + * Port for accessing memory + */ + class WalkerPort : public Port + { + public: + WalkerPort(const std::string &_name, Walker * _walker) : + Port(_name, _walker), walker(_walker), + snoopRangeSent(false) + {} + + protected: + Walker * walker; + + bool snoopRangeSent; + + bool recvTiming(PacketPtr pkt); + Tick recvAtomic(PacketPtr pkt); + void recvFunctional(PacketPtr pkt); + void recvStatusChange(Status status); + void recvRetry(); + void getDeviceAddressRanges(AddrRangeList &resp, + bool &snoop) + { + resp.clear(); + snoop = true; + } + }; + + Port *getPort(const std::string &if_name, int idx = -1); + + friend class WalkerPort; + + WalkerPort port; + + // The TLB we're supposed to load. + TLB * tlb; + System * sys; + + /* + * State machine state. + */ + ThreadContext * tc; + State state; + State nextState; + int size; + bool enableNX; + TlbEntry entry; + + public: + + void setTLB(TLB * _tlb) + { + tlb = _tlb; + } + + typedef X86PagetableWalkerParams Params; + + Walker(const Params *params) : + MemObject(params), + read(NULL), inflight(0), retrying(false), + port(name() + ".port", this), + tlb(NULL), sys(params->system), + tc(NULL), state(Ready), nextState(Ready) + { + } + }; +} +#endif // __ARCH_X86_PAGE_TABLE_WALKER_HH__ diff --git a/src/arch/x86/process.cc b/src/arch/x86/process.cc index c6246c76b..633b2f136 100644 --- a/src/arch/x86/process.cc +++ b/src/arch/x86/process.cc @@ -461,6 +461,8 @@ X86LiveProcess::argsInit(int intSize, int pageSize) threadContexts[0]->setIntReg(StackPointerReg, stack_min); Addr prog_entry = objFile->entryPoint(); + // There doesn't need to be any segment base added in since we're dealing + // with the flat segmentation model. threadContexts[0]->setPC(prog_entry); threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst)); diff --git a/src/arch/x86/segmentregs.hh b/src/arch/x86/segmentregs.hh index 9fd9bcb0e..524b756d6 100644 --- a/src/arch/x86/segmentregs.hh +++ b/src/arch/x86/segmentregs.hh @@ -68,6 +68,7 @@ namespace X86ISA SEGMENT_REG_DS, SEGMENT_REG_FS, SEGMENT_REG_GS, + SEGMENT_REG_INT, NUM_SEGMENTREGS }; diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index 6afee6d72..68a22bc16 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -64,11 +64,15 @@ #include "arch/x86/x86_traits.hh" #include "base/bitfield.hh" #include "base/trace.hh" +#include "config/full_system.hh" #include "cpu/thread_context.hh" #include "cpu/base.hh" #include "mem/packet_access.hh" #include "mem/request.hh" -#include "sim/system.hh" + +#if FULL_SYSTEM +#include "arch/x86/pagetable_walker.hh" +#endif namespace X86ISA { @@ -79,6 +83,11 @@ TLB::TLB(const Params *p) : SimObject(p), size(p->size) for (int x = 0; x < size; x++) freeList.push_back(&tlb[x]); + +#if FULL_SYSTEM + walker = p->walker; + walker->setTLB(this); +#endif } void @@ -119,14 +128,38 @@ TLB::lookup(Addr va, bool update_lru) return NULL; } +#if FULL_SYSTEM +void +TLB::walk(ThreadContext * _tc, Addr vaddr) +{ + walker->start(_tc, vaddr); +} +#endif + void TLB::invalidateAll() { + DPRINTF(TLB, "Invalidating all entries.\n"); + while (!entryList.empty()) { + TlbEntry *entry = entryList.front(); + entryList.pop_front(); + freeList.push_back(entry); + } } void TLB::invalidateNonGlobal() { + DPRINTF(TLB, "Invalidating all non global entries.\n"); + EntryList::iterator entryIt; + for (entryIt = entryList.begin(); entryIt != entryList.end();) { + if (!(*entryIt)->global) { + freeList.push_back(*entryIt); + entryList.erase(entryIt++); + } else { + entryIt++; + } + } } void @@ -150,7 +183,8 @@ TLB::translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute) // If this is true, we're dealing with a request to read an internal // value. - if (seg == NUM_SEGMENTREGS) { + if (seg == SEGMENT_REG_INT) { + DPRINTF(TLB, "Addresses references internal memory.\n"); Addr prefix = vaddr & IntAddrPrefixMask; if (prefix == IntAddrPrefixCPUID) { panic("CPUID memory space not yet implemented!\n"); @@ -448,10 +482,12 @@ TLB::translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute) // If protected mode has been enabled... if (cr0.pe) { + DPRINTF(TLB, "In protected mode.\n"); Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); // If we're not in 64-bit mode, do protection/limit checks if (!efer.lma || !csAttr.longMode) { + DPRINTF(TLB, "Not in long mode. Checking segment protection.\n"); SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); if (!attr.writable && write) return new GeneralProtection(0); @@ -460,6 +496,7 @@ TLB::translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute) Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); if (!attr.expandDown) { + DPRINTF(TLB, "Checking an expand down segment.\n"); // We don't have to worry about the access going around the // end of memory because accesses will be broken up into // pieces at boundaries aligned on sizes smaller than an @@ -484,25 +521,28 @@ TLB::translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute) } // If paging is enabled, do the translation. if (cr0.pg) { + DPRINTF(TLB, "Paging enabled.\n"); // The vaddr already has the segment base applied. TlbEntry *entry = lookup(vaddr); if (!entry) { -#if FULL_SYSTEM - return new TlbFault(); -#else return new TlbFault(vaddr); -#endif } else { // Do paging protection checks. - Addr paddr = entry->paddr | (vaddr & mask(12)); + DPRINTF(TLB, "Entry found with paddr %#x, doing protection checks.\n", entry->paddr); + Addr paddr = entry->paddr | (vaddr & (entry->size-1)); + DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr); req->setPaddr(paddr); } } else { //Use the address which already has segmentation applied. + DPRINTF(TLB, "Paging disabled.\n"); + DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, vaddr); req->setPaddr(vaddr); } } else { // Real mode + DPRINTF(TLB, "In real mode.\n"); + DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, vaddr); req->setPaddr(vaddr); } return NoFault; diff --git a/src/arch/x86/tlb.hh b/src/arch/x86/tlb.hh index 12739379c..a361c2291 100644 --- a/src/arch/x86/tlb.hh +++ b/src/arch/x86/tlb.hh @@ -59,10 +59,13 @@ #define __ARCH_X86_TLB_HH__ #include <list> +#include <vector> +#include <string> #include "arch/x86/pagetable.hh" #include "arch/x86/segmentregs.hh" #include "config/full_system.hh" +#include "mem/mem_object.hh" #include "mem/request.hh" #include "params/X86DTB.hh" #include "params/X86ITB.hh" @@ -74,16 +77,26 @@ class Packet; namespace X86ISA { + class Walker; + static const unsigned StoreCheck = 1 << NUM_SEGMENTREGS; + class TLB; + class TLB : public SimObject { -#if !FULL_SYSTEM protected: friend class FakeITLBFault; friend class FakeDTLBFault; -#endif + + bool _allowNX; + public: + bool allowNX() const + { + return _allowNX; + } + typedef X86TLBParams Params; TLB(const Params *p); @@ -91,28 +104,38 @@ namespace X86ISA TlbEntry *lookup(Addr va, bool update_lru = true); +#if FULL_SYSTEM protected: - int size; - - TlbEntry * tlb; - typedef std::list<TlbEntry *> EntryList; - EntryList freeList; - EntryList entryList; + Walker * walker; - void insert(Addr vpn, TlbEntry &entry); + void walk(ThreadContext * _tc, Addr vaddr); +#endif + public: void invalidateAll(); void invalidateNonGlobal(); void demapPage(Addr va); + protected: + int size; + + TlbEntry * tlb; + + typedef std::list<TlbEntry *> EntryList; + EntryList freeList; + EntryList entryList; + template<class TlbFault> Fault translate(RequestPtr &req, ThreadContext *tc, bool write, bool execute); public: + + void insert(Addr vpn, TlbEntry &entry); + // Checkpointing virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); @@ -124,6 +147,7 @@ namespace X86ISA typedef X86ITBParams Params; ITB(const Params *p) : TLB(p) { + _allowNX = false; } Fault translate(RequestPtr &req, ThreadContext *tc); @@ -137,6 +161,7 @@ namespace X86ISA typedef X86DTBParams Params; DTB(const Params *p) : TLB(p) { + _allowNX = true; } Fault translate(RequestPtr &req, ThreadContext *tc, bool write); #if FULL_SYSTEM diff --git a/src/arch/x86/utility.cc b/src/arch/x86/utility.cc index 0eee0c93e..b2a6ea040 100644 --- a/src/arch/x86/utility.cc +++ b/src/arch/x86/utility.cc @@ -60,6 +60,7 @@ #include "arch/x86/segmentregs.hh" #include "arch/x86/utility.hh" #include "arch/x86/x86_traits.hh" +#include "sim/system.hh" namespace X86ISA { @@ -253,20 +254,177 @@ void initCPU(ThreadContext *tc, int cpuId) #endif +#if FULL_SYSTEM void startupCPU(ThreadContext *tc, int cpuId) { if (cpuId == 0) { // This is the boot strap processor (BSP). Initialize it to look like - // the boot loader has just turned control over to the 64 bit OS. - - // Enable paging, turn on long mode, etc. + // the boot loader has just turned control over to the 64 bit OS. We + // won't actually set up real mode or legacy protected mode descriptor + // tables because we aren't executing any code that would require + // them. We do, however toggle the control bits in the correct order + // while allowing consistency checks and the underlying mechansims + // just to be safe. + + const int NumPDTs = 4; + + const Addr PageMapLevel4 = 0x70000; + const Addr PageDirPtrTable = 0x71000; + const Addr PageDirTable[NumPDTs] = + {0x72000, 0x73000, 0x74000, 0x75000}; + const Addr GDTBase = 0x76000; + + const int PML4Bits = 9; + const int PDPTBits = 9; + const int PDTBits = 9; + + // Get a port to write the page tables and descriptor tables. + FunctionalPort * physPort = tc->getPhysPort(); + + /* + * Set up the gdt. + */ + // Place holder at selector 0 + uint64_t nullDescriptor = 0; + physPort->writeBlob(GDTBase, (uint8_t *)(&nullDescriptor), 8); + + //64 bit code segment + SegDescriptor csDesc = 0; + csDesc.type.c = 0; // Not conforming + csDesc.dpl = 0; // Privelege level 0 + csDesc.p = 1; // Present + csDesc.l = 1; // 64 bit + csDesc.d = 0; // default operand size + //Because we're dealing with a pointer and I don't think it's + //guaranteed that there isn't anything in a nonvirtual class between + //it's beginning in memory and it's actual data, we'll use an + //intermediary. + uint64_t csDescVal = csDesc; + physPort->writeBlob(GDTBase, (uint8_t *)(&csDescVal), 8); + + tc->setMiscReg(MISCREG_GDTR_BASE, GDTBase); + tc->setMiscReg(MISCREG_GDTR_LIMIT, 0xF); + + /* + * Identity map the first 4GB of memory. In order to map this region + * of memory in long mode, there needs to be one actual page map level + * 4 entry which points to one page directory pointer table which + * points to 4 different page directory tables which are full of two + * megabyte pages. All of the other entries in valid tables are set + * to indicate that they don't pertain to anything valid and will + * cause a fault if used. + */ + + // Put valid values in all of the various table entries which indicate + // that those entries don't point to further tables or pages. Then + // set the values of those entries which are needed. + + // Page Map Level 4 + + // read/write, user, not present + uint64_t pml4e = X86ISA::htog(0x6); + for (int offset = 0; offset < (1 << PML4Bits) * 8; offset += 8) { + physPort->writeBlob(PageMapLevel4 + offset, (uint8_t *)(&pml4e), 8); + } + // Point to the only PDPT + pml4e = X86ISA::htog(0x7 | PageDirPtrTable); + physPort->writeBlob(PageMapLevel4, (uint8_t *)(&pml4e), 8); + + // Page Directory Pointer Table + + // read/write, user, not present + uint64_t pdpe = X86ISA::htog(0x6); + for (int offset = 0; offset < (1 << PDPTBits) * 8; offset += 8) { + physPort->writeBlob(PageDirPtrTable + offset, + (uint8_t *)(&pdpe), 8); + } + // Point to the PDTs + for (int table = 0; table < NumPDTs; table++) { + pdpe = X86ISA::htog(0x7 | PageDirTable[table]); + physPort->writeBlob(PageDirPtrTable + table * 8, + (uint8_t *)(&pdpe), 8); + } + + // Page Directory Tables + + Addr base = 0; + const Addr pageSize = 2 << 20; + for (int table = 0; table < NumPDTs; table++) { + for (int offset = 0; offset < (1 << PDTBits) * 8; offset += 8) { + // read/write, user, present, 4MB + uint64_t pdte = X86ISA::htog(0x87 | base); + physPort->writeBlob(PageDirTable[table] + offset, + (uint8_t *)(&pdte), 8); + base += pageSize; + } + } + + /* + * Transition from real mode all the way up to Long mode + */ + CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); + //Turn off paging. + cr0.pg = 0; + tc->setMiscReg(MISCREG_CR0, cr0); + //Turn on protected mode. + cr0.pe = 1; + tc->setMiscReg(MISCREG_CR0, cr0); + + CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4); + //Turn on pae. + cr4.pae = 1; + tc->setMiscReg(MISCREG_CR4, cr4); + + //Point to the page tables. + tc->setMiscReg(MISCREG_CR3, PageMapLevel4); + + Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER); + //Enable long mode. + efer.lme = 1; + tc->setMiscReg(MISCREG_EFER, efer); + + //Activate long mode. + cr0.pg = 1; + tc->setMiscReg(MISCREG_CR0, cr0); + + /* + * Far jump into 64 bit mode. + */ + // Set the selector + tc->setMiscReg(MISCREG_CS, 1); + // Manually set up the segment attributes. In the future when there's + // other existing functionality to do this, that could be used + // instead. + SegAttr csAttr = 0; + csAttr.writable = 0; + csAttr.readable = 1; + csAttr.expandDown = 0; + csAttr.dpl = 0; + csAttr.defaultSize = 0; + csAttr.longMode = 1; + tc->setMiscReg(MISCREG_CS_ATTR, csAttr); + + tc->setPC(tc->getSystemPtr()->kernelEntry); + tc->setNextPC(tc->readPC()); + + // We should now be in long mode. Yay! tc->activate(0); } else { // This is an application processor (AP). It should be initialized to // look like only the BIOS POST has run on it and put then put it into // a halted state. + tc->suspend(); } } +#else + +void startupCPU(ThreadContext *tc, int cpuId) +{ + tc->activate(0); +} + +#endif + } //namespace X86_ISA diff --git a/src/base/hashmap.hh b/src/base/hashmap.hh index b78cc02e8..f8d799780 100644 --- a/src/base/hashmap.hh +++ b/src/base/hashmap.hh @@ -59,7 +59,7 @@ namespace m5 { // namespace __hash_namespace { -#if !defined(__LP64__) && !defined(__alpha__) && !defined(__SUNPRO_CC) +#if defined(__APPLE__) || !defined(__LP64__) && !defined(__alpha__) && !defined(__SUNPRO_CC) template<> struct hash<uint64_t> { size_t operator()(uint64_t r) const { diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 27ca8ce1e..f0284b2cf 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -58,7 +58,7 @@ class DerivO3CPU(BaseCPU): cachePorts = Param.Unsigned(200, "Cache Ports") icache_port = Port("Instruction Port") dcache_port = Port("Data Port") - _mem_ports = ['icache_port', 'dcache_port'] + _mem_ports = BaseCPU._mem_ports + ['icache_port', 'dcache_port'] decodeToFetchDelay = Param.Unsigned(1, "Decode to fetch delay") renameToFetchDelay = Param.Unsigned(1 ,"Rename to fetch delay") diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index efbbc2329..55584629e 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -289,9 +289,13 @@ O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc) // Copy the misc regs. TheISA::copyMiscRegs(tc, this); - // Then finally set the PC and the next PC. + // Then finally set the PC, the next PC, the nextNPC, the micropc, and the + // next micropc. cpu->setPC(tc->readPC(), tid); cpu->setNextPC(tc->readNextPC(), tid); + cpu->setNextNPC(tc->readNextNPC(), tid); + cpu->setMicroPC(tc->readMicroPC(), tid); + cpu->setNextMicroPC(tc->readNextMicroPC(), tid); #if !FULL_SYSTEM this->thread->funcExeInst = tc->readFuncExeInst(); #endif @@ -450,6 +454,30 @@ O3ThreadContext<Impl>::setNextPC(uint64_t val) template <class Impl> void +O3ThreadContext<Impl>::setMicroPC(uint64_t val) +{ + cpu->setMicroPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setNextMicroPC(uint64_t val) +{ + cpu->setNextMicroPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void O3ThreadContext<Impl>::setMiscRegNoEffect(int misc_reg, const MiscReg &val) { cpu->setMiscRegNoEffect(misc_reg, val, thread->readTid()); diff --git a/src/cpu/simple/AtomicSimpleCPU.py b/src/cpu/simple/AtomicSimpleCPU.py index bfd1825c2..28c2aa9c9 100644 --- a/src/cpu/simple/AtomicSimpleCPU.py +++ b/src/cpu/simple/AtomicSimpleCPU.py @@ -41,4 +41,5 @@ class AtomicSimpleCPU(BaseCPU): icache_port = Port("Instruction Port") dcache_port = Port("Data Port") physmem_port = Port("Physical Memory Port") - _mem_ports = ['icache_port', 'dcache_port', 'physmem_port'] + _mem_ports = BaseCPU._mem_ports + \ + ['icache_port', 'dcache_port', 'physmem_port'] diff --git a/src/cpu/simple/TimingSimpleCPU.py b/src/cpu/simple/TimingSimpleCPU.py index 2fcde175c..7e777e813 100644 --- a/src/cpu/simple/TimingSimpleCPU.py +++ b/src/cpu/simple/TimingSimpleCPU.py @@ -38,4 +38,4 @@ class TimingSimpleCPU(BaseCPU): profile = Param.Latency('0ns', "trace the kernel stack") icache_port = Port("Instruction Port") dcache_port = Port("Data Port") - _mem_ports = ['icache_port', 'dcache_port'] + _mem_ports = BaseCPU._mem_ports + ['icache_port', 'dcache_port'] diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc index 6220305b8..54165f293 100644 --- a/src/mem/page_table.cc +++ b/src/mem/page_table.cc @@ -156,7 +156,7 @@ PageTable::serialize(std::ostream &os) PTableItr iter = pTable.begin(); PTableItr end = pTable.end(); while (iter != end) { - os << "\n[" << csprintf("%s.Entry%d", name(), count) << "]\n"; + os << "\n[" << csprintf("%s.Entry%d", process->name(), count) << "]\n"; paramOut(os, "vaddr", iter->first); iter->second.serialize(os); @@ -178,9 +178,9 @@ PageTable::unserialize(Checkpoint *cp, const std::string §ion) pTable.clear(); while(i < count) { - paramIn(cp, csprintf("%s.Entry%d", name(), i), "vaddr", vaddr); + paramIn(cp, csprintf("%s.Entry%d", process->name(), i), "vaddr", vaddr); entry = new TheISA::TlbEntry(); - entry->unserialize(cp, csprintf("%s.Entry%d", name(), i)); + entry->unserialize(cp, csprintf("%s.Entry%d", process->name(), i)); pTable[vaddr] = *entry; ++i; } diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index d3e7d7975..78df6bef1 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -793,12 +793,14 @@ class SimObject(object): # necessary to construct it. Does *not* recursively create # children. def getCCObject(self): - params = self.getCCParams() if not self._ccObject: - self._ccObject = -1 # flag to catch cycles in recursion + # Cycles in the configuration heirarchy are not supported. This + # will catch the resulting recursion and stop. + self._ccObject = -1 + params = self.getCCParams() self._ccObject = params.create() elif self._ccObject == -1: - raise RuntimeError, "%s: recursive call to getCCObject()" \ + raise RuntimeError, "%s: Cycle found in configuration heirarchy." \ % self.path() return self._ccObject |