diff options
Diffstat (limited to 'src')
78 files changed, 5855 insertions, 900 deletions
diff --git a/src/arch/SConscript b/src/arch/SConscript index c3ff69f46..0ac25b6c7 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -51,6 +51,7 @@ isa_switch_hdrs = Split(''' locked_mem.hh mmaped_ipr.hh process.hh + predecoder.hh regfile.hh remote_gdb.hh stacktrace.hh diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa index b62372f66..af1a91a62 100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@ -728,8 +728,10 @@ decode OPCODE default Unknown::unknown() { 0: OpcdecFault::hw_st_quad(); 1: decode HW_LDST_QUAD { format HwLoad { - 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, L); - 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, Q); + 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, + L, IsSerializing, IsSerializeBefore); + 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, + Q, IsSerializing, IsSerializeBefore); } } } @@ -740,9 +742,9 @@ decode OPCODE default Unknown::unknown() { 1: decode HW_LDST_COND { 0: decode HW_LDST_QUAD { 0: hw_st({{ EA = (Rb + disp) & ~3; }}, - {{ Mem.ul = Ra<31:0>; }}, L); + {{ Mem.ul = Ra<31:0>; }}, L, IsSerializing, IsSerializeBefore); 1: hw_st({{ EA = (Rb + disp) & ~7; }}, - {{ Mem.uq = Ra.uq; }}, Q); + {{ Mem.uq = Ra.uq; }}, Q, IsSerializing, IsSerializeBefore); } 1: FailUnimpl::hw_st_cond(); diff --git a/src/arch/alpha/predecoder.hh b/src/arch/alpha/predecoder.hh new file mode 100644 index 000000000..650f2bfa2 --- /dev/null +++ b/src/arch/alpha/predecoder.hh @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_ALPHA_PREDECODER_HH__ +#define __ARCH_ALPHA_PREDECODER_HH__ + +#include "arch/alpha/types.hh" +#include "base/misc.hh" +#include "config/full_system.hh" +#include "sim/host.hh" + +class ThreadContext; + +namespace AlphaISA +{ + class Predecoder + { + protected: + ThreadContext * tc; + //The pc of the current instruction + Addr fetchPC; + //The extended machine instruction being generated + ExtMachInst ext_inst; + + public: + Predecoder(ThreadContext * _tc) : tc(_tc) + {} + + ThreadContext * getTC() + { + return tc; + } + + void setTC(ThreadContext * _tc) + { + tc = _tc; + } + + void process() + { + } + + //Use this to give data to the predecoder. This should be used + //when there is control flow. + void moreBytes(Addr pc, Addr off, MachInst inst) + { + fetchPC = pc; + assert(off == 0); + ext_inst = inst; +#if FULL_SYSTEM + if (pc && 0x1) + ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32); +#endif + } + + //Use this to give data to the predecoder. This should be used + //when instructions are executed in order. + void moreBytes(MachInst machInst) + { + moreBytes(fetchPC + sizeof(machInst), 0, machInst); + } + + bool needMoreBytes() + { + return true; + } + + bool extMachInstReady() + { + return true; + } + + //This returns a constant reference to the ExtMachInst to avoid a copy + const ExtMachInst & getExtMachInst() + { + return ext_inst; + } + }; +}; + +#endif // __ARCH_ALPHA_PREDECODER_HH__ diff --git a/src/arch/alpha/utility.hh b/src/arch/alpha/utility.hh index b7844c7eb..c20394a92 100644 --- a/src/arch/alpha/utility.hh +++ b/src/arch/alpha/utility.hh @@ -48,19 +48,6 @@ namespace AlphaISA return (tc->readMiscRegNoEffect(AlphaISA::IPR_DTB_CM) & 0x18) != 0; } - static inline ExtMachInst - makeExtMI(MachInst inst, Addr pc) { -#if FULL_SYSTEM - ExtMachInst ext_inst = inst; - if (pc && 0x1) - return ext_inst|=(static_cast<ExtMachInst>(pc & 0x1) << 32); - else - return ext_inst; -#else - return ExtMachInst(inst); -#endif - } - inline bool isCallerSaveIntegerRegister(unsigned int reg) { panic("register classification not implemented"); return (reg >= 1 && reg <= 8 || reg >= 22 && reg <= 25 || reg == 27); diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index 21860a2e1..f3981a6eb 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -81,12 +81,12 @@ tokens = reserved + ( # code literal 'CODELIT', - # ( ) [ ] { } < > , ; : :: * + # ( ) [ ] { } < > , ; . : :: * 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', 'LBRACE', 'RBRACE', 'LESS', 'GREATER', 'EQUALS', - 'COMMA', 'SEMI', 'COLON', 'DBLCOLON', + 'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON', 'ASTERISK', # C preprocessor directives @@ -113,6 +113,7 @@ t_GREATER = r'\>' t_EQUALS = r'=' t_COMMA = r',' t_SEMI = r';' +t_DOT = r'\.' t_COLON = r':' t_DBLCOLON = r'::' t_ASTERISK = r'\*' @@ -261,6 +262,7 @@ def p_defs_and_outputs_1(t): def p_def_or_output(t): '''def_or_output : def_format | def_bitfield + | def_bitfield_struct | def_template | def_operand_types | def_operands @@ -363,6 +365,23 @@ def p_def_bitfield_1(t): hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) t[0] = GenCode(header_output = hash_define) +# alternate form for structure member: 'def bitfield <ID> <ID>' +def p_def_bitfield_struct(t): + 'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI' + if (t[2] != ''): + error(t.lineno(1), 'error: structure bitfields are always unsigned.') + expr = 'machInst.%s' % t[5] + hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) + t[0] = GenCode(header_output = hash_define) + +def p_id_with_dot_0(t): + 'id_with_dot : ID' + t[0] = t[1] + +def p_id_with_dot_1(t): + 'id_with_dot : ID DOT id_with_dot' + t[0] = t[1] + t[2] + t[3] + def p_opt_signed_0(t): 'opt_signed : SIGNED' t[0] = t[1] diff --git a/src/arch/mips/predecoder.hh b/src/arch/mips/predecoder.hh new file mode 100644 index 000000000..a25cce8a7 --- /dev/null +++ b/src/arch/mips/predecoder.hh @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_MIPS_PREDECODER_HH__ +#define __ARCH_MIPS_PREDECODER_HH__ + +#include "arch/mips/types.hh" +#include "base/misc.hh" +#include "sim/host.hh" + +class ThreadContext; + +namespace MipsISA +{ + class Predecoder + { + protected: + ThreadContext * tc; + //The extended machine instruction being generated + ExtMachInst emi; + + public: + Predecoder(ThreadContext * _tc) : tc(_tc) + {} + + ThreadContext * getTC() + { + return tc; + } + + void setTC(ThreadContext * _tc) + { + tc = _tc; + } + + void process() + { + } + + //Use this to give data to the predecoder. This should be used + //when there is control flow. + void moreBytes(Addr currPC, Addr off, MachInst inst) + { + assert(off == 0); + emi = inst; + } + + //Use this to give data to the predecoder. This should be used + //when instructions are executed in order. + void moreBytes(MachInst machInst) + { + moreBytes(0, 0, machInst); + } + + bool needMoreBytes() + { + return true; + } + + bool extMachInstReady() + { + return true; + } + + //This returns a constant reference to the ExtMachInst to avoid a copy + const ExtMachInst & getExtMachInst() + { + return emi; + } + }; +}; + +#endif // __ARCH_MIPS_PREDECODER_HH__ diff --git a/src/arch/mips/utility.hh b/src/arch/mips/utility.hh index b5c1e31e1..12db1de57 100644 --- a/src/arch/mips/utility.hh +++ b/src/arch/mips/utility.hh @@ -88,19 +88,6 @@ namespace MipsISA { return 0; } - static inline ExtMachInst - makeExtMI(MachInst inst, ThreadContext * xc) { -#if FULL_SYSTEM - ExtMachInst ext_inst = inst; - if (xc->readPC() && 0x1) - return ext_inst|=(static_cast<ExtMachInst>(xc->readPC() & 0x1) << 32); - else - return ext_inst; -#else - return ExtMachInst(inst); -#endif - } - inline void startupCPU(ThreadContext *tc, int cpuId) { tc->activate(0); diff --git a/src/arch/sparc/isa/decoder.isa b/src/arch/sparc/isa/decoder.isa index 70afe19b6..68b2183ad 100644 --- a/src/arch/sparc/isa/decoder.isa +++ b/src/arch/sparc/isa/decoder.isa @@ -185,25 +185,25 @@ decode OP default Unknown::unknown() }}, ',a'); } default: decode BPCC { - 0x0: fbpcc0(22, {{ + 0x0: fbpfcc0(19, {{ if(passesFpCondition(Fsr<11:10>, COND2)) NNPC = xc->readPC() + disp; else handle_annul }}); - 0x1: fbpcc1(22, {{ + 0x1: fbpfcc1(19, {{ if(passesFpCondition(Fsr<33:32>, COND2)) NNPC = xc->readPC() + disp; else handle_annul }}); - 0x2: fbpcc2(22, {{ + 0x2: fbpfcc2(19, {{ if(passesFpCondition(Fsr<35:34>, COND2)) NNPC = xc->readPC() + disp; else handle_annul }}); - 0x3: fbpcc3(22, {{ + 0x3: fbpfcc3(19, {{ if(passesFpCondition(Fsr<37:36>, COND2)) NNPC = xc->readPC() + disp; else @@ -426,19 +426,22 @@ decode OP default Unknown::unknown() {{Rs1<63:> == val2<63:> && val2<63:> != resTemp<63:>}} ); 0x24: mulscc({{ - int64_t resTemp, multiplicand = Rs2_or_imm13; - int32_t multiplier = Rs1<31:0>; int32_t savedLSB = Rs1<0:>; - multiplier = multiplier<31:1> | - ((Ccr<3:3> ^ Ccr<1:1>) << 32); - if(!Y<0:>) - multiplicand = 0; - Rd = resTemp = multiplicand + multiplier; + + //Step 1 + int64_t multiplicand = Rs2_or_imm13; + //Step 2 + int32_t partialP = Rs1<31:1> | + ((Ccr<3:3> ^ Ccr<1:1>) << 31); + //Step 3 + int32_t added = Y<0:> ? multiplicand : 0; + Rd = partialP + added; + //Steps 4 & 5 Y = Y<31:1> | (savedLSB << 31);}}, - {{((multiplicand<31:0> + multiplier<31:0>)<32:0>)}}, - {{multiplicand<31:> == multiplier<31:> && multiplier<31:> != resTemp<31:>}}, - {{((multiplicand >> 1) + (multiplier >> 1) + (multiplicand & multiplier & 0x1))<63:>}}, - {{multiplicand<63:> == multiplier<63:> && multiplier<63:> != resTemp<63:>}} + {{((partialP<31:0> + added<31:0>)<32:0>)}}, + {{partialP<31:> == added<31:> && added<31:> != Rd<31:>}}, + {{((partialP >> 1) + (added >> 1) + (partialP & added & 0x1))<63:>}}, + {{partialP<63:> == added<63:> && partialP<63:> != Rd<63:>}} ); } format IntOp @@ -816,6 +819,58 @@ decode OP default Unknown::unknown() } 0x35: decode OPF{ format FpBasic{ + 0x01: fmovs_fcc0({{ + if(passesFpCondition(Fsr<11:10>, COND4)) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0x02: fmovd_fcc0({{ + if(passesFpCondition(Fsr<11:10>, COND4)) + Frd = Frs2; + else + Frd = Frd; + }}); + 0x03: FpUnimpl::fmovq_fcc0(); + 0x25: fmovrsz({{ + if(Rs1 == 0) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0x26: fmovrdz({{ + if(Rs1 == 0) + Frd = Frs2; + else + Frd = Frd; + }}); + 0x27: FpUnimpl::fmovrqz(); + 0x41: fmovs_fcc1({{ + if(passesFpCondition(Fsr<33:32>, COND4)) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0x42: fmovd_fcc1({{ + if(passesFpCondition(Fsr<33:32>, COND4)) + Frd = Frs2; + else + Frd = Frd; + }}); + 0x43: FpUnimpl::fmovq_fcc1(); + 0x45: fmovrslez({{ + if(Rs1 <= 0) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0x46: fmovrdlez({{ + if(Rs1 <= 0) + Frd = Frs2; + else + Frd = Frd; + }}); + 0x47: FpUnimpl::fmovrqlez(); 0x51: fcmps({{ uint8_t fcc; if(isnan(Frs1s) || isnan(Frs2s)) @@ -874,6 +929,110 @@ decode OP default Unknown::unknown() Fsr = insertBits(Fsr, firstbit +1, firstbit, fcc); }}); 0x57: FpUnimpl::fcmpeq(); + 0x65: fmovrslz({{ + if(Rs1 < 0) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0x66: fmovrdlz({{ + if(Rs1 < 0) + Frd = Frs2; + else + Frd = Frd; + }}); + 0x67: FpUnimpl::fmovrqlz(); + 0x81: fmovs_fcc2({{ + if(passesFpCondition(Fsr<35:34>, COND4)) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0x82: fmovd_fcc2({{ + if(passesFpCondition(Fsr<35:34>, COND4)) + Frd = Frs2; + else + Frd = Frd; + }}); + 0x83: FpUnimpl::fmovq_fcc2(); + 0xA5: fmovrsnz({{ + if(Rs1 != 0) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0xA6: fmovrdnz({{ + if(Rs1 != 0) + Frd = Frs2; + else + Frd = Frd; + }}); + 0xA7: FpUnimpl::fmovrqnz(); + 0xC1: fmovs_fcc3({{ + if(passesFpCondition(Fsr<37:36>, COND4)) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0xC2: fmovd_fcc3({{ + if(passesFpCondition(Fsr<37:36>, COND4)) + Frd = Frs2; + else + Frd = Frd; + }}); + 0xC3: FpUnimpl::fmovq_fcc3(); + 0xC5: fmovrsgz({{ + if(Rs1 > 0) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0xC6: fmovrdgz({{ + if(Rs1 > 0) + Frd = Frs2; + else + Frd = Frd; + }}); + 0xC7: FpUnimpl::fmovrqgz(); + 0xE5: fmovrsgez({{ + if(Rs1 >= 0) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0xE6: fmovrdgez({{ + if(Rs1 >= 0) + Frd = Frs2; + else + Frd = Frd; + }}); + 0xE7: FpUnimpl::fmovrqgez(); + 0x101: fmovs_icc({{ + if(passesCondition(Ccr<3:0>, COND4)) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0x102: fmovd_icc({{ + if(passesCondition(Ccr<3:0>, COND4)) + Frd = Frs2; + else + Frd = Frd; + }}); + 0x103: FpUnimpl::fmovq_icc(); + 0x181: fmovs_xcc({{ + if(passesCondition(Ccr<7:4>, COND4)) + Frds = Frs2s; + else + Frds = Frds; + }}); + 0x182: fmovd_xcc({{ + if(passesCondition(Ccr<7:4>, COND4)) + Frd = Frs2; + else + Frd = Frd; + }}); + 0x183: FpUnimpl::fmovq_xcc(); default: FailUnimpl::fpop2(); } } @@ -1164,7 +1323,16 @@ decode OP default Unknown::unknown() 0x04: stw({{Mem.uw = Rd.sw;}}); 0x05: stb({{Mem.ub = Rd.sb;}}); 0x06: sth({{Mem.uhw = Rd.shw;}}); - 0x07: sttw({{Mem.udw = RdLow<31:0> | (RdHigh<31:0> << 32);}}); + 0x07: sttw({{ + //This temporary needs to be here so that the parser + //will correctly identify this instruction as a store. + //It's probably either the parenthesis or referencing + //the member variable that throws confuses it. + Twin32_t temp; + temp.a = RdLow<31:0>; + temp.b = RdHigh<31:0>; + Mem.tuw = temp; + }}); } format Load { 0x08: ldsw({{Rd = (int32_t)Mem.sw;}}); @@ -1254,7 +1422,16 @@ decode OP default Unknown::unknown() 0x14: stwa({{Mem.uw = Rd;}}, {{EXT_ASI}}); 0x15: stba({{Mem.ub = Rd;}}, {{EXT_ASI}}); 0x16: stha({{Mem.uhw = Rd;}}, {{EXT_ASI}}); - 0x17: sttwa({{Mem.udw = RdLow<31:0> | RdHigh<31:0> << 32;}}, {{EXT_ASI}}); + 0x17: sttwa({{ + //This temporary needs to be here so that the parser + //will correctly identify this instruction as a store. + //It's probably either the parenthesis or referencing + //the member variable that throws confuses it. + Twin32_t temp; + temp.a = RdLow<31:0>; + temp.b = RdHigh<31:0>; + Mem.tuw = temp; + }}, {{EXT_ASI}}); } format LoadAlt { 0x18: ldswa({{Rd = (int32_t)Mem.sw;}}, {{EXT_ASI}}); diff --git a/src/arch/sparc/isa/formats/branch.isa b/src/arch/sparc/isa/formats/branch.isa index 5cd3ab598..f5ab940bb 100644 --- a/src/arch/sparc/isa/formats/branch.isa +++ b/src/arch/sparc/isa/formats/branch.isa @@ -40,7 +40,7 @@ output header {{ { protected: // Constructor - Branch(const char *mnem, MachInst _machInst, OpClass __opClass) : + Branch(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : SparcStaticInst(mnem, _machInst, __opClass) { } @@ -56,7 +56,7 @@ output header {{ { protected: // Constructor - BranchDisp(const char *mnem, MachInst _machInst, + BranchDisp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : Branch(mnem, _machInst, __opClass) { @@ -76,7 +76,7 @@ output header {{ { protected: // Constructor - BranchNBits(const char *mnem, MachInst _machInst, + BranchNBits(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : BranchDisp(mnem, _machInst, __opClass) { @@ -91,7 +91,7 @@ output header {{ { protected: // Constructor - BranchSplit(const char *mnem, MachInst _machInst, + BranchSplit(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : BranchDisp(mnem, _machInst, __opClass) { @@ -107,7 +107,7 @@ output header {{ { protected: // Constructor - BranchImm13(const char *mnem, MachInst _machInst, OpClass __opClass) : + BranchImm13(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : Branch(mnem, _machInst, __opClass), imm(sext<13>(SIMM13)) { } diff --git a/src/arch/sparc/isa/formats/mem/util.isa b/src/arch/sparc/isa/formats/mem/util.isa index 1d884d6c3..dfe937371 100644 --- a/src/arch/sparc/isa/formats/mem/util.isa +++ b/src/arch/sparc/isa/formats/mem/util.isa @@ -224,7 +224,7 @@ def template StoreExecute {{ } if(storeCond && fault == NoFault) { - fault = xc->write((uint%(mem_acc_size)s_t)Mem, + fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem, EA, %(asi_val)s, 0); } if(fault == NoFault) @@ -257,7 +257,7 @@ def template StoreInitiateAcc {{ } if(storeCond && fault == NoFault) { - fault = xc->write((uint%(mem_acc_size)s_t)Mem, + fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem, EA, %(asi_val)s, 0); } if(fault == NoFault) diff --git a/src/arch/sparc/linux/syscalls.cc b/src/arch/sparc/linux/syscalls.cc index 2c2902f9e..03c8bafe2 100644 --- a/src/arch/sparc/linux/syscalls.cc +++ b/src/arch/sparc/linux/syscalls.cc @@ -132,7 +132,7 @@ SyscallDesc SparcLinuxProcess::syscall32Descs[] = { /* 40 */ SyscallDesc("lstat", unimplementedFunc), /* 41 */ SyscallDesc("dup", unimplementedFunc), /* 42 */ SyscallDesc("pipe", pipePseudoFunc), - /* 43 */ SyscallDesc("times", unimplementedFunc), + /* 43 */ SyscallDesc("times", ignoreFunc), /* 44 */ SyscallDesc("getuid32", unimplementedFunc), /* 45 */ SyscallDesc("umount2", unimplementedFunc), //32 bit /* 46 */ SyscallDesc("setgid", unimplementedFunc), //32 bit @@ -217,7 +217,7 @@ SyscallDesc SparcLinuxProcess::syscall32Descs[] = { /* 125 */ SyscallDesc("recvfrom", unimplementedFunc), /* 126 */ SyscallDesc("setreuid", unimplementedFunc), //32 bit /* 127 */ SyscallDesc("setregid", unimplementedFunc), //32 bit - /* 128 */ SyscallDesc("rename", unimplementedFunc), + /* 128 */ SyscallDesc("rename", renameFunc), /* 129 */ SyscallDesc("truncate", unimplementedFunc), /* 130 */ SyscallDesc("ftruncate", unimplementedFunc), /* 131 */ SyscallDesc("flock", unimplementedFunc), @@ -320,7 +320,7 @@ SyscallDesc SparcLinuxProcess::syscall32Descs[] = { /* 228 */ SyscallDesc("setfsuid", unimplementedFunc), //32 bit /* 229 */ SyscallDesc("setfsgid", unimplementedFunc), //32 bit /* 230 */ SyscallDesc("_newselect", unimplementedFunc), //32 bit - /* 231 */ SyscallDesc("time", unimplementedFunc), + /* 231 */ SyscallDesc("time", ignoreFunc), /* 232 */ SyscallDesc("oldstat", unimplementedFunc), /* 233 */ SyscallDesc("stime", unimplementedFunc), /* 234 */ SyscallDesc("statfs64", unimplementedFunc), @@ -435,7 +435,7 @@ SyscallDesc SparcLinuxProcess::syscallDescs[] = { /* 40 */ SyscallDesc("lstat", unimplementedFunc), /* 41 */ SyscallDesc("dup", unimplementedFunc), /* 42 */ SyscallDesc("pipe", pipePseudoFunc), - /* 43 */ SyscallDesc("times", unimplementedFunc), + /* 43 */ SyscallDesc("times", ignoreFunc), /* 44 */ SyscallDesc("getuid32", unimplementedFunc), /* 45 */ SyscallDesc("umount2", unimplementedFunc), /* 46 */ SyscallDesc("setgid", unimplementedFunc), @@ -520,7 +520,7 @@ SyscallDesc SparcLinuxProcess::syscallDescs[] = { /* 125 */ SyscallDesc("recvfrom", unimplementedFunc), /* 126 */ SyscallDesc("setreuid", unimplementedFunc), /* 127 */ SyscallDesc("setregid", unimplementedFunc), - /* 128 */ SyscallDesc("rename", unimplementedFunc), + /* 128 */ SyscallDesc("rename", renameFunc), /* 129 */ SyscallDesc("truncate", unimplementedFunc), /* 130 */ SyscallDesc("ftruncate", unimplementedFunc), /* 131 */ SyscallDesc("flock", unimplementedFunc), @@ -623,7 +623,7 @@ SyscallDesc SparcLinuxProcess::syscallDescs[] = { /* 228 */ SyscallDesc("setfsuid", unimplementedFunc), /* 229 */ SyscallDesc("setfsgid", unimplementedFunc), /* 230 */ SyscallDesc("_newselect", unimplementedFunc), - /* 231 */ SyscallDesc("time", unimplementedFunc), + /* 231 */ SyscallDesc("time", ignoreFunc), /* 232 */ SyscallDesc("oldstat", unimplementedFunc), /* 233 */ SyscallDesc("stime", unimplementedFunc), /* 234 */ SyscallDesc("statfs64", unimplementedFunc), diff --git a/src/arch/sparc/miscregfile.cc b/src/arch/sparc/miscregfile.cc index f3be1c4c2..5bd572d38 100644 --- a/src/arch/sparc/miscregfile.cc +++ b/src/arch/sparc/miscregfile.cc @@ -646,7 +646,12 @@ void MiscRegFile::setReg(int miscReg, #endif return; case MISCREG_CWP: - new_val = val > NWindows ? NWindows - 1 : val; + new_val = val >= NWindows ? NWindows - 1 : val; + if (val >= NWindows) { + new_val = NWindows - 1; + warn("Attempted to set the CWP to %d with NWindows = %d\n", + val, NWindows); + } tc->changeRegFileContext(CONTEXT_CWP, new_val); break; case MISCREG_GL: diff --git a/src/arch/sparc/predecoder.hh b/src/arch/sparc/predecoder.hh new file mode 100644 index 000000000..4a8c9dc4a --- /dev/null +++ b/src/arch/sparc/predecoder.hh @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_SPARC_PREDECODER_HH__ +#define __ARCH_SPARC_PREDECODER_HH__ + +#include "arch/sparc/types.hh" +#include "base/misc.hh" +#include "cpu/thread_context.hh" +#include "sim/host.hh" + +class ThreadContext; + +namespace SparcISA +{ + class Predecoder + { + protected: + ThreadContext * tc; + //The extended machine instruction being generated + ExtMachInst emi; + + public: + Predecoder(ThreadContext * _tc) : tc(_tc) + {} + + ThreadContext * getTC() + { + return tc; + } + + void setTC(ThreadContext * _tc) + { + tc = _tc; + } + + void process() + { + } + + //Use this to give data to the predecoder. This should be used + //when there is control flow. + void moreBytes(Addr currPC, Addr off, MachInst inst) + { + assert(off == 0); + + emi = inst; + //The I bit, bit 13, is used to figure out where the ASI + //should come from. Use that in the ExtMachInst. This is + //slightly redundant, but it removes the need to put a condition + //into all the execute functions + if(inst & (1 << 13)) + emi |= (static_cast<ExtMachInst>( + tc->readMiscRegNoEffect(MISCREG_ASI)) + << (sizeof(MachInst) * 8)); + else + emi |= (static_cast<ExtMachInst>(bits(inst, 12, 5)) + << (sizeof(MachInst) * 8)); + } + + //Use this to give data to the predecoder. This should be used + //when instructions are executed in order. + void moreBytes(MachInst machInst) + { + moreBytes(0, 0, machInst); + } + + bool needMoreBytes() + { + return true; + } + + bool extMachInstReady() + { + return true; + } + + //This returns a constant reference to the ExtMachInst to avoid a copy + const ExtMachInst & getExtMachInst() + { + return emi; + } + }; +}; + +#endif // __ARCH_SPARC_PREDECODER_HH__ diff --git a/src/arch/sparc/ua2005.cc b/src/arch/sparc/ua2005.cc index 865280038..48e97a531 100644 --- a/src/arch/sparc/ua2005.cc +++ b/src/arch/sparc/ua2005.cc @@ -195,6 +195,7 @@ MiscRegFile::setFSReg(int miscReg, const MiscReg &val, ThreadContext *tc) panic("No support for setting spec_en bit\n"); setRegNoEffect(miscReg, bits(val,0,0)); if (!bits(val,0,0)) { + DPRINTF(Quiesce, "Cpu executed quiescing instruction\n"); // Time to go to sleep tc->suspend(); if (tc->getKernelStats()) @@ -307,7 +308,7 @@ MiscRegFile::processSTickCompare(ThreadContext *tc) tc->getCpuPtr()->instCount(); assert(ticks >= 0 && "stick compare missed interrupt cycle"); - if (ticks == 0) { + if (ticks == 0 || tc->status() == ThreadContext::Suspended) { DPRINTF(Timer, "STick compare cycle reached at %#x\n", (stick_cmpr & mask(63))); if (!(tc->readMiscRegNoEffect(MISCREG_STICK_CMPR) & (ULL(1) << 63))) { @@ -324,11 +325,15 @@ MiscRegFile::processHSTickCompare(ThreadContext *tc) // we're actually at the correct cycle or we need to wait a little while // more int ticks; + if ( tc->status() == ThreadContext::Halted || + tc->status() == ThreadContext::Unallocated) + return; + ticks = ((int64_t)(hstick_cmpr & mask(63)) - (int64_t)stick) - tc->getCpuPtr()->instCount(); assert(ticks >= 0 && "hstick compare missed interrupt cycle"); - if (ticks == 0) { + if (ticks == 0 || tc->status() == ThreadContext::Suspended) { DPRINTF(Timer, "HSTick compare cycle reached at %#x\n", (stick_cmpr & mask(63))); if (!(tc->readMiscRegNoEffect(MISCREG_HSTICK_CMPR) & (ULL(1) << 63))) { diff --git a/src/arch/sparc/utility.hh b/src/arch/sparc/utility.hh index dc9201401..1458231f2 100644 --- a/src/arch/sparc/utility.hh +++ b/src/arch/sparc/utility.hh @@ -48,22 +48,6 @@ namespace SparcISA tc->readMiscRegNoEffect(MISCREG_HPSTATE & (1 << 2))); } - inline ExtMachInst - makeExtMI(MachInst inst, ThreadContext * xc) { - ExtMachInst emi = (MachInst) inst; - //The I bit, bit 13, is used to figure out where the ASI - //should come from. Use that in the ExtMachInst. This is - //slightly redundant, but it removes the need to put a condition - //into all the execute functions - if(inst & (1 << 13)) - emi |= (static_cast<ExtMachInst>(xc->readMiscRegNoEffect(MISCREG_ASI)) - << (sizeof(MachInst) * 8)); - else - emi |= (static_cast<ExtMachInst>(bits(inst, 12, 5)) - << (sizeof(MachInst) * 8)); - return emi; - } - inline bool isCallerSaveIntegerRegister(unsigned int reg) { panic("register classification not implemented"); return false; diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript index f49225758..2e2c5b006 100644 --- a/src/arch/x86/SConscript +++ b/src/arch/x86/SConscript @@ -84,11 +84,12 @@ # Authors: Gabe Black Import('*') - if env['TARGET_ISA'] == 'x86': Source('floatregfile.cc') Source('intregfile.cc') Source('miscregfile.cc') + Source('predecoder.cc') + Source('predecoder_tables.cc') Source('regfile.cc') Source('remote_gdb.cc') diff --git a/src/arch/x86/intregfile.hh b/src/arch/x86/intregfile.hh index da631d444..f7b03f0f0 100644 --- a/src/arch/x86/intregfile.hh +++ b/src/arch/x86/intregfile.hh @@ -88,8 +88,9 @@ #ifndef __ARCH_X86_INTREGFILE_HH__ #define __ARCH_X86_INTREGFILE_HH__ -#include "arch/x86/x86_traits.hh" +#include "arch/x86/intregs.hh" #include "arch/x86/types.hh" +#include "arch/x86/x86_traits.hh" #include <string> @@ -102,7 +103,7 @@ namespace X86ISA //This function translates integer register file indices into names std::string getIntRegName(RegIndex); - const int NumIntArchRegs = 16; + const int NumIntArchRegs = NUM_INTREGS; const int NumIntRegs = NumIntArchRegs + NumMicroIntRegs; class IntRegFile diff --git a/src/arch/x86/intregs.hh b/src/arch/x86/intregs.hh index 3fe25bd5f..ed801cc48 100644 --- a/src/arch/x86/intregs.hh +++ b/src/arch/x86/intregs.hh @@ -77,7 +77,8 @@ namespace X86ISA INTREG_R12W, INTREG_R13W, INTREG_R14W, - INTREG_R15W + INTREG_R15W, + NUM_INTREGS }; }; diff --git a/src/arch/x86/isa/base.isa b/src/arch/x86/isa/base.isa new file mode 100644 index 000000000..4776f7a7e --- /dev/null +++ b/src/arch/x86/isa/base.isa @@ -0,0 +1,181 @@ +// Copyright (c) 2007 The Hewlett-Packard Development Company +// All rights reserved. +// +// Redistribution and use of this software in source and binary forms, +// with or without modification, are permitted provided that the +// following conditions are met: +// +// The software must be used only for Non-Commercial Use which means any +// use which is NOT directed to receiving any direct monetary +// compensation for, or commercial advantage from such use. Illustrative +// examples of non-commercial use are academic research, personal study, +// teaching, education and corporate research & development. +// Illustrative examples of commercial use are distributing products for +// commercial advantage and providing services using the software for +// commercial advantage. +// +// If you wish to use this software or functionality therein that may be +// covered by patents for commercial use, please contact: +// Director of Intellectual Property Licensing +// Office of Strategy and Technology +// Hewlett-Packard Company +// 1501 Page Mill Road +// Palo Alto, California 94304 +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. Redistributions +// in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or +// other materials provided with the distribution. Neither the name of +// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. No right of +// sublicense is granted herewith. Derivatives of the software and +// output created using the software may be prepared, but only for +// Non-Commercial Uses. Derivatives of the software may be shared with +// others provided: (i) the others agree to abide by the list of +// conditions herein which includes the Non-Commercial Use restrictions; +// and (ii) such Derivatives of the software include the above copyright +// notice to acknowledge the contribution from this software where +// applicable, this list of conditions and the disclaimer below. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Base class for sparc instructions, and some support functions +// + +output header {{ + + /** + * Base class for all X86 static instructions. + */ + class X86StaticInst : public StaticInst + { + protected: + // Constructor. + X86StaticInst(const char *mnem, + ExtMachInst _machInst, OpClass __opClass) + : StaticInst(mnem, _machInst, __opClass) + { + } + + std::string generateDisassembly(Addr pc, + const SymbolTable *symtab) const; + + void printReg(std::ostream &os, int reg) const; + void printSrcReg(std::ostream &os, int reg) const; + void printDestReg(std::ostream &os, int reg) const; + }; +}}; + +output decoder {{ + + inline void printMnemonic(std::ostream &os, const char * mnemonic) + { + ccprintf(os, "\t%s ", mnemonic); + } + + void + X86StaticInst::printSrcReg(std::ostream &os, int reg) const + { + if(_numSrcRegs > reg) + printReg(os, _srcRegIdx[reg]); + } + + void + X86StaticInst::printDestReg(std::ostream &os, int reg) const + { + if(_numDestRegs > reg) + printReg(os, _destRegIdx[reg]); + } + + void + X86StaticInst::printReg(std::ostream &os, int reg) const + { + if (reg < FP_Base_DepTag) { + //FIXME These should print differently depending on the + //mode etc, but for now this will get the point across + switch (reg) { + case INTREG_RAX: + ccprintf(os, "rax"); + break; + case INTREG_RBX: + ccprintf(os, "rbx"); + break; + case INTREG_RCX: + ccprintf(os, "rcx"); + break; + case INTREG_RDX: + ccprintf(os, "rdx"); + break; + case INTREG_RSP: + ccprintf(os, "rsp"); + break; + case INTREG_RBP: + ccprintf(os, "rbp"); + break; + case INTREG_RSI: + ccprintf(os, "rsi"); + break; + case INTREG_RDI: + ccprintf(os, "rdi"); + break; + case INTREG_R8W: + ccprintf(os, "r8"); + break; + case INTREG_R9W: + ccprintf(os, "r9"); + break; + case INTREG_R10W: + ccprintf(os, "r10"); + break; + case INTREG_R11W: + ccprintf(os, "r11"); + break; + case INTREG_R12W: + ccprintf(os, "r12"); + break; + case INTREG_R13W: + ccprintf(os, "r13"); + break; + case INTREG_R14W: + ccprintf(os, "r14"); + break; + case INTREG_R15W: + ccprintf(os, "r15"); + break; + } + } else if (reg < Ctrl_Base_DepTag) { + ccprintf(os, "%%f%d", reg - FP_Base_DepTag); + } else { + switch (reg - Ctrl_Base_DepTag) { + default: + ccprintf(os, "%%ctrl%d", reg - Ctrl_Base_DepTag); + } + } + } + + std::string X86StaticInst::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { + std::stringstream ss; + + printMnemonic(ss, mnemonic); + + return ss.str(); + } +}}; diff --git a/src/arch/x86/isa/bitfields.isa b/src/arch/x86/isa/bitfields.isa index 47aec4fa1..fff324caa 100644 --- a/src/arch/x86/isa/bitfields.isa +++ b/src/arch/x86/isa/bitfields.isa @@ -58,5 +58,30 @@ // Bitfield definitions. // -def bitfield EXAMPLE <24>; +//Prefixes +def bitfield REX rex; +def bitfield LEGACY legacy; +// Pieces of the opcode +def bitfield OPCODE_NUM opcode.num; +def bitfield OPCODE_PREFIXA opcode.prefixA; +def bitfield OPCODE_PREFIXB opcode.prefixB; +def bitfield OPCODE_OP opcode.op; +//The top 5 bits of the opcode tend to split the instructions into groups +def bitfield OPCODE_OP_TOP5 opcode.op.top5; +def bitfield OPCODE_OP_BOTTOM3 opcode.op.bottom3; + +// Immediate fields +def bitfield IMMEDIATE immediate; +def bitfield DISPLACEMENT displacement; + +//Modifier bytes +def bitfield MODRM modRM; +def bitfield MODRM_MOD modRM.mod; +def bitfield MODRM_REG modRM.reg; +def bitfield MODRM_RM modRM.rm; + +def bitfield SIB sib; +def bitfield SIB_SCALE sib.scale; +def bitfield SIB_INDEX sib.index; +def bitfield SIB_BASE sib.base; diff --git a/src/arch/x86/isa/decoder/decoder.isa b/src/arch/x86/isa/decoder/decoder.isa new file mode 100644 index 000000000..20f31f882 --- /dev/null +++ b/src/arch/x86/isa/decoder/decoder.isa @@ -0,0 +1,89 @@ +// Copyright (c) 2007 The Hewlett-Packard Development Company +// All rights reserved. +// +// Redistribution and use of this software in source and binary forms, +// with or without modification, are permitted provided that the +// following conditions are met: +// +// The software must be used only for Non-Commercial Use which means any +// use which is NOT directed to receiving any direct monetary +// compensation for, or commercial advantage from such use. Illustrative +// examples of non-commercial use are academic research, personal study, +// teaching, education and corporate research & development. +// Illustrative examples of commercial use are distributing products for +// commercial advantage and providing services using the software for +// commercial advantage. +// +// If you wish to use this software or functionality therein that may be +// covered by patents for commercial use, please contact: +// Director of Intellectual Property Licensing +// Office of Strategy and Technology +// Hewlett-Packard Company +// 1501 Page Mill Road +// Palo Alto, California 94304 +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. Redistributions +// in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or +// other materials provided with the distribution. Neither the name of +// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. No right of +// sublicense is granted herewith. Derivatives of the software and +// output created using the software may be prepared, but only for +// Non-Commercial Uses. Derivatives of the software may be shared with +// others provided: (i) the others agree to abide by the list of +// conditions herein which includes the Non-Commercial Use restrictions; +// and (ii) such Derivatives of the software include the above copyright +// notice to acknowledge the contribution from this software where +// applicable, this list of conditions and the disclaimer below. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// The actual decoder specification +// + +decode OPCODE_NUM default Unknown::unknown() +{ + 0x0: M5InternalError::error( + {{"Saw an ExtMachInst with zero opcode bytes!"}}); + //1 byte opcodes + ##include "one_byte_opcodes.isa" + //2 byte opcodes + ##include "two_byte_opcodes.isa" + //3 byte opcodes + 0x3: decode OPCODE_PREFIXA { + 0xF0: decode OPCODE_PREFIXB { + //We don't handle these properly in the predecoder yet, so there's + //no reason to implement them for now. + 0x38: decode OPCODE_OP { + default: FailUnimpl::sseThreeEight(); + } + 0x3A: decode OPCODE_OP { + default: FailUnimpl::sseThreeA(); + } + 0xF0: decode OPCODE_OP { + default: FailUnimpl::threednow(); + } + default: M5InternalError::error( + {{"Unexpected second opcode byte in three byte opcode!"}}); + } + default: M5InternalError::error( + {{"Unexpected first opcode byte in three byte opcode!"}}); + } +} diff --git a/src/arch/x86/isa/decoder/one_byte_opcodes.isa b/src/arch/x86/isa/decoder/one_byte_opcodes.isa new file mode 100644 index 000000000..c56a8bf92 --- /dev/null +++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa @@ -0,0 +1,398 @@ +// Copyright (c) 2007 The Hewlett-Packard Development Company +// All rights reserved. +// +// Redistribution and use of this software in source and binary forms, +// with or without modification, are permitted provided that the +// following conditions are met: +// +// The software must be used only for Non-Commercial Use which means any +// use which is NOT directed to receiving any direct monetary +// compensation for, or commercial advantage from such use. Illustrative +// examples of non-commercial use are academic research, personal study, +// teaching, education and corporate research & development. +// Illustrative examples of commercial use are distributing products for +// commercial advantage and providing services using the software for +// commercial advantage. +// +// If you wish to use this software or functionality therein that may be +// covered by patents for commercial use, please contact: +// Director of Intellectual Property Licensing +// Office of Strategy and Technology +// Hewlett-Packard Company +// 1501 Page Mill Road +// Palo Alto, California 94304 +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. Redistributions +// in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or +// other materials provided with the distribution. Neither the name of +// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. No right of +// sublicense is granted herewith. Derivatives of the software and +// output created using the software may be prepared, but only for +// Non-Commercial Uses. Derivatives of the software may be shared with +// others provided: (i) the others agree to abide by the list of +// conditions herein which includes the Non-Commercial Use restrictions; +// and (ii) such Derivatives of the software include the above copyright +// notice to acknowledge the contribution from this software where +// applicable, this list of conditions and the disclaimer below. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Decode the one byte opcodes +// + +0x1: decode OPCODE_OP_TOP5 { + format WarnUnimpl { + 0x00: decode OPCODE_OP_BOTTOM3 { + 0x6: push_ES(); + 0x7: pop_ES(); + default: MultiOp::add( + {{out1 = in1 + in2}}, + OPCODE_OP_BOTTOM3, + [[Eb,Gb],[Ev,Gv], + [Gb,Eb],[Gv,Ev], + [Al,Ib],[rAx,Iz]]); + } + 0x01: decode OPCODE_OP_BOTTOM3 { + 0x0: or_Eb_Gb(); + 0x1: or_Ev_Gv(); + 0x2: or_Gb_Eb(); + 0x3: or_Gv_Ev(); + 0x4: or_Al_Ib(); + 0x5: or_rAX_Iz(); + 0x6: push_CS(); + //Any time this is seen, it should generate a two byte opcode + 0x7: M5InternalError::error( + {{"Saw a one byte opcode whose value was 0x0F!"}}); + } + 0x02: decode OPCODE_OP_BOTTOM3 { + 0x0: adc_Eb_Gb(); + 0x1: adc_Ev_Gv(); + 0x2: adc_Gb_Eb(); + 0x3: adc_Gv_Ev(); + 0x4: adc_Al_Ib(); + 0x5: adc_rAX_Iz(); + 0x6: push_SS(); + 0x7: pop_SS(); + } + 0x03: decode OPCODE_OP_BOTTOM3 { + 0x0: sbb_Eb_Gb(); + 0x1: sbb_Ev_Gv(); + 0x2: sbb_Gb_Eb(); + 0x3: sbb_Gv_Ev(); + 0x4: sbb_Al_Ib(); + 0x5: sbb_rAX_Iz(); + 0x6: push_DS(); + 0x7: pop_DS(); + } + 0x04: decode OPCODE_OP_BOTTOM3 { + 0x0: and_Eb_Gb(); + 0x1: and_Ev_Gv(); + 0x2: and_Gb_Eb(); + 0x3: and_Gv_Ev(); + 0x4: and_Al_Ib(); + 0x5: and_rAX_Iz(); + 0x6: M5InternalError::error( + {{"Tried to execute the ES segment override prefix!"}}); + 0x7: daa(); + } + 0x05: decode OPCODE_OP_BOTTOM3 { + 0x0: sub_Eb_Gb(); + 0x1: sub_Ev_Gv(); + 0x2: sub_Gb_Eb(); + 0x3: sub_Gv_Ev(); + 0x4: sub_Al_Ib(); + 0x5: sub_rAX_Iz(); + 0x6: M5InternalError::error( + {{"Tried to execute the CS segment override prefix!"}}); + 0x7: das(); + } + 0x06: decode OPCODE_OP_BOTTOM3 { + 0x0: xor_Eb_Gb(); + 0x1: xor_Ev_Gv(); + 0x2: xor_Gb_Eb(); + 0x3: xor_Gv_Ev(); + 0x4: xor_Al_Ib(); + 0x5: xor_rAX_Iz(); + 0x6: M5InternalError::error( + {{"Tried to execute the SS segment override prefix!"}}); + 0x7: aaa(); + } + 0x07: decode OPCODE_OP_BOTTOM3 { + 0x0: cmp_Eb_Gb(); + 0x1: cmp_Ev_Gv(); + 0x2: cmp_Gb_Eb(); + 0x3: cmp_Gv_Ev(); + 0x4: cmp_Al_Ib(); + 0x5: cmp_rAX_Iz(); + 0x6: M5InternalError::error( + {{"Tried to execute the DS segment override prefix!"}}); + 0x7: aas(); + } + 0x08: decode OPCODE_OP_BOTTOM3 { + 0x0: inc_eAX(); + 0x1: inc_eCX(); + 0x2: inc_eDX(); + 0x3: inc_eBX(); + 0x4: inc_eSP(); + 0x5: inc_eBP(); + 0x6: inc_eSI(); + 0x7: inc_eDI(); + } + 0x09: decode OPCODE_OP_BOTTOM3 { + 0x0: dec_eAX(); + 0x1: dec_eCX(); + 0x2: dec_eDX(); + 0x3: dec_eBX(); + 0x4: dec_eSP(); + 0x5: dec_eBP(); + 0x6: dec_eSI(); + 0x7: dec_eDI(); + } + 0x0A: decode OPCODE_OP_BOTTOM3 { + 0x0: push_rAX(); + 0x1: push_rCX(); + 0x2: push_rDX(); + 0x3: push_rBX(); + 0x4: push_rSP(); + 0x5: push_rBP(); + 0x6: push_rSI(); + 0x7: push_rDI(); + } + 0x0B: decode OPCODE_OP_BOTTOM3 { + 0x0: pop_rAX(); + 0x1: pop_rCX(); + 0x2: pop_rDX(); + 0x3: pop_rBX(); + 0x4: pop_rSP(); + 0x5: pop_rBP(); + 0x6: pop_rSI(); + 0x7: pop_rDI(); + } + 0x0C: decode OPCODE_OP_BOTTOM3 { + 0x0: pusha(); + 0x1: popa(); + 0x2: bound_Gv_Ma(); + 0x3: arpl_Ew_Gw(); + 0x4: M5InternalError::error( + {{"Tried to execute the FS segment override prefix!"}}); + 0x5: M5InternalError::error( + {{"Tried to execute the GS segment override prefix!"}}); + 0x6: M5InternalError::error( + {{"Tried to execute the operand size override prefix!"}}); + 0x7: M5InternalError::error( + {{"Tried to execute the DS address size override prefix!"}}); + } + 0x0D: decode OPCODE_OP_BOTTOM3 { + 0x0: push_Iz(); + 0x1: imul_Gv_Ev_Iz(); + 0x2: push_Ib(); + 0x3: imul_Gv_Ev_Ib(); + 0x4: ins_Yb_Dx(); + 0x5: ins_Yz_Dx(); + 0x6: outs_Dx_Xb(); + 0x7: outs_Dx_Xz(); + } + 0x0E: decode OPCODE_OP_BOTTOM3 { + 0x0: jo_Jb(); + 0x1: jno_Jb(); + 0x2: jb_Jb(); + 0x3: jnb_Jb(); + 0x4: jz_Jb(); + 0x5: jnz_Jb(); + 0x6: jbe_Jb(); + 0x7: jnbe_Jb(); + } + 0x0F: decode OPCODE_OP_BOTTOM3 { + 0x0: js_Jb(); + 0x1: jns_Jb(); + 0x2: jp_Jb(); + 0x3: jnp_Jb(); + 0x4: jl_Jb(); + 0x5: jnl_Jb(); + 0x6: jle_Jb(); + 0x7: jnke_Jb(); + } + 0x10: decode OPCODE_OP_BOTTOM3 { + 0x0: group1_Eb_Ib(); + 0x1: group1_Ev_Iz(); + 0x2: group1_Eb_Ib(); + 0x3: group1_Ev_Ib(); + 0x4: test_Eb_Gb(); + 0x5: test_Ev_Gv(); + 0x6: xchg_Eb_Gb(); + 0x7: xchg_Ev_Gv(); + } + 0x11: decode OPCODE_OP_BOTTOM3 { + 0x0: mov_Eb_Gb(); + 0x1: mov_Ev_Gv(); + 0x2: mov_Gb_Eb(); + 0x3: mov_Gv_Ev(); + 0x4: mov_MwRv_Sw(); + 0x5: lea_Gv_M(); + 0x6: mov_Sw_MwRv(); + 0x7: group10_Ev(); //Make sure this is Ev + } + 0x12: decode OPCODE_OP_BOTTOM3 { + 0x0: nop_or_pause(); //Check for repe prefix + 0x1: xchg_rCX_rAX(); + 0x2: xchg_rDX_rAX(); + 0x3: xchg_rVX_rAX(); + 0x4: xchg_rSP_rAX(); + 0x5: xchg_rBP_rAX(); + 0x6: xchg_rSI_rAX(); + 0x7: xchg_rDI_rAX(); + } + 0x13: decode OPCODE_OP_BOTTOM3 { + 0x0: cbw_or_cwde_or_cdqe_rAX(); + 0x1: cwd_or_cdq_or_cqo_rAX_rDX(); + 0x2: call_Ap(); + 0x3: fwait(); //aka wait + 0x4: pushf_Fv(); + 0x5: popf_Fv(); + 0x6: sahf(); + 0x7: lahf(); + } + 0x14: decode OPCODE_OP_BOTTOM3 { + 0x0: mov_Al_Ob(); + 0x1: mov_rAX_Ov(); + 0x2: mov_Ob_Al(); + 0x3: mov_Ov_rAX(); + 0x4: movs_Yb_Xb(); + 0x5: movs_Yv_Xv(); + 0x6: cmps_Yb_Xb(); + 0x7: cmps_Yv_Xv(); + } + 0x15: decode OPCODE_OP_BOTTOM3 { + 0x0: test_Al_Ib(); + 0x1: test_rAX_Iz(); + 0x2: stos_Yb_Al(); + 0x3: stos_Yv_rAX(); + 0x4: lods_Al_Xb(); + 0x5: lods_rAX_Xv(); + 0x6: scas_Yb_Al(); + 0x7: scas_Yv_rAX(); + } + 0x16: decode OPCODE_OP_BOTTOM3 { + 0x0: mov_Al_Ib(); + 0x1: mov_Cl_Ib(); + 0x2: mov_Dl_Ib(); + 0x3: mov_Bl_Ib(); + 0x4: mov_Ah_Ib(); + 0x5: mov_Ch_Ib(); + 0x6: mov_Dh_Ib(); + 0x7: mov_Bh_Ib(); + } + 0x17: decode OPCODE_OP_BOTTOM3 { + 0x0: mov_rAX_Iv(); + 0x1: mov_rCX_Iv(); + 0x2: mov_rDX_Iv(); + 0x3: mov_rBX_Iv(); + 0x4: mov_rSP_Iv(); + 0x5: mov_rBP_Iv(); + 0x6: mov_rSI_Iv(); + 0x7: mov_rDI_Iv(); + } + 0x18: decode OPCODE_OP_BOTTOM3 { + 0x0: group2_Eb_Ib(); + 0x1: group2_Ev_Ib(); + 0x2: ret_near_Iw(); + 0x3: ret_near(); + 0x4: les_Gz_Mp(); + 0x5: lds_Gz_Mp(); + 0x6: group12_Eb_Ib(); + 0x7: group12_Ev_Iz(); + } + 0x19: decode OPCODE_OP_BOTTOM3 { + 0x0: enter_Iw_Ib(); + 0x1: leave(); + 0x2: ret_far_Iw(); + 0x3: ret_far(); + 0x4: int3(); + 0x5: int_Ib(); + 0x6: into(); + 0x7: iret(); + } + 0x1A: decode OPCODE_OP_BOTTOM3 { + 0x0: group2_Eb_1(); + 0x1: group2_Ev_1(); + 0x2: group2_Eb_Cl(); + 0x3: group2_Ev_Cl(); + 0x4: aam_Ib(); + 0x5: aad_Ib(); + 0x6: salc(); + 0x7: xlat(); + } + 0x1B: decode OPCODE_OP_BOTTOM3 { + 0x0: esc0(); + 0x1: esc1(); + 0x2: esc2(); + 0x3: esc3(); + 0x4: esc4(); + 0x5: esc5(); + 0x6: esc6(); + 0x7: esc7(); + } + 0x1C: decode OPCODE_OP_BOTTOM3 { + 0x0: loopne_Jb(); + 0x1: loope_Jb(); + 0x2: loop_Jb(); + 0x3: jcxz_or_jecx_or_jrcx(); + 0x4: in_Al_Ib(); + 0x5: in_eAX_Ib(); + 0x6: out_Ib_Al(); + 0x7: out_Ib_eAX(); + } + 0x1D: decode OPCODE_OP_BOTTOM3 { + 0x0: call_Jz(); + 0x1: jmp_Jz(); + 0x2: jmp_Ap(); + 0x3: jmp_Jb(); + 0x4: in_Al_Dx(); + 0x5: in_eAX_Dx(); + 0x6: out_Dx_Al(); + 0x7: out_Dx_eAX(); + } + 0x1E: decode OPCODE_OP_BOTTOM3 { + 0x0: M5InternalError::error( + {{"Tried to execute the lock prefix!"}}); + 0x1: int1(); + 0x2: M5InternalError::error( + {{"Tried to execute the repne prefix!"}}); + 0x3: M5InternalError::error( + {{"Tried to execute the rep/repe prefix!"}}); + 0x4: hlt(); + 0x5: cmc(); + 0x6: group3_Eb(); + 0x7: group3_Ev(); + } + 0x1F: decode OPCODE_OP_BOTTOM3 { + 0x0: clc(); + 0x1: stc(); + 0x2: cli(); + 0x3: sti(); + 0x4: cld(); + 0x5: std(); + 0x6: group4(); + 0x7: group5(); + } + } + default: FailUnimpl::oneByteOps(); +} diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa new file mode 100644 index 000000000..f05c33bdb --- /dev/null +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa @@ -0,0 +1,393 @@ +// Copyright (c) 2007 The Hewlett-Packard Development Company +// All rights reserved. +// +// Redistribution and use of this software in source and binary forms, +// with or without modification, are permitted provided that the +// following conditions are met: +// +// The software must be used only for Non-Commercial Use which means any +// use which is NOT directed to receiving any direct monetary +// compensation for, or commercial advantage from such use. Illustrative +// examples of non-commercial use are academic research, personal study, +// teaching, education and corporate research & development. +// Illustrative examples of commercial use are distributing products for +// commercial advantage and providing services using the software for +// commercial advantage. +// +// If you wish to use this software or functionality therein that may be +// covered by patents for commercial use, please contact: +// Director of Intellectual Property Licensing +// Office of Strategy and Technology +// Hewlett-Packard Company +// 1501 Page Mill Road +// Palo Alto, California 94304 +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. Redistributions +// in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or +// other materials provided with the distribution. Neither the name of +// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. No right of +// sublicense is granted herewith. Derivatives of the software and +// output created using the software may be prepared, but only for +// Non-Commercial Uses. Derivatives of the software may be shared with +// others provided: (i) the others agree to abide by the list of +// conditions herein which includes the Non-Commercial Use restrictions; +// and (ii) such Derivatives of the software include the above copyright +// notice to acknowledge the contribution from this software where +// applicable, this list of conditions and the disclaimer below. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Decode the two byte opcodes +// +0x2: decode OPCODE_PREFIXA { + 0xF0: decode OPCODE_OP_TOP5 { + format WarnUnimpl { + 0x00: decode OPCODE_OP_BOTTOM3 { + 0x00: group6(); + 0x01: group7(); + 0x02: lar_Gv_Ew(); + 0x03: lsl_Gv_Ew(); + //sandpile.org doesn't seem to know what this is... ? + 0x04: loadall_or_reset_or_hang(); + //sandpile.org says (AMD) after syscall, so I might want to check + //if that means amd64 or AMD machines + 0x05: loadall_or_syscall(); + 0x06: clts(); + //sandpile.org says (AMD) after sysret, so I might want to check + //if that means amd64 or AMD machines + 0x07: loadall_or_sysret(); + } + 0x01: decode OPCODE_OP_BOTTOM3 { + 0x0: holderholder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x02: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x03: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x04: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x05: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x06: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x07: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x08: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x09: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x0A: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x0B: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x0C: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x0D: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x0E: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x0F: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x10: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x11: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x12: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x13: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x14: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x15: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x16: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x17: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x18: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x19: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x1A: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x1B: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x1C: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x1D: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x1E: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + 0x1F: decode OPCODE_OP_BOTTOM3 { + 0x0: holder(); + 0x1: holder(); + 0x2: holder(); + 0x3: holder(); + 0x4: holder(); + 0x5: holder(); + 0x6: holder(); + 0x7: holder(); + } + default: FailUnimpl::twoByteOps(); + } + } + default: M5InternalError::error( + {{"Unexpected first opcode byte in two byte opcode!"}}); +} diff --git a/src/arch/x86/isa/formats/basic.isa b/src/arch/x86/isa/formats/basic.isa index 7aea7085f..ea224d638 100644 --- a/src/arch/x86/isa/formats/basic.isa +++ b/src/arch/x86/isa/formats/basic.isa @@ -147,3 +147,12 @@ def template BasicDecode {{ def template BasicDecodeWithMnemonic {{ return new %(class_name)s("%(mnemonic)s", machInst); }}; + +// The most basic instruction format... used only for a few misc. insts +def format BasicOperate(code, *flags) {{ + iop = InstObjParams(name, Name, 'SparcStaticInst', code, flags) + header_output = BasicDeclare.subst(iop) + decoder_output = BasicConstructor.subst(iop) + decode_block = BasicDecode.subst(iop) + exec_output = BasicExecute.subst(iop) +}}; diff --git a/src/arch/x86/isa/decoder.isa b/src/arch/x86/isa/formats/error.isa index 85f376b49..8ac2ea44d 100644 --- a/src/arch/x86/isa/decoder.isa +++ b/src/arch/x86/isa/formats/error.isa @@ -1,3 +1,5 @@ +// -*- mode:c++ -*- + // Copyright (c) 2007 The Hewlett-Packard Development Company // All rights reserved. // @@ -55,10 +57,21 @@ //////////////////////////////////////////////////////////////////// // -// The actual decoder specification +// "Format" which really indicates an internal error. This is a more +// significant problem for x86 than for other ISAs because of it's complex +// ExtMachInst type. // -decode EXAMPLE default Unknown::unknown() -{ - 0x0: Unknown::unknown2(); -} +def template ErrorDecode {{ + { + panic("X86 decoder internal error: '%%s' %%s", + %(message)s, machInst); + } +}}; + +def format M5InternalError(error_message) {{ + iop = InstObjParams(name, 'M5InternalError') + iop.message = error_message + decode_block = ErrorDecode.subst(iop) +}}; + diff --git a/src/arch/x86/isa/formats/formats.isa b/src/arch/x86/isa/formats/formats.isa index 0d3d1c6dc..d763c05bc 100644 --- a/src/arch/x86/isa/formats/formats.isa +++ b/src/arch/x86/isa/formats/formats.isa @@ -87,3 +87,14 @@ //Include the "unknown" format ##include "unknown.isa" + +//Include the "unimp" format +##include "unimp.isa" + +//Include a format to signal m5 internal errors. This is used to indicate a +//malfunction of the decode mechanism. +##include "error.isa" + +//Include a format which implements a batch of instructions which do the same +//thing on a variety of inputs +##include "multi.isa" diff --git a/src/arch/x86/isa/formats/multi.isa b/src/arch/x86/isa/formats/multi.isa new file mode 100644 index 000000000..3e80f9cfb --- /dev/null +++ b/src/arch/x86/isa/formats/multi.isa @@ -0,0 +1,106 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2007 The Hewlett-Packard Development Company +// All rights reserved. +// +// Redistribution and use of this software in source and binary forms, +// with or without modification, are permitted provided that the +// following conditions are met: +// +// The software must be used only for Non-Commercial Use which means any +// use which is NOT directed to receiving any direct monetary +// compensation for, or commercial advantage from such use. Illustrative +// examples of non-commercial use are academic research, personal study, +// teaching, education and corporate research & development. +// Illustrative examples of commercial use are distributing products for +// commercial advantage and providing services using the software for +// commercial advantage. +// +// If you wish to use this software or functionality therein that may be +// covered by patents for commercial use, please contact: +// Director of Intellectual Property Licensing +// Office of Strategy and Technology +// Hewlett-Packard Company +// 1501 Page Mill Road +// Palo Alto, California 94304 +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. Redistributions +// in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or +// other materials provided with the distribution. Neither the name of +// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. No right of +// sublicense is granted herewith. Derivatives of the software and +// output created using the software may be prepared, but only for +// Non-Commercial Uses. Derivatives of the software may be shared with +// others provided: (i) the others agree to abide by the list of +// conditions herein which includes the Non-Commercial Use restrictions; +// and (ii) such Derivatives of the software include the above copyright +// notice to acknowledge the contribution from this software where +// applicable, this list of conditions and the disclaimer below. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Instructions that do the same thing to multiple sets of arguments. +// + +output header {{ +}}; + +output decoder {{ +}}; + +output exec {{ +}}; + +let {{ + multiops = {} +}}; + +def format MultiOp(code, switchVal, opTags, *opt_flags) {{ + # Loads and stores that bring in and write out values from the + # instructions. These are determined by the input and output tags, + # and the resulting instruction will have the right number of micro ops, + # or could be implemented as an atomic macro op. + instNames = [] + for tagSet in opTags: + loads = [] + stores = [] + postfix = '' + for tag in tagSet: + postfix += '_' + tag + gather_inputs = '' + if len(loads) + len(stores) == 0: + # If there are no loads or stores, make this a single instruction. + iop = InstObjParams(name, Name + postfix, 'X86StaticInst', + {"code": code, "gather_inputs": gather_inputs}, + opt_flags) + else: + # Build up a macro op. We'll punt on this for now + pass + + decodeBlob = 'switch(%s) {\n' % switchVal + counter = 0 + for inst in instNames: + decodeBlob += '%d: return (X86StaticInst *)(new %s(machInst));\n' % \ + (counter, inst) + counter += 1 + decodeBlob += '}\n' + # decode_block = BasicDecodeWithMnemonic.subst(iop) +}}; diff --git a/src/arch/x86/isa/formats/unimp.isa b/src/arch/x86/isa/formats/unimp.isa new file mode 100644 index 000000000..12fa8387b --- /dev/null +++ b/src/arch/x86/isa/formats/unimp.isa @@ -0,0 +1,174 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2007 The Hewlett-Packard Development Company +// All rights reserved. +// +// Redistribution and use of this software in source and binary forms, +// with or without modification, are permitted provided that the +// following conditions are met: +// +// The software must be used only for Non-Commercial Use which means any +// use which is NOT directed to receiving any direct monetary +// compensation for, or commercial advantage from such use. Illustrative +// examples of non-commercial use are academic research, personal study, +// teaching, education and corporate research & development. +// Illustrative examples of commercial use are distributing products for +// commercial advantage and providing services using the software for +// commercial advantage. +// +// If you wish to use this software or functionality therein that may be +// covered by patents for commercial use, please contact: +// Director of Intellectual Property Licensing +// Office of Strategy and Technology +// Hewlett-Packard Company +// 1501 Page Mill Road +// Palo Alto, California 94304 +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. Redistributions +// in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or +// other materials provided with the distribution. Neither the name of +// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. No right of +// sublicense is granted herewith. Derivatives of the software and +// output created using the software may be prepared, but only for +// Non-Commercial Uses. Derivatives of the software may be shared with +// others provided: (i) the others agree to abide by the list of +// conditions herein which includes the Non-Commercial Use restrictions; +// and (ii) such Derivatives of the software include the above copyright +// notice to acknowledge the contribution from this software where +// applicable, this list of conditions and the disclaimer below. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Unimplemented instructions +// + +output header {{ + /** + * Static instruction class for unimplemented instructions that + * cause simulator termination. Note that these are recognized + * (legal) instructions that the simulator does not support; the + * 'Unknown' class is used for unrecognized/illegal instructions. + * This is a leaf class. + */ + class FailUnimplemented : public X86StaticInst + { + public: + /// Constructor + FailUnimplemented(const char *_mnemonic, ExtMachInst _machInst) + : X86StaticInst(_mnemonic, _machInst, No_OpClass) + { + // don't call execute() (which panics) if we're on a + // speculative path + flags[IsNonSpeculative] = true; + } + + %(BasicExecDeclare)s + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; + + /** + * Base class for unimplemented instructions that cause a warning + * to be printed (but do not terminate simulation). This + * implementation is a little screwy in that it will print a + * warning for each instance of a particular unimplemented machine + * instruction, not just for each unimplemented opcode. Should + * probably make the 'warned' flag a static member of the derived + * class. + */ + class WarnUnimplemented : public X86StaticInst + { + private: + /// Have we warned on this instruction yet? + mutable bool warned; + + public: + /// Constructor + WarnUnimplemented(const char *_mnemonic, ExtMachInst _machInst) + : X86StaticInst(_mnemonic, _machInst, No_OpClass), warned(false) + { + // don't call execute() (which panics) if we're on a + // speculative path + flags[IsNonSpeculative] = true; + } + + %(BasicExecDeclare)s + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; +}}; + +output decoder {{ + std::string + FailUnimplemented::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { + return csprintf("%-10s (unimplemented)", mnemonic); + } + + std::string + WarnUnimplemented::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { +#ifdef SS_COMPATIBLE_DISASSEMBLY + return csprintf("%-10s", mnemonic); +#else + return csprintf("%-10s (unimplemented)", mnemonic); +#endif + } +}}; + +output exec {{ + Fault + FailUnimplemented::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + panic("attempt to execute unimplemented instruction '%s' %s", + mnemonic, machInst); + return NoFault; + } + + Fault + WarnUnimplemented::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + if (!warned) { + warn("instruction '%s' unimplemented\n", mnemonic); + warned = true; + } + + return NoFault; + } +}}; + + +def format FailUnimpl() {{ + iop = InstObjParams(name, 'FailUnimplemented') + decode_block = BasicDecodeWithMnemonic.subst(iop) +}}; + +def format WarnUnimpl() {{ + iop = InstObjParams(name, 'WarnUnimplemented') + decode_block = BasicDecodeWithMnemonic.subst(iop) +}}; + diff --git a/src/arch/x86/isa/formats/unknown.isa b/src/arch/x86/isa/formats/unknown.isa index 605ddcb69..43ddc20c1 100644 --- a/src/arch/x86/isa/formats/unknown.isa +++ b/src/arch/x86/isa/formats/unknown.isa @@ -120,7 +120,8 @@ output exec {{ Fault Unknown::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { - panic("No instructions are implemented for X86!\n"); + warn("No instructions are implemented for X86!\n"); + return NoFault; } }}; diff --git a/src/arch/x86/isa/main.isa b/src/arch/x86/isa/main.isa index fd1b461f0..146f714a7 100644 --- a/src/arch/x86/isa/main.isa +++ b/src/arch/x86/isa/main.isa @@ -79,10 +79,10 @@ namespace X86ISA; ##include "operands.isa" //Include the base class for x86 instructions, and some support code -//##include "base.isa" +##include "base.isa" //Include the definitions for the instruction formats ##include "formats/formats.isa" //Include the decoder definition -##include "decoder.isa" +##include "decoder/decoder.isa" diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa index 4b144dce0..20376f38f 100644 --- a/src/arch/x86/isa/operands.isa +++ b/src/arch/x86/isa/operands.isa @@ -96,4 +96,7 @@ def operand_types {{ }}; def operands {{ + # This is just copied from SPARC, because having no operands confuses + # the parser. + 'Rd': ('IntReg', 'udw', 'RD', 'IsInteger', 1) }}; diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc new file mode 100644 index 000000000..fbed4fe41 --- /dev/null +++ b/src/arch/x86/predecoder.cc @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2007 The Hewlett-Packard Development Company + * All rights reserved. + * + * Redistribution and use of this software in source and binary forms, + * with or without modification, are permitted provided that the + * following conditions are met: + * + * The software must be used only for Non-Commercial Use which means any + * use which is NOT directed to receiving any direct monetary + * compensation for, or commercial advantage from such use. Illustrative + * examples of non-commercial use are academic research, personal study, + * teaching, education and corporate research & development. + * Illustrative examples of commercial use are distributing products for + * commercial advantage and providing services using the software for + * commercial advantage. + * + * If you wish to use this software or functionality therein that may be + * covered by patents for commercial use, please contact: + * Director of Intellectual Property Licensing + * Office of Strategy and Technology + * Hewlett-Packard Company + * 1501 Page Mill Road + * Palo Alto, California 94304 + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. Redistributions + * in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. Neither the name of + * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. No right of + * sublicense is granted herewith. Derivatives of the software and + * output created using the software may be prepared, but only for + * Non-Commercial Uses. Derivatives of the software may be shared with + * others provided: (i) the others agree to abide by the list of + * conditions herein which includes the Non-Commercial Use restrictions; + * and (ii) such Derivatives of the software include the above copyright + * notice to acknowledge the contribution from this software where + * applicable, this list of conditions and the disclaimer below. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/x86/predecoder.hh" +#include "base/misc.hh" +#include "base/trace.hh" +#include "sim/host.hh" + +namespace X86ISA +{ + void Predecoder::process() + { + //This function drives the predecoder state machine. + + //Some sanity checks. You shouldn't try to process more bytes if + //there aren't any, and you shouldn't overwrite an already + //predecoder ExtMachInst. + assert(!outOfBytes); + assert(!emiIsReady); + + //While there's still something to do... + while(!emiIsReady && !outOfBytes) + { + uint8_t nextByte = getNextByte(); + switch(state) + { + case PrefixState: + state = doPrefixState(nextByte); + break; + case OpcodeState: + state = doOpcodeState(nextByte); + break; + case ModRMState: + state = doModRMState(nextByte); + break; + case SIBState: + state = doSIBState(nextByte); + break; + case DisplacementState: + state = doDisplacementState(); + break; + case ImmediateState: + state = doImmediateState(); + break; + case ErrorState: + panic("Went to the error state in the predecoder.\n"); + default: + panic("Unrecognized state! %d\n", state); + } + } + } + + //Either get a prefix and record it in the ExtMachInst, or send the + //state machine on to get the opcode(s). + Predecoder::State Predecoder::doPrefixState(uint8_t nextByte) + { + uint8_t prefix = Prefixes[nextByte]; + State nextState = PrefixState; + if(prefix) + consumeByte(); + switch(prefix) + { + //Operand size override prefixes + case OperandSizeOverride: + DPRINTF(Predecoder, "Found operand size override prefix.\n"); + break; + case AddressSizeOverride: + DPRINTF(Predecoder, "Found address size override prefix.\n"); + break; + //Segment override prefixes + case CSOverride: + DPRINTF(Predecoder, "Found cs segment override.\n"); + break; + case DSOverride: + DPRINTF(Predecoder, "Found ds segment override.\n"); + break; + case ESOverride: + DPRINTF(Predecoder, "Found es segment override.\n"); + break; + case FSOverride: + DPRINTF(Predecoder, "Found fs segment override.\n"); + break; + case GSOverride: + DPRINTF(Predecoder, "Found gs segment override.\n"); + break; + case SSOverride: + DPRINTF(Predecoder, "Found ss segment override.\n"); + break; + case Lock: + DPRINTF(Predecoder, "Found lock prefix.\n"); + break; + case Rep: + DPRINTF(Predecoder, "Found rep prefix.\n"); + break; + case Repne: + DPRINTF(Predecoder, "Found repne prefix.\n"); + break; + case RexPrefix: + DPRINTF(Predecoder, "Found Rex prefix %#x.\n", nextByte); + emi.rex = nextByte; + break; + case 0: + emi.opcode.num = 0; + nextState = OpcodeState; + break; + default: + panic("Unrecognized prefix %#x\n", nextByte); + } + return nextState; + } + + //Load all the opcodes (currently up to 2) and then figure out + //what immediate and/or ModRM is needed. + Predecoder::State Predecoder::doOpcodeState(uint8_t nextByte) + { + State nextState = ErrorState; + emi.opcode.num++; + //We can't handle 3+ byte opcodes right now + assert(emi.opcode.num < 3); + consumeByte(); + if(emi.opcode.num == 1 && nextByte == 0x0f) + { + nextState = OpcodeState; + DPRINTF(Predecoder, "Found two byte opcode.\n"); + emi.opcode.prefixA = nextByte; + } + else if(emi.opcode.num == 2 && + (nextByte == 0x0f || + (nextByte & 0xf8) == 0x38)) + { + panic("Three byte opcodes aren't yet supported!\n"); + nextState = OpcodeState; + DPRINTF(Predecoder, "Found three byte opcode.\n"); + emi.opcode.prefixB = nextByte; + } + else + { + DPRINTF(Predecoder, "Found opcode %#x.\n", nextByte); + emi.opcode.op = nextByte; + + //Prepare for any immediate/displacement we might need + immediateCollected = 0; + emi.immediate = 0; + displacementCollected = 0; + emi.displacement = 0; + + //Figure out how big of an immediate we'll retreive based + //on the opcode. + int immType = ImmediateType[ + emi.opcode.num - 1][nextByte]; + if(0) //16 bit mode + immediateSize = ImmediateTypeToSize[0][immType]; + else if(!(emi.rex & 0x4)) //32 bit mode + immediateSize = ImmediateTypeToSize[1][immType]; + else //64 bit mode + immediateSize = ImmediateTypeToSize[2][immType]; + + //Determine what to expect next + if (UsesModRM[emi.opcode.num - 1][nextByte]) { + nextState = ModRMState; + } else if(immediateSize) { + nextState = ImmediateState; + } else { + emiIsReady = true; + nextState = PrefixState; + } + } + return nextState; + } + + //Get the ModRM byte and determine what displacement, if any, there is. + //Also determine whether or not to get the SIB byte, displacement, or + //immediate next. + Predecoder::State Predecoder::doModRMState(uint8_t nextByte) + { + State nextState = ErrorState; + emi.modRM = nextByte; + DPRINTF(Predecoder, "Found modrm byte %#x.\n", nextByte); + if (0) {//FIXME in 16 bit mode + //figure out 16 bit displacement size + if(nextByte & 0xC7 == 0x06 || + nextByte & 0xC0 == 0x80) + displacementSize = 2; + else if(nextByte & 0xC0 == 0x40) + displacementSize = 1; + else + displacementSize = 0; + } else { + //figure out 32/64 bit displacement size + if(nextByte & 0xC7 == 0x05 || + nextByte & 0xC0 == 0x80) + displacementSize = 4; + else if(nextByte & 0xC0 == 0x40) + displacementSize = 2; + else + displacementSize = 0; + } + //If there's an SIB, get that next. + //There is no SIB in 16 bit mode. + if(nextByte & 0x7 == 4 && + nextByte & 0xC0 != 0xC0) { + // && in 32/64 bit mode) + nextState = SIBState; + } else if(displacementSize) { + nextState = DisplacementState; + } else if(immediateSize) { + nextState = ImmediateState; + } else { + emiIsReady = true; + nextState = PrefixState; + } + //The ModRM byte is consumed no matter what + consumeByte(); + return nextState; + } + + //Get the SIB byte. We don't do anything with it at this point, other + //than storing it in the ExtMachInst. Determine if we need to get a + //displacement or immediate next. + Predecoder::State Predecoder::doSIBState(uint8_t nextByte) + { + State nextState = ErrorState; + emi.sib = nextByte; + DPRINTF(Predecoder, "Found SIB byte %#x.\n", nextByte); + consumeByte(); + if(displacementSize) { + nextState = DisplacementState; + } else if(immediateSize) { + nextState = ImmediateState; + } else { + emiIsReady = true; + nextState = PrefixState; + } + return nextState; + } + + //Gather up the displacement, or at least as much of it + //as we can get. + Predecoder::State Predecoder::doDisplacementState() + { + State nextState = ErrorState; + + getImmediate(displacementCollected, + emi.displacement, + displacementSize); + + DPRINTF(Predecoder, "Collecting %d byte displacement, got %d bytes.\n", + displacementSize, displacementCollected); + + if(displacementSize == displacementCollected) { + //Sign extend the displacement + switch(displacementSize) + { + case 1: + emi.displacement = sext<8>(emi.displacement); + break; + case 2: + emi.displacement = sext<16>(emi.displacement); + break; + case 4: + emi.displacement = sext<32>(emi.displacement); + break; + default: + panic("Undefined displacement size!\n"); + } + DPRINTF(Predecoder, "Collected displacement %#x.\n", + emi.displacement); + if(immediateSize) { + nextState = ImmediateState; + } else { + emiIsReady = true; + nextState = PrefixState; + } + } + else + nextState = DisplacementState; + return nextState; + } + + //Gather up the immediate, or at least as much of it + //as we can get + Predecoder::State Predecoder::doImmediateState() + { + State nextState = ErrorState; + + getImmediate(immediateCollected, + emi.immediate, + immediateSize); + + DPRINTF(Predecoder, "Collecting %d byte immediate, got %d bytes.\n", + immediateSize, immediateCollected); + + if(immediateSize == immediateCollected) + { + DPRINTF(Predecoder, "Collected immediate %#x.\n", + emi.immediate); + emiIsReady = true; + nextState = PrefixState; + } + else + nextState = ImmediateState; + return nextState; + } +} diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh new file mode 100644 index 000000000..1df17d6d2 --- /dev/null +++ b/src/arch/x86/predecoder.hh @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2007 The Hewlett-Packard Development Company + * All rights reserved. + * + * Redistribution and use of this software in source and binary forms, + * with or without modification, are permitted provided that the + * following conditions are met: + * + * The software must be used only for Non-Commercial Use which means any + * use which is NOT directed to receiving any direct monetary + * compensation for, or commercial advantage from such use. Illustrative + * examples of non-commercial use are academic research, personal study, + * teaching, education and corporate research & development. + * Illustrative examples of commercial use are distributing products for + * commercial advantage and providing services using the software for + * commercial advantage. + * + * If you wish to use this software or functionality therein that may be + * covered by patents for commercial use, please contact: + * Director of Intellectual Property Licensing + * Office of Strategy and Technology + * Hewlett-Packard Company + * 1501 Page Mill Road + * Palo Alto, California 94304 + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. Redistributions + * in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. Neither the name of + * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. No right of + * sublicense is granted herewith. Derivatives of the software and + * output created using the software may be prepared, but only for + * Non-Commercial Uses. Derivatives of the software may be shared with + * others provided: (i) the others agree to abide by the list of + * conditions herein which includes the Non-Commercial Use restrictions; + * and (ii) such Derivatives of the software include the above copyright + * notice to acknowledge the contribution from this software where + * applicable, this list of conditions and the disclaimer below. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_X86_PREDECODER_HH__ +#define __ARCH_X86_PREDECODER_HH__ + +#include "arch/x86/types.hh" +#include "base/bitfield.hh" +#include "sim/host.hh" + +class ThreadContext; + +namespace X86ISA +{ + class Predecoder + { + private: + //These are defined and documented in predecoder_tables.cc + static const uint8_t Prefixes[256]; + static const uint8_t UsesModRM[2][256]; + static const uint8_t ImmediateType[2][256]; + static const uint8_t ImmediateTypeToSize[3][10]; + + protected: + ThreadContext * tc; + //The bytes to be predecoded + MachInst fetchChunk; + //The pc of the start of fetchChunk + Addr basePC; + //The offset into fetchChunk of current processing + int offset; + //The extended machine instruction being generated + ExtMachInst emi; + + inline uint8_t getNextByte() + { + return (fetchChunk >> (offset * 8)) & 0xff; + } + + void getImmediate(int &collected, uint64_t ¤t, int size) + { + //Figure out how many bytes we still need to get for the + //immediate. + int toGet = size - collected; + //Figure out how many bytes are left in our "buffer" + int remaining = sizeof(MachInst) - offset; + //Get as much as we need, up to the amount available. + toGet = toGet > remaining ? remaining : toGet; + + //Shift the bytes we want to be all the way to the right + uint64_t partialDisp = fetchChunk >> (offset * 8); + //Mask off what we don't want + partialDisp &= mask(toGet * 8); + //Shift it over to overlay with our displacement. + partialDisp <<= (displacementCollected * 8); + //Put it into our displacement + current |= partialDisp; + //Update how many bytes we've collected. + collected += toGet; + consumeBytes(toGet); + } + + inline void consumeByte() + { + offset++; + assert(offset <= sizeof(MachInst)); + if(offset == sizeof(MachInst)) + outOfBytes = true; + } + + inline void consumeBytes(int numBytes) + { + offset += numBytes; + assert(offset <= sizeof(MachInst)); + if(offset == sizeof(MachInst)) + outOfBytes = true; + } + + //State machine state + protected: + //Whether or not we're out of bytes + bool outOfBytes; + //Whether we've completed generating an ExtMachInst + bool emiIsReady; + //The size of the displacement value + int displacementSize; + int displacementCollected; + //The size of the immediate value + int immediateSize; + int immediateCollected; + + enum State { + PrefixState, + OpcodeState, + ModRMState, + SIBState, + DisplacementState, + ImmediateState, + //We should never get to this state. Getting here is an error. + ErrorState + }; + + State state; + + //Functions to handle each of the states + State doPrefixState(uint8_t); + State doOpcodeState(uint8_t); + State doModRMState(uint8_t); + State doSIBState(uint8_t); + State doDisplacementState(); + State doImmediateState(); + + public: + Predecoder(ThreadContext * _tc) : + tc(_tc), basePC(0), offset(0), + outOfBytes(true), emiIsReady(false), + state(PrefixState) + {} + + ThreadContext * getTC() + { + return tc; + } + + void setTC(ThreadContext * _tc) + { + tc = _tc; + } + + void process(); + + //Use this to give data to the predecoder. This should be used + //when there is control flow. + void moreBytes(Addr currPC, Addr off, MachInst data) + { + basePC = currPC; + offset = off; + fetchChunk = data; + assert(off < sizeof(MachInst)); + outOfBytes = false; + process(); + } + + //Use this to give data to the predecoder. This should be used + //when instructions are executed in order. + void moreBytes(MachInst machInst) + { + moreBytes(basePC + sizeof(machInst), 0, machInst); + } + + bool needMoreBytes() + { + return outOfBytes; + } + + bool extMachInstReady() + { + return emiIsReady; + } + + //This returns a constant reference to the ExtMachInst to avoid a copy + const ExtMachInst & getExtMachInst() + { + assert(emiIsReady); + emiIsReady = false; + return emi; + } + }; +}; + +#endif // __ARCH_X86_PREDECODER_HH__ diff --git a/src/arch/x86/predecoder_tables.cc b/src/arch/x86/predecoder_tables.cc new file mode 100644 index 000000000..f233ad234 --- /dev/null +++ b/src/arch/x86/predecoder_tables.cc @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2007 The Hewlett-Packard Development Company + * All rights reserved. + * + * Redistribution and use of this software in source and binary forms, + * with or without modification, are permitted provided that the + * following conditions are met: + * + * The software must be used only for Non-Commercial Use which means any + * use which is NOT directed to receiving any direct monetary + * compensation for, or commercial advantage from such use. Illustrative + * examples of non-commercial use are academic research, personal study, + * teaching, education and corporate research & development. + * Illustrative examples of commercial use are distributing products for + * commercial advantage and providing services using the software for + * commercial advantage. + * + * If you wish to use this software or functionality therein that may be + * covered by patents for commercial use, please contact: + * Director of Intellectual Property Licensing + * Office of Strategy and Technology + * Hewlett-Packard Company + * 1501 Page Mill Road + * Palo Alto, California 94304 + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. Redistributions + * in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. Neither the name of + * the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. No right of + * sublicense is granted herewith. Derivatives of the software and + * output created using the software may be prepared, but only for + * Non-Commercial Uses. Derivatives of the software may be shared with + * others provided: (i) the others agree to abide by the list of + * conditions herein which includes the Non-Commercial Use restrictions; + * and (ii) such Derivatives of the software include the above copyright + * notice to acknowledge the contribution from this software where + * applicable, this list of conditions and the disclaimer below. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/x86/predecoder.hh" +#include "arch/x86/types.hh" + +namespace X86ISA +{ + const uint8_t CS = CSOverride; + const uint8_t DS = DSOverride; + const uint8_t ES = ESOverride; + const uint8_t FS = FSOverride; + const uint8_t GS = GSOverride; + const uint8_t SS = SSOverride; + + const uint8_t OO = OperandSizeOverride; + const uint8_t AO = AddressSizeOverride; + const uint8_t LO = Lock; + const uint8_t RE = Rep; + const uint8_t RN = Repne; + const uint8_t RX = RexPrefix; + + //This table identifies whether a byte is a prefix, and if it is, + //which prefix it is. + const uint8_t Predecoder::Prefixes[256] = + { //LSB +// MSB 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F +/* 0*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 1*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 2*/ 0 , 0 , 0 , 0 , 0 , 0 , ES, 0 , 0 , 0 , 0 , 0 , 0 , 0 , CS, 0, +/* 3*/ 0 , 0 , 0 , 0 , 0 , 0 , SS, 0 , 0 , 0 , 0 , 0 , 0 , 0 , DS, 0, +/* 4*/ RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, RX, +/* 5*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 6*/ 0 , 0 , 0 , 0 , FS, GS, OO, AO, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 7*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 8*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 9*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* A*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* B*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* C*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* D*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* E*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* F*/ LO, 0 , RN, RE, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 + }; + + //This table identifies whether a particular opcode uses the ModRM byte + const uint8_t Predecoder::UsesModRM[2][256] = + {//For one byte instructions + { //LSB +// MSB 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F +/* 0 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0, +/* 1 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0, +/* 2 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0, +/* 3 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0, +/* 4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 6 */ 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 1 , 0 , 0 , 0 , 0, +/* 7 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 8 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1, +/* 9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* A */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* C */ 1 , 1 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* D */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1, +/* E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* F */ 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 + }, + //For two byte instructions + { //LSB +// MSB 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F +/* 0 */ 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 1, +/* 1 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 2 */ 1 , 1 , 1 , 1 , 1 , 0 , 1 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1, +/* 3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 4 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1, +/* 5 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1, +/* 6 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1, +/* 7 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1, +/* 8 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* 9 */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1, +/* A */ 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1, +/* B */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1, +/* C */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0, +/* D */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1, +/* E */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1, +/* F */ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 + } + }; + + enum ImmediateTypes { + NoImm, + NI = NoImm, + ByteImm, + BY = ByteImm, + WordImm, + WO = WordImm, + DWordImm, + DW = DWordImm, + QWordImm, + QW = QWordImm, + OWordImm, + OW = OWordImm, + VWordImm, + VW = VWordImm, + ZWordImm, + ZW = ZWordImm, + Pointer, + PO = Pointer, + //The enter instruction takes -2- immediates for a total of 3 bytes + Enter, + EN = Enter + }; + + const uint8_t Predecoder::ImmediateTypeToSize[3][10] = + { +// noimm byte word dword qword oword vword zword enter + {0, 1, 2, 4, 8, 16, 2, 2, 3, 4}, //16 bit + {0, 1, 2, 4, 8, 16, 4, 4, 3, 6}, //32 bit + {0, 1, 2, 4, 8, 16, 4, 8, 3, 0} //64 bit + }; + + //This table determines the immediate type. The first index is the + //number of bytes in the instruction, and the second is the meaningful + //byte of the opcode. I didn't use the NI constant here for the sake + //of clarity. + const uint8_t Predecoder::ImmediateType[2][256] = + {//For one byte instructions + { //LSB +// MSB 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F +/* 0 */ 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , +/* 1 */ 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , +/* 2 */ 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , +/* 3 */ 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , +/* 4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 6 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , ZW, ZW, BY, BY, 0 , 0 , 0 , 0 , +/* 7 */ BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, BY, +/* 8 */ BY, ZW, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* A */ BY, VW, BY, VW, 0 , 0 , 0 , 0 , BY, ZW, 0 , 0 , 0 , 0 , 0 , 0 , +/* B */ BY, BY, BY, BY, BY, BY, BY, BY, VW, VW, VW, VW, VW, VW, VW, VW, +/* C */ BY, BY, WO, 0 , 0 , 0 , BY, ZW, EN, 0 , WO, 0 , 0 , BY, 0 , 0 , +/* D */ 0 , 0 , 0 , 0 , BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* E */ BY, BY, BY, BY, BY, BY, BY, BY, ZW, ZW, PO, BY, 0 , 0 , 0 , 0 , +/* F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 + }, + //For two byte instructions + { //LSB +// MSB 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F +/* 0 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 0 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 2 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 6 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 7 */ BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* 8 */ ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, ZW, +/* 9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* A */ 0 , 0 , 0 , 0 , BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , BY, 0 , 0 , 0 , +/* B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , ZW, 0 , BY, 0 , 0 , 0 , 0 , 0 , +/* C */ 0 , 0 , BY, 0 , BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* D */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/* F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 + } + }; +} diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh index 63f65eee5..ca4a15d24 100644 --- a/src/arch/x86/types.hh +++ b/src/arch/x86/types.hh @@ -59,13 +59,111 @@ #define __ARCH_X86_TYPES_HH__ #include <inttypes.h> +#include <iostream> + +#include "base/bitfield.hh" +#include "base/cprintf.hh" namespace X86ISA { - //XXX This won't work - typedef uint32_t MachInst; - //XXX This won't work either - typedef uint64_t ExtMachInst; + //This really determines how many bytes are passed to the predecoder. + typedef uint64_t MachInst; + + enum Prefixes { + NoOverride = 0, + CSOverride = 1, + DSOverride = 2, + ESOverride = 3, + FSOverride = 4, + GSOverride = 5, + SSOverride = 6, + //The Rex prefix obviously doesn't fit in with the above, but putting + //it here lets us save double the space the enums take up. + RexPrefix = 7, + //There can be only one segment override, so they share the + //first 3 bits in the legacyPrefixes bitfield. + SegmentOverride = 0x7, + OperandSizeOverride = 8, + AddressSizeOverride = 16, + Lock = 32, + Rep = 64, + Repne = 128 + }; + + BitUnion8(ModRM) + Bitfield<7,6> mod; + Bitfield<5,3> reg; + Bitfield<2,0> rm; + EndBitUnion(ModRM) + + BitUnion8(Sib) + Bitfield<7,6> scale; + Bitfield<5,3> index; + Bitfield<2,0> base; + EndBitUnion(Sib) + + BitUnion8(Rex) + Bitfield<3> w; + Bitfield<2> r; + Bitfield<1> x; + Bitfield<0> b; + EndBitUnion(Rex) + + BitUnion8(Opcode) + Bitfield<7,3> top5; + Bitfield<2,0> bottom3; + EndBitUnion(Opcode) + + //The intermediate structure the x86 predecoder returns. + struct ExtMachInst + { + //Prefixes + uint8_t legacy; + Rex rex; + //This holds all of the bytes of the opcode + struct + { + //The number of bytes in this opcode. Right now, we ignore that + //this can be 3 in some cases + uint8_t num; + //The first byte detected in a 2+ byte opcode. Should be 0xF0. + uint8_t prefixA; + //The second byte detected in a 3+ byte opcode. Could be 0xF0 for + //3dnow instructions, or 0x38-0x3F for some SSE instructions. + uint8_t prefixB; + //The main opcode byte. The highest addressed byte in the opcode. + Opcode op; + } opcode; + //Modifier bytes + ModRM modRM; + uint8_t sib; + //Immediate fields + uint64_t immediate; + uint64_t displacement; + }; + + inline static std::ostream & + operator << (std::ostream & os, const ExtMachInst & emi) + { + ccprintf(os, "\n{\n\tleg = %#x,\n\trex = %#x,\n\t" + "op = {\n\t\tnum = %d,\n\t\top = %#x,\n\t\t" + "prefixA = %#x,\n\t\tprefixB = %#x\n\t},\n\t" + "modRM = %#x,\n\tsib = %#x,\n\t" + "immediate = %#x,\n\tdisplacement = %#x\n}\n", + emi.legacy, (uint8_t)emi.rex, + emi.opcode.num, emi.opcode.op, + emi.opcode.prefixA, emi.opcode.prefixB, + (uint8_t)emi.modRM, (uint8_t)emi.sib, + emi.immediate, emi.displacement); + return os; + } + + inline static bool + operator == (const ExtMachInst &emi1, const ExtMachInst &emi2) + { + //Since this is empty, it's always equal + return true; + } typedef uint64_t IntReg; //XXX Should this be a 128 bit structure for XMM memory ops? diff --git a/src/arch/x86/utility.hh b/src/arch/x86/utility.hh index 1fbe1fffe..d89e223de 100644 --- a/src/arch/x86/utility.hh +++ b/src/arch/x86/utility.hh @@ -59,11 +59,23 @@ #define __ARCH_X86_UTILITY_HH__ #include "arch/x86/types.hh" +#include "base/hashmap.hh" #include "base/misc.hh" +#include "cpu/thread_context.hh" #include "sim/host.hh" class ThreadContext; +namespace __hash_namespace { + template<> + struct hash<X86ISA::ExtMachInst> { + size_t operator()(const X86ISA::ExtMachInst &emi) const { + //Because these are all the same, return 0 + return 0; + }; + }; +} + namespace X86ISA { static inline bool @@ -72,11 +84,6 @@ namespace X86ISA return false; } - inline ExtMachInst - makeExtMI(MachInst inst, ThreadContext * xc) { - return inst; - } - inline bool isCallerSaveIntegerRegister(unsigned int reg) { panic("register classification not implemented"); return false; diff --git a/src/base/SConscript b/src/base/SConscript index 788aa3e6f..6fc140145 100644 --- a/src/base/SConscript +++ b/src/base/SConscript @@ -30,10 +30,26 @@ Import('*') -# base/traceflags.{cc,hh} are generated from base/traceflags.py. -# $TARGET.base will expand to "<build-dir>/base/traceflags". -env.Command(['traceflags.hh', 'traceflags.cc'], 'traceflags.py', - 'python $SOURCE $TARGET.base') +def make_cc(target, source, env): + assert(len(source) == 1) + assert(len(target) == 1) + + traceflags = {} + execfile(str(source[0]), traceflags) + func = traceflags['gen_cc'] + func(str(target[0])) + +def make_hh(target, source, env): + assert(len(source) == 1) + assert(len(target) == 1) + + traceflags = {} + execfile(str(source[0]), traceflags) + func = traceflags['gen_hh'] + func(str(target[0])) + +env.Command('traceflags.hh', 'traceflags.py', make_hh) +env.Command('traceflags.cc', 'traceflags.py', make_cc) Source('annotate.cc') Source('bigint.cc') diff --git a/src/base/bigint.hh b/src/base/bigint.hh index ed48c67fe..d60684231 100644 --- a/src/base/bigint.hh +++ b/src/base/bigint.hh @@ -28,6 +28,8 @@ * Authors: Ali Saidi */ +#include "base/misc.hh" + #include <iostream> #ifndef __BASE_BIGINT_HH__ @@ -49,6 +51,12 @@ struct m5_twin64_t { b = x; return *this; } + + operator uint64_t() + { + panic("Tried to cram a twin64_t into an integer!\n"); + return a; + } }; struct m5_twin32_t { @@ -67,6 +75,12 @@ struct m5_twin32_t { b = x; return *this; } + + operator uint32_t() + { + panic("Tried to cram a twin32_t into an integer!\n"); + return a; + } }; diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh index 83b9138b4..69cce2245 100644 --- a/src/base/bitfield.hh +++ b/src/base/bitfield.hh @@ -121,20 +121,282 @@ findMsbSet(uint64_t val) { int msb = 0; if (!val) return 0; - if (bits(val, 63,32)) msb += 32; - val >>= 32; - if (bits(val, 31,16)) msb += 16; - val >>= 16; - if (bits(val, 15,8)) msb += 8; - val >>= 8; - if (bits(val, 7,4)) msb += 4; - val >>= 4; - if (bits(val, 3,2)) msb += 2; - val >>= 2; - if (bits(val, 1,1)) msb += 1; + if (bits(val, 63,32)) { msb += 32; val >>= 32; } + if (bits(val, 31,16)) { msb += 16; val >>= 16; } + if (bits(val, 15,8)) { msb += 8; val >>= 8; } + if (bits(val, 7,4)) { msb += 4; val >>= 4; } + if (bits(val, 3,2)) { msb += 2; val >>= 2; } + if (bits(val, 1,1)) { msb += 1; } return msb; } +// The following implements the BitUnion system of defining bitfields +//on top of an underlying class. This is done through the pervasive use of +//both named and unnamed unions which all contain the same actual storage. +//Since they're unioned with each other, all of these storage locations +//overlap. This allows all of the bitfields to manipulate the same data +//without having to have access to each other. More details are provided with the +//individual components. +//This namespace is for classes which implement the backend of the BitUnion +//stuff. Don't use any of these directly, except for the Bitfield classes in +//the *BitfieldTypes class(es). +namespace BitfieldBackend +{ + //A base class for all bitfields. It instantiates the actual storage, + //and provides getBits and setBits functions for manipulating it. The + //Data template parameter is type of the underlying storage. + template<class Data> + class BitfieldBase + { + protected: + Data __data; + + //This function returns a range of bits from the underlying storage. + //It relies on the "bits" function above. It's the user's + //responsibility to make sure that there is a properly overloaded + //version of this function for whatever type they want to overlay. + inline uint64_t + getBits(int first, int last) const + { + return bits(__data, first, last); + } + + //Similar to the above, but for settings bits with replaceBits. + inline void + setBits(int first, int last, uint64_t val) + { + replaceBits(__data, first, last, val); + } + }; + + //This class contains all the "regular" bitfield classes. It is inherited + //by all BitUnions which give them access to those types. + template<class Type> + class RegularBitfieldTypes + { + protected: + //This class implements ordinary bitfields, that is a span of bits + //who's msb is "first", and who's lsb is "last". + template<int first, int last=first> + class Bitfield : public BitfieldBase<Type> + { + public: + operator uint64_t () const + { + return this->getBits(first, last); + } + + uint64_t + operator=(const uint64_t _data) + { + this->setBits(first, last, _data); + return _data; + } + }; + + //A class which specializes the above so that it can only be read + //from. This is accomplished explicitly making sure the assignment + //operator is blocked. The conversion operator is carried through + //inheritance. This will unfortunately need to be copied into each + //bitfield type due to limitations with how templates work + template<int first, int last=first> + class BitfieldRO : public Bitfield<first, last> + { + private: + uint64_t + operator=(const uint64_t _data); + }; + + //Similar to the above, but only allows writing. + template<int first, int last=first> + class BitfieldWO : public Bitfield<first, last> + { + private: + operator uint64_t () const; + + public: + using Bitfield<first, last>::operator=; + }; + }; + + //This class contains all the "regular" bitfield classes. It is inherited + //by all BitUnions which give them access to those types. + template<class Type> + class SignedBitfieldTypes + { + protected: + //This class implements ordinary bitfields, that is a span of bits + //who's msb is "first", and who's lsb is "last". + template<int first, int last=first> + class SignedBitfield : public BitfieldBase<Type> + { + public: + operator int64_t () const + { + return sext<first - last + 1>(this->getBits(first, last)); + } + + int64_t + operator=(const int64_t _data) + { + this->setBits(first, last, _data); + return _data; + } + }; + + //A class which specializes the above so that it can only be read + //from. This is accomplished explicitly making sure the assignment + //operator is blocked. The conversion operator is carried through + //inheritance. This will unfortunately need to be copied into each + //bitfield type due to limitations with how templates work + template<int first, int last=first> + class SignedBitfieldRO : public SignedBitfield<first, last> + { + private: + int64_t + operator=(const int64_t _data); + }; + + //Similar to the above, but only allows writing. + template<int first, int last=first> + class SignedBitfieldWO : public SignedBitfield<first, last> + { + private: + operator int64_t () const; + + public: + int64_t operator=(const int64_t _data) + { + *((SignedBitfield<first, last> *)this) = _data; + return _data; + } + }; + }; + + template<class Type> + class BitfieldTypes : public RegularBitfieldTypes<Type>, + public SignedBitfieldTypes<Type> + {}; + + //When a BitUnion is set up, an underlying class is created which holds + //the actual union. This class then inherits from it, and provids the + //implementations for various operators. Setting things up this way + //prevents having to redefine these functions in every different BitUnion + //type. More operators could be implemented in the future, as the need + //arises. + template <class Type, class Base> + class BitUnionOperators : public Base + { + public: + operator Type () const + { + return Base::__data; + } + + Type + operator=(const Type & _data) + { + Base::__data = _data; + return _data; + } + + bool + operator<(const Base & base) const + { + return Base::__data < base.__data; + } + + bool + operator==(const Base & base) const + { + return Base::__data == base.__data; + } + }; +} + +//This macro is a backend for other macros that specialize it slightly. +//First, it creates/extends a namespace "BitfieldUnderlyingClasses" and +//sticks the class which has the actual union in it, which +//BitfieldOperators above inherits from. Putting these classes in a special +//namespace ensures that there will be no collisions with other names as long +//as the BitUnion names themselves are all distinct and nothing else uses +//the BitfieldUnderlyingClasses namespace, which is unlikely. The class itself +//creates a typedef of the "type" parameter called __DataType. This allows +//the type to propagate outside of the macro itself in a controlled way. +//Finally, the base storage is defined which BitfieldOperators will refer to +//in the operators it defines. This macro is intended to be followed by +//bitfield definitions which will end up inside it's union. As explained +//above, these is overlayed the __data member in its entirety by each of the +//bitfields which are defined in the union, creating shared storage with no +//overhead. +#define __BitUnion(type, name) \ + namespace BitfieldUnderlyingClasses \ + { \ + class name; \ + } \ + class BitfieldUnderlyingClasses::name : \ + public BitfieldBackend::BitfieldTypes<type> \ + { \ + public: \ + typedef type __DataType; \ + union { \ + type __data;\ + +//This closes off the class and union started by the above macro. It is +//followed by a typedef which makes "name" refer to a BitfieldOperator +//class inheriting from the class and union just defined, which completes +//building up the type for the user. +#define EndBitUnion(name) \ + }; \ + }; \ + typedef BitfieldBackend::BitUnionOperators< \ + BitfieldUnderlyingClasses::name::__DataType, \ + BitfieldUnderlyingClasses::name> name; + +//This sets up a bitfield which has other bitfields nested inside of it. The +//__data member functions like the "underlying storage" of the top level +//BitUnion. Like everything else, it overlays with the top level storage, so +//making it a regular bitfield type makes the entire thing function as a +//regular bitfield when referred to by itself. +#define __SubBitUnion(fieldType, first, last, name) \ + class : public BitfieldBackend::BitfieldTypes<__DataType> \ + { \ + public: \ + union { \ + fieldType<first, last> __data; + +//This closes off the union created above and gives it a name. Unlike the top +//level BitUnion, we're interested in creating an object instead of a type. +//The operators are defined in the macro itself instead of a class for +//technical reasons. If someone determines a way to move them to one, please +//do so. +#define EndSubBitUnion(name) \ + }; \ + inline operator const __DataType () \ + { return __data; } \ + \ + inline const __DataType operator = (const __DataType & _data) \ + { __data = _data; } \ + } name; + +//Regular bitfields +//These define macros for read/write regular bitfield based subbitfields. +#define SubBitUnion(name, first, last) \ + __SubBitUnion(Bitfield, first, last, name) + +//Regular bitfields +//These define macros for read/write regular bitfield based subbitfields. +#define SignedSubBitUnion(name, first, last) \ + __SubBitUnion(SignedBitfield, first, last, name) + +//Use this to define an arbitrary type overlayed with bitfields. +#define BitUnion(type, name) __BitUnion(type, name) + +//Use this to define conveniently sized values overlayed with bitfields. +#define BitUnion64(name) __BitUnion(uint64_t, name) +#define BitUnion32(name) __BitUnion(uint32_t, name) +#define BitUnion16(name) __BitUnion(uint16_t, name) +#define BitUnion8(name) __BitUnion(uint8_t, name) #endif // __BASE_BITFIELD_HH__ diff --git a/src/base/cprintf.hh b/src/base/cprintf.hh index 7f8e33367..cff73a228 100644 --- a/src/base/cprintf.hh +++ b/src/base/cprintf.hh @@ -143,20 +143,20 @@ ccprintf(std::ostream &stream, const std::string &format, inline void ccprintf(std::ostream &stream, const std::string &format, CPRINTF_DECLARATION) { - ccprintf(stream, format, VARARGS_ALLARGS); + ccprintf(stream, format.c_str(), VARARGS_ALLARGS); } inline void cprintf(const std::string &format, CPRINTF_DECLARATION) { - ccprintf(std::cout, format, VARARGS_ALLARGS); + ccprintf(std::cout, format.c_str(), VARARGS_ALLARGS); } inline std::string csprintf(const std::string &format, CPRINTF_DECLARATION) { std::stringstream stream; - ccprintf(stream, format, VARARGS_ALLARGS); + ccprintf(stream, format.c_str(), VARARGS_ALLARGS); return stream.str(); } diff --git a/src/base/traceflags.py b/src/base/traceflags.py index cb17d98d3..6b241c410 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -116,6 +116,7 @@ baseFlags = [ 'ISP', 'IdeCtrl', 'IdeDisk', + 'Iob', 'Interrupt', 'LLSC', 'LSQ', @@ -136,6 +137,7 @@ baseFlags = [ 'PciConfigAll', 'Pipeline', 'Printf', + 'Predecoder', 'Quiesce', 'ROB', 'Regs', @@ -348,16 +350,3 @@ const char *Trace::flagStrings[] = print >>ccfile, '};' ccfile.close() - -if __name__ == '__main__': - # This file generates the header and source files for the flags - # that control the tracing facility. - - import sys - - if len(sys.argv) != 2: - print "%s: Need argument (basename of cc/hh files)" % sys.argv[0] - sys.exit(1) - - gen_hh(sys.argv[1] + '.hh') - gen_cc(sys.argv[1] + '.cc') diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 3e0be6ad8..4dccee0d3 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -226,7 +226,8 @@ BaseCPU::startup() #endif if (params->progress_interval) { - new CPUProgressEvent(&mainEventQueue, params->progress_interval, + new CPUProgressEvent(&mainEventQueue, + cycles(params->progress_interval), this); } } diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 7167bfde0..4d8300186 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -34,11 +34,11 @@ #include <vector> +#include "arch/isa_traits.hh" #include "base/statistics.hh" #include "config/full_system.hh" #include "sim/eventq.hh" #include "mem/mem_object.hh" -#include "arch/isa_traits.hh" #if FULL_SYSTEM #include "arch/interrupts.hh" @@ -50,6 +50,11 @@ class ThreadContext; class System; class Port; +namespace TheISA +{ + class Predecoder; +} + class CPUProgressEvent : public Event { protected: @@ -125,6 +130,7 @@ class BaseCPU : public MemObject protected: std::vector<ThreadContext *> threadContexts; + std::vector<TheISA::Predecoder *> predecoders; public: diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 9ccdcdccc..6c6d90076 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -171,15 +171,15 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The kind of fault this instruction has generated. */ Fault fault; - /** The memory request. */ - Request *req; - /** Pointer to the data for the memory access. */ uint8_t *memData; /** The effective virtual address (lds & stores only). */ Addr effAddr; + /** Is the effective virtual address valid. */ + bool effAddrValid; + /** The effective physical address. */ Addr physEffAddr; @@ -601,12 +601,18 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns whether or not this instruction is ready to issue. */ bool readyToIssue() const { return status[CanIssue]; } + /** Clears this instruction being able to issue. */ + void clearCanIssue() { status.reset(CanIssue); } + /** Sets this instruction as issued from the IQ. */ void setIssued() { status.set(Issued); } /** Returns whether or not this instruction has issued. */ bool isIssued() const { return status[Issued]; } + /** Clears this instruction as being issued. */ + void clearIssued() { status.reset(Issued); } + /** Sets this instruction as executed. */ void setExecuted() { status.set(Executed); } @@ -729,6 +735,12 @@ class BaseDynInst : public FastAlloc, public RefCounted */ bool eaCalcDone; + /** Is this instruction's memory access uncacheable. */ + bool isUncacheable; + + /** Has this instruction generated a memory request. */ + bool reqMade; + public: /** Sets the effective address. */ void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } @@ -745,6 +757,12 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Whether or not the memory operation is done. */ bool memOpDone; + /** Is this instruction's memory access uncacheable. */ + bool uncacheable() { return isUncacheable; } + + /** Has this instruction generated a memory request. */ + bool hasRequest() { return reqMade; } + public: /** Load queue index. */ int16_t lqIdx; @@ -776,25 +794,25 @@ template<class T> inline Fault BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags) { - // Sometimes reads will get retried, so they may come through here - // twice. - if (!req) { - req = new Request(); - req->setVirt(asid, addr, sizeof(T), flags, this->PC); - req->setThreadContext(thread->readCpuId(), threadNumber); - } else { - assert(addr == req->getVaddr()); - } + reqMade = true; + Request *req = new Request(); + req->setVirt(asid, addr, sizeof(T), flags, this->PC); + req->setThreadContext(thread->readCpuId(), threadNumber); if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() > TheISA::VMPageSize) { + delete req; return TheISA::genAlignmentFault(); } fault = cpu->translateDataReadReq(req, thread); + if (req->isUncacheable()) + isUncacheable = true; + if (fault == NoFault) { effAddr = req->getVaddr(); + effAddrValid = true; physEffAddr = req->getPaddr(); memReqFlags = req->getFlags(); @@ -817,6 +835,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags) // Commit will have to clean up whatever happened. Set this // instruction as executed. this->setExecuted(); + delete req; } if (traceData) { @@ -837,21 +856,25 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) traceData->setData(data); } - assert(req == NULL); - - req = new Request(); + reqMade = true; + Request *req = new Request(); req->setVirt(asid, addr, sizeof(T), flags, this->PC); req->setThreadContext(thread->readCpuId(), threadNumber); if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() > TheISA::VMPageSize) { + delete req; return TheISA::genAlignmentFault(); } fault = cpu->translateDataWriteReq(req, thread); + if (req->isUncacheable()) + isUncacheable = true; + if (fault == NoFault) { effAddr = req->getVaddr(); + effAddrValid = true; physEffAddr = req->getPaddr(); memReqFlags = req->getFlags(); #if 0 @@ -863,12 +886,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) #else fault = cpu->write(req, data, sqIdx); #endif - } - - if (res) { - // always return some result to keep misspeculated paths - // (which will ignore faults) deterministic - *res = (fault == NoFault) ? req->getExtraData() : 0; + } else { + delete req; } return fault; diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index c3d71e428..a1c866336 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -92,11 +92,13 @@ template <class Impl> void BaseDynInst<Impl>::initVars() { - req = NULL; memData = NULL; effAddr = 0; + effAddrValid = false; physEffAddr = 0; + isUncacheable = false; + reqMade = false; readyRegs = 0; instResult.integer = 0; @@ -140,10 +142,6 @@ BaseDynInst<Impl>::initVars() template <class Impl> BaseDynInst<Impl>::~BaseDynInst() { - if (req) { - delete req; - } - if (memData) { delete [] memData; } @@ -271,7 +269,7 @@ void BaseDynInst<Impl>::markSrcRegReady() { if (++readyRegs == numSrcRegs()) { - status.set(CanIssue); + setCanIssue(); } } diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc index 54d8c68fa..c568b1439 100644 --- a/src/cpu/exetrace.cc +++ b/src/cpu/exetrace.cc @@ -31,14 +31,17 @@ * Steve Raasch */ +#include <errno.h> #include <fstream> #include <iomanip> #include <sys/ipc.h> #include <sys/shm.h> +#include "arch/predecoder.hh" #include "arch/regfile.hh" #include "arch/utility.hh" #include "base/loader/symtab.hh" +#include "base/socket.hh" #include "config/full_system.hh" #include "cpu/base.hh" #include "cpu/exetrace.hh" @@ -64,6 +67,7 @@ static bool wasMicro = false; namespace Trace { SharedData *shared_data = NULL; +ListenSocket *cosim_listener = NULL; void setupSharedData() @@ -149,9 +153,96 @@ Trace::InstRecord::dump() ostream &outs = Trace::output(); DPRINTF(Sparc, "Instruction: %#X\n", staticInst->machInst); + bool diff = true; if (IsOn(ExecRegDelta)) { + diff = false; +#ifndef NDEBUG #if THE_ISA == SPARC_ISA + static int fd = 0; + //Don't print what happens for each micro-op, just print out + //once at the last op, and for regular instructions. + if(!staticInst->isMicroOp() || staticInst->isLastMicroOp()) + { + if(!cosim_listener) + { + int port = 8000; + cosim_listener = new ListenSocket(); + while(!cosim_listener->listen(port, true)) + { + DPRINTF(GDBMisc, "Can't bind port %d\n", port); + port++; + } + ccprintf(cerr, "Listening for cosimulator on port %d\n", port); + fd = cosim_listener->accept(); + } + char prefix[] = "goli"; + for(int p = 0; p < 4; p++) + { + for(int i = 0; i < 8; i++) + { + uint64_t regVal; + int res = read(fd, ®Val, sizeof(regVal)); + if(res < 0) + panic("First read call failed! %s\n", strerror(errno)); + regVal = TheISA::gtoh(regVal); + uint64_t realRegVal = thread->readIntReg(p * 8 + i); + if((regVal & 0xffffffffULL) != (realRegVal & 0xffffffffULL)) + { + DPRINTF(ExecRegDelta, "Register %s%d should be %#x but is %#x.\n", prefix[p], i, regVal, realRegVal); + diff = true; + } + //ccprintf(outs, "%s%d m5 = %#x statetrace = %#x\n", prefix[p], i, realRegVal, regVal); + } + } + /*for(int f = 0; f <= 62; f+=2) + { + uint64_t regVal; + int res = read(fd, ®Val, sizeof(regVal)); + if(res < 0) + panic("First read call failed! %s\n", strerror(errno)); + regVal = TheISA::gtoh(regVal); + uint64_t realRegVal = thread->readFloatRegBits(f, 64); + if(regVal != realRegVal) + { + DPRINTF(ExecRegDelta, "Register f%d should be %#x but is %#x.\n", f, regVal, realRegVal); + } + }*/ + uint64_t regVal; + int res = read(fd, ®Val, sizeof(regVal)); + if(res < 0) + panic("First read call failed! %s\n", strerror(errno)); + regVal = TheISA::gtoh(regVal); + uint64_t realRegVal = thread->readNextPC(); + if(regVal != realRegVal) + { + DPRINTF(ExecRegDelta, "Register pc should be %#x but is %#x.\n", regVal, realRegVal); + diff = true; + } + res = read(fd, ®Val, sizeof(regVal)); + if(res < 0) + panic("First read call failed! %s\n", strerror(errno)); + regVal = TheISA::gtoh(regVal); + realRegVal = thread->readNextNPC(); + if(regVal != realRegVal) + { + DPRINTF(ExecRegDelta, "Register npc should be %#x but is %#x.\n", regVal, realRegVal); + diff = true; + } + res = read(fd, ®Val, sizeof(regVal)); + if(res < 0) + panic("First read call failed! %s\n", strerror(errno)); + regVal = TheISA::gtoh(regVal); + realRegVal = thread->readIntReg(SparcISA::NumIntArchRegs + 2); + if((regVal & 0xF) != (realRegVal & 0xF)) + { + DPRINTF(ExecRegDelta, "Register ccr should be %#x but is %#x.\n", regVal, realRegVal); + diff = true; + } + } +#endif +#endif +#if 0 //THE_ISA == SPARC_ISA //Don't print what happens for each micro-op, just print out //once at the last op, and for regular instructions. if(!staticInst->isMicroOp() || staticInst->isLastMicroOp()) @@ -210,7 +301,8 @@ Trace::InstRecord::dump() } #endif } - else if (IsOn(ExecIntel)) { + if(!diff) { + } else if (IsOn(ExecIntel)) { ccprintf(outs, "%7d ) ", when); outs << "0x" << hex << PC << ":\t"; if (staticInst->isLoad()) { @@ -302,6 +394,7 @@ Trace::InstRecord::dump() outs << endl; } #if THE_ISA == SPARC_ISA && FULL_SYSTEM + static TheISA::Predecoder predecoder(NULL); // Compare if (IsOn(ExecLegion)) { @@ -556,9 +649,13 @@ Trace::InstRecord::dump() << staticInst->disassemble(m5Pc, debugSymbolTable) << endl; + predecoder.setTC(thread); + predecoder.moreBytes(m5Pc, 0, shared_data->instruction); + + assert(predecoder.extMachInstRead()); + StaticInstPtr legionInst = - StaticInst::decode(makeExtMI(shared_data->instruction, - thread)); + StaticInst::decode(predecoder.getExtMachInst()); outs << setfill(' ') << setw(15) << " Legion Inst: " << "0x" << setw(8) << setfill('0') << hex diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 5a375a4b8..34754d3c5 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param<int> clock; Param<int> phase; Param<int> numThreads; +Param<int> cpu_id; Param<int> activity; #if FULL_SYSTEM SimObjectParam<System *> system; -Param<int> cpu_id; SimObjectParam<AlphaISA::ITB *> itb; SimObjectParam<AlphaISA::DTB *> dtb; Param<Tick> profile; @@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM_DFLT(phase, "clock phase", 0), INIT_PARAM(numThreads, "number of HW thread contexts"), + INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM_DFLT(activity, "Initial activity count", 0), #if FULL_SYSTEM INIT_PARAM(system, "System object"), - INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(itb, "Instruction translation buffer"), INIT_PARAM(dtb, "Data translation buffer"), INIT_PARAM(profile, ""), @@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU) AlphaSimpleParams *params = new AlphaSimpleParams; params->clock = clock; + params->phase = phase; params->name = getInstanceName(); params->numberOfThreads = actual_num_threads; + params->cpu_id = cpu_id; params->activity = activity; #if FULL_SYSTEM params->system = system; - params->cpu_id = cpu_id; params->itb = itb; params->dtb = dtb; params->profile = profile; diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh index b91972704..304ee6c38 100644 --- a/src/cpu/o3/alpha/cpu_impl.hh +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -114,6 +114,7 @@ AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params) #endif // Give the thread the TC. this->thread[i]->tc = tc; + this->thread[i]->setCpuId(params->cpu_id); // Add the TC to the CPU's list of TC's. this->threadContexts.push_back(tc); diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 0d7d82529..e2ad23954 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -247,6 +247,11 @@ class DefaultCommit /** Handles squashing due to an TC write. */ void squashFromTC(unsigned tid); +#if FULL_SYSTEM + /** Handles processing an interrupt. */ + void handleInterrupt(); +#endif // FULL_SYSTEM + /** Commits as many instructions as possible. */ void commitInsts(); @@ -409,6 +414,16 @@ class DefaultCommit /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; + /** Records if there is a trap currently in flight. */ + bool trapInFlight[Impl::MaxThreads]; + + /** Records if there were any stores committed this cycle. */ + bool committedStores[Impl::MaxThreads]; + + /** Records if commit should check if the ROB is truly empty (see + commit_impl.hh). */ + bool checkEmptyROB[Impl::MaxThreads]; + /** Pointer to the list of active threads. */ std::list<unsigned> *activeThreads; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 18fb2aaa3..3fd85595f 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -118,6 +118,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) for (int i=0; i < numThreads; i++) { commitStatus[i] = Idle; changedROBNumEntries[i] = false; + checkEmptyROB[i] = false; + trapInFlight[i] = false; + committedStores[i] = false; trapSquash[i] = false; tcSquash[i] = false; PC[i] = nextPC[i] = nextNPC[i] = 0; @@ -335,6 +338,7 @@ DefaultCommit<Impl>::initStage() for (int i=0; i < numThreads; i++) { toIEW->commitInfo[i].usedROB = true; toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i); + toIEW->commitInfo[i].emptyROB = true; } cpu->activityThisCycle(); @@ -473,14 +477,14 @@ DefaultCommit<Impl>::generateTrapEvent(unsigned tid) TrapEvent *trap = new TrapEvent(this, tid); trap->schedule(curTick + trapLatency); - - thread[tid]->trapPending = true; + trapInFlight[tid] = true; } template <class Impl> void DefaultCommit<Impl>::generateTCEvent(unsigned tid) { + assert(!trapInFlight[tid]); DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid); tcSquash[tid] = true; @@ -495,7 +499,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid) // Hopefully this doesn't mess things up. Basically I want to squash // all instructions of this thread. InstSeqNum squashed_inst = rob->isEmpty() ? - 0 : rob->readHeadInst(tid)->seqNum - 1;; + 0 : rob->readHeadInst(tid)->seqNum - 1; // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB (0 in this case. @@ -532,6 +536,7 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid) thread[tid]->trapPending = false; thread[tid]->inSyscall = false; + trapInFlight[tid] = false; trapSquash[tid] = false; @@ -580,6 +585,10 @@ DefaultCommit<Impl>::tick() while (threads != end) { unsigned tid = *threads++; + // Clear the bit saying if the thread has committed stores + // this cycle. + committedStores[tid] = false; + if (commitStatus[tid] == ROBSquashing) { if (rob->isDoneSquashing(tid)) { @@ -635,16 +644,11 @@ DefaultCommit<Impl>::tick() updateStatus(); } +#if FULL_SYSTEM template <class Impl> void -DefaultCommit<Impl>::commit() +DefaultCommit<Impl>::handleInterrupt() { - - ////////////////////////////////////// - // Check for interrupts - ////////////////////////////////////// - -#if FULL_SYSTEM if (interrupt != NoFault) { // Wait until the ROB is empty and all stores have drained in // order to enter the interrupt. @@ -653,6 +657,12 @@ DefaultCommit<Impl>::commit() // an interrupt needed to be handled. DPRINTF(Commit, "Interrupt detected.\n"); + Fault new_interrupt = cpu->getInterrupts(); + assert(new_interrupt != NoFault); + + // Clear the interrupt now that it's going to be handled + toIEW->commitInfo[0].clearInterrupt = true; + assert(!thread[0]->inSyscall); thread[0]->inSyscall = true; @@ -666,16 +676,14 @@ DefaultCommit<Impl>::commit() // Generate trap squash event. generateTrapEvent(0); - // Clear the interrupt now that it's been handled - toIEW->commitInfo[0].clearInterrupt = true; interrupt = NoFault; } else { DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n"); } - } else if (cpu->check_interrupts(cpu->tcBase(0)) && - commitStatus[0] != TrapPending && - !trapSquash[0] && - !tcSquash[0]) { + } else if (commitStatus[0] != TrapPending && + cpu->check_interrupts(cpu->tcBase(0)) && + !trapSquash[0] && + !tcSquash[0]) { // Process interrupts if interrupts are enabled, not in PAL // mode, and no other traps or external squashes are currently // pending. @@ -691,7 +699,21 @@ DefaultCommit<Impl>::commit() toIEW->commitInfo[0].interruptPending = true; } } +} +#endif // FULL_SYSTEM + +template <class Impl> +void +DefaultCommit<Impl>::commit() +{ +#if FULL_SYSTEM + // Check for any interrupt, and start processing it. Or if we + // have an outstanding interrupt and are at a point when it is + // valid to take an interrupt, process it. + if (cpu->check_interrupts(cpu->tcBase(0))) { + handleInterrupt(); + } #endif // FULL_SYSTEM //////////////////////////////////// @@ -709,6 +731,7 @@ DefaultCommit<Impl>::commit() assert(!tcSquash[tid]); squashFromTrap(tid); } else if (tcSquash[tid] == true) { + assert(commitStatus[tid] != TrapPending); squashFromTC(tid); } @@ -753,6 +776,7 @@ DefaultCommit<Impl>::commit() bdelay_done_seq_num--; #endif } + // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB. youngestSeqNum[tid] = squashed_inst; @@ -817,13 +841,29 @@ DefaultCommit<Impl>::commit() toIEW->commitInfo[tid].usedROB = true; toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); - if (rob->isEmpty(tid)) { - toIEW->commitInfo[tid].emptyROB = true; - } - wroteToTimeBuffer = true; changedROBNumEntries[tid] = false; + if (rob->isEmpty(tid)) + checkEmptyROB[tid] = true; } + + // ROB is only considered "empty" for previous stages if: a) + // ROB is empty, b) there are no outstanding stores, c) IEW + // stage has received any information regarding stores that + // committed. + // c) is checked by making sure to not consider the ROB empty + // on the same cycle as when stores have been committed. + // @todo: Make this handle multi-cycle communication between + // commit and IEW. + if (checkEmptyROB[tid] && rob->isEmpty(tid) && + !iewStage->hasStoresToWB() && !committedStores[tid]) { + checkEmptyROB[tid] = false; + toIEW->commitInfo[tid].usedROB = true; + toIEW->commitInfo[tid].emptyROB = true; + toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); + wroteToTimeBuffer = true; + } + } } @@ -966,8 +1006,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. thread[tid]->funcExeInst--; - head_inst->setAtCommit(); - if (head_inst->isNonSpeculative() || head_inst->isStoreConditional() || head_inst->isMemBarrier() || @@ -977,19 +1015,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) "instruction [sn:%lli] at the head of the ROB, PC %#x.\n", head_inst->seqNum, head_inst->readPC()); - // Hack to make sure syscalls/memory barriers/quiesces - // aren't executed until all stores write back their data. - // This direct communication shouldn't be used for - // anything other than this. - if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() || - head_inst->isQuiesce()) && - iewStage->hasStoresToWB()) - { + if (inst_num > 0 || iewStage->hasStoresToWB()) { DPRINTF(Commit, "Waiting for all stores to writeback.\n"); return false; - } else if (inst_num > 0 || iewStage->hasStoresToWB()) { - DPRINTF(Commit, "Waiting to become head of commit.\n"); - return false; } toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum; @@ -1002,6 +1030,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } else if (head_inst->isLoad()) { + if (inst_num > 0 || iewStage->hasStoresToWB()) { + DPRINTF(Commit, "Waiting for all stores to writeback.\n"); + return false; + } + + assert(head_inst->uncacheable()); DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n", head_inst->seqNum, head_inst->readPC()); @@ -1025,8 +1059,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) panic("Thread sync instructions are not handled yet.\n"); } + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = head_inst->getFault(); + // Stores mark themselves as completed. - if (!head_inst->isStore()) { + if (!head_inst->isStore() && inst_fault == NoFault) { head_inst->setCompleted(); } @@ -1038,9 +1075,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) } #endif - // Check if the instruction caused a fault. If so, trap. - Fault inst_fault = head_inst->getFault(); - // DTB will sometimes need the machine instruction for when // faults happen. So we will set it here, prior to the DTB // possibly needing it for its fault. @@ -1048,7 +1082,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) static_cast<TheISA::MachInst>(head_inst->staticInst->machInst)); if (inst_fault != NoFault) { - head_inst->setCompleted(); DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", head_inst->seqNum, head_inst->readPC()); @@ -1057,6 +1090,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } + head_inst->setCompleted(); + #if USE_CHECKER if (cpu->checker && head_inst->isStore()) { cpu->checker->verify(head_inst); @@ -1082,6 +1117,14 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) commitStatus[tid] = TrapPending; + if (head_inst->traceData) { + head_inst->traceData->setFetchSeq(head_inst->seqNum); + head_inst->traceData->setCPSeq(thread[tid]->numInst); + head_inst->traceData->dump(); + delete head_inst->traceData; + head_inst->traceData = NULL; + } + // Generate trap squash event. generateTrapEvent(tid); // warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC()); @@ -1123,6 +1166,10 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Finally clear the head ROB entry. rob->retireHead(tid); + // If this was a store, record it for this cycle. + if (head_inst->isStore()) + committedStores[tid] = true; + // Return true to indicate that we have committed an instruction. return true; } @@ -1167,7 +1214,8 @@ DefaultCommit<Impl>::getInsts() int tid = inst->threadNumber; if (!inst->isSquashed() && - commitStatus[tid] != ROBSquashing) { + commitStatus[tid] != ROBSquashing && + commitStatus[tid] != TrapPending) { changedROBNumEntries[tid] = true; DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n", diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 38e6a0b5b..354e3c490 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -466,7 +466,7 @@ FullO3CPU<Impl>::tick() lastRunningCycle = curTick; timesIdled++; } else { - tickEvent.schedule(curTick + cycles(1)); + tickEvent.schedule(nextCycle(curTick + cycles(1))); DPRINTF(O3CPU, "Scheduling next tick!\n"); } } @@ -886,7 +886,7 @@ FullO3CPU<Impl>::resume() #endif if (!tickEvent.scheduled()) - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); _status = Running; } @@ -979,11 +979,11 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) ThreadContext *tc = threadContexts[i]; if (tc->status() == ThreadContext::Active && _status != Running) { _status = Running; - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); } } if (!tickEvent.scheduled()) - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); } template <class Impl> @@ -1393,7 +1393,7 @@ FullO3CPU<Impl>::wakeCPU() idleCycles += (curTick - 1) - lastRunningCycle; - tickEvent.schedule(curTick); + tickEvent.schedule(nextCycle()); } template <class Impl> diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index ea374dd57..0ab20ba2a 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -146,9 +146,9 @@ class FullO3CPU : public BaseO3CPU void scheduleTickEvent(int delay) { if (tickEvent.squashed()) - tickEvent.reschedule(curTick + cycles(delay)); + tickEvent.reschedule(nextCycle(curTick + cycles(delay))); else if (!tickEvent.scheduled()) - tickEvent.schedule(curTick + cycles(delay)); + tickEvent.schedule(nextCycle(curTick + cycles(delay))); } /** Unschedule tick event, regardless of its current state. */ @@ -186,9 +186,11 @@ class FullO3CPU : public BaseO3CPU { // Schedule thread to activate, regardless of its current state. if (activateThreadEvent[tid].squashed()) - activateThreadEvent[tid].reschedule(curTick + cycles(delay)); + activateThreadEvent[tid]. + reschedule(nextCycle(curTick + cycles(delay))); else if (!activateThreadEvent[tid].scheduled()) - activateThreadEvent[tid].schedule(curTick + cycles(delay)); + activateThreadEvent[tid]. + schedule(nextCycle(curTick + cycles(delay))); } /** Unschedule actiavte thread event, regardless of its current state. */ @@ -235,9 +237,11 @@ class FullO3CPU : public BaseO3CPU { // Schedule thread to activate, regardless of its current state. if (deallocateContextEvent[tid].squashed()) - deallocateContextEvent[tid].reschedule(curTick + cycles(delay)); + deallocateContextEvent[tid]. + reschedule(nextCycle(curTick + cycles(delay))); else if (!deallocateContextEvent[tid].scheduled()) - deallocateContextEvent[tid].schedule(curTick + cycles(delay)); + deallocateContextEvent[tid]. + schedule(nextCycle(curTick + cycles(delay))); } /** Unschedule thread deallocation in CPU */ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 8347ed775..da7ce00f5 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -33,6 +33,7 @@ #define __CPU_O3_FETCH_HH__ #include "arch/utility.hh" +#include "arch/predecoder.hh" #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/pc_event.hh" @@ -338,6 +339,9 @@ class DefaultFetch /** BPredUnit. */ BPredUnit branchPred; + /** Predecoder. */ + TheISA::Predecoder predecoder; + /** Per-thread fetch PC. */ Addr PC[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index ac0149d18..663cd3142 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -103,6 +103,7 @@ DefaultFetch<Impl>::IcachePort::recvRetry() template<class Impl> DefaultFetch<Impl>::DefaultFetch(Params *params) : branchPred(params), + predecoder(NULL), decodeToFetchDelay(params->decodeToFetchDelay), renameToFetchDelay(params->renameToFetchDelay), iewToFetchDelay(params->iewToFetchDelay), @@ -619,6 +620,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid fault = TheISA::genMachineCheckFault(); delete mem_req; memReq[tid] = NULL; + warn("Bad address!\n"); } assert(retryPkt == NULL); assert(retryTid == -1); @@ -669,11 +671,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, // Get rid of the retrying packet if it was from this thread. if (retryTid == tid) { assert(cacheBlocked); - cacheBlocked = false; - retryTid = -1; - delete retryPkt->req; - delete retryPkt; + if (retryPkt) { + delete retryPkt->req; + delete retryPkt; + } retryPkt = NULL; + retryTid = -1; } fetchStatus[tid] = Squashing; @@ -1117,13 +1120,10 @@ DefaultFetch<Impl>::fetch(bool &status_change) inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> (&cacheData[tid][offset])); -#if THE_ISA == ALPHA_ISA - ext_inst = TheISA::makeExtMI(inst, fetch_PC); -#elif THE_ISA == SPARC_ISA - ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC()); -#elif THE_ISA == MIPS_ISA - ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC()); -#endif + predecoder.setTC(cpu->thread[tid]->getTC()); + predecoder.moreBytes(fetch_PC, 0, inst); + + ext_inst = predecoder.getExtMachInst(); // Create a new DynInst from the instruction fetched. DynInstPtr instruction = new DynInst(ext_inst, @@ -1152,7 +1152,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) ///FIXME This needs to be more robust in dealing with delay slots #if !ISA_HAS_DELAY_SLOT - predicted_branch |= +// predicted_branch |= #endif lookupAndUpdateNextPC(instruction, next_PC, next_NPC); predicted_branch |= (next_PC != fetch_NPC); @@ -1223,7 +1223,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) // until commit handles the fault. The only other way it can // wake up is if a squash comes along and changes the PC. #if FULL_SYSTEM - assert(numInst != fetchWidth); + assert(numInst < fetchWidth); // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); // We will use a nop in order to carry the fault. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index f24eaf2c4..4883e5a5c 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1153,19 +1153,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) inst->setCanCommit(); instQueue.insertBarrier(inst); add_to_iq = false; - } else if (inst->isNonSpeculative()) { - DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction " - "encountered, skipping.\n", tid); - - // Same as non-speculative stores. - inst->setCanCommit(); - - // Specifically insert it as nonspeculative. - instQueue.insertNonSpec(inst); - - ++iewDispNonSpecInsts; - - add_to_iq = false; } else if (inst->isNop()) { DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, " "skipping.\n", tid); @@ -1193,6 +1180,20 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) } else { add_to_iq = true; } + if (inst->isNonSpeculative()) { + DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction " + "encountered, skipping.\n", tid); + + // Same as non-speculative stores. + inst->setCanCommit(); + + // Specifically insert it as nonspeculative. + instQueue.insertNonSpec(inst); + + ++iewDispNonSpecInsts; + + add_to_iq = false; + } // If the instruction queue is not full, then add the // instruction. @@ -1379,6 +1380,7 @@ DefaultIEW<Impl>::executeInsts() predictedNotTakenIncorrect++; } } else if (ldstQueue.violation(tid)) { + assert(inst->isMemRef()); // If there was an ordering violation, then get the // DynInst that caused the violation. Note that this // clears the violation signal. @@ -1391,10 +1393,10 @@ DefaultIEW<Impl>::executeInsts() // Ensure the violating instruction is older than // current squash - if (fetchRedirect[tid] && - violator->seqNum >= toCommit->squashedSeqNum[tid]) +/* if (fetchRedirect[tid] && + violator->seqNum >= toCommit->squashedSeqNum[tid] + 1) continue; - +*/ fetchRedirect[tid] = true; // Tell the instruction queue that a violation has occured. @@ -1414,6 +1416,33 @@ DefaultIEW<Impl>::executeInsts() squashDueToMemBlocked(inst, tid); } + } else { + // Reset any state associated with redirects that will not + // be used. + if (ldstQueue.violation(tid)) { + assert(inst->isMemRef()); + + DynInstPtr violator = ldstQueue.getMemDepViolator(tid); + + DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + DPRINTF(IEW, "Violation will not be handled because " + "already squashing\n"); + + ++memOrderViolationEvents; + } + if (ldstQueue.loadBlocked(tid) && + !ldstQueue.isLoadBlockedHandled(tid)) { + DPRINTF(IEW, "Load operation couldn't execute because the " + "memory system is blocked. PC: %#x [sn:%lli]\n", + inst->readPC(), inst->seqNum); + DPRINTF(IEW, "Blocked load will not be handled because " + "already squashing\n"); + + ldstQueue.setLoadBlockedHandled(tid); + } + } } @@ -1563,6 +1592,7 @@ DefaultIEW<Impl>::tick() //DPRINTF(IEW,"NonspecInst from thread %i",tid); if (fromCommit->commitInfo[tid].uncached) { instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad); + fromCommit->commitInfo[tid].uncachedLoad->setAtCommit(); } else { instQueue.scheduleNonSpec( fromCommit->commitInfo[tid].nonSpecSeqNum); diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index d5781d89d..79e03d4bf 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -829,6 +829,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst) unsigned tid = (*inst_it).second->threadNumber; + (*inst_it).second->setAtCommit(); + (*inst_it).second->setCanIssue(); if (!(*inst_it).second->isMemRef()) { @@ -960,6 +962,8 @@ template <class Impl> void InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst) { + DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum); + resched_inst->clearCanIssue(); memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); } @@ -984,7 +988,6 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst) completed_inst->memOpDone = true; memDepUnit[tid].completed(completed_inst); - count[tid]--; } @@ -1084,16 +1087,21 @@ InstructionQueue<Impl>::doSquash(unsigned tid) ++iqSquashedOperandsExamined; } - } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) { + } else if (!squashed_inst->isStoreConditional() || + !squashed_inst->isCompleted()) { NonSpecMapIt ns_inst_it = nonSpecInsts.find(squashed_inst->seqNum); assert(ns_inst_it != nonSpecInsts.end()); + if (ns_inst_it == nonSpecInsts.end()) { + assert(squashed_inst->getFault() != NoFault); + } else { - (*ns_inst_it).second = NULL; + (*ns_inst_it).second = NULL; - nonSpecInsts.erase(ns_inst_it); + nonSpecInsts.erase(ns_inst_it); - ++iqSquashedNonSpecRemoved; + ++iqSquashedNonSpecRemoved; + } } // Might want to also clear out the head of the dependency graph. diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 2419afe29..1b10843f5 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -497,6 +497,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); ++lsqRescheduledLoads; + + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the proper + // place to really handle request deletes. + delete req; return TheISA::genMachineCheckFault(); } @@ -534,6 +539,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) if (store_size == 0) continue; + else if (storeQueue[store_idx].inst->uncacheable()) + continue; + + assert(storeQueue[store_idx].inst->effAddrValid); // Check if the store data is within the lower and upper bounds of // addresses that the request needs. @@ -550,7 +559,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) storeQueue[store_idx].inst->effAddr; // If the store's data has all of the data needed, we can forward. - if (store_has_lower_limit && store_has_upper_limit) { + if ((store_has_lower_limit && store_has_upper_limit)) { // Get shift amount for offset into the store's data. int shift_amt = req->getVaddr() & (store_size - 1); // @todo: Magic number, assumes byte addressing @@ -596,6 +605,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // If it's already been written back, then don't worry about // stalling on it. if (storeQueue[store_idx].completed) { + panic("Should not check one of these"); continue; } @@ -614,6 +624,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // rescheduled eventually iewStage->rescheduleMemInst(load_inst); iewStage->decrWb(load_inst->seqNum); + load_inst->clearIssued(); ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not @@ -622,7 +633,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->getVaddr()); - ++lsqBlockedLoads; + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the + // proper place to really handle request deletes. + delete req; + return NoFault; } } @@ -654,8 +669,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // Delete state and data packet because a load retry // initiates a pipeline restart; it does not retry. delete state; + delete data_pkt->req; delete data_pkt; + req = NULL; + if (result == Packet::BadAddress) { return TheISA::genMachineCheckFault(); } @@ -669,6 +687,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // If the cache was blocked, or has become blocked due to the access, // handle it. if (lsq->cacheBlocked()) { + if (req) + delete req; + ++lsqCacheBlocked; iewStage->decrWb(load_inst->seqNum); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 3ba22a530..e70c960b3 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -81,6 +81,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) if (isSwitchedOut() || inst->isSquashed()) { iewStage->decrWb(inst->seqNum); delete state; + delete pkt->req; delete pkt; return; } else { @@ -94,6 +95,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) } delete state; + delete pkt->req; delete pkt; } @@ -403,12 +405,15 @@ template <class Impl> Fault LSQUnit<Impl>::executeLoad(DynInstPtr &inst) { + using namespace TheISA; // Execute a specific load. Fault load_fault = NoFault; DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n", inst->readPC(),inst->seqNum); + assert(!inst->isSquashed()); + load_fault = inst->initiateAcc(); // If the instruction faulted, then we need to send it along to commit @@ -418,12 +423,44 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst) // realizes there is activity. // Mark it as executed unless it is an uncached load that // needs to hit the head of commit. - if (!(inst->req && inst->req->isUncacheable()) || + if (!(inst->hasRequest() && inst->uncacheable()) || inst->isAtCommit()) { inst->setExecuted(); } iewStage->instToCommit(inst); iewStage->activityThisCycle(); + } else if (!loadBlocked()) { + assert(inst->effAddrValid); + int load_idx = inst->lqIdx; + incrLdIdx(load_idx); + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + + // @todo: Fix this, magic number being used here + if (loadQueue[load_idx]->effAddrValid && + (loadQueue[load_idx]->effAddr >> 8) == + (inst->effAddr >> 8)) { + // A load incorrectly passed this load. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + DynInstPtr violator = loadQueue[load_idx]; + if (!memDepViolator || + (violator->seqNum < memDepViolator->seqNum)) { + memDepViolator = violator; + } else { + break; + } + + ++lsqMemOrderViolation; + + return genMachineCheckFault(); + } + + incrLdIdx(load_idx); + } } return load_fault; @@ -442,6 +479,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n", store_inst->readPC(), store_inst->seqNum); + assert(!store_inst->isSquashed()); + // Check the recently completed loads to see if any match this store's // address. If so, then we have a memory ordering violation. int load_idx = store_inst->lqIdx; @@ -465,32 +504,36 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) ++storesToWB; } - if (!memDepViolator) { - while (load_idx != loadTail) { - // Really only need to check loads that have actually executed - // It's safe to check all loads because effAddr is set to - // InvalAddr when the dyn inst is created. - - // @todo: For now this is extra conservative, detecting a - // violation if the addresses match assuming all accesses - // are quad word accesses. - - // @todo: Fix this, magic number being used here - if ((loadQueue[load_idx]->effAddr >> 8) == - (store_inst->effAddr >> 8)) { - // A load incorrectly passed this store. Squash and refetch. - // For now return a fault to show that it was unsuccessful. - memDepViolator = loadQueue[load_idx]; - ++lsqMemOrderViolation; - - return genMachineCheckFault(); + assert(store_inst->effAddrValid); + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + // It's safe to check all loads because effAddr is set to + // InvalAddr when the dyn inst is created. + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + + // @todo: Fix this, magic number being used here + if (loadQueue[load_idx]->effAddrValid && + (loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + DynInstPtr violator = loadQueue[load_idx]; + if (!memDepViolator || + (violator->seqNum < memDepViolator->seqNum)) { + memDepViolator = violator; + } else { + break; } - incrLdIdx(load_idx); + ++lsqMemOrderViolation; + + return genMachineCheckFault(); } - // If we've reached this point, there was no violation. - memDepViolator = NULL; + incrLdIdx(load_idx); } return store_fault; @@ -660,7 +703,7 @@ LSQUnit<Impl>::writebackStores() panic("LSQ sent out a bad address for a completed store!"); } // Need to handle becoming blocked on a store. - DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will" + DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" "retry later\n", inst->seqNum); isStoreBlocked = true; @@ -735,6 +778,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } } + if (memDepViolator && squashed_num < memDepViolator->seqNum) { + memDepViolator = NULL; + } + int store_idx = storeTail; decrStIdx(store_idx); @@ -764,6 +811,11 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the proper + // place to really handle request deletes. + delete storeQueue[store_idx].req; + storeQueue[store_idx].req = NULL; --stores; diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index f19980fd5..64558efaa 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -214,6 +214,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) inst_entry->regsReady = true; } + // Clear the bit saying this instruction can issue. + inst->clearCanIssue(); + // Add this instruction to the list of dependents. store_entry->dependInsts.push_back(inst_entry); @@ -357,7 +360,6 @@ void MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst) { DynInstPtr temp_inst; - bool found_inst = false; // For now this replay function replays all waiting memory ops. while (!instsToReplay.empty()) { @@ -371,14 +373,8 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst) moveToReady(inst_entry); - if (temp_inst == inst) { - found_inst = true; - } - instsToReplay.pop_front(); } - - assert(found_inst); } template <class MemDepPred, class Impl> diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index 620daf691..b436ec1c3 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -192,8 +192,6 @@ SimpleRenameMap::rename(RegIndex arch_reg) // known that the prev reg was outside the range of normal registers // so the free list can avoid adding it. prev_reg = renamed_reg; - - assert(renamed_reg < numPhysicalRegs + numMiscRegs); } DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n", diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index d2acc6232..a145e046e 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -103,7 +103,7 @@ void O3ThreadContext<Impl>::delVirtPort(VirtualPort *vp) { if (vp != thread->getVirtPort()) { - delete vp->getPeer(); + vp->removeConn(); delete vp; } } diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 4a76ae110..d78162243 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -748,7 +748,7 @@ template <class Impl> void OzoneCPU<Impl>::OzoneTC::delVirtPort(VirtualPort *vp) { - delete vp->getPeer(); + vp->removeConn(); delete vp; } #endif diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index ca4627bbf..6f69b5ac4 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -446,6 +446,17 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) #ifndef DOXYGEN_SHOULD_SKIP_THIS + +template +Fault +AtomicSimpleCPU::write(Twin32_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +AtomicSimpleCPU::write(Twin64_t data, Addr addr, + unsigned flags, uint64_t *res); + template Fault AtomicSimpleCPU::write(uint64_t data, Addr addr, @@ -505,17 +516,28 @@ AtomicSimpleCPU::tick() Fault fault = setupFetchRequest(ifetch_req); if (fault == NoFault) { - ifetch_pkt->reinitFromRequest(); + Tick icache_latency = 0; + bool icache_access = false; + dcache_access = false; // assume no dcache access - Tick icache_latency = icachePort.sendAtomic(ifetch_pkt); - // ifetch_req is initialized to read the instruction directly - // into the CPU object's inst field. + //Fetch more instruction memory if necessary + if(predecoder.needMoreBytes()) + { + icache_access = true; + ifetch_pkt->reinitFromRequest(); + + icache_latency = icachePort.sendAtomic(ifetch_pkt); + // ifetch_req is initialized to read the instruction directly + // into the CPU object's inst field. + } - dcache_access = false; // assume no dcache access preExecute(); - fault = curStaticInst->execute(this, traceData); - postExecute(); + if(curStaticInst) + { + fault = curStaticInst->execute(this, traceData); + postExecute(); + } // @todo remove me after debugging with legion done if (curStaticInst && (!curStaticInst->isMicroOp() || @@ -523,7 +545,8 @@ AtomicSimpleCPU::tick() instCnt++; if (simulate_stalls) { - Tick icache_stall = icache_latency - cycles(1); + Tick icache_stall = + icache_access ? icache_latency - cycles(1) : 0; Tick dcache_stall = dcache_access ? dcache_latency - cycles(1) : 0; Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1); @@ -534,8 +557,8 @@ AtomicSimpleCPU::tick() } } - - advancePC(fault); + if(predecoder.needMoreBytes()) + advancePC(fault); } if (_status != Idle) diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index f6c109127..877dc5bd4 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -70,7 +70,7 @@ using namespace std; using namespace TheISA; BaseSimpleCPU::BaseSimpleCPU(Params *p) - : BaseCPU(p), thread(NULL) + : BaseCPU(p), thread(NULL), predecoder(NULL) { #if FULL_SYSTEM thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb); @@ -301,7 +301,7 @@ BaseSimpleCPU::post_interrupt(int int_num, int index) BaseCPU::post_interrupt(int_num, index); if (thread->status() == ThreadContext::Suspended) { - DPRINTF(IPI,"Suspended Processor awoke\n"); + DPRINTF(Quiesce,"Suspended Processor awoke\n"); thread->activate(); } } @@ -367,18 +367,23 @@ BaseSimpleCPU::preExecute() inst = gtoh(inst); //If we're not in the middle of a macro instruction if (!curMacroStaticInst) { -#if THE_ISA == ALPHA_ISA - StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->readPC())); -#elif THE_ISA == SPARC_ISA - StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->getTC())); -#elif THE_ISA == X86_ISA - StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->getTC())); -#elif THE_ISA == MIPS_ISA - //Mips doesn't do anything in it's MakeExtMI function right now, - //so it won't be called. - StaticInstPtr instPtr = StaticInst::decode(inst); -#endif - if (instPtr->isMacroOp()) { + StaticInstPtr instPtr = NULL; + + //Predecode, ie bundle up an ExtMachInst + //This should go away once the constructor can be set up properly + predecoder.setTC(thread->getTC()); + //If more fetch data is needed, pass it in. + if(predecoder.needMoreBytes()) + predecoder.moreBytes(thread->readPC(), 0, inst); + else + predecoder.process(); + //If an instruction is ready, decode it + if (predecoder.extMachInstReady()) + instPtr = StaticInst::decode(predecoder.getExtMachInst()); + + //If we decoded an instruction and it's microcoded, start pulling + //out micro ops + if (instPtr && instPtr->isMacroOp()) { curMacroStaticInst = instPtr; curStaticInst = curMacroStaticInst-> fetchMicroOp(thread->readMicroPC()); @@ -391,17 +396,19 @@ BaseSimpleCPU::preExecute() fetchMicroOp(thread->readMicroPC()); } + //If we decoded an instruction this "tick", record information about it. + if(curStaticInst) + { + traceData = Trace::getInstRecord(curTick, tc, curStaticInst, + thread->readPC()); - traceData = Trace::getInstRecord(curTick, tc, curStaticInst, - thread->readPC()); - - DPRINTF(Decode,"Decode: Decoded %s instruction (opcode: 0x%x): 0x%x\n", - curStaticInst->getName(), curStaticInst->getOpcode(), - curStaticInst->machInst); + DPRINTF(Decode,"Decode: Decoded %s instruction: 0x%x\n", + curStaticInst->getName(), curStaticInst->machInst); #if FULL_SYSTEM - thread->setInst(inst); + thread->setInst(inst); #endif // FULL_SYSTEM + } } void @@ -411,7 +418,8 @@ BaseSimpleCPU::postExecute() if (thread->profile) { bool usermode = TheISA::inUserMode(tc); thread->profilePC = usermode ? 1 : thread->readPC(); - ProfileNode *node = thread->profile->consume(tc, inst); + StaticInstPtr si(inst); + ProfileNode *node = thread->profile->consume(tc, si); if (node) thread->profileNode = node; } @@ -444,9 +452,9 @@ BaseSimpleCPU::advancePC(Fault fault) fault->invoke(tc); thread->setMicroPC(0); thread->setNextMicroPC(1); - } else { + } else if (predecoder.needMoreBytes()) { //If we're at the last micro op for this instruction - if (curStaticInst->isLastMicroOp()) { + if (curStaticInst && curStaticInst->isLastMicroOp()) { //We should be working with a macro op assert(curMacroStaticInst); //Close out this macro op, and clean up the @@ -465,13 +473,9 @@ BaseSimpleCPU::advancePC(Fault fault) } else { // go to the next instruction thread->setPC(thread->readNextPC()); -#if ISA_HAS_DELAY_SLOT thread->setNextPC(thread->readNextNPC()); thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); assert(thread->readNextPC() != thread->readNextNPC()); -#else - thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); -#endif } } diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index eae24014b..787259c96 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -33,6 +33,7 @@ #ifndef __CPU_SIMPLE_BASE_HH__ #define __CPU_SIMPLE_BASE_HH__ +#include "arch/predecoder.hh" #include "base/statistics.hh" #include "config/full_system.hh" #include "cpu/base.hh" @@ -63,6 +64,10 @@ class Process; class RemoteGDB; class GDBListener; +namespace TheISA +{ + class Predecoder; +} class ThreadContext; class Checkpoint; @@ -74,7 +79,6 @@ namespace Trace { class BaseSimpleCPU : public BaseCPU { protected: - typedef TheISA::MachInst MachInst; typedef TheISA::MiscReg MiscReg; typedef TheISA::FloatReg FloatReg; typedef TheISA::FloatRegBits FloatRegBits; @@ -122,7 +126,10 @@ class BaseSimpleCPU : public BaseCPU #endif // current instruction - MachInst inst; + TheISA::MachInst inst; + + // The predecoder + TheISA::Predecoder predecoder; // Static data storage TheISA::LargestRead dataReg; diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 2e602648a..45da7c3eb 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -398,6 +398,16 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) #ifndef DOXYGEN_SHOULD_SKIP_THIS template Fault +TimingSimpleCPU::write(Twin32_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +TimingSimpleCPU::write(Twin64_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault TimingSimpleCPU::write(uint64_t data, Addr addr, unsigned flags, uint64_t *res); diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index 13d0e2e29..39f31782b 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -305,7 +305,7 @@ void SimpleThread::delVirtPort(VirtualPort *vp) { if (vp != virtPort) { - delete vp->getPeer(); + vp->removeConn(); delete vp; } } diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 416c8ab56..a58ac85d6 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -35,6 +35,7 @@ #include <string> #include "arch/isa_traits.hh" +#include "arch/utility.hh" #include "sim/faults.hh" #include "base/bitfield.hh" #include "base/hashmap.hh" @@ -439,9 +440,6 @@ class StaticInst : public StaticInstBase //This is defined as inline below. static StaticInstPtr decode(ExtMachInst mach_inst); - /// Return opcode of machine instruction - uint32_t getOpcode() { return bits(machInst, 31, 26);} - /// Return name of machine instruction std::string getName() { return mnemonic; } }; @@ -474,7 +472,7 @@ class StaticInstPtr : public RefCountingPtr<StaticInst> /// Construct directly from machine instruction. /// Calls StaticInst::decode(). - StaticInstPtr(TheISA::ExtMachInst mach_inst) + explicit StaticInstPtr(TheISA::ExtMachInst mach_inst) : RefCountingPtr<StaticInst>(StaticInst::decode(mach_inst)) { } diff --git a/src/dev/SConscript b/src/dev/SConscript index 1ec83de4b..ea529b536 100644 --- a/src/dev/SConscript +++ b/src/dev/SConscript @@ -40,7 +40,7 @@ if env['FULL_SYSTEM']: Source('etherlink.cc') Source('etherpkt.cc') Source('ethertap.cc') - #Source('i8254xGBe.cc') + Source('i8254xGBe.cc') Source('ide_ctrl.cc') Source('ide_disk.cc') Source('io_device.cc') diff --git a/src/dev/i8254xGBe.cc b/src/dev/i8254xGBe.cc index 7fc68f4e7..c38a9e873 100644 --- a/src/dev/i8254xGBe.cc +++ b/src/dev/i8254xGBe.cc @@ -35,7 +35,13 @@ * other MACs with slight modifications. */ + +/* + * @todo really there are multiple dma engines.. we should implement them. + */ + #include "base/inet.hh" +#include "base/trace.hh" #include "dev/i8254xGBe.hh" #include "mem/packet.hh" #include "mem/packet_access.hh" @@ -43,32 +49,38 @@ #include "sim/stats.hh" #include "sim/system.hh" +#include <algorithm> + using namespace iGbReg; +using namespace Net; IGbE::IGbE(Params *p) - : PciDev(p), etherInt(NULL) + : PciDev(p), etherInt(NULL), useFlowControl(p->use_flow_control), + rxFifo(p->rx_fifo_size), txFifo(p->tx_fifo_size), rxTick(false), + txTick(false), rdtrEvent(this), radvEvent(this), tadvEvent(this), + tidvEvent(this), tickEvent(this), interEvent(this), + rxDescCache(this, name()+".RxDesc", p->rx_desc_cache_size), + txDescCache(this, name()+".TxDesc", p->tx_desc_cache_size), clock(p->clock) { // Initialized internal registers per Intel documentation - regs.tctl.reg = 0; - regs.rctl.reg = 0; - regs.ctrl.reg = 0; - regs.ctrl.fd = 1; - regs.ctrl.lrst = 1; - regs.ctrl.speed = 2; - regs.ctrl.frcspd = 1; - regs.sts.reg = 0; - regs.eecd.reg = 0; - regs.eecd.fwe = 1; - regs.eecd.ee_type = 1; - regs.eerd.reg = 0; - regs.icd.reg = 0; - regs.imc.reg = 0; - regs.rctl.reg = 0; - regs.tctl.reg = 0; - regs.manc.reg = 0; - - regs.pba.rxa = 0x30; - regs.pba.txa = 0x10; + // All registers intialized to 0 by per register constructor + regs.ctrl.fd(1); + regs.ctrl.lrst(1); + regs.ctrl.speed(2); + regs.ctrl.frcspd(1); + regs.sts.speed(3); // Say we're 1000Mbps + regs.sts.fd(1); // full duplex + regs.sts.lu(1); // link up + regs.eecd.fwe(1); + regs.eecd.ee_type(1); + regs.imr = 0; + regs.iam = 0; + regs.rxdctl.gran(1); + regs.rxdctl.wthresh(1); + regs.fcrth(1); + + regs.pba.rxa(0x30); + regs.pba.txa(0x10); eeOpBits = 0; eeAddrBits = 0; @@ -78,8 +90,21 @@ IGbE::IGbE(Params *p) // clear all 64 16 bit words of the eeprom memset(&flash, 0, EEPROM_SIZE*2); + // Set the MAC address + memcpy(flash, p->hardware_address.bytes(), ETH_ADDR_LEN); + for (int x = 0; x < ETH_ADDR_LEN/2; x++) + flash[x] = htobe(flash[x]); + + uint16_t csum = 0; + for (int x = 0; x < EEPROM_SIZE; x++) + csum += htobe(flash[x]); + + // Magic happy checksum value - flash[0] = 0xBABA; + flash[EEPROM_SIZE-1] = htobe((uint16_t)(EEPROM_CSUM - csum)); + + rxFifo.clear(); + txFifo.clear(); } @@ -114,7 +139,7 @@ IGbE::read(PacketPtr pkt) // Only 32bit accesses allowed assert(pkt->getSize() == 4); - //DPRINTF(Ethernet, "Read device register %#X\n", daddr); + DPRINTF(Ethernet, "Read device register %#X\n", daddr); pkt->allocate(); @@ -124,47 +149,125 @@ IGbE::read(PacketPtr pkt) switch (daddr) { - case CTRL: - pkt->set<uint32_t>(regs.ctrl.reg); - break; - case STATUS: - pkt->set<uint32_t>(regs.sts.reg); - break; - case EECD: - pkt->set<uint32_t>(regs.eecd.reg); - break; - case EERD: - pkt->set<uint32_t>(regs.eerd.reg); - break; - case ICR: - pkt->set<uint32_t>(regs.icd.reg); - break; - case IMC: - pkt->set<uint32_t>(regs.imc.reg); - break; - case RCTL: - pkt->set<uint32_t>(regs.rctl.reg); - break; - case TCTL: - pkt->set<uint32_t>(regs.tctl.reg); - break; - case PBA: - pkt->set<uint32_t>(regs.pba.reg); - break; - case WUC: - case LEDCTL: - pkt->set<uint32_t>(0); // We don't care, so just return 0 - break; - case MANC: - pkt->set<uint32_t>(regs.manc.reg); - break; + case REG_CTRL: + pkt->set<uint32_t>(regs.ctrl()); + break; + case REG_STATUS: + pkt->set<uint32_t>(regs.sts()); + break; + case REG_EECD: + pkt->set<uint32_t>(regs.eecd()); + break; + case REG_EERD: + pkt->set<uint32_t>(regs.eerd()); + break; + case REG_CTRL_EXT: + pkt->set<uint32_t>(regs.ctrl_ext()); + break; + case REG_MDIC: + pkt->set<uint32_t>(regs.mdic()); + break; + case REG_ICR: + DPRINTF(Ethernet, "Reading ICR. ICR=%#x IMR=%#x IAM=%#x IAME=%d\n", regs.icr(), + regs.imr, regs.iam, regs.ctrl_ext.iame()); + pkt->set<uint32_t>(regs.icr()); + if (regs.icr.int_assert() || regs.imr == 0) { + regs.icr = regs.icr() & ~mask(30); + DPRINTF(Ethernet, "Cleared ICR. ICR=%#x\n", regs.icr()); + } + if (regs.ctrl_ext.iame() && regs.icr.int_assert()) + regs.imr &= ~regs.iam; + chkInterrupt(); + break; + case REG_ITR: + pkt->set<uint32_t>(regs.itr()); + break; + case REG_RCTL: + pkt->set<uint32_t>(regs.rctl()); + break; + case REG_FCTTV: + pkt->set<uint32_t>(regs.fcttv()); + break; + case REG_TCTL: + pkt->set<uint32_t>(regs.tctl()); + break; + case REG_PBA: + pkt->set<uint32_t>(regs.pba()); + break; + case REG_WUC: + case REG_LEDCTL: + pkt->set<uint32_t>(0); // We don't care, so just return 0 + break; + case REG_FCRTL: + pkt->set<uint32_t>(regs.fcrtl()); + break; + case REG_FCRTH: + pkt->set<uint32_t>(regs.fcrth()); + break; + case REG_RDBAL: + pkt->set<uint32_t>(regs.rdba.rdbal()); + break; + case REG_RDBAH: + pkt->set<uint32_t>(regs.rdba.rdbah()); + break; + case REG_RDLEN: + pkt->set<uint32_t>(regs.rdlen()); + break; + case REG_RDH: + pkt->set<uint32_t>(regs.rdh()); + break; + case REG_RDT: + pkt->set<uint32_t>(regs.rdt()); + break; + case REG_RDTR: + pkt->set<uint32_t>(regs.rdtr()); + if (regs.rdtr.fpd()) { + rxDescCache.writeback(0); + postInterrupt(IT_RXT); + regs.rdtr.fpd(0); + } + break; + case REG_RADV: + pkt->set<uint32_t>(regs.radv()); + break; + case REG_TDBAL: + pkt->set<uint32_t>(regs.tdba.tdbal()); + break; + case REG_TDBAH: + pkt->set<uint32_t>(regs.tdba.tdbah()); + break; + case REG_TDLEN: + pkt->set<uint32_t>(regs.tdlen()); + break; + case REG_TDH: + pkt->set<uint32_t>(regs.tdh()); + break; + case REG_TDT: + pkt->set<uint32_t>(regs.tdt()); + break; + case REG_TIDV: + pkt->set<uint32_t>(regs.tidv()); + break; + case REG_TXDCTL: + pkt->set<uint32_t>(regs.txdctl()); + break; + case REG_TADV: + pkt->set<uint32_t>(regs.tadv()); + break; + case REG_RXCSUM: + pkt->set<uint32_t>(regs.rxcsum()); + break; + case REG_MANC: + pkt->set<uint32_t>(regs.manc()); + break; default: - if (!(daddr >= VFTA && daddr < (VFTA + VLAN_FILTER_TABLE_SIZE)*4) && - !(daddr >= RAL && daddr < (RAL + RCV_ADDRESS_TABLE_SIZE)*4) && - !(daddr >= MTA && daddr < (MTA + MULTICAST_TABLE_SIZE)*4)) - pkt->set<uint32_t>(0); - else - panic("Read request to unknown register number: %#x\n", daddr); + if (!(daddr >= REG_VFTA && daddr < (REG_VFTA + VLAN_FILTER_TABLE_SIZE*4)) && + !(daddr >= REG_RAL && daddr < (REG_RAL + RCV_ADDRESS_TABLE_SIZE*8)) && + !(daddr >= REG_MTA && daddr < (REG_MTA + MULTICAST_TABLE_SIZE*4)) && + !(daddr >= REG_CRCERRS && daddr < (REG_CRCERRS + STATS_REGS_SIZE))) + panic("Read request to unknown register number: %#x\n", daddr); + else + pkt->set<uint32_t>(0); }; pkt->result = Packet::Success; @@ -187,100 +290,253 @@ IGbE::write(PacketPtr pkt) // Only 32bit accesses allowed assert(pkt->getSize() == sizeof(uint32_t)); - //DPRINTF(Ethernet, "Wrote device register %#X value %#X\n", daddr, pkt->get<uint32_t>()); + DPRINTF(Ethernet, "Wrote device register %#X value %#X\n", daddr, pkt->get<uint32_t>()); /// /// Handle write of register here /// uint32_t val = pkt->get<uint32_t>(); + Regs::RCTL oldrctl; + Regs::TCTL oldtctl; + switch (daddr) { - case CTRL: - regs.ctrl.reg = val; - break; - case STATUS: - regs.sts.reg = val; - break; - case EECD: - int oldClk; - oldClk = regs.eecd.sk; - regs.eecd.reg = val; - // See if this is a eeprom access and emulate accordingly - if (!oldClk && regs.eecd.sk) { - if (eeOpBits < 8) { - eeOpcode = eeOpcode << 1 | regs.eecd.din; - eeOpBits++; - } else if (eeAddrBits < 8 && eeOpcode == EEPROM_READ_OPCODE_SPI) { - eeAddr = eeAddr << 1 | regs.eecd.din; - eeAddrBits++; - } else if (eeDataBits < 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) { - assert(eeAddr>>1 < EEPROM_SIZE); - DPRINTF(EthernetEEPROM, "EEPROM bit read: %d word: %#X\n", - flash[eeAddr>>1] >> eeDataBits & 0x1, flash[eeAddr>>1]); - regs.eecd.dout = (flash[eeAddr>>1] >> (15-eeDataBits)) & 0x1; - eeDataBits++; - } else if (eeDataBits < 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI) { - regs.eecd.dout = 0; - eeDataBits++; - } else - panic("What's going on with eeprom interface? opcode:" - " %#x:%d addr: %#x:%d, data: %d\n", (uint32_t)eeOpcode, - (uint32_t)eeOpBits, (uint32_t)eeAddr, - (uint32_t)eeAddrBits, (uint32_t)eeDataBits); - - // Reset everything for the next command - if ((eeDataBits == 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) || + case REG_CTRL: + regs.ctrl = val; + if (regs.ctrl.tfce()) + warn("TX Flow control enabled, should implement\n"); + if (regs.ctrl.rfce()) + warn("RX Flow control enabled, should implement\n"); + break; + case REG_CTRL_EXT: + regs.ctrl_ext = val; + break; + case REG_STATUS: + regs.sts = val; + break; + case REG_EECD: + int oldClk; + oldClk = regs.eecd.sk(); + regs.eecd = val; + // See if this is a eeprom access and emulate accordingly + if (!oldClk && regs.eecd.sk()) { + if (eeOpBits < 8) { + eeOpcode = eeOpcode << 1 | regs.eecd.din(); + eeOpBits++; + } else if (eeAddrBits < 8 && eeOpcode == EEPROM_READ_OPCODE_SPI) { + eeAddr = eeAddr << 1 | regs.eecd.din(); + eeAddrBits++; + } else if (eeDataBits < 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) { + assert(eeAddr>>1 < EEPROM_SIZE); + DPRINTF(EthernetEEPROM, "EEPROM bit read: %d word: %#X\n", + flash[eeAddr>>1] >> eeDataBits & 0x1, flash[eeAddr>>1]); + regs.eecd.dout((flash[eeAddr>>1] >> (15-eeDataBits)) & 0x1); + eeDataBits++; + } else if (eeDataBits < 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI) { + regs.eecd.dout(0); + eeDataBits++; + } else + panic("What's going on with eeprom interface? opcode:" + " %#x:%d addr: %#x:%d, data: %d\n", (uint32_t)eeOpcode, + (uint32_t)eeOpBits, (uint32_t)eeAddr, + (uint32_t)eeAddrBits, (uint32_t)eeDataBits); + + // Reset everything for the next command + if ((eeDataBits == 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) || (eeDataBits == 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI)) { - eeOpBits = 0; - eeAddrBits = 0; - eeDataBits = 0; + eeOpBits = 0; + eeAddrBits = 0; + eeDataBits = 0; eeOpcode = 0; - eeAddr = 0; - } + eeAddr = 0; + } DPRINTF(EthernetEEPROM, "EEPROM: opcode: %#X:%d addr: %#X:%d\n", - (uint32_t)eeOpcode, (uint32_t) eeOpBits, - (uint32_t)eeAddr>>1, (uint32_t)eeAddrBits); + (uint32_t)eeOpcode, (uint32_t) eeOpBits, + (uint32_t)eeAddr>>1, (uint32_t)eeAddrBits); if (eeOpBits == 8 && !(eeOpcode == EEPROM_READ_OPCODE_SPI || - eeOpcode == EEPROM_RDSR_OPCODE_SPI )) - panic("Unknown eeprom opcode: %#X:%d\n", (uint32_t)eeOpcode, - (uint32_t)eeOpBits); - - - } - // If driver requests eeprom access, immediately give it to it - regs.eecd.ee_gnt = regs.eecd.ee_req; - break; - case EERD: - regs.eerd.reg = val; - break; - case ICR: - regs.icd.reg = val; - break; - case IMC: - regs.imc.reg = val; - break; - case RCTL: - regs.rctl.reg = val; - break; - case TCTL: - regs.tctl.reg = val; - break; - case PBA: - regs.pba.rxa = val; - regs.pba.txa = 64 - regs.pba.rxa; - break; - case WUC: - case LEDCTL: - ; // We don't care, so don't store anything - break; - case MANC: - regs.manc.reg = val; - break; + eeOpcode == EEPROM_RDSR_OPCODE_SPI )) + panic("Unknown eeprom opcode: %#X:%d\n", (uint32_t)eeOpcode, + (uint32_t)eeOpBits); + + + } + // If driver requests eeprom access, immediately give it to it + regs.eecd.ee_gnt(regs.eecd.ee_req()); + break; + case REG_EERD: + regs.eerd = val; + break; + case REG_MDIC: + regs.mdic = val; + if (regs.mdic.i()) + panic("No support for interrupt on mdic complete\n"); + if (regs.mdic.phyadd() != 1) + panic("No support for reading anything but phy\n"); + DPRINTF(Ethernet, "%s phy address %x\n", regs.mdic.op() == 1 ? "Writing" + : "Reading", regs.mdic.regadd()); + switch (regs.mdic.regadd()) { + case PHY_PSTATUS: + regs.mdic.data(0x796D); // link up + break; + case PHY_PID: + regs.mdic.data(0x02A8); + break; + case PHY_EPID: + regs.mdic.data(0x0380); + break; + case PHY_GSTATUS: + regs.mdic.data(0x7C00); + break; + case PHY_EPSTATUS: + regs.mdic.data(0x3000); + break; + case PHY_AGC: + regs.mdic.data(0x180); // some random length + break; + default: + regs.mdic.data(0); + } + regs.mdic.r(1); + break; + case REG_ICR: + DPRINTF(Ethernet, "Writing ICR. ICR=%#x IMR=%#x IAM=%#x IAME=%d\n", regs.icr(), + regs.imr, regs.iam, regs.ctrl_ext.iame()); + if (regs.ctrl_ext.iame()) + regs.imr &= ~regs.iam; + regs.icr = ~bits(val,30,0) & regs.icr(); + chkInterrupt(); + break; + case REG_ITR: + regs.itr = val; + break; + case REG_ICS: + postInterrupt((IntTypes)val); + break; + case REG_IMS: + regs.imr |= val; + chkInterrupt(); + break; + case REG_IMC: + regs.imr &= ~val; + chkInterrupt(); + break; + case REG_IAM: + regs.iam = val; + break; + case REG_RCTL: + oldrctl = regs.rctl; + regs.rctl = val; + if (regs.rctl.rst()) { + rxDescCache.reset(); + rxFifo.clear(); + regs.rctl.rst(0); + } + if (regs.rctl.en()) + rxTick = true; + restartClock(); + break; + case REG_FCTTV: + regs.fcttv = val; + break; + case REG_TCTL: + regs.tctl = val; + oldtctl = regs.tctl; + regs.tctl = val; + if (regs.tctl.en()) + txTick = true; + restartClock(); + if (regs.tctl.en() && !oldtctl.en()) { + txDescCache.reset(); + } + break; + case REG_PBA: + regs.pba.rxa(val); + regs.pba.txa(64 - regs.pba.rxa()); + break; + case REG_WUC: + case REG_LEDCTL: + case REG_FCAL: + case REG_FCAH: + case REG_FCT: + case REG_VET: + case REG_AIFS: + case REG_TIPG: + ; // We don't care, so don't store anything + break; + case REG_FCRTL: + regs.fcrtl = val; + break; + case REG_FCRTH: + regs.fcrth = val; + break; + case REG_RDBAL: + regs.rdba.rdbal( val & ~mask(4)); + rxDescCache.areaChanged(); + break; + case REG_RDBAH: + regs.rdba.rdbah(val); + rxDescCache.areaChanged(); + break; + case REG_RDLEN: + regs.rdlen = val & ~mask(7); + rxDescCache.areaChanged(); + break; + case REG_RDH: + regs.rdh = val; + rxDescCache.areaChanged(); + break; + case REG_RDT: + regs.rdt = val; + rxTick = true; + restartClock(); + break; + case REG_RDTR: + regs.rdtr = val; + break; + case REG_RADV: + regs.radv = val; + break; + case REG_TDBAL: + regs.tdba.tdbal( val & ~mask(4)); + txDescCache.areaChanged(); + break; + case REG_TDBAH: + regs.tdba.tdbah(val); + txDescCache.areaChanged(); + break; + case REG_TDLEN: + regs.tdlen = val & ~mask(7); + txDescCache.areaChanged(); + break; + case REG_TDH: + regs.tdh = val; + txDescCache.areaChanged(); + break; + case REG_TDT: + regs.tdt = val; + txTick = true; + restartClock(); + break; + case REG_TIDV: + regs.tidv = val; + break; + case REG_TXDCTL: + regs.txdctl = val; + break; + case REG_TADV: + regs.tadv = val; + break; + case REG_RXCSUM: + regs.rxcsum = val; + break; + case REG_MANC: + regs.manc = val; + break; default: - if (!(daddr >= VFTA && daddr < (VFTA + VLAN_FILTER_TABLE_SIZE)*4) && - !(daddr >= RAL && daddr < (RAL + RCV_ADDRESS_TABLE_SIZE)*4) && - !(daddr >= MTA && daddr < (MTA + MULTICAST_TABLE_SIZE)*4)) + if (!(daddr >= REG_VFTA && daddr < (REG_VFTA + VLAN_FILTER_TABLE_SIZE*4)) && + !(daddr >= REG_RAL && daddr < (REG_RAL + RCV_ADDRESS_TABLE_SIZE*8)) && + !(daddr >= REG_MTA && daddr < (REG_MTA + MULTICAST_TABLE_SIZE*4))) panic("Write request to unknown register number: %#x\n", daddr); }; @@ -288,18 +544,658 @@ IGbE::write(PacketPtr pkt) return pioDelay; } +void +IGbE::postInterrupt(IntTypes t, bool now) +{ + assert(t); + + // Interrupt is already pending + if (t & regs.icr()) + return; + + if (regs.icr() & regs.imr) + { + regs.icr = regs.icr() | t; + if (!interEvent.scheduled()) + interEvent.schedule(curTick + Clock::Int::ns * 256 * + regs.itr.interval()); + } else { + regs.icr = regs.icr() | t; + if (regs.itr.interval() == 0 || now) { + if (interEvent.scheduled()) + interEvent.deschedule(); + cpuPostInt(); + } else { + DPRINTF(EthernetIntr, "EINT: Scheduling timer interrupt for %d ticks\n", + Clock::Int::ns * 256 * regs.itr.interval()); + assert(!interEvent.scheduled()); + interEvent.schedule(curTick + Clock::Int::ns * 256 * regs.itr.interval()); + } + } +} + +void +IGbE::cpuPostInt() +{ + if (rdtrEvent.scheduled()) { + regs.icr.rxt0(1); + rdtrEvent.deschedule(); + } + if (radvEvent.scheduled()) { + regs.icr.rxt0(1); + radvEvent.deschedule(); + } + if (tadvEvent.scheduled()) { + regs.icr.txdw(1); + tadvEvent.deschedule(); + } + if (tidvEvent.scheduled()) { + regs.icr.txdw(1); + tidvEvent.deschedule(); + } + + regs.icr.int_assert(1); + DPRINTF(EthernetIntr, "EINT: Posting interrupt to CPU now. Vector %#x\n", + regs.icr()); + intrPost(); +} + +void +IGbE::cpuClearInt() +{ + if (regs.icr.int_assert()) { + regs.icr.int_assert(0); + DPRINTF(EthernetIntr, "EINT: Clearing interrupt to CPU now. Vector %#x\n", + regs.icr()); + intrClear(); + } +} + +void +IGbE::chkInterrupt() +{ + // Check if we need to clear the cpu interrupt + if (!(regs.icr() & regs.imr)) { + if (interEvent.scheduled()) + interEvent.deschedule(); + if (regs.icr.int_assert()) + cpuClearInt(); + } + + if (regs.icr() & regs.imr) { + if (regs.itr.interval() == 0) { + cpuPostInt(); + } else { + if (!interEvent.scheduled()) + interEvent.schedule(curTick + Clock::Int::ns * 256 * regs.itr.interval()); + } + } + + +} + + +IGbE::RxDescCache::RxDescCache(IGbE *i, const std::string n, int s) + : DescCache<RxDesc>(i, n, s), pktDone(false), pktEvent(this) + +{ +} bool -IGbE::ethRxPkt(EthPacketPtr packet) +IGbE::RxDescCache::writePacket(EthPacketPtr packet) { - panic("Need to implemenet\n"); + // We shouldn't have to deal with any of these yet + DPRINTF(EthernetDesc, "Packet Length: %d Desc Size: %d\n", + packet->length, igbe->regs.rctl.descSize()); + assert(packet->length < igbe->regs.rctl.descSize()); + + if (!unusedCache.size()) + return false; + + pktPtr = packet; + + igbe->dmaWrite(igbe->platform->pciToDma(unusedCache.front()->buf), + packet->length, &pktEvent, packet->data); + return true; +} + +void +IGbE::RxDescCache::pktComplete() +{ + assert(unusedCache.size()); + RxDesc *desc; + desc = unusedCache.front(); + + uint16_t crcfixup = igbe->regs.rctl.secrc() ? 0 : 4 ; + desc->len = htole((uint16_t)(pktPtr->length + crcfixup)); + DPRINTF(EthernetDesc, "pktPtr->length: %d stripcrc offset: %d value written: %d %d\n", + pktPtr->length, crcfixup, + htole((uint16_t)(pktPtr->length + crcfixup)), + (uint16_t)(pktPtr->length + crcfixup)); + + // no support for anything but starting at 0 + assert(igbe->regs.rxcsum.pcss() == 0); + + DPRINTF(EthernetDesc, "RxDesc: Packet written to memory updating Descriptor\n"); + + uint8_t status = RXDS_DD | RXDS_EOP; + uint8_t err = 0; + IpPtr ip(pktPtr); + if (ip) { + if (igbe->regs.rxcsum.ipofld()) { + DPRINTF(EthernetDesc, "RxDesc: Checking IP checksum\n"); + status |= RXDS_IPCS; + desc->csum = htole(cksum(ip)); + if (cksum(ip) != 0) { + err |= RXDE_IPE; + DPRINTF(EthernetDesc, "RxDesc: Checksum is bad!!\n"); + } + } + TcpPtr tcp(ip); + if (tcp && igbe->regs.rxcsum.tuofld()) { + DPRINTF(EthernetDesc, "RxDesc: Checking TCP checksum\n"); + status |= RXDS_TCPCS; + desc->csum = htole(cksum(tcp)); + if (cksum(tcp) != 0) { + DPRINTF(EthernetDesc, "RxDesc: Checksum is bad!!\n"); + err |= RXDE_TCPE; + } + } + + UdpPtr udp(ip); + if (udp && igbe->regs.rxcsum.tuofld()) { + DPRINTF(EthernetDesc, "RxDesc: Checking UDP checksum\n"); + status |= RXDS_UDPCS; + desc->csum = htole(cksum(udp)); + if (cksum(tcp) != 0) { + DPRINTF(EthernetDesc, "RxDesc: Checksum is bad!!\n"); + err |= RXDE_TCPE; + } + } + } // if ip + + desc->status = htole(status); + desc->errors = htole(err); + + // No vlan support at this point... just set it to 0 + desc->vlan = 0; + + // Deal with the rx timer interrupts + if (igbe->regs.rdtr.delay()) { + DPRINTF(EthernetSM, "RXS: Scheduling DTR for %d\n", + igbe->regs.rdtr.delay() * igbe->intClock()); + if (igbe->rdtrEvent.scheduled()) + igbe->rdtrEvent.reschedule(curTick + igbe->regs.rdtr.delay() * + igbe->intClock()); + else + igbe->rdtrEvent.schedule(curTick + igbe->regs.rdtr.delay() * + igbe->intClock()); + } + + if (igbe->regs.radv.idv() && igbe->regs.rdtr.delay()) { + DPRINTF(EthernetSM, "RXS: Scheduling ADV for %d\n", + igbe->regs.radv.idv() * igbe->intClock()); + if (!igbe->radvEvent.scheduled()) + igbe->radvEvent.schedule(curTick + igbe->regs.radv.idv() * + igbe->intClock()); + } + + // if neither radv or rdtr, maybe itr is set... + if (!igbe->regs.rdtr.delay()) { + DPRINTF(EthernetSM, "RXS: Receive interrupt delay disabled, posting IT_RXT\n"); + igbe->postInterrupt(IT_RXT); + } + + // If the packet is small enough, interrupt appropriately + // I wonder if this is delayed or not?! + if (pktPtr->length <= igbe->regs.rsrpd.idv()) + igbe->postInterrupt(IT_SRPD); + + DPRINTF(EthernetDesc, "RxDesc: Processing of this descriptor complete\n"); + unusedCache.pop_front(); + usedCache.push_back(desc); + pktPtr = NULL; + enableSm(); + pktDone = true; +} + +void +IGbE::RxDescCache::enableSm() +{ + igbe->rxTick = true; + igbe->restartClock(); +} + +bool +IGbE::RxDescCache::packetDone() +{ + if (pktDone) { + pktDone = false; + return true; + } + return false; +} + +///////////////////////////////////// IGbE::TxDesc ///////////////////////////////// + +IGbE::TxDescCache::TxDescCache(IGbE *i, const std::string n, int s) + : DescCache<TxDesc>(i,n, s), pktDone(false), isTcp(false), pktWaiting(false), + hLen(0), pktEvent(this) + +{ +} + +int +IGbE::TxDescCache::getPacketSize() +{ + assert(unusedCache.size()); + + TxDesc *desc; + + DPRINTF(EthernetDesc, "TxDesc: Starting processing of descriptor\n"); + + while (unusedCache.size() && TxdOp::isContext(unusedCache.front())) { + DPRINTF(EthernetDesc, "TxDesc: Got context descriptor type... skipping\n"); + + // I think we can just ignore these for now? + desc = unusedCache.front(); + // is this going to be a tcp or udp packet? + isTcp = TxdOp::tcp(desc) ? true : false; + + // make sure it's ipv4 + assert(TxdOp::ip(desc)); + + TxdOp::setDd(desc); + unusedCache.pop_front(); + usedCache.push_back(desc); + } + + if (!unusedCache.size()) + return -1; + + DPRINTF(EthernetDesc, "TxDesc: Next TX packet is %d bytes\n", + TxdOp::getLen(unusedCache.front())); + + return TxdOp::getLen(unusedCache.front()); +} + +void +IGbE::TxDescCache::getPacketData(EthPacketPtr p) +{ + assert(unusedCache.size()); + + TxDesc *desc; + desc = unusedCache.front(); + + assert((TxdOp::isLegacy(desc) || TxdOp::isData(desc)) && TxdOp::getLen(desc)); + + pktPtr = p; + + pktWaiting = true; + + DPRINTF(EthernetDesc, "TxDesc: Starting DMA of packet\n"); + igbe->dmaRead(igbe->platform->pciToDma(TxdOp::getBuf(desc)), + TxdOp::getLen(desc), &pktEvent, p->data + hLen); + + +} + +void +IGbE::TxDescCache::pktComplete() +{ + + TxDesc *desc; + assert(unusedCache.size()); + assert(pktPtr); + + DPRINTF(EthernetDesc, "TxDesc: DMA of packet complete\n"); + + + desc = unusedCache.front(); + assert((TxdOp::isLegacy(desc) || TxdOp::isData(desc)) && TxdOp::getLen(desc)); + + DPRINTF(EthernetDesc, "TxDescriptor data d1: %#llx d2: %#llx\n", desc->d1, desc->d2); + + if (!TxdOp::eop(desc)) { + assert(hLen == 0); + hLen = TxdOp::getLen(desc); + unusedCache.pop_front(); + usedCache.push_back(desc); + pktDone = true; + pktWaiting = false; + pktPtr = NULL; + + DPRINTF(EthernetDesc, "TxDesc: Partial Packet Descriptor Done\n"); + return; + } + + // Set the length of the data in the EtherPacket + pktPtr->length = TxdOp::getLen(desc) + hLen; + + // no support for vlans + assert(!TxdOp::vle(desc)); + + // we alway report status + assert(TxdOp::rs(desc)); + + // we only support single packet descriptors at this point + assert(TxdOp::eop(desc)); + + // set that this packet is done + TxdOp::setDd(desc); + + DPRINTF(EthernetDesc, "TxDescriptor data d1: %#llx d2: %#llx\n", desc->d1, desc->d2); + + // Checksums are only ofloaded for new descriptor types + if (TxdOp::isData(desc) && ( TxdOp::ixsm(desc) || TxdOp::txsm(desc)) ) { + DPRINTF(EthernetDesc, "TxDesc: Calculating checksums for packet\n"); + IpPtr ip(pktPtr); + if (TxdOp::ixsm(desc)) { + ip->sum(0); + ip->sum(cksum(ip)); + DPRINTF(EthernetDesc, "TxDesc: Calculated IP checksum\n"); + } + if (TxdOp::txsm(desc)) { + if (isTcp) { + TcpPtr tcp(ip); + tcp->sum(0); + tcp->sum(cksum(tcp)); + DPRINTF(EthernetDesc, "TxDesc: Calculated TCP checksum\n"); + } else { + UdpPtr udp(ip); + udp->sum(0); + udp->sum(cksum(udp)); + DPRINTF(EthernetDesc, "TxDesc: Calculated UDP checksum\n"); + } + } + } + + if (TxdOp::ide(desc)) { + // Deal with the rx timer interrupts + DPRINTF(EthernetDesc, "TxDesc: Descriptor had IDE set\n"); + if (igbe->regs.tidv.idv()) { + DPRINTF(EthernetDesc, "TxDesc: setting tidv\n"); + if (igbe->tidvEvent.scheduled()) + igbe->tidvEvent.reschedule(curTick + igbe->regs.tidv.idv() * + igbe->intClock()); + else + igbe->tidvEvent.schedule(curTick + igbe->regs.tidv.idv() * + igbe->intClock()); + } + + if (igbe->regs.tadv.idv() && igbe->regs.tidv.idv()) { + DPRINTF(EthernetDesc, "TxDesc: setting tadv\n"); + if (!igbe->tadvEvent.scheduled()) + igbe->tadvEvent.schedule(curTick + igbe->regs.tadv.idv() * + igbe->intClock()); + } + } + + + + unusedCache.pop_front(); + usedCache.push_back(desc); + pktDone = true; + pktWaiting = false; + pktPtr = NULL; + + hLen = 0; + DPRINTF(EthernetDesc, "TxDesc: Descriptor Done\n"); + + if (igbe->regs.txdctl.wthresh() == 0) { + DPRINTF(EthernetDesc, "TxDesc: WTHRESH == 0, writing back descriptor\n"); + writeback(0); + } else if (igbe->regs.txdctl.wthresh() >= usedCache.size()) { + DPRINTF(EthernetDesc, "TxDesc: used > WTHRESH, writing back descriptor\n"); + writeback((igbe->cacheBlockSize()-1)>>4); + } + +} + +bool +IGbE::TxDescCache::packetAvailable() +{ + if (pktDone) { + pktDone = false; + return true; + } + return false; +} + +void +IGbE::TxDescCache::enableSm() +{ + igbe->txTick = true; + igbe->restartClock(); +} + + + + +///////////////////////////////////// IGbE ///////////////////////////////// + +void +IGbE::restartClock() +{ + if (!tickEvent.scheduled() && (rxTick || txTick)) + tickEvent.schedule((curTick/cycles(1)) * cycles(1) + cycles(1)); } void +IGbE::txStateMachine() +{ + if (!regs.tctl.en()) { + txTick = false; + DPRINTF(EthernetSM, "TXS: TX disabled, stopping ticking\n"); + return; + } + + // If we have a packet available and it's length is not 0 (meaning it's not + // a multidescriptor packet) put it in the fifo, otherwise an the next + // iteration we'll get the rest of the data + if (txPacket && txDescCache.packetAvailable() && txPacket->length) { + bool success; + DPRINTF(EthernetSM, "TXS: packet placed in TX FIFO\n"); + success = txFifo.push(txPacket); + assert(success); + txPacket = NULL; + return; + } + + // Only support descriptor granularity + assert(regs.txdctl.gran()); + if (regs.txdctl.lwthresh() && txDescCache.descLeft() < (regs.txdctl.lwthresh() * 8)) { + DPRINTF(EthernetSM, "TXS: LWTHRESH caused posting of TXDLOW\n"); + postInterrupt(IT_TXDLOW); + } + + if (!txPacket) { + txPacket = new EthPacketData(16384); + } + + if (!txDescCache.packetWaiting()) { + if (txDescCache.descLeft() == 0) { + DPRINTF(EthernetSM, "TXS: No descriptors left in ring, forcing " + "writeback stopping ticking and posting TXQE\n"); + txDescCache.writeback(0); + txTick = false; + postInterrupt(IT_TXQE, true); + } + + + if (!(txDescCache.descUnused())) { + DPRINTF(EthernetSM, "TXS: No descriptors available in cache, fetching and stopping ticking\n"); + txTick = false; + txDescCache.fetchDescriptors(); + return; + } + + int size; + size = txDescCache.getPacketSize(); + if (size > 0 && txFifo.avail() > size) { + DPRINTF(EthernetSM, "TXS: Reserving %d bytes in FIFO and begining " + "DMA of next packet\n", size); + txFifo.reserve(size); + txDescCache.getPacketData(txPacket); + } else { + DPRINTF(EthernetSM, "TXS: No packets to get, writing back used descriptors\n"); + txDescCache.writeback(0); + } + + return; + } +} + +bool +IGbE::ethRxPkt(EthPacketPtr pkt) +{ + DPRINTF(Ethernet, "RxFIFO: Receiving pcakte from wire\n"); + if (!regs.rctl.en()) { + DPRINTF(Ethernet, "RxFIFO: RX not enabled, dropping\n"); + return true; + } + + // restart the state machines if they are stopped + rxTick = true; + if ((rxTick || txTick) && !tickEvent.scheduled()) { + DPRINTF(EthernetSM, "RXS: received packet into fifo, starting ticking\n"); + restartClock(); + } + + if (!rxFifo.push(pkt)) { + DPRINTF(Ethernet, "RxFIFO: Packet won't fit in fifo... dropped\n"); + postInterrupt(IT_RXO, true); + return false; + } + return true; +} + + +void +IGbE::rxStateMachine() +{ + if (!regs.rctl.en()) { + rxTick = false; + DPRINTF(EthernetSM, "RXS: RX disabled, stopping ticking\n"); + return; + } + + // If the packet is done check for interrupts/descriptors/etc + if (rxDescCache.packetDone()) { + DPRINTF(EthernetSM, "RXS: Packet completed DMA to memory\n"); + int descLeft = rxDescCache.descLeft(); + switch (regs.rctl.rdmts()) { + case 2: if (descLeft > .125 * regs.rdlen()) break; + case 1: if (descLeft > .250 * regs.rdlen()) break; + case 0: if (descLeft > .500 * regs.rdlen()) break; + DPRINTF(Ethernet, "RXS: Interrupting (RXDMT) because of descriptors left\n"); + postInterrupt(IT_RXDMT); + break; + } + + if (descLeft == 0) { + DPRINTF(EthernetSM, "RXS: No descriptors left in ring, forcing writeback\n"); + rxDescCache.writeback(0); + DPRINTF(EthernetSM, "RXS: No descriptors left, stopping ticking\n"); + rxTick = false; + } + + // only support descriptor granulaties + assert(regs.rxdctl.gran()); + + if (regs.rxdctl.wthresh() >= rxDescCache.descUsed()) { + DPRINTF(EthernetSM, "RXS: Writing back because WTHRESH >= descUsed\n"); + if (regs.rxdctl.wthresh() < (cacheBlockSize()>>4)) + rxDescCache.writeback(regs.rxdctl.wthresh()-1); + else + rxDescCache.writeback((cacheBlockSize()-1)>>4); + } + + if ((rxDescCache.descUnused() < regs.rxdctl.pthresh()) && + ((rxDescCache.descLeft() - rxDescCache.descUnused()) > regs.rxdctl.hthresh())) { + DPRINTF(EthernetSM, "RXS: Fetching descriptors because descUnused < PTHRESH\n"); + rxDescCache.fetchDescriptors(); + } + + if (rxDescCache.descUnused() == 0) { + DPRINTF(EthernetSM, "RXS: No descriptors available in cache, stopping ticking\n"); + rxTick = false; + DPRINTF(EthernetSM, "RXS: Fetching descriptors because none available\n"); + rxDescCache.fetchDescriptors(); + } + return; + } + + if (!rxDescCache.descUnused()) { + DPRINTF(EthernetSM, "RXS: No descriptors available in cache, stopping ticking\n"); + rxTick = false; + DPRINTF(EthernetSM, "RXS: No descriptors available, fetching\n"); + rxDescCache.fetchDescriptors(); + return; + } + + if (rxFifo.empty()) { + DPRINTF(EthernetSM, "RXS: RxFIFO empty, stopping ticking\n"); + rxTick = false; + return; + } + + EthPacketPtr pkt; + pkt = rxFifo.front(); + + DPRINTF(EthernetSM, "RXS: Writing packet into memory\n"); + if (!rxDescCache.writePacket(pkt)) { + return; + } + + DPRINTF(EthernetSM, "RXS: Removing packet from FIFO\n"); + rxFifo.pop(); + DPRINTF(EthernetSM, "RXS: stopping ticking until packet DMA completes\n"); + rxTick = false; +} + +void +IGbE::txWire() +{ + if (txFifo.empty()) { + return; + } + + txTick = true; + + if (etherInt->sendPacket(txFifo.front())) { + DPRINTF(Ethernet, "TxFIFO: Successful transmit, bytes in fifo: %d\n", + txFifo.avail()); + txFifo.pop(); + } + +} + +void +IGbE::tick() +{ + DPRINTF(EthernetSM, "IGbE: -------------- Cycle --------------\n"); + + if (rxTick) + rxStateMachine(); + + if (txTick) { + txStateMachine(); + txWire(); + } + + if (rxTick || txTick) + tickEvent.schedule(curTick + cycles(1)); +} + +void IGbE::ethTxDone() { - panic("Need to implemenet\n"); + // restart the state machines if they are stopped + txTick = true; + restartClock(); + DPRINTF(Ethernet, "TxFIFO: Transmission complete\n"); } void @@ -355,6 +1251,14 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(IGbE) Param<uint32_t> pci_func; Param<Tick> pio_latency; Param<Tick> config_latency; + Param<std::string> hardware_address; + Param<bool> use_flow_control; + Param<int> rx_fifo_size; + Param<int> tx_fifo_size; + Param<int> rx_desc_cache_size; + Param<int> tx_desc_cache_size; + Param<Tick> clock; + END_DECLARE_SIM_OBJECT_PARAMS(IGbE) @@ -367,7 +1271,14 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(IGbE) INIT_PARAM(pci_dev, "PCI device number"), INIT_PARAM(pci_func, "PCI function code"), INIT_PARAM_DFLT(pio_latency, "Programmed IO latency in bus cycles", 1), - INIT_PARAM(config_latency, "Number of cycles for a config read or write") + INIT_PARAM(config_latency, "Number of cycles for a config read or write"), + INIT_PARAM(hardware_address, "Ethernet Hardware Address"), + INIT_PARAM(use_flow_control,"Should the device use xon/off packets"), + INIT_PARAM(rx_fifo_size,"Size of the RX FIFO"), + INIT_PARAM(tx_fifo_size,"Size of the TX FIFO"), + INIT_PARAM(rx_desc_cache_size,"Size of the RX descriptor cache"), + INIT_PARAM(tx_desc_cache_size,"Size of the TX descriptor cache"), + INIT_PARAM(clock,"Clock rate for the device to tick at") END_INIT_SIM_OBJECT_PARAMS(IGbE) @@ -385,6 +1296,14 @@ CREATE_SIM_OBJECT(IGbE) params->functionNum = pci_func; params->pio_delay = pio_latency; params->config_delay = config_latency; + params->hardware_address = hardware_address; + params->use_flow_control = use_flow_control; + params->rx_fifo_size = rx_fifo_size; + params->tx_fifo_size = tx_fifo_size; + params->rx_desc_cache_size = rx_desc_cache_size; + params->tx_desc_cache_size = tx_desc_cache_size; + params->clock = clock; + return new IGbE(params); } diff --git a/src/dev/i8254xGBe.hh b/src/dev/i8254xGBe.hh index ce4007263..a2b9f38d5 100644 --- a/src/dev/i8254xGBe.hh +++ b/src/dev/i8254xGBe.hh @@ -35,6 +35,9 @@ #ifndef __DEV_I8254XGBE_HH__ #define __DEV_I8254XGBE_HH__ +#include <deque> +#include <string> + #include "base/inet.hh" #include "base/statistics.hh" #include "dev/etherint.hh" @@ -50,22 +53,451 @@ class IGbE : public PciDev { private: IGbEInt *etherInt; + + // device registers iGbReg::Regs regs; + + // eeprom data, status and control bits int eeOpBits, eeAddrBits, eeDataBits; uint8_t eeOpcode, eeAddr; - uint16_t flash[iGbReg::EEPROM_SIZE]; + // cached parameters from params struct + Tick tickRate; + bool useFlowControl; + + // packet fifos + PacketFifo rxFifo; + PacketFifo txFifo; + + // Packet that we are currently putting into the txFifo + EthPacketPtr txPacket; + + // Should to Rx/Tx State machine tick? + bool rxTick; + bool txTick; + + // Event and function to deal with RDTR timer expiring + void rdtrProcess() { rxDescCache.writeback(0); postInterrupt(iGbReg::IT_RXT, true); } + //friend class EventWrapper<IGbE, &IGbE::rdtrProcess>; + EventWrapper<IGbE, &IGbE::rdtrProcess> rdtrEvent; + + // Event and function to deal with RADV timer expiring + void radvProcess() { rxDescCache.writeback(0); postInterrupt(iGbReg::IT_RXT, true); } + //friend class EventWrapper<IGbE, &IGbE::radvProcess>; + EventWrapper<IGbE, &IGbE::radvProcess> radvEvent; + + // Event and function to deal with TADV timer expiring + void tadvProcess() { postInterrupt(iGbReg::IT_TXDW, true); } + //friend class EventWrapper<IGbE, &IGbE::tadvProcess>; + EventWrapper<IGbE, &IGbE::tadvProcess> tadvEvent; + + // Event and function to deal with TIDV timer expiring + void tidvProcess() { postInterrupt(iGbReg::IT_TXDW, true); }; + //friend class EventWrapper<IGbE, &IGbE::tidvProcess>; + EventWrapper<IGbE, &IGbE::tidvProcess> tidvEvent; + + // Main event to tick the device + void tick(); + //friend class EventWrapper<IGbE, &IGbE::tick>; + EventWrapper<IGbE, &IGbE::tick> tickEvent; + + + void rxStateMachine(); + void txStateMachine(); + void txWire(); + + /** Write an interrupt into the interrupt pending register and check mask + * and interrupt limit timer before sending interrupt to CPU + * @param t the type of interrupt we are posting + * @param now should we ignore the interrupt limiting timer + */ + void postInterrupt(iGbReg::IntTypes t, bool now = false); + + /** Check and see if changes to the mask register have caused an interrupt + * to need to be sent or perhaps removed an interrupt cause. + */ + void chkInterrupt(); + + /** Send an interrupt to the cpu + */ + void cpuPostInt(); + // Event to moderate interrupts + EventWrapper<IGbE, &IGbE::cpuPostInt> interEvent; + + /** Clear the interupt line to the cpu + */ + void cpuClearInt(); + + Tick intClock() { return Clock::Int::ns * 1024; } + + void restartClock(); + + template<class T> + class DescCache + { + protected: + virtual Addr descBase() const = 0; + virtual long descHead() const = 0; + virtual long descTail() const = 0; + virtual long descLen() const = 0; + virtual void updateHead(long h) = 0; + virtual void enableSm() = 0; + virtual void intAfterWb() const {} + + std::deque<T*> usedCache; + std::deque<T*> unusedCache; + + T *fetchBuf; + T *wbBuf; + + // Pointer to the device we cache for + IGbE *igbe; + + // Name of this descriptor cache + std::string _name; + + // How far we've cached + int cachePnt; + + // The size of the descriptor cache + int size; + + // How many descriptors we are currently fetching + int curFetching; + + // How many descriptors we are currently writing back + int wbOut; + + // if the we wrote back to the end of the descriptor ring and are going + // to have to wrap and write more + bool moreToWb; + + // What the alignment is of the next descriptor writeback + Addr wbAlignment; + + /** The packet that is currently being dmad to memory if any + */ + EthPacketPtr pktPtr; + + public: + DescCache(IGbE *i, const std::string n, int s) + : igbe(i), _name(n), cachePnt(0), size(s), curFetching(0), wbOut(0), + pktPtr(NULL), fetchEvent(this), wbEvent(this) + { + fetchBuf = new T[size]; + wbBuf = new T[size]; + } + + virtual ~DescCache() + { + reset(); + } + + std::string name() { return _name; } + + /** If the address/len/head change when we've got descriptors that are + * dirty that is very bad. This function checks that we don't and if we + * do panics. + */ + void areaChanged() + { + if (usedCache.size() > 0 || unusedCache.size() > 0) + panic("Descriptor Address, Length or Head changed. Bad\n"); + } + + void writeback(Addr aMask) + { + int curHead = descHead(); + int max_to_wb = usedCache.size(); + + DPRINTF(EthernetDesc, "Writing back descriptors head: %d tail: " + "%d len: %d cachePnt: %d max_to_wb: %d descleft: %d\n", + curHead, descTail(), descLen(), cachePnt, max_to_wb, + descLeft()); + + // Check if this writeback is less restrictive that the previous + // and if so setup another one immediately following it + if (wbOut && (aMask < wbAlignment)) { + moreToWb = true; + wbAlignment = aMask; + DPRINTF(EthernetDesc, "Writing back already in process, returning\n"); + return; + } + + + moreToWb = false; + wbAlignment = aMask; + + if (max_to_wb + curHead > descLen()) { + max_to_wb = descLen() - curHead; + moreToWb = true; + // this is by definition aligned correctly + } else if (aMask != 0) { + // align the wb point to the mask + max_to_wb = max_to_wb & ~aMask; + } + + DPRINTF(EthernetDesc, "Writing back %d descriptors\n", max_to_wb); + + if (max_to_wb <= 0 || wbOut) + return; + + wbOut = max_to_wb; + + for (int x = 0; x < wbOut; x++) + memcpy(&wbBuf[x], usedCache[x], sizeof(T)); + + for (int x = 0; x < wbOut; x++) { + assert(usedCache.size()); + delete usedCache[0]; + usedCache.pop_front(); + }; + + + assert(wbOut); + igbe->dmaWrite(igbe->platform->pciToDma(descBase() + curHead * sizeof(T)), + wbOut * sizeof(T), &wbEvent, (uint8_t*)wbBuf); + } + + /** Fetch a chunk of descriptors into the descriptor cache. + * Calls fetchComplete when the memory system returns the data + */ + void fetchDescriptors() + { + size_t max_to_fetch = descTail() - cachePnt; + if (max_to_fetch < 0) + max_to_fetch = descLen() - cachePnt; + + max_to_fetch = std::min(max_to_fetch, (size - usedCache.size() - + unusedCache.size())); + + DPRINTF(EthernetDesc, "Fetching descriptors head: %d tail: " + "%d len: %d cachePnt: %d max_to_fetch: %d descleft: %d\n", + descHead(), descTail(), descLen(), cachePnt, + max_to_fetch, descLeft()); + + // Nothing to do + if (max_to_fetch == 0 || curFetching) + return; + + // So we don't have two descriptor fetches going on at once + curFetching = max_to_fetch; + + DPRINTF(EthernetDesc, "Fetching descriptors at %#x (%#x), size: %#x\n", + descBase() + cachePnt * sizeof(T), + igbe->platform->pciToDma(descBase() + cachePnt * sizeof(T)), + curFetching * sizeof(T)); + + assert(curFetching); + igbe->dmaRead(igbe->platform->pciToDma(descBase() + cachePnt * sizeof(T)), + curFetching * sizeof(T), &fetchEvent, (uint8_t*)fetchBuf); + } + + + /** Called by event when dma to read descriptors is completed + */ + void fetchComplete() + { + T *newDesc; + for (int x = 0; x < curFetching; x++) { + newDesc = new T; + memcpy(newDesc, &fetchBuf[x], sizeof(T)); + unusedCache.push_back(newDesc); + } + +#ifndef NDEBUG + int oldCp = cachePnt; +#endif + + cachePnt += curFetching; + if (cachePnt > descLen()) + cachePnt -= descLen(); + + curFetching = 0; + + DPRINTF(EthernetDesc, "Fetching complete cachePnt %d -> %d\n", + oldCp, cachePnt); + + enableSm(); + + } + + EventWrapper<DescCache, &DescCache::fetchComplete> fetchEvent; + + /** Called by event when dma to writeback descriptors is completed + */ + void wbComplete() + { + long curHead = descHead(); +#ifndef NDEBUG + long oldHead = curHead; +#endif + + curHead += wbOut; + wbOut = 0; + + if (curHead > descLen()) + curHead = 0; + + // Update the head + updateHead(curHead); + + DPRINTF(EthernetDesc, "Writeback complete curHead %d -> %d\n", + oldHead, curHead); + + // If we still have more to wb, call wb now + if (moreToWb) { + DPRINTF(EthernetDesc, "Writeback has more todo\n"); + writeback(wbAlignment); + } + intAfterWb(); + } + + + EventWrapper<DescCache, &DescCache::wbComplete> wbEvent; + + /* Return the number of descriptors left in the ring, so the device has + * a way to figure out if it needs to interrupt. + */ + int descLeft() const + { + int left = unusedCache.size(); + if (cachePnt - descTail() >= 0) + left += (cachePnt - descTail()); + else + left += (descTail() - cachePnt); + + return left; + } + + /* Return the number of descriptors used and not written back. + */ + int descUsed() const { return usedCache.size(); } + + /* Return the number of cache unused descriptors we have. */ + int descUnused() const {return unusedCache.size(); } + + /* Get into a state where the descriptor address/head/etc colud be + * changed */ + void reset() + { + DPRINTF(EthernetDesc, "Reseting descriptor cache\n"); + for (int x = 0; x < usedCache.size(); x++) + delete usedCache[x]; + for (int x = 0; x < unusedCache.size(); x++) + delete unusedCache[x]; + + usedCache.clear(); + unusedCache.clear(); + } + + }; + + + class RxDescCache : public DescCache<iGbReg::RxDesc> + { + protected: + virtual Addr descBase() const { return igbe->regs.rdba(); } + virtual long descHead() const { return igbe->regs.rdh(); } + virtual long descLen() const { return igbe->regs.rdlen() >> 4; } + virtual long descTail() const { return igbe->regs.rdt(); } + virtual void updateHead(long h) { igbe->regs.rdh(h); } + virtual void enableSm(); + + bool pktDone; + + public: + RxDescCache(IGbE *i, std::string n, int s); + + /** Write the given packet into the buffer(s) pointed to by the + * descriptor and update the book keeping. Should only be called when + * there are no dma's pending. + * @param packet ethernet packet to write + * @return if the packet could be written (there was a free descriptor) + */ + bool writePacket(EthPacketPtr packet); + /** Called by event when dma to write packet is completed + */ + void pktComplete(); + + /** Check if the dma on the packet has completed. + */ + + bool packetDone(); + + EventWrapper<RxDescCache, &RxDescCache::pktComplete> pktEvent; + + }; + friend class RxDescCache; + + RxDescCache rxDescCache; + + class TxDescCache : public DescCache<iGbReg::TxDesc> + { + protected: + virtual Addr descBase() const { return igbe->regs.tdba(); } + virtual long descHead() const { return igbe->regs.tdh(); } + virtual long descTail() const { return igbe->regs.tdt(); } + virtual long descLen() const { return igbe->regs.tdlen() >> 4; } + virtual void updateHead(long h) { igbe->regs.tdh(h); } + virtual void enableSm(); + virtual void intAfterWb() const { igbe->postInterrupt(iGbReg::IT_TXDW);} + + bool pktDone; + bool isTcp; + bool pktWaiting; + int hLen; + + public: + TxDescCache(IGbE *i, std::string n, int s); + + /** Tell the cache to DMA a packet from main memory into its buffer and + * return the size the of the packet to reserve space in tx fifo. + * @return size of the packet + */ + int getPacketSize(); + void getPacketData(EthPacketPtr p); + + /** Ask if the packet has been transfered so the state machine can give + * it to the fifo. + * @return packet available in descriptor cache + */ + bool packetAvailable(); + + /** Ask if we are still waiting for the packet to be transfered. + * @return packet still in transit. + */ + bool packetWaiting() { return pktWaiting; } + + /** Called by event when dma to write packet is completed + */ + void pktComplete(); + EventWrapper<TxDescCache, &TxDescCache::pktComplete> pktEvent; + + }; + friend class TxDescCache; + + TxDescCache txDescCache; public: struct Params : public PciDev::Params { - ; + Net::EthAddr hardware_address; + bool use_flow_control; + int rx_fifo_size; + int tx_fifo_size; + int rx_desc_cache_size; + int tx_desc_cache_size; + Tick clock; }; IGbE(Params *params); ~IGbE() {;} + Tick clock; + inline Tick cycles(int numCycles) const { return numCycles * clock; } + virtual Tick read(PacketPtr pkt); virtual Tick write(PacketPtr pkt); @@ -76,6 +508,7 @@ class IGbE : public PciDev void setEthInt(IGbEInt *i) { assert(!etherInt); etherInt = i; } + const Params *params() const {return (const Params *)_params; } virtual void serialize(std::ostream &os); diff --git a/src/dev/i8254xGBe_defs.hh b/src/dev/i8254xGBe_defs.hh index ae0925356..8538c155b 100644 --- a/src/dev/i8254xGBe_defs.hh +++ b/src/dev/i8254xGBe_defs.hh @@ -31,433 +31,570 @@ /* @file * Register and structure descriptions for Intel's 8254x line of gigabit ethernet controllers. */ +#include "base/bitfield.hh" namespace iGbReg { -const uint32_t CTRL = 0x00000; //* -const uint32_t STATUS = 0x00008; //* -const uint32_t EECD = 0x00010; //* -const uint32_t EERD = 0x00014; //* -const uint32_t CTRL_EXT = 0x00018; -const uint32_t PBA = 0x01000; -const uint32_t ICR = 0x000C0; //* -const uint32_t ITR = 0x000C4; -const uint32_t ICS = 0x000C8; -const uint32_t IMS = 0x000D0; -const uint32_t IMC = 0x000D8; //* -const uint32_t RCTL = 0x00100; //* -const uint32_t RDBAL = 0x02800; -const uint32_t RDBAH = 0x02804; -const uint32_t RDLEN = 0x02808; -const uint32_t RDH = 0x02810; -const uint32_t RDT = 0x02818; -const uint32_t RDTR = 0x02820; -const uint32_t RADV = 0x0282C; -const uint32_t RSRPD = 0x02C00; -const uint32_t TCTL = 0x00400; //* -const uint32_t TDBAL = 0x03800; -const uint32_t TDBAH = 0x03804; -const uint32_t TDLEN = 0x03808; -const uint32_t TDH = 0x03810; -const uint32_t THT = 0x03818; -const uint32_t TIDV = 0x03820; -const uint32_t TXDMAC = 0x03000; -const uint32_t TXDCTL = 0x03828; -const uint32_t TADV = 0x0282C; -const uint32_t TSPMT = 0x03830; -const uint32_t RXDCTL = 0x02828; -const uint32_t RXCSUM = 0x05000; -const uint32_t MANC = 0x05820;//* + +// Registers used by the Intel GbE NIC +const uint32_t REG_CTRL = 0x00000; +const uint32_t REG_STATUS = 0x00008; +const uint32_t REG_EECD = 0x00010; +const uint32_t REG_EERD = 0x00014; +const uint32_t REG_CTRL_EXT = 0x00018; +const uint32_t REG_MDIC = 0x00020; +const uint32_t REG_FCAL = 0x00028; +const uint32_t REG_FCAH = 0x0002C; +const uint32_t REG_FCT = 0x00030; +const uint32_t REG_VET = 0x00038; +const uint32_t REG_PBA = 0x01000; +const uint32_t REG_ICR = 0x000C0; +const uint32_t REG_ITR = 0x000C4; +const uint32_t REG_ICS = 0x000C8; +const uint32_t REG_IMS = 0x000D0; +const uint32_t REG_IMC = 0x000D8; +const uint32_t REG_IAM = 0x000E0; +const uint32_t REG_RCTL = 0x00100; +const uint32_t REG_FCTTV = 0x00170; +const uint32_t REG_TIPG = 0x00410; +const uint32_t REG_AIFS = 0x00458; +const uint32_t REG_LEDCTL = 0x00e00; +const uint32_t REG_FCRTL = 0x02160; +const uint32_t REG_FCRTH = 0x02168; +const uint32_t REG_RDBAL = 0x02800; +const uint32_t REG_RDBAH = 0x02804; +const uint32_t REG_RDLEN = 0x02808; +const uint32_t REG_RDH = 0x02810; +const uint32_t REG_RDT = 0x02818; +const uint32_t REG_RDTR = 0x02820; +const uint32_t REG_RXDCTL = 0x02828; +const uint32_t REG_RADV = 0x0282C; +const uint32_t REG_TCTL = 0x00400; +const uint32_t REG_TDBAL = 0x03800; +const uint32_t REG_TDBAH = 0x03804; +const uint32_t REG_TDLEN = 0x03808; +const uint32_t REG_TDH = 0x03810; +const uint32_t REG_TDT = 0x03818; +const uint32_t REG_TIDV = 0x03820; +const uint32_t REG_TXDCTL = 0x03828; +const uint32_t REG_TADV = 0x0382C; +const uint32_t REG_CRCERRS = 0x04000; +const uint32_t REG_RXCSUM = 0x05000; +const uint32_t REG_MTA = 0x05200; +const uint32_t REG_RAL = 0x05400; +const uint32_t REG_RAH = 0x05404; +const uint32_t REG_VFTA = 0x05600; + +const uint32_t REG_WUC = 0x05800; +const uint32_t REG_MANC = 0x05820; const uint8_t EEPROM_READ_OPCODE_SPI = 0x03; const uint8_t EEPROM_RDSR_OPCODE_SPI = 0x05; const uint8_t EEPROM_SIZE = 64; +const uint16_t EEPROM_CSUM = 0xBABA; + +const uint8_t VLAN_FILTER_TABLE_SIZE = 128; +const uint8_t RCV_ADDRESS_TABLE_SIZE = 16; +const uint8_t MULTICAST_TABLE_SIZE = 128; +const uint32_t STATS_REGS_SIZE = 0x124; + + +// Registers in that are accessed in the PHY +const uint8_t PHY_PSTATUS = 0x1; +const uint8_t PHY_PID = 0x2; +const uint8_t PHY_EPID = 0x3; +const uint8_t PHY_GSTATUS = 10; +const uint8_t PHY_EPSTATUS = 15; +const uint8_t PHY_AGC = 18; + +// Receive Descriptor Status Flags +const uint8_t RXDS_PIF = 0x80; +const uint8_t RXDS_IPCS = 0x40; +const uint8_t RXDS_TCPCS = 0x20; +const uint8_t RXDS_UDPCS = 0x10; +const uint8_t RXDS_VP = 0x08; +const uint8_t RXDS_IXSM = 0x04; +const uint8_t RXDS_EOP = 0x02; +const uint8_t RXDS_DD = 0x01; +// Receive Descriptor Error Flags +const uint8_t RXDE_RXE = 0x80; +const uint8_t RXDE_IPE = 0x40; +const uint8_t RXDE_TCPE = 0x20; +const uint8_t RXDE_SEQ = 0x04; +const uint8_t RXDE_SE = 0x02; +const uint8_t RXDE_CE = 0x01; + +// Interrupt types +enum IntTypes +{ + IT_NONE = 0x00000, //dummy value + IT_TXDW = 0x00001, + IT_TXQE = 0x00002, + IT_LSC = 0x00004, + IT_RXSEQ = 0x00008, + IT_RXDMT = 0x00010, + IT_RXO = 0x00040, + IT_RXT = 0x00080, + IT_MADC = 0x00200, + IT_RXCFG = 0x00400, + IT_GPI0 = 0x02000, + IT_GPI1 = 0x04000, + IT_TXDLOW = 0x08000, + IT_SRPD = 0x10000, + IT_ACK = 0x20000 +}; + +// Receive Descriptor struct struct RxDesc { Addr buf; uint16_t len; uint16_t csum; - union { - uint8_t status; - struct { // these may be in the worng order - uint8_t dd:1; // descriptor done (hw is done when 1) - uint8_t eop:1; // end of packet - uint8_t xism:1; // ignore checksum - uint8_t vp:1; // packet is vlan packet - uint8_t rsv:1; // reserved - uint8_t tcpcs:1; // TCP checksum done - uint8_t ipcs:1; // IP checksum done - uint8_t pif:1; // passed in-exact filter - } st; - }; - union { - uint8_t errors; - struct { - uint8_t ce:1; // crc error or alignment error - uint8_t se:1; // symbol error - uint8_t seq:1; // sequence error - uint8_t rsv:1; // reserved - uint8_t cxe:1; // carrier extension error - uint8_t tcpe:1; // tcp checksum error - uint8_t ipe:1; // ip checksum error - uint8_t rxe:1; // PX data error - } er; - }; - union { - uint16_t special; - struct { - uint16_t vlan:12; //vlan id - uint16_t cfi:1; // canocial form id - uint16_t pri:3; // user priority - } sp; - }; + uint8_t status; + uint8_t errors; + uint16_t vlan; }; -union TxDesc { - uint8_t data[16]; - struct { - Addr buf; - uint16_t len; - uint8_t cso; - union { - uint8_t command; - struct { - uint8_t eop:1; // end of packet - uint8_t ifcs:1; // insert crc - uint8_t ic:1; // insert checksum - uint8_t rs:1; // report status - uint8_t rps:1; // report packet sent - uint8_t dext:1; // extension - uint8_t vle:1; // vlan enable - uint8_t ide:1; // interrupt delay enable - } cmd; - }; - union { - uint8_t status:4; - struct { - uint8_t dd:1; // descriptor done - uint8_t ec:1; // excess collisions - uint8_t lc:1; // late collision - uint8_t tu:1; // transmit underrun - } st; - }; - uint8_t reserved:4; - uint8_t css; - union { - uint16_t special; - struct { - uint16_t vlan:12; //vlan id - uint16_t cfi:1; // canocial form id - uint16_t pri:3; // user priority - } sp; - }; - } legacy; - - // Type 0000 descriptor - struct { - uint8_t ipcss; - uint8_t ipcso; - uint16_t ipcse; - uint8_t tucss; - uint8_t tucso; - uint16_t tucse; - uint32_t paylen:20; - uint8_t dtype:4; - union { - uint8_t tucommand; - struct { - uint8_t tcp:1; // tcp/udp - uint8_t ip:1; // ip ipv4/ipv6 - uint8_t tse:1; // tcp segment enbale - uint8_t rs:1; // report status - uint8_t rsv0:1; // reserved - uint8_t dext:1; // descriptor extension - uint8_t rsv1:1; // reserved - uint8_t ide:1; // interrupt delay enable - } tucmd; - }; - union { - uint8_t status:4; - struct { - uint8_t dd:1; - uint8_t rsvd:3; - } sta; - }; - uint8_t reserved:4; - uint8_t hdrlen; - uint16_t mss; - } t0; - - // Type 0001 descriptor - struct { - Addr buf; - uint32_t dtalen:20; - uint8_t dtype:4; - union { - uint8_t dcommand; - struct { - uint8_t eop:1; // end of packet - uint8_t ifcs:1; // insert crc - uint8_t tse:1; // segmentation enable - uint8_t rs:1; // report status - uint8_t rps:1; // report packet sent - uint8_t dext:1; // extension - uint8_t vle:1; // vlan enable - uint8_t ide:1; // interrupt delay enable - } dcmd; - }; - union { - uint8_t status:4; - struct { - uint8_t dd:1; // descriptor done - uint8_t ec:1; // excess collisions - uint8_t lc:1; // late collision - uint8_t tu:1; // transmit underrun - } sta; - }; - union { - uint8_t pktopts; - struct { - uint8_t ixsm:1; // insert ip checksum - uint8_t txsm:1; // insert tcp checksum - }; - }; - union { - uint16_t special; - struct { - uint16_t vlan:12; //vlan id - uint16_t cfi:1; // canocial form id - uint16_t pri:3; // user priority - } sp; - }; - } t1; - - // Junk to test descriptor type! - struct { - uint64_t junk; - uint32_t junk1:20; - uint8_t dtype; - uint8_t junk2:5; - uint8_t dext:1; - uint8_t junk3:2; - uint8_t junk4:4; - uint32_t junk5; - } type; +struct TxDesc { + uint64_t d1; + uint64_t d2; }; +namespace TxdOp { +const uint8_t TXD_CNXT = 0x0; +const uint8_t TXD_DATA = 0x0; + +bool isLegacy(TxDesc *d) { return !bits(d->d2,29,29); } +uint8_t getType(TxDesc *d) { return bits(d->d2, 23,20); } +bool isContext(TxDesc *d) { return !isLegacy(d) && getType(d) == TXD_CNXT; } +bool isData(TxDesc *d) { return !isLegacy(d) && getType(d) == TXD_DATA; } + +Addr getBuf(TxDesc *d) { assert(isLegacy(d) || isData(d)); return d->d1; } +Addr getLen(TxDesc *d) { if (isLegacy(d)) return bits(d->d2,15,0); else return bits(d->d2, 19,0); } +void setDd(TxDesc *d) +{ + replaceBits(d->d2, 35, 32, ULL(1)); +} + +bool ide(TxDesc *d) { return bits(d->d2, 31,31); } +bool vle(TxDesc *d) { assert(isLegacy(d) || isData(d)); return bits(d->d2, 30,30); } +bool rs(TxDesc *d) { return bits(d->d2, 27,27); } +bool ic(TxDesc *d) { assert(isLegacy(d) || isData(d)); return isLegacy(d) && bits(d->d2, 26,26); } +bool tse(TxDesc *d) { return (isData(d) || isContext(d)) && bits(d->d2, 26,26); } +bool ifcs(TxDesc *d) { assert(isLegacy(d) || isData(d)); return bits(d->d2, 25,25); } +bool eop(TxDesc *d) { assert(isLegacy(d) || isData(d)); return bits(d->d2, 24,24); } +bool ip(TxDesc *d) { assert(isContext(d)); return bits(d->d2, 25,25); } +bool tcp(TxDesc *d) { assert(isContext(d)); return bits(d->d2, 24,24); } + +uint8_t getCso(TxDesc *d) { assert(isLegacy(d)); return bits(d->d2, 23,16); } +uint8_t getCss(TxDesc *d) { assert(isLegacy(d)); return bits(d->d2, 47,40); } + +bool ixsm(TxDesc *d) { return isData(d) && bits(d->d2, 40,40); } +bool txsm(TxDesc *d) { return isData(d) && bits(d->d2, 41,41); } + +int tucse(TxDesc *d) { assert(isContext(d)); return bits(d->d1,63,48); } +int tucso(TxDesc *d) { assert(isContext(d)); return bits(d->d1,47,40); } +int tucss(TxDesc *d) { assert(isContext(d)); return bits(d->d1,39,32); } +int ipcse(TxDesc *d) { assert(isContext(d)); return bits(d->d1,31,16); } +int ipcso(TxDesc *d) { assert(isContext(d)); return bits(d->d1,15,8); } +int ipcss(TxDesc *d) { assert(isContext(d)); return bits(d->d1,7,0); } +int mss(TxDesc *d) { assert(isContext(d)); return bits(d->d2,63,48); } +int hdrlen(TxDesc *d) { assert(isContext(d)); return bits(d->d2,47,40); } +} // namespace TxdOp + + +#define ADD_FIELD32(NAME, OFFSET, BITS) \ + inline uint32_t NAME() { return bits(_data, OFFSET+BITS-1, OFFSET); } \ + inline void NAME(uint32_t d) { replaceBits(_data, OFFSET+BITS-1, OFFSET,d); } + +#define ADD_FIELD64(NAME, OFFSET, BITS) \ + inline uint64_t NAME() { return bits(_data, OFFSET+BITS-1, OFFSET); } \ + inline void NAME(uint64_t d) { replaceBits(_data, OFFSET+BITS-1, OFFSET,d); } + struct Regs { - union { // 0x0000 CTRL Register - uint32_t reg; - struct { - uint8_t fd:1; // full duplex - uint8_t bem:1; // big endian mode - uint8_t pcipr:1; // PCI priority - uint8_t lrst:1; // link reset - uint8_t tme:1; // test mode enable - uint8_t asde:1; // Auto-speed detection - uint8_t slu:1; // Set link up - uint8_t ilos:1; // invert los-of-signal - uint8_t speed:2; // speed selection bits - uint8_t be32:1; // big endian mode 32 - uint8_t frcspd:1; // force speed - uint8_t frcdpx:1; // force duplex - uint8_t duden:1; // dock/undock enable - uint8_t dudpol:1; // dock/undock polarity - uint8_t fphyrst:1; // force phy reset - uint8_t extlen:1; // external link status enable - uint8_t rsvd:1; // reserved - uint8_t sdp0d:1; // software controlled pin data - uint8_t sdp1d:1; // software controlled pin data - uint8_t sdp2d:1; // software controlled pin data - uint8_t sdp3d:1; // software controlled pin data - uint8_t sdp0i:1; // software controlled pin dir - uint8_t sdp1i:1; // software controlled pin dir - uint8_t sdp2i:1; // software controlled pin dir - uint8_t sdp3i:1; // software controlled pin dir - uint8_t rst:1; // reset - uint8_t rfce:1; // receive flow control enable - uint8_t tfce:1; // transmit flow control enable - uint8_t rte:1; // routing tag enable - uint8_t vme:1; // vlan enable - uint8_t phyrst:1; // phy reset - } ; - } ctrl; - - union { // 0x0008 STATUS - uint32_t reg; - struct { - uint8_t fd:1; // full duplex - uint8_t lu:1; // link up - uint8_t func:2; // function id - uint8_t txoff:1; // transmission paused - uint8_t tbimode:1; // tbi mode - uint8_t speed:2; // link speed - uint8_t asdv:2; // auto speed detection value - uint8_t mtxckok:1; // mtx clock running ok - uint8_t pci66:1; // In 66Mhz pci slot - uint8_t bus64:1; // in 64 bit slot - uint8_t pcix:1; // Pci mode - uint8_t pcixspd:1; // pci x speed - uint8_t reserved; // reserved - } ; - } sts; - - union { // 0x0010 EECD - uint32_t reg; - struct { - uint8_t sk:1; // clack input to the eeprom - uint8_t cs:1; // chip select to eeprom - uint8_t din:1; // data input to eeprom - uint8_t dout:1; // data output bit - uint8_t fwe:2; // flash write enable - uint8_t ee_req:1; // request eeprom access - uint8_t ee_gnt:1; // grant eeprom access - uint8_t ee_pres:1; // eeprom present - uint8_t ee_size:1; // eeprom size - uint8_t ee_sz1:1; // eeprom size - uint8_t rsvd:2; // reserved - uint8_t ee_type:1; // type of eeprom - } ; - } eecd; - - union { // 0x0014 EERD - uint32_t reg; - struct { - uint8_t start:1; // start read - uint8_t done:1; // done read - uint16_t addr:14; // address - uint16_t data; // data - }; - } eerd; - - union { // 0x00C0 ICR - uint32_t reg; - struct { - uint8_t txdw:1; // tx descr witten back - uint8_t txqe:1; // tx queue empty - uint8_t lsc:1; // link status change - uint8_t rxseq:1; // rcv sequence error - uint8_t rxdmt0:1; // rcv descriptor min thresh - uint8_t rsvd1:1; // reserved - uint8_t rxo:1; // receive overrunn - uint8_t rxt0:1; // receiver timer interrupt - uint8_t rsvd2:1; // reserved - uint8_t mdac:1; // mdi/o access complete - uint8_t rxcfg:1; // recv /c/ ordered sets - uint8_t rsvd3:1; // reserved - uint8_t phyint:1; // phy interrupt - uint8_t gpi1:1; // gpi int 1 - uint8_t gpi2:1; // gpi int 2 - uint8_t txdlow:1; // transmit desc low thresh - uint8_t srpd:1; // small receive packet detected - uint16_t rsvd4:15; // reserved - } ; - } icd; - - union { // 0x00C0 IMC - uint32_t reg; - struct { - uint8_t txdw:1; // tx descr witten back - uint8_t txqe:1; // tx queue empty - uint8_t lsc:1; // link status change - uint8_t rxseq:1; // rcv sequence error - uint8_t rxdmt0:1; // rcv descriptor min thresh - uint8_t rsvd1:1; // reserved - uint8_t rxo:1; // receive overrunn - uint8_t rxt0:1; // receiver timer interrupt - uint8_t rsvd2:1; // reserved - uint8_t mdac:1; // mdi/o access complete - uint8_t rxcfg:1; // recv /c/ ordered sets - uint8_t rsvd3:1; // reserved - uint8_t phyint:1; // phy interrupt - uint8_t gpi1:1; // gpi int 1 - uint8_t gpi2:1; // gpi int 2 - uint8_t txdlow:1; // transmit desc low thresh - uint8_t srpd:1; // small receive packet detected - uint16_t rsvd4:15; // reserved - } ; - } imc; - - union { // 0x0100 RCTL - uint32_t reg; - struct { - uint8_t rst:1; // Reset - uint8_t en:1; // Enable - uint8_t sbp:1; // Store bad packets - uint8_t upe:1; // Unicast Promiscuous enabled - uint8_t mpe:1; // Multicast promiscuous enabled - uint8_t lpe:1; // long packet reception enabled - uint8_t lbm:2; // - uint8_t rdmts:2; // - uint8_t rsvd:2; // - uint8_t mo:2; // - uint8_t mdr:1; // - uint8_t bam:1; // - uint8_t bsize:2; // - uint8_t vpe:1; // - uint8_t cfien:1; // - uint8_t cfi:1; // - uint8_t rsvd2:1; // - uint8_t dpf:1; // discard pause frames - uint8_t pmcf:1; // pass mac control frames - uint8_t rsvd3:1; // reserved - uint8_t bsex:1; // buffer size extension - uint8_t secrc:1; // strip ethernet crc from incoming packet - uint8_t rsvd1:5; // reserved - } ; - } rctl; - - union { // 0x0400 TCTL - uint32_t reg; - struct { - uint8_t rst:1; // Reset - uint8_t en:1; // Enable - uint8_t bce:1; // busy check enable - uint8_t psp:1; // pad short packets - uint8_t ct:8; // collision threshold - uint16_t cold:10; // collision distance - uint8_t swxoff:1; // software xoff transmission - uint8_t pbe:1; // packet burst enable - uint8_t rtlc:1; // retransmit late collisions - uint8_t nrtu:1; // on underrun no TX - uint8_t mulr:1; // multiple request - uint8_t rsvd:5; // reserved - } ; - } tctl; - - union { // 0x5820 MANC - uint32_t reg; - struct { - uint8_t smbus:1; // SMBus enabled ##### - uint8_t asf:1; // ASF enabled ##### - uint8_t ronforce:1; // reset of force - uint8_t rsvd:5; // reserved - uint8_t rmcp1:1; // rcmp1 filtering - uint8_t rmcp2:1; // rcmp2 filtering - uint8_t ipv4:1; // enable ipv4 - uint8_t ipv6:1; // enable ipv6 - uint8_t snap:1; // accept snap - uint8_t arp:1; // filter arp ##### - uint8_t neighbor:1; // neighbor discovery - uint8_t arp_resp:1; // arp response - uint8_t tcorst:1; // tco reset happened - uint8_t rcvtco:1; // receive tco enabled ###### - uint8_t blkphyrst:1;// block phy resets ######## - uint8_t rcvall:1; // receive all - uint8_t macaddrfltr:1; // mac address filtering ###### - uint8_t mng2host:1; // mng2 host packets ####### - uint8_t ipaddrfltr:1; // ip address filtering - uint8_t xsumfilter:1; // checksum filtering - uint8_t brfilter:1; // broadcast filtering - uint8_t smbreq:1; // smb request - uint8_t smbgnt:1; // smb grant - uint8_t smbclkin:1; // smbclkin - uint8_t smbdatain:1; // smbdatain - uint8_t smbdataout:1; // smb data out - uint8_t smbclkout:1; // smb clock out - uint8_t rsvd2:2; - }; - } manc; + template<class T> + struct Reg { + T _data; + T operator()() { return _data; } + const Reg<T> &operator=(T d) { _data = d; return *this;} + bool operator==(T d) { return d == _data; } + void operator()(T d) { _data = d; } + Reg() { _data = 0; } + }; + + struct CTRL : public Reg<uint32_t> { // 0x0000 CTRL Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(fd,0,1); // full duplex + ADD_FIELD32(bem,1,1); // big endian mode + ADD_FIELD32(pcipr,2,1); // PCI priority + ADD_FIELD32(lrst,3,1); // link reset + ADD_FIELD32(tme,4,1); // test mode enable + ADD_FIELD32(asde,5,1); // Auto-speed detection + ADD_FIELD32(slu,6,1); // Set link up + ADD_FIELD32(ilos,7,1); // invert los-of-signal + ADD_FIELD32(speed,8,2); // speed selection bits + ADD_FIELD32(be32,10,1); // big endian mode 32 + ADD_FIELD32(frcspd,11,1); // force speed + ADD_FIELD32(frcdpx,12,1); // force duplex + ADD_FIELD32(duden,13,1); // dock/undock enable + ADD_FIELD32(dudpol,14,1); // dock/undock polarity + ADD_FIELD32(fphyrst,15,1); // force phy reset + ADD_FIELD32(extlen,16,1); // external link status enable + ADD_FIELD32(rsvd,17,1); // reserved + ADD_FIELD32(sdp0d,18,1); // software controlled pin data + ADD_FIELD32(sdp1d,19,1); // software controlled pin data + ADD_FIELD32(sdp2d,20,1); // software controlled pin data + ADD_FIELD32(sdp3d,21,1); // software controlled pin data + ADD_FIELD32(sdp0i,22,1); // software controlled pin dir + ADD_FIELD32(sdp1i,23,1); // software controlled pin dir + ADD_FIELD32(sdp2i,24,1); // software controlled pin dir + ADD_FIELD32(sdp3i,25,1); // software controlled pin dir + ADD_FIELD32(rst,26,1); // reset + ADD_FIELD32(rfce,27,1); // receive flow control enable + ADD_FIELD32(tfce,28,1); // transmit flow control enable + ADD_FIELD32(rte,29,1); // routing tag enable + ADD_FIELD32(vme,30,1); // vlan enable + ADD_FIELD32(phyrst,31,1); // phy reset + }; + CTRL ctrl; + + struct STATUS : public Reg<uint32_t> { // 0x0008 STATUS Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(fd,0,1); // full duplex + ADD_FIELD32(lu,1,1); // link up + ADD_FIELD32(func,2,2); // function id + ADD_FIELD32(txoff,4,1); // transmission paused + ADD_FIELD32(tbimode,5,1); // tbi mode + ADD_FIELD32(speed,6,2); // link speed + ADD_FIELD32(asdv,8,2); // auto speed detection value + ADD_FIELD32(mtxckok,10,1); // mtx clock running ok + ADD_FIELD32(pci66,11,1); // In 66Mhz pci slot + ADD_FIELD32(bus64,12,1); // in 64 bit slot + ADD_FIELD32(pcix,13,1); // Pci mode + ADD_FIELD32(pcixspd,14,2); // pci x speed + }; + STATUS sts; + + struct EECD : public Reg<uint32_t> { // 0x0010 EECD Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(sk,0,1); // clack input to the eeprom + ADD_FIELD32(cs,1,1); // chip select to eeprom + ADD_FIELD32(din,2,1); // data input to eeprom + ADD_FIELD32(dout,3,1); // data output bit + ADD_FIELD32(fwe,4,2); // flash write enable + ADD_FIELD32(ee_req,6,1); // request eeprom access + ADD_FIELD32(ee_gnt,7,1); // grant eeprom access + ADD_FIELD32(ee_pres,8,1); // eeprom present + ADD_FIELD32(ee_size,9,1); // eeprom size + ADD_FIELD32(ee_sz1,10,1); // eeprom size + ADD_FIELD32(rsvd,11,2); // reserved + ADD_FIELD32(ee_type,13,1); // type of eeprom + } ; + EECD eecd; + + struct EERD : public Reg<uint32_t> { // 0x0014 EERD Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(start,0,1); // start read + ADD_FIELD32(done,4,1); // done read + ADD_FIELD32(addr,8,8); // address + ADD_FIELD32(data,16,16); // data + }; + EERD eerd; + + struct CTRL_EXT : public Reg<uint32_t> { // 0x0018 CTRL_EXT Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(gpi_en,0,4); // enable interrupts from gpio + ADD_FIELD32(phyint,5,1); // reads the phy internal int status + ADD_FIELD32(sdp2_data,6,1); // data from gpio sdp + ADD_FIELD32(spd3_data,7,1); // data frmo gpio sdp + ADD_FIELD32(spd2_iodir,10,1); // direction of sdp2 + ADD_FIELD32(spd3_iodir,11,1); // direction of sdp2 + ADD_FIELD32(asdchk,12,1); // initiate auto-speed-detection + ADD_FIELD32(eerst,13,1); // reset the eeprom + ADD_FIELD32(spd_byps,15,1); // bypass speed select + ADD_FIELD32(ro_dis,17,1); // disable relaxed memory ordering + ADD_FIELD32(vreg,21,1); // power down the voltage regulator + ADD_FIELD32(link_mode,22,2); // interface to talk to the link + ADD_FIELD32(iame, 27,1); // interrupt acknowledge auto-mask ?? + ADD_FIELD32(drv_loaded, 28,1);// driver is loaded and incharge of device + ADD_FIELD32(timer_clr, 29,1); // clear interrupt timers after IMS clear ?? + }; + CTRL_EXT ctrl_ext; + + struct MDIC : public Reg<uint32_t> { // 0x0020 MDIC Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(data,0,16); // data + ADD_FIELD32(regadd,16,5); // register address + ADD_FIELD32(phyadd,21,5); // phy addresses + ADD_FIELD32(op,26,2); // opcode + ADD_FIELD32(r,28,1); // ready + ADD_FIELD32(i,29,1); // interrupt + ADD_FIELD32(e,30,1); // error + }; + MDIC mdic; + + struct ICR : public Reg<uint32_t> { // 0x00C0 ICR Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(txdw,0,1) // tx descr witten back + ADD_FIELD32(txqe,1,1) // tx queue empty + ADD_FIELD32(lsc,2,1) // link status change + ADD_FIELD32(rxseq,3,1) // rcv sequence error + ADD_FIELD32(rxdmt0,4,1) // rcv descriptor min thresh + ADD_FIELD32(rsvd1,5,1) // reserved + ADD_FIELD32(rxo,6,1) // receive overrunn + ADD_FIELD32(rxt0,7,1) // receiver timer interrupt + ADD_FIELD32(mdac,9,1) // mdi/o access complete + ADD_FIELD32(rxcfg,10,1) // recv /c/ ordered sets + ADD_FIELD32(phyint,12,1) // phy interrupt + ADD_FIELD32(gpi1,13,1) // gpi int 1 + ADD_FIELD32(gpi2,14,1) // gpi int 2 + ADD_FIELD32(txdlow,15,1) // transmit desc low thresh + ADD_FIELD32(srpd,16,1) // small receive packet detected + ADD_FIELD32(ack,17,1); // receive ack frame + ADD_FIELD32(int_assert, 31,1); // interrupt caused a system interrupt + }; + ICR icr; + + uint32_t imr; // register that contains the current interrupt mask + + struct ITR : public Reg<uint32_t> { // 0x00C4 ITR Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(interval, 0,16); // minimum inter-interrutp inteval + // specified in 256ns interrupts + }; + ITR itr; + + // When CTRL_EXT.IAME and the ICR.INT_ASSERT is 1 an ICR read or write + // causes the IAM register contents to be written into the IMC + // automatically clearing all interrupts that have a bit in the IAM set + uint32_t iam; + + struct RCTL : public Reg<uint32_t> { // 0x0100 RCTL Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(rst,0,1); // Reset + ADD_FIELD32(en,1,1); // Enable + ADD_FIELD32(sbp,2,1); // Store bad packets + ADD_FIELD32(upe,3,1); // Unicast Promiscuous enabled + ADD_FIELD32(mpe,4,1); // Multicast promiscuous enabled + ADD_FIELD32(lpe,5,1); // long packet reception enabled + ADD_FIELD32(lbm,6,2); // + ADD_FIELD32(rdmts,8,2); // + ADD_FIELD32(mo,12,2); // + ADD_FIELD32(mdr,14,1); // + ADD_FIELD32(bam,15,1); // + ADD_FIELD32(bsize,16,2); // + ADD_FIELD32(vfe,18,1); // + ADD_FIELD32(cfien,19,1); // + ADD_FIELD32(cfi,20,1); // + ADD_FIELD32(dpf,22,1); // discard pause frames + ADD_FIELD32(pmcf,23,1); // pass mac control frames + ADD_FIELD32(bsex,25,1); // buffer size extension + ADD_FIELD32(secrc,26,1); // strip ethernet crc from incoming packet + int descSize() + { + switch(bsize()) { + case 0: return bsex() == 0 ? 2048 : -1; + case 1: return bsex() == 0 ? 1024 : 16384; + case 2: return bsex() == 0 ? 512 : 8192; + case 3: return bsex() == 0 ? 256 : 4096; + default: + return -1; + } + } + }; + RCTL rctl; + + struct FCTTV : public Reg<uint32_t> { // 0x0170 FCTTV + using Reg<uint32_t>::operator=; + ADD_FIELD32(ttv,0,16); // Transmit Timer Value + }; + FCTTV fcttv; + + struct TCTL : public Reg<uint32_t> { // 0x0400 TCTL Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(rst,0,1); // Reset + ADD_FIELD32(en,1,1); // Enable + ADD_FIELD32(bce,2,1); // busy check enable + ADD_FIELD32(psp,3,1); // pad short packets + ADD_FIELD32(ct,4,8); // collision threshold + ADD_FIELD32(cold,12,10); // collision distance + ADD_FIELD32(swxoff,22,1); // software xoff transmission + ADD_FIELD32(pbe,23,1); // packet burst enable + ADD_FIELD32(rtlc,24,1); // retransmit late collisions + ADD_FIELD32(nrtu,25,1); // on underrun no TX + ADD_FIELD32(mulr,26,1); // multiple request + }; + TCTL tctl; + + struct PBA : public Reg<uint32_t> { // 0x1000 PBA Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(rxa,0,16); + ADD_FIELD32(txa,16,16); + }; + PBA pba; + + struct FCRTL : public Reg<uint32_t> { // 0x2160 FCRTL Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(rtl,3,28); // make this bigger than the spec so we can have + // a larger buffer + ADD_FIELD32(xone, 31,1); + }; + FCRTL fcrtl; + + struct FCRTH : public Reg<uint32_t> { // 0x2168 FCRTL Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(rth,3,13); // make this bigger than the spec so we can have + //a larger buffer + ADD_FIELD32(xfce, 31,1); + }; + FCRTH fcrth; + + struct RDBA : public Reg<uint64_t> { // 0x2800 RDBA Register + using Reg<uint64_t>::operator=; + ADD_FIELD64(rdbal,0,32); // base address of rx descriptor ring + ADD_FIELD64(rdbah,32,32); // base address of rx descriptor ring + }; + RDBA rdba; + + struct RDLEN : public Reg<uint32_t> { // 0x2808 RDLEN Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(len,7,13); // number of bytes in the descriptor buffer + }; + RDLEN rdlen; + + struct RDH : public Reg<uint32_t> { // 0x2810 RDH Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(rdh,0,16); // head of the descriptor ring + }; + RDH rdh; + + struct RDT : public Reg<uint32_t> { // 0x2818 RDT Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(rdt,0,16); // tail of the descriptor ring + }; + RDT rdt; + + struct RDTR : public Reg<uint32_t> { // 0x2820 RDTR Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(delay,0,16); // receive delay timer + ADD_FIELD32(fpd, 31,1); // flush partial descriptor block ?? + }; + RDTR rdtr; + + struct RXDCTL : public Reg<uint32_t> { // 0x2828 RXDCTL Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(pthresh,0,6); // prefetch threshold, less that this + // consider prefetch + ADD_FIELD32(hthresh,8,6); // number of descriptors in host mem to + // consider prefetch + ADD_FIELD32(wthresh,16,6); // writeback threshold + ADD_FIELD32(gran,24,1); // granularity 0 = desc, 1 = cacheline + }; + RXDCTL rxdctl; + + struct RADV : public Reg<uint32_t> { // 0x282C RADV Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(idv,0,16); // absolute interrupt delay + }; + RADV radv; + + struct RSRPD : public Reg<uint32_t> { // 0x2C00 RSRPD Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(idv,0,12); // size to interrutp on small packets + }; + RSRPD rsrpd; + + struct TDBA : public Reg<uint64_t> { // 0x3800 TDBAL Register + using Reg<uint64_t>::operator=; + ADD_FIELD64(tdbal,0,32); // base address of transmit descriptor ring + ADD_FIELD64(tdbah,32,32); // base address of transmit descriptor ring + }; + TDBA tdba; + + struct TDLEN : public Reg<uint32_t> { // 0x3808 TDLEN Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(len,7,13); // number of bytes in the descriptor buffer + }; + TDLEN tdlen; + + struct TDH : public Reg<uint32_t> { // 0x3810 TDH Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(tdh,0,16); // head of the descriptor ring + }; + TDH tdh; + + struct TDT : public Reg<uint32_t> { // 0x3818 TDT Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(tdt,0,16); // tail of the descriptor ring + }; + TDT tdt; + + struct TIDV : public Reg<uint32_t> { // 0x3820 TIDV Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(idv,0,16); // interrupt delay + }; + TIDV tidv; + + struct TXDCTL : public Reg<uint32_t> { // 0x3828 TXDCTL Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(pthresh, 0,6); // if number of descriptors control has is + // below this number, a prefetch is considered + ADD_FIELD32(hthresh,8,8); // number of valid descriptors is host memory + // before a prefetch is considered + ADD_FIELD32(wthresh,16,6); // number of descriptors to keep until + // writeback is considered + ADD_FIELD32(gran, 24,1); // granulatiry of above values (0 = cacheline, + // 1 == desscriptor) + ADD_FIELD32(lwthresh,25,7); // xmit descriptor low thresh, interrupt + // below this level + }; + TXDCTL txdctl; + + struct TADV : public Reg<uint32_t> { // 0x382C TADV Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(idv,0,16); // absolute interrupt delay + }; + TADV tadv; + + struct RXCSUM : public Reg<uint32_t> { // 0x5000 RXCSUM Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(pcss,0,8); + ADD_FIELD32(ipofld,8,1); + ADD_FIELD32(tuofld,9,1); + }; + RXCSUM rxcsum; + + struct MANC : public Reg<uint32_t> { // 0x5820 MANC Register + using Reg<uint32_t>::operator=; + ADD_FIELD32(smbus,0,1); // SMBus enabled ##### + ADD_FIELD32(asf,1,1); // ASF enabled ##### + ADD_FIELD32(ronforce,2,1); // reset of force + ADD_FIELD32(rsvd,3,5); // reserved + ADD_FIELD32(rmcp1,8,1); // rcmp1 filtering + ADD_FIELD32(rmcp2,9,1); // rcmp2 filtering + ADD_FIELD32(ipv4,10,1); // enable ipv4 + ADD_FIELD32(ipv6,11,1); // enable ipv6 + ADD_FIELD32(snap,12,1); // accept snap + ADD_FIELD32(arp,13,1); // filter arp ##### + ADD_FIELD32(neighbor,14,1); // neighbor discovery + ADD_FIELD32(arp_resp,15,1); // arp response + ADD_FIELD32(tcorst,16,1); // tco reset happened + ADD_FIELD32(rcvtco,17,1); // receive tco enabled ###### + ADD_FIELD32(blkphyrst,18,1);// block phy resets ######## + ADD_FIELD32(rcvall,19,1); // receive all + ADD_FIELD32(macaddrfltr,20,1); // mac address filtering ###### + ADD_FIELD32(mng2host,21,1); // mng2 host packets ####### + ADD_FIELD32(ipaddrfltr,22,1); // ip address filtering + ADD_FIELD32(xsumfilter,23,1); // checksum filtering + ADD_FIELD32(brfilter,24,1); // broadcast filtering + ADD_FIELD32(smbreq,25,1); // smb request + ADD_FIELD32(smbgnt,26,1); // smb grant + ADD_FIELD32(smbclkin,27,1); // smbclkin + ADD_FIELD32(smbdatain,28,1); // smbdatain + ADD_FIELD32(smbdataout,29,1); // smb data out + ADD_FIELD32(smbclkout,30,1); // smb clock out + }; + MANC manc; }; }; // iGbReg namespace diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh index 902cde909..cd7a5296a 100644 --- a/src/dev/io_device.hh +++ b/src/dev/io_device.hh @@ -132,6 +132,7 @@ class DmaPort : public Port bool dmaPending() { return pendingCount > 0; } + int cacheBlockSize() { return peerBlockSize(); } unsigned int drain(Event *de); }; @@ -261,13 +262,17 @@ class DmaDevice : public PioDevice addr, size, event, data); } - void dmaRead(Addr addr, int size, Event *event, uint8_t *data = NULL) - { dmaPort->dmaAction(MemCmd::ReadReq, addr, size, event, data); } + void dmaRead(Addr addr, int size, Event *event, uint8_t *data) + { + dmaPort->dmaAction(MemCmd::ReadReq, addr, size, event, data); + } bool dmaPending() { return dmaPort->dmaPending(); } virtual unsigned int drain(Event *de); + int cacheBlockSize() { return dmaPort->cacheBlockSize(); } + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "pio") { diff --git a/src/dev/sparc/iob.cc b/src/dev/sparc/iob.cc index 6bd40b631..e686e51f7 100644 --- a/src/dev/sparc/iob.cc +++ b/src/dev/sparc/iob.cc @@ -192,6 +192,8 @@ Iob::writeIob(PacketPtr pkt) data = pkt->get<uint64_t>(); intMan[index].cpu = bits(data,12,8); intMan[index].vector = bits(data,5,0); + DPRINTF(Iob, "Wrote IntMan %d cpu %d, vec %d\n", index, + intMan[index].cpu, intMan[index].vector); return; } @@ -201,11 +203,14 @@ Iob::writeIob(PacketPtr pkt) intCtl[index].mask = bits(data,2,2); if (bits(data,1,1)) intCtl[index].pend = false; + DPRINTF(Iob, "Wrote IntCtl %d pend %d cleared %d\n", index, + intCtl[index].pend, bits(data,2,2)); return; } if (accessAddr == JIntVecAddr) { jIntVec = bits(pkt->get<uint64_t>(), 5,0); + DPRINTF(Iob, "Wrote jIntVec %d\n", jIntVec); return; } @@ -237,11 +242,15 @@ Iob::writeJBus(PacketPtr pkt) index = (accessAddr - JIntBusyAddr) >> 3; data = pkt->get<uint64_t>(); jIntBusy[index].busy = bits(data,5,5); + DPRINTF(Iob, "Wrote jIntBusy index %d busy: %d\n", index, + jIntBusy[index].busy); return; } if (accessAddr == JIntABusyAddr) { data = pkt->get<uint64_t>(); jIntBusy[cpuid].busy = bits(data,5,5); + DPRINTF(Iob, "Wrote jIntBusy index %d busy: %d\n", cpuid, + jIntBusy[cpuid].busy); return; }; @@ -256,6 +265,8 @@ Iob::receiveDeviceInterrupt(DeviceId devid) return; intCtl[devid].mask = true; intCtl[devid].pend = true; + DPRINTF(Iob, "Receiving Device interrupt: %d for cpu %d vec %d\n", + devid, intMan[devid].cpu, intMan[devid].vector); ic->post(intMan[devid].cpu, SparcISA::IT_INT_VEC, intMan[devid].vector); } @@ -269,6 +280,8 @@ Iob::generateIpi(Type type, int cpu_id, int vector) switch (type) { case 0: // interrupt + DPRINTF(Iob, "Generating interrupt because of I/O write to cpu: %d vec %d\n", + cpu_id, vector); ic->post(cpu_id, SparcISA::IT_INT_VEC, vector); break; case 1: // reset @@ -279,9 +292,11 @@ Iob::generateIpi(Type type, int cpu_id, int vector) sys->threadContexts[cpu_id]->activate(); break; case 2: // idle -- this means stop executing and don't wake on interrupts + DPRINTF(Iob, "Idling CPU because of I/O write cpu: %d\n", cpu_id); sys->threadContexts[cpu_id]->halt(); break; case 3: // resume + DPRINTF(Iob, "Resuming CPU because of I/O write cpu: %d\n", cpu_id); sys->threadContexts[cpu_id]->activate(); break; default: @@ -297,6 +312,9 @@ Iob::receiveJBusInterrupt(int cpu_id, int source, uint64_t d0, uint64_t d1) if (jIntBusy[cpu_id].busy) return false; + DPRINTF(Iob, "Receiving jBus interrupt: %d for cpu %d vec %d\n", + source, cpu_id, jIntVec); + jIntBusy[cpu_id].busy = true; jIntBusy[cpu_id].source = source; jBusData0[cpu_id] = d0; diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 4988df3c5..6e6ba2380 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -171,8 +171,12 @@ Bus::recvTiming(PacketPtr pkt) } short dest = pkt->getDest(); + + // Make sure to clear the snoop commit flag so it doesn't think an + // access has been handled twice. if (dest == Packet::Broadcast) { port = findPort(pkt->getAddr(), pkt->getSrc()); + pkt->flags &= ~SNOOP_COMMIT; if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) { bool success; diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 9368e7648..ca965859e 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -595,8 +595,13 @@ Cache<TagStore,Coherence>::access(PacketPtr &pkt) //We are determining prefetches on access stream, call prefetcher prefetcher->handleMiss(pkt, curTick); } + + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); + if (!pkt->req->isUncacheable()) { - blk = handleAccess(pkt, lat, writebacks); + if (!missQueue->findMSHR(blk_addr)) { + blk = handleAccess(pkt, lat, writebacks); + } } else { size = pkt->getSize(); } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 25b8fcbeb..24ca9cfa2 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -599,6 +599,7 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time) MemCmd cmd = mshr->getTarget()->cmd; mshr->pkt->setDest(Packet::Broadcast); mshr->pkt->result = Packet::Unknown; + mshr->pkt->req = mshr->getTarget()->req; mq.markPending(mshr, cmd); mshr->order = order++; cache->setMasterRequest(Request_MSHR, time); diff --git a/src/python/m5/objects/Ethernet.py b/src/python/m5/objects/Ethernet.py index a52e35511..bfe30950c 100644 --- a/src/python/m5/objects/Ethernet.py +++ b/src/python/m5/objects/Ethernet.py @@ -67,7 +67,14 @@ if build_env['ALPHA_TLASER']: class IGbE(PciDevice): type = 'IGbE' - hardware_address = Param.EthernetAddr(NextEthernetAddr, "Ethernet Hardware Address") + hardware_address = Param.String("Ethernet Hardware Address") + use_flow_control = Param.Bool(False, "Should we use xon/xoff flow contorl (UNIMPLMENTD)") + rx_fifo_size = Param.MemorySize('384kB', "Size of the rx FIFO") + tx_fifo_size = Param.MemorySize('384kB', "Size of the tx FIFO") + rx_desc_cache_size = Param.Int(64, "Number of enteries in the rx descriptor cache") + tx_desc_cache_size = Param.Int(64, "Number of enteries in the rx descriptor cache") + clock = Param.Clock('500MHz', "Clock speed of the device") + class IGbEPciData(PciConfigData): VendorID = 0x8086 |