diff options
Diffstat (limited to 'src/cpu')
105 files changed, 5872 insertions, 4020 deletions
diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 34bad132c..bc4ec7923 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -68,6 +68,13 @@ mem_comp_sig_template = ''' virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); return NoFault; }; ''' +# Generate a temporary CPU list, including the CheckerCPU if +# it's enabled. This isn't used for anything else other than StaticInst +# headers. +temp_cpu_list = env['CPU_MODELS'] +if env['USE_CHECKER']: + temp_cpu_list.append('CheckerCPU') + # Generate header. def gen_cpu_exec_signatures(target, source, env): f = open(str(target[0]), 'w') @@ -75,7 +82,7 @@ def gen_cpu_exec_signatures(target, source, env): #ifndef __CPU_STATIC_INST_EXEC_SIGS_HH__ #define __CPU_STATIC_INST_EXEC_SIGS_HH__ ''' - for cpu in env['CPU_MODELS']: + for cpu in temp_cpu_list: xc_type = CpuModel.dict[cpu].strings['CPU_exec_context'] print >> f, exec_sig_template % (xc_type, xc_type, xc_type) print >> f, ''' @@ -85,12 +92,19 @@ def gen_cpu_exec_signatures(target, source, env): # Generate string that gets printed when header is rebuilt def gen_sigs_string(target, source, env): return "Generating static_inst_exec_sigs.hh: " \ - + ', '.join(env['CPU_MODELS']) + + ', '.join(temp_cpu_list) # Add command to generate header to environment. env.Command('static_inst_exec_sigs.hh', models_db, Action(gen_cpu_exec_signatures, gen_sigs_string, - varlist = ['CPU_MODELS'])) + varlist = temp_cpu_list)) + +env.Depends('static_inst_exec_sigs.hh', Value(env['USE_CHECKER'])) +env.Depends('static_inst_exec_sigs.hh', Value(env['CPU_MODELS'])) + +# List of suppported CPUs by the Checker. Errors out if USE_CHECKER=True +# and one of these are not being used. +CheckerSupportedCPUList = ['O3CPU', 'OzoneCPU'] ################################################################# # @@ -116,15 +130,13 @@ if need_simple_base: if 'FastCPU' in env['CPU_MODELS']: sources += Split('fast/cpu.cc') -if 'AlphaFullCPU' in env['CPU_MODELS']: +need_bp_unit = False +if 'O3CPU' in env['CPU_MODELS']: + need_bp_unit = True + sources += SConscript('o3/SConscript', exports = 'env') sources += Split(''' - base_dyn_inst.cc - o3/2bit_local_pred.cc - o3/alpha_dyn_inst.cc - o3/alpha_cpu.cc - o3/alpha_cpu_builder.cc + o3/base_dyn_inst.cc o3/bpred_unit.cc - o3/btb.cc o3/commit.cc o3/decode.cc o3/fetch.cc @@ -136,40 +148,54 @@ if 'AlphaFullCPU' in env['CPU_MODELS']: o3/lsq_unit.cc o3/lsq.cc o3/mem_dep_unit.cc - o3/ras.cc o3/rename.cc o3/rename_map.cc o3/rob.cc o3/scoreboard.cc o3/store_set.cc - o3/tournament_pred.cc ''') + if env['USE_CHECKER']: + sources += Split('o3/checker_builder.cc') -if 'OzoneSimpleCPU' in env['CPU_MODELS']: +if 'OzoneCPU' in env['CPU_MODELS']: + need_bp_unit = True sources += Split(''' + ozone/base_dyn_inst.cc + ozone/bpred_unit.cc ozone/cpu.cc ozone/cpu_builder.cc ozone/dyn_inst.cc ozone/front_end.cc - ozone/inorder_back_end.cc - ozone/inst_queue.cc - ozone/rename_table.cc - ''') - -if 'OzoneCPU' in env['CPU_MODELS']: - sources += Split(''' - ozone/lsq_unit.cc ozone/lw_back_end.cc ozone/lw_lsq.cc + ozone/rename_table.cc ''') + if env['USE_CHECKER']: + sources += Split('ozone/checker_builder.cc') -if 'CheckerCPU' in env['CPU_MODELS']: +if need_bp_unit: sources += Split(''' - checker/cpu.cc - checker/o3_cpu_builder.cc + o3/2bit_local_pred.cc + o3/btb.cc + o3/ras.cc + o3/tournament_pred.cc ''') -# FullCPU sources are included from m5/SConscript since they're not +if env['USE_CHECKER']: + sources += Split('checker/cpu.cc') + checker_supports = False + for i in CheckerSupportedCPUList: + if i in env['CPU_MODELS']: + checker_supports = True + if not checker_supports: + print "Checker only supports CPU models", + for i in CheckerSupportedCPUList: + print i, + print ", please set USE_CHECKER=False or use one of those CPU models" + Exit(1) + + +# FullCPU sources are included from src/SConscript since they're not # below this point in the file hierarchy. # Convert file names to SCons File objects. This takes care of the diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 55c04c498..ce440aeff 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -41,7 +41,6 @@ #include "cpu/cpuevent.hh" #include "cpu/thread_context.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "sim/param.hh" #include "sim/process.hh" #include "sim/sim_events.hh" @@ -60,11 +59,11 @@ int maxThreadsPerCPU = 1; #if FULL_SYSTEM BaseCPU::BaseCPU(Params *p) - : SimObject(p->name), clock(p->clock), checkInterrupts(true), + : MemObject(p->name), clock(p->clock), checkInterrupts(true), params(p), number_of_threads(p->numberOfThreads), system(p->system) #else BaseCPU::BaseCPU(Params *p) - : SimObject(p->name), clock(p->clock), params(p), + : MemObject(p->name), clock(p->clock), params(p), number_of_threads(p->numberOfThreads), system(p->system) #endif { @@ -237,7 +236,7 @@ BaseCPU::registerThreadContexts() void -BaseCPU::switchOut(Sampler *sampler) +BaseCPU::switchOut() { panic("This CPU doesn't support sampling!"); } diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 43122f238..2be6e4e81 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -36,17 +36,17 @@ #include "base/statistics.hh" #include "config/full_system.hh" -#include "cpu/sampler/sampler.hh" #include "sim/eventq.hh" -#include "sim/sim_object.hh" +#include "mem/mem_object.hh" #include "arch/isa_traits.hh" class BranchPred; class CheckerCPU; class ThreadContext; class System; +class Port; -class BaseCPU : public SimObject +class BaseCPU : public MemObject { protected: // CPU's clock period in terms of the number of ticks of curTime. @@ -148,7 +148,7 @@ class BaseCPU : public SimObject /// Prepare for another CPU to take over execution. When it is /// is ready (drained pipe) it signals the sampler. - virtual void switchOut(Sampler *); + virtual void switchOut(); /// Take over execution from the given CPU. Used for warm-up and /// sampling. diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 948ee058a..9cc61f74c 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,6 +31,7 @@ #ifndef __CPU_BASE_DYN_INST_HH__ #define __CPU_BASE_DYN_INST_HH__ +#include <bitset> #include <list> #include <string> @@ -44,12 +45,6 @@ #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "sim/system.hh" -/* -#include "encumbered/cpu/full/bpred_update.hh" -#include "encumbered/cpu/full/spec_memory.hh" -#include "encumbered/cpu/full/spec_state.hh" -#include "encumbered/mem/functional/main.hh" -*/ /** * @file @@ -64,8 +59,8 @@ class BaseDynInst : public FastAlloc, public RefCounted { public: // Typedef for the CPU. - typedef typename Impl::FullCPU FullCPU; - typedef typename FullCPU::ImplState ImplState; + typedef typename Impl::CPUType ImplCPU; + typedef typename ImplCPU::ImplState ImplState; // Binary machine instruction type. typedef TheISA::MachInst MachInst; @@ -132,56 +127,34 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The sequence number of the instruction. */ InstSeqNum seqNum; - /** Is the instruction in the IQ */ - bool iqEntry; - - /** Is the instruction in the ROB */ - bool robEntry; - - /** Is the instruction in the LSQ */ - bool lsqEntry; - - /** Is the instruction completed. */ - bool completed; - - /** Is the instruction's result ready. */ - bool resultReady; - - /** Can this instruction issue. */ - bool canIssue; - - /** Has this instruction issued. */ - bool issued; - - /** Has this instruction executed (or made it through execute) yet. */ - bool executed; - - /** Can this instruction commit. */ - bool canCommit; - - /** Is this instruction committed. */ - bool committed; - - /** Is this instruction squashed. */ - bool squashed; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInIQ; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInLSQ; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInROB; - - /** Is this a recover instruction. */ - bool recoverInst; - - /** Is this a thread blocking instruction. */ - bool blockingInst; /* this inst has called thread_block() */ + enum Status { + IqEntry, /// Instruction is in the IQ + RobEntry, /// Instruction is in the ROB + LsqEntry, /// Instruction is in the LSQ + Completed, /// Instruction has completed + ResultReady, /// Instruction has its result + CanIssue, /// Instruction can issue and execute + Issued, /// Instruction has issued + Executed, /// Instruction has executed + CanCommit, /// Instruction can commit + AtCommit, /// Instruction has reached commit + Committed, /// Instruction has committed + Squashed, /// Instruction is squashed + SquashedInIQ, /// Instruction is squashed in the IQ + SquashedInLSQ, /// Instruction is squashed in the LSQ + SquashedInROB, /// Instruction is squashed in the ROB + RecoverInst, /// Is a recover instruction + BlockingInst, /// Is a blocking instruction + ThreadsyncWait, /// Is a thread synchronization instruction + SerializeBefore, /// Needs to serialize on + /// instructions ahead of it + SerializeAfter, /// Needs to serialize instructions behind it + SerializeHandled, /// Serialization has been handled + NumStatus + }; - /** Is this a thread syncrhonization instruction. */ - bool threadsyncWait; + /** The status of this BaseDynInst. Several bits can be set. */ + std::bitset<NumStatus> status; /** The thread this instruction is from. */ short threadNumber; @@ -192,8 +165,8 @@ class BaseDynInst : public FastAlloc, public RefCounted /** How many source registers are ready. */ unsigned readyRegs; - /** Pointer to the FullCPU object. */ - FullCPU *cpu; + /** Pointer to the Impl's CPU object. */ + ImplCPU *cpu; /** Pointer to the thread state. */ ImplState *thread; @@ -202,10 +175,9 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault fault; /** The memory request. */ -// MemReqPtr req; Request *req; -// Packet pkt; + /** Pointer to the data for the memory access. */ uint8_t *memData; /** The effective virtual address (lds & stores only). */ @@ -223,12 +195,6 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The memory request flags (from translation). */ unsigned memReqFlags; - /** The size of the data to be stored. */ - int storeSize; - - /** The data to be stored. */ - IntReg storeData; - union Result { uint64_t integer; float fp; @@ -273,7 +239,7 @@ class BaseDynInst : public FastAlloc, public RefCounted * @param cpu Pointer to the instruction's CPU. */ BaseDynInst(ExtMachInst inst, Addr PC, Addr pred_PC, InstSeqNum seq_num, - FullCPU *cpu); + ImplCPU *cpu); /** BaseDynInst constructor given a StaticInst pointer. * @param _staticInst The StaticInst for this BaseDynInst. @@ -288,21 +254,6 @@ class BaseDynInst : public FastAlloc, public RefCounted void initVars(); public: - /** - * @todo: Make this function work; currently it is a dummy function. - * @param fault Last fault. - * @param cmd Last command. - * @param addr Virtual address of access. - * @param p Memory accessed. - * @param nbytes Access size. - */ -// void -// trace_mem(Fault fault, -// MemCmd cmd, -// Addr addr, -// void *p, -// int nbytes); - /** Dumps out contents of this BaseDynInst. */ void dump(); @@ -360,9 +311,9 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isThreadSync() const { return staticInst->isThreadSync(); } bool isSerializing() const { return staticInst->isSerializing(); } bool isSerializeBefore() const - { return staticInst->isSerializeBefore() || serializeBefore; } + { return staticInst->isSerializeBefore() || status[SerializeBefore]; } bool isSerializeAfter() const - { return staticInst->isSerializeAfter() || serializeAfter; } + { return staticInst->isSerializeAfter() || status[SerializeAfter]; } bool isMemBarrier() const { return staticInst->isMemBarrier(); } bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } @@ -371,41 +322,32 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isUnverifiable() const { return staticInst->isUnverifiable(); } /** Temporarily sets this instruction as a serialize before instruction. */ - void setSerializeBefore() { serializeBefore = true; } + void setSerializeBefore() { status.set(SerializeBefore); } /** Clears the serializeBefore part of this instruction. */ - void clearSerializeBefore() { serializeBefore = false; } + void clearSerializeBefore() { status.reset(SerializeBefore); } /** Checks if this serializeBefore is only temporarily set. */ - bool isTempSerializeBefore() { return serializeBefore; } - - /** Tracks if instruction has been externally set as serializeBefore. */ - bool serializeBefore; + bool isTempSerializeBefore() { return status[SerializeBefore]; } /** Temporarily sets this instruction as a serialize after instruction. */ - void setSerializeAfter() { serializeAfter = true; } + void setSerializeAfter() { status.set(SerializeAfter); } /** Clears the serializeAfter part of this instruction.*/ - void clearSerializeAfter() { serializeAfter = false; } + void clearSerializeAfter() { status.reset(SerializeAfter); } /** Checks if this serializeAfter is only temporarily set. */ - bool isTempSerializeAfter() { return serializeAfter; } + bool isTempSerializeAfter() { return status[SerializeAfter]; } - /** Tracks if instruction has been externally set as serializeAfter. */ - bool serializeAfter; + /** Sets the serialization part of this instruction as handled. */ + void setSerializeHandled() { status.set(SerializeHandled); } /** Checks if the serialization part of this instruction has been * handled. This does not apply to the temporary serializing * state; it only applies to this instruction's own permanent * serializing state. */ - bool isSerializeHandled() { return serializeHandled; } - - /** Sets the serialization part of this instruction as handled. */ - void setSerializeHandled() { serializeHandled = true; } - - /** Whether or not the serialization of this instruction has been handled. */ - bool serializeHandled; + bool isSerializeHandled() { return status[SerializeHandled]; } /** Returns the opclass of this instruction. */ OpClass opClass() const { return staticInst->opClass(); } @@ -439,11 +381,13 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns the result of a floating point (double) instruction. */ double readDoubleResult() { return instResult.dbl; } + /** Records an integer register being set to a value. */ void setIntReg(const StaticInst *si, int idx, uint64_t val) { instResult.integer = val; } + /** Records an fp register being set to a value. */ void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) { if (width == 32) @@ -454,16 +398,19 @@ class BaseDynInst : public FastAlloc, public RefCounted panic("Unsupported width!"); } + /** Records an fp register being set to a value. */ void setFloatReg(const StaticInst *si, int idx, FloatReg val) { instResult.fp = val; } + /** Records an fp register being set to an integer value. */ void setFloatRegBits(const StaticInst *si, int idx, uint64_t val, int width) { instResult.integer = val; } + /** Records an fp register being set to an integer value. */ void setFloatRegBits(const StaticInst *si, int idx, uint64_t val) { instResult.integer = val; @@ -482,106 +429,112 @@ class BaseDynInst : public FastAlloc, public RefCounted } /** Sets this instruction as completed. */ - void setCompleted() { completed = true; } + void setCompleted() { status.set(Completed); } /** Returns whether or not this instruction is completed. */ - bool isCompleted() const { return completed; } + bool isCompleted() const { return status[Completed]; } - void setResultReady() { resultReady = true; } + /** Marks the result as ready. */ + void setResultReady() { status.set(ResultReady); } - bool isResultReady() const { return resultReady; } + /** Returns whether or not the result is ready. */ + bool isResultReady() const { return status[ResultReady]; } /** Sets this instruction as ready to issue. */ - void setCanIssue() { canIssue = true; } + void setCanIssue() { status.set(CanIssue); } /** Returns whether or not this instruction is ready to issue. */ - bool readyToIssue() const { return canIssue; } + bool readyToIssue() const { return status[CanIssue]; } /** Sets this instruction as issued from the IQ. */ - void setIssued() { issued = true; } + void setIssued() { status.set(Issued); } /** Returns whether or not this instruction has issued. */ - bool isIssued() const { return issued; } + bool isIssued() const { return status[Issued]; } /** Sets this instruction as executed. */ - void setExecuted() { executed = true; } + void setExecuted() { status.set(Executed); } /** Returns whether or not this instruction has executed. */ - bool isExecuted() const { return executed; } + bool isExecuted() const { return status[Executed]; } /** Sets this instruction as ready to commit. */ - void setCanCommit() { canCommit = true; } + void setCanCommit() { status.set(CanCommit); } /** Clears this instruction as being ready to commit. */ - void clearCanCommit() { canCommit = false; } + void clearCanCommit() { status.reset(CanCommit); } /** Returns whether or not this instruction is ready to commit. */ - bool readyToCommit() const { return canCommit; } + bool readyToCommit() const { return status[CanCommit]; } + + void setAtCommit() { status.set(AtCommit); } + + bool isAtCommit() { return status[AtCommit]; } /** Sets this instruction as committed. */ - void setCommitted() { committed = true; } + void setCommitted() { status.set(Committed); } /** Returns whether or not this instruction is committed. */ - bool isCommitted() const { return committed; } + bool isCommitted() const { return status[Committed]; } /** Sets this instruction as squashed. */ - void setSquashed() { squashed = true; } + void setSquashed() { status.set(Squashed); } /** Returns whether or not this instruction is squashed. */ - bool isSquashed() const { return squashed; } + bool isSquashed() const { return status[Squashed]; } //Instruction Queue Entry //----------------------- /** Sets this instruction as a entry the IQ. */ - void setInIQ() { iqEntry = true; } + void setInIQ() { status.set(IqEntry); } /** Sets this instruction as a entry the IQ. */ - void removeInIQ() { iqEntry = false; } + void clearInIQ() { status.reset(IqEntry); } + + /** Returns whether or not this instruction has issued. */ + bool isInIQ() const { return status[IqEntry]; } /** Sets this instruction as squashed in the IQ. */ - void setSquashedInIQ() { squashedInIQ = true; squashed = true;} + void setSquashedInIQ() { status.set(SquashedInIQ); status.set(Squashed);} /** Returns whether or not this instruction is squashed in the IQ. */ - bool isSquashedInIQ() const { return squashedInIQ; } - - /** Returns whether or not this instruction has issued. */ - bool isInIQ() const { return iqEntry; } + bool isSquashedInIQ() const { return status[SquashedInIQ]; } //Load / Store Queue Functions //----------------------- /** Sets this instruction as a entry the LSQ. */ - void setInLSQ() { lsqEntry = true; } + void setInLSQ() { status.set(LsqEntry); } /** Sets this instruction as a entry the LSQ. */ - void removeInLSQ() { lsqEntry = false; } + void removeInLSQ() { status.reset(LsqEntry); } + + /** Returns whether or not this instruction is in the LSQ. */ + bool isInLSQ() const { return status[LsqEntry]; } /** Sets this instruction as squashed in the LSQ. */ - void setSquashedInLSQ() { squashedInLSQ = true;} + void setSquashedInLSQ() { status.set(SquashedInLSQ);} /** Returns whether or not this instruction is squashed in the LSQ. */ - bool isSquashedInLSQ() const { return squashedInLSQ; } - - /** Returns whether or not this instruction is in the LSQ. */ - bool isInLSQ() const { return lsqEntry; } + bool isSquashedInLSQ() const { return status[SquashedInLSQ]; } //Reorder Buffer Functions //----------------------- /** Sets this instruction as a entry the ROB. */ - void setInROB() { robEntry = true; } + void setInROB() { status.set(RobEntry); } /** Sets this instruction as a entry the ROB. */ - void removeInROB() { robEntry = false; } + void clearInROB() { status.reset(RobEntry); } + + /** Returns whether or not this instruction is in the ROB. */ + bool isInROB() const { return status[RobEntry]; } /** Sets this instruction as squashed in the ROB. */ - void setSquashedInROB() { squashedInROB = true; } + void setSquashedInROB() { status.set(SquashedInROB); } /** Returns whether or not this instruction is squashed in the ROB. */ - bool isSquashedInROB() const { return squashedInROB; } - - /** Returns whether or not this instruction is in the ROB. */ - bool isInROB() const { return robEntry; } + bool isSquashedInROB() const { return status[SquashedInROB]; } /** Read the PC of this instruction. */ const Addr readPC() const { return PC; } @@ -590,17 +543,18 @@ class BaseDynInst : public FastAlloc, public RefCounted void setNextPC(uint64_t val) { nextPC = val; -// instResult.integer = val; } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } - void setThread(unsigned tid) { threadNumber = tid; } + /** Sets the thread id. */ + void setTid(unsigned tid) { threadNumber = tid; } - void setState(ImplState *state) { thread = state; } + /** Sets the pointer to the thread state. */ + void setThreadState(ImplState *state) { thread = state; } - /** Returns the thread context. - */ + /** Returns the thread context. */ ThreadContext *tcBase() { return thread->getTC(); } private: @@ -637,8 +591,6 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Store queue index. */ int16_t sqIdx; - bool reachedCommit; - /** Iterator pointing to this BaseDynInst in the list of all insts. */ ListIt instListIt; diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst_impl.hh index 30fa10a6b..91424faad 100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,10 +41,6 @@ #include "mem/request.hh" #include "cpu/base_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_cpu.hh" -//#include "cpu/ozone/simple_impl.hh" -//#include "cpu/ozone/ozone_impl.hh" using namespace std; using namespace TheISA; @@ -71,8 +67,8 @@ my_hash_t thishash; template <class Impl> BaseDynInst<Impl>::BaseDynInst(ExtMachInst machInst, Addr inst_PC, Addr pred_PC, InstSeqNum seq_num, - FullCPU *cpu) - : staticInst(machInst), traceData(NULL), cpu(cpu)/*, xc(cpu->xcBase())*/ + ImplCPU *cpu) + : staticInst(machInst), traceData(NULL), cpu(cpu) { seqNum = seq_num; @@ -99,37 +95,18 @@ BaseDynInst<Impl>::initVars() memData = NULL; effAddr = 0; physEffAddr = 0; - storeSize = 0; readyRegs = 0; - // May want to turn this into a bit vector or something. - completed = false; - resultReady = false; - canIssue = false; - issued = false; - executed = false; - canCommit = false; - committed = false; - squashed = false; - squashedInIQ = false; - squashedInLSQ = false; - squashedInROB = false; + instResult.integer = 0; + + status.reset(); + eaCalcDone = false; memOpDone = false; + lqIdx = -1; sqIdx = -1; - reachedCommit = false; - - blockingInst = false; - recoverInst = false; - - iqEntry = false; - robEntry = false; - - serializeBefore = false; - serializeAfter = false; - serializeHandled = false; // Eventually make this a parameter. threadNumber = 0; @@ -242,31 +219,7 @@ template <class Impl> void BaseDynInst<Impl>::writeHint(Addr addr, int size, unsigned flags) { - // Need to create a MemReq here so we can do a translation. This - // will casue a TLB miss trap if necessary... not sure whether - // that's the best thing to do or not. We don't really need the - // MemReq otherwise, since wh64 has no functional effect. -/* - MemReqPtr req = new MemReq(addr, thread->getXCProxy(), size, flags); - req->asid = asid; - - fault = cpu->translateDataWriteReq(req); - - if (fault == NoFault && !(req->flags & UNCACHEABLE)) { - // Record key MemReq parameters so we can generate another one - // just like it for the timing access without calling translate() - // again (which might mess up the TLB). - effAddr = req->vaddr; - physEffAddr = req->paddr; - memReqFlags = req->flags; - } else { - // ignore faults & accesses to uncacheable space... treat as no-op - effAddr = physEffAddr = MemReq::inval_addr; - } - - storeSize = size; - storeData = 0; -*/ + // Not currently supported. } /** @@ -276,22 +229,7 @@ template <class Impl> Fault BaseDynInst<Impl>::copySrcTranslate(Addr src) { -/* - MemReqPtr req = new MemReq(src, thread->getXCProxy(), 64); - req->asid = asid; - - // translate to physical address - Fault fault = cpu->translateDataReadReq(req); - - if (fault == NoFault) { - thread->copySrcAddr = src; - thread->copySrcPhysAddr = req->paddr; - } else { - thread->copySrcAddr = 0; - thread->copySrcPhysAddr = 0; - } - return fault; -*/ + // Not currently supported. return NoFault; } @@ -302,26 +240,7 @@ template <class Impl> Fault BaseDynInst<Impl>::copy(Addr dest) { -/* - uint8_t data[64]; - FunctionalMemory *mem = thread->mem; - assert(thread->copySrcPhysAddr); - MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64); - req->asid = asid; - - // translate to physical address - Fault fault = cpu->translateDataWriteReq(req); - - if (fault == NoFault) { - Addr dest_addr = req->paddr; - // Need to read straight from memory since we have more than 8 bytes. - req->paddr = thread->copySrcPhysAddr; - mem->read(req, data); - req->paddr = dest_addr; - mem->write(req, data); - } - return fault; -*/ + // Not currently supported. return NoFault; } @@ -350,7 +269,7 @@ void BaseDynInst<Impl>::markSrcRegReady() { if (++readyRegs == numSrcRegs()) { - canIssue = true; + status.set(CanIssue); } } @@ -358,13 +277,9 @@ template <class Impl> void BaseDynInst<Impl>::markSrcRegReady(RegIndex src_idx) { - ++readyRegs; - _readySrcRegIdx[src_idx] = true; - if (readyRegs == numSrcRegs()) { - canIssue = true; - } + markSrcRegReady(); } template <class Impl> @@ -382,25 +297,3 @@ BaseDynInst<Impl>::eaSrcsReady() return true; } - -// Forward declaration -template class BaseDynInst<AlphaSimpleImpl>; - -template <> -int -BaseDynInst<AlphaSimpleImpl>::instcount = 0; -/* -// Forward declaration -template class BaseDynInst<SimpleImpl>; - -template <> -int -BaseDynInst<SimpleImpl>::instcount = 0; - -// Forward declaration -template class BaseDynInst<OzoneImpl>; - -template <> -int -BaseDynInst<OzoneImpl>::instcount = 0; -*/ diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index ebc02f7be..1540a6b94 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -31,27 +31,17 @@ #include <list> #include <string> -#include "base/refcnt.hh" #include "cpu/base.hh" -#include "cpu/base_dyn_inst.hh" #include "cpu/checker/cpu.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" #include "cpu/static_inst.hh" +#include "mem/packet_impl.hh" #include "sim/byteswap.hh" -#include "sim/sim_object.hh" -#include "sim/stats.hh" - -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" - -//#include "cpu/ozone/dyn_inst.hh" -//#include "cpu/ozone/ozone_impl.hh" -//#include "cpu/ozone/simple_impl.hh" #if FULL_SYSTEM -#include "sim/system.hh" #include "arch/vtophys.hh" +#include "kern/kernel_stats.hh" #endif // FULL_SYSTEM using namespace std; @@ -77,6 +67,7 @@ CheckerCPU::CheckerCPU(Params *p) changedPC = willChangePC = changedNextPC = false; exitOnError = p->exitOnError; + warnOnlyOnLoadError = p->warnOnlyOnLoadError; #if FULL_SYSTEM itb = p->itb; dtb = p->dtb; @@ -84,6 +75,8 @@ CheckerCPU::CheckerCPU(Params *p) #else process = p->process; #endif + + result.integer = 0; } CheckerCPU::~CheckerCPU() @@ -406,379 +399,10 @@ CheckerCPU::checkFlags(Request *req) } } -template <class DynInstPtr> void -Checker<DynInstPtr>::tick(DynInstPtr &completed_inst) +CheckerCPU::dumpAndExit() { - DynInstPtr inst; - - // Either check this instruction, or add it to a list of - // instructions waiting to be checked. Instructions must be - // checked in program order, so if a store has committed yet not - // completed, there may be some instructions that are waiting - // behind it that have completed and must be checked. - if (!instList.empty()) { - if (youngestSN < completed_inst->seqNum) { - DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", - completed_inst->seqNum, completed_inst->readPC()); - instList.push_back(completed_inst); - youngestSN = completed_inst->seqNum; - } - - if (!instList.front()->isCompleted()) { - return; - } else { - inst = instList.front(); - instList.pop_front(); - } - } else { - if (!completed_inst->isCompleted()) { - if (youngestSN < completed_inst->seqNum) { - DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", - completed_inst->seqNum, completed_inst->readPC()); - instList.push_back(completed_inst); - youngestSN = completed_inst->seqNum; - } - return; - } else { - if (youngestSN < completed_inst->seqNum) { - inst = completed_inst; - youngestSN = completed_inst->seqNum; - } else { - return; - } - } - } - - // Try to check all instructions that are completed, ending if we - // run out of instructions to check or if an instruction is not - // yet completed. - while (1) { - DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n", - inst->seqNum, inst->readPC()); - unverifiedResult.integer = inst->readIntResult(); - unverifiedReq = inst->req; - unverifiedMemData = inst->memData; - numCycles++; - - Fault fault = NoFault; - - // maintain $r0 semantics - thread->setIntReg(ZeroReg, 0); -#ifdef TARGET_ALPHA - thread->setFloatRegDouble(ZeroReg, 0.0); -#endif // TARGET_ALPHA - - // Check if any recent PC changes match up with anything we - // expect to happen. This is mostly to check if traps or - // PC-based events have occurred in both the checker and CPU. - if (changedPC) { - DPRINTF(Checker, "Changed PC recently to %#x\n", - thread->readPC()); - if (willChangePC) { - if (newPC == thread->readPC()) { - DPRINTF(Checker, "Changed PC matches expected PC\n"); - } else { - warn("%lli: Changed PC does not match expected PC, " - "changed: %#x, expected: %#x", - curTick, thread->readPC(), newPC); - handleError(); - } - willChangePC = false; - } - changedPC = false; - } - if (changedNextPC) { - DPRINTF(Checker, "Changed NextPC recently to %#x\n", - thread->readNextPC()); - changedNextPC = false; - } - - // Try to fetch the instruction - -#if FULL_SYSTEM -#define IFETCH_FLAGS(pc) ((pc) & 1) ? PHYSICAL : 0 -#else -#define IFETCH_FLAGS(pc) 0 -#endif - - uint64_t fetch_PC = thread->readPC() & ~3; - - // set up memory request for instruction fetch - memReq = new Request(inst->threadNumber, fetch_PC, - sizeof(uint32_t), - IFETCH_FLAGS(thread->readPC()), - fetch_PC, thread->readCpuId(), inst->threadNumber); - - bool succeeded = translateInstReq(memReq); - - if (!succeeded) { - if (inst->getFault() == NoFault) { - // In this case the instruction was not a dummy - // instruction carrying an ITB fault. In the single - // threaded case the ITB should still be able to - // translate this instruction; in the SMT case it's - // possible that its ITB entry was kicked out. - warn("%lli: Instruction PC %#x was not found in the ITB!", - curTick, thread->readPC()); - handleError(); - - // go to the next instruction - thread->setPC(thread->readNextPC()); - thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); - - return; - } else { - // The instruction is carrying an ITB fault. Handle - // the fault and see if our results match the CPU on - // the next tick(). - fault = inst->getFault(); - } - } - - if (fault == NoFault) { - Packet *pkt = new Packet(memReq, Packet::ReadReq, - Packet::Broadcast); - - pkt->dataStatic(&machInst); - - icachePort->sendFunctional(pkt); - - delete pkt; - - // keep an instruction count - numInst++; - - // decode the instruction - machInst = gtoh(machInst); - // Checks that the instruction matches what we expected it to be. - // Checks both the machine instruction and the PC. - validateInst(inst); - - curStaticInst = StaticInst::decode(makeExtMI(machInst, - thread->readPC())); - -#if FULL_SYSTEM - thread->setInst(machInst); -#endif // FULL_SYSTEM - - fault = inst->getFault(); - } - - // Discard fetch's memReq. - delete memReq; - memReq = NULL; - - // Either the instruction was a fault and we should process the fault, - // or we should just go ahead execute the instruction. This assumes - // that the instruction is properly marked as a fault. - if (fault == NoFault) { - - thread->funcExeInst++; - - fault = curStaticInst->execute(this, NULL); - - // Checks to make sure instrution results are correct. - validateExecution(inst); - - if (curStaticInst->isLoad()) { - ++numLoad; - } - } - - if (fault != NoFault) { -#if FULL_SYSTEM - fault->invoke(tc); - willChangePC = true; - newPC = thread->readPC(); - DPRINTF(Checker, "Fault, PC is now %#x\n", newPC); -#else // !FULL_SYSTEM - fatal("fault (%d) detected @ PC 0x%08p", fault, thread->readPC()); -#endif // FULL_SYSTEM - } else { -#if THE_ISA != MIPS_ISA - // go to the next instruction - thread->setPC(thread->readNextPC()); - thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); -#else - // go to the next instruction - thread->setPC(thread->readNextPC()); - thread->setNextPC(thread->readNextNPC()); - thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); -#endif - - } - -#if FULL_SYSTEM - // @todo: Determine if these should happen only if the - // instruction hasn't faulted. In the SimpleCPU case this may - // not be true, but in the O3 or Ozone case this may be true. - Addr oldpc; - int count = 0; - do { - oldpc = thread->readPC(); - system->pcEventQueue.service(tc); - count++; - } while (oldpc != thread->readPC()); - if (count > 1) { - willChangePC = true; - newPC = thread->readPC(); - DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC); - } -#endif - - // @todo: Optionally can check all registers. (Or just those - // that have been modified). - validateState(); - - if (memReq) { - delete memReq; - memReq = NULL; - } - - // Continue verifying instructions if there's another completed - // instruction waiting to be verified. - if (instList.empty()) { - break; - } else if (instList.front()->isCompleted()) { - inst = instList.front(); - instList.pop_front(); - } else { - break; - } - } + warn("%lli: Checker PC:%#x, next PC:%#x", + curTick, thread->readPC(), thread->readNextPC()); + panic("Checker found an error!"); } - -template <class DynInstPtr> -void -Checker<DynInstPtr>::switchOut(Sampler *s) -{ - instList.clear(); -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU) -{ -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::validateInst(DynInstPtr &inst) -{ - if (inst->readPC() != thread->readPC()) { - warn("%lli: PCs do not match! Inst: %#x, checker: %#x", - curTick, inst->readPC(), thread->readPC()); - if (changedPC) { - warn("%lli: Changed PCs recently, may not be an error", - curTick); - } else { - handleError(); - } - } - - MachInst mi = static_cast<MachInst>(inst->staticInst->machInst); - - if (mi != machInst) { - warn("%lli: Binary instructions do not match! Inst: %#x, " - "checker: %#x", - curTick, mi, machInst); - handleError(); - } -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::validateExecution(DynInstPtr &inst) -{ - if (inst->numDestRegs()) { - // @todo: Support more destination registers. - if (inst->isUnverifiable()) { - // Unverifiable instructions assume they were executed - // properly by the CPU. Grab the result from the - // instruction and write it to the register. - RegIndex idx = inst->destRegIdx(0); - if (idx < TheISA::FP_Base_DepTag) { - thread->setIntReg(idx, inst->readIntResult()); - } else if (idx < TheISA::Fpcr_DepTag) { - thread->setFloatRegBits(idx, inst->readIntResult()); - } else { - thread->setMiscReg(idx, inst->readIntResult()); - } - } else if (result.integer != inst->readIntResult()) { - warn("%lli: Instruction results do not match! (Values may not " - "actually be integers) Inst: %#x, checker: %#x", - curTick, inst->readIntResult(), result.integer); - handleError(); - } - } - - if (inst->readNextPC() != thread->readNextPC()) { - warn("%lli: Instruction next PCs do not match! Inst: %#x, " - "checker: %#x", - curTick, inst->readNextPC(), thread->readNextPC()); - handleError(); - } - - // Checking side effect registers can be difficult if they are not - // checked simultaneously with the execution of the instruction. - // This is because other valid instructions may have modified - // these registers in the meantime, and their values are not - // stored within the DynInst. - while (!miscRegIdxs.empty()) { - int misc_reg_idx = miscRegIdxs.front(); - miscRegIdxs.pop(); - - if (inst->tcBase()->readMiscReg(misc_reg_idx) != - thread->readMiscReg(misc_reg_idx)) { - warn("%lli: Misc reg idx %i (side effect) does not match! " - "Inst: %#x, checker: %#x", - curTick, misc_reg_idx, - inst->tcBase()->readMiscReg(misc_reg_idx), - thread->readMiscReg(misc_reg_idx)); - handleError(); - } - } -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::validateState() -{ -} - -template <class DynInstPtr> -void -Checker<DynInstPtr>::dumpInsts() -{ - int num = 0; - - InstListIt inst_list_it = --(instList.end()); - - cprintf("Inst list size: %i\n", instList.size()); - - while (inst_list_it != instList.end()) - { - cprintf("Instruction:%i\n", - num); - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Completed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isCompleted()); - - cprintf("\n"); - - inst_list_it--; - ++num; - } - -} - -//template -//class Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >; - -template -class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >; diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index c9986d228..a508c56ba 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -66,7 +66,6 @@ class ThreadContext; class MemInterface; class Checkpoint; class Request; -class Sampler; /** * CheckerCPU class. Dynamically verifies instructions as they are @@ -103,6 +102,7 @@ class CheckerCPU : public BaseCPU Process *process; #endif bool exitOnError; + bool warnOnlyOnLoadError; }; public: @@ -127,6 +127,12 @@ class CheckerCPU : public BaseCPU Port *dcachePort; + virtual Port *getPort(const std::string &name, int idx) + { + panic("Not supported on checker!"); + return NULL; + } + public: // Primary thread being run. SimpleThread *thread; @@ -335,10 +341,13 @@ class CheckerCPU : public BaseCPU void handleError() { if (exitOnError) - panic("Checker found error!"); + dumpAndExit(); } + bool checkFlags(Request *req); + void dumpAndExit(); + ThreadContext *tcBase() { return tc; } SimpleThread *threadBase() { return thread; } @@ -351,6 +360,7 @@ class CheckerCPU : public BaseCPU uint64_t newPC; bool changedNextPC; bool exitOnError; + bool warnOnlyOnLoadError; InstSeqNum youngestSN; }; @@ -369,15 +379,26 @@ class Checker : public CheckerCPU : CheckerCPU(p) { } - void switchOut(Sampler *s); + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); - void tick(DynInstPtr &inst); + void verify(DynInstPtr &inst); void validateInst(DynInstPtr &inst); void validateExecution(DynInstPtr &inst); void validateState(); + void copyResult(DynInstPtr &inst); + + private: + void handleError(DynInstPtr &inst) + { + if (exitOnError) + dumpAndExit(inst); + } + + void dumpAndExit(DynInstPtr &inst); + std::list<DynInstPtr> instList; typedef typename std::list<DynInstPtr>::iterator InstListIt; void dumpInsts(); diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh new file mode 100644 index 000000000..81f97726c --- /dev/null +++ b/src/cpu/checker/cpu_impl.hh @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include <list> +#include <string> + +#include "base/refcnt.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/checker/cpu.hh" +#include "cpu/simple_thread.hh" +#include "cpu/thread_context.hh" +#include "cpu/static_inst.hh" +#include "mem/packet_impl.hh" +#include "sim/byteswap.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#if FULL_SYSTEM +#include "arch/vtophys.hh" +#endif // FULL_SYSTEM + +using namespace std; +//The CheckerCPU does alpha only +using namespace AlphaISA; + +template <class DynInstPtr> +void +Checker<DynInstPtr>::verify(DynInstPtr &completed_inst) +{ + DynInstPtr inst; + + // Either check this instruction, or add it to a list of + // instructions waiting to be checked. Instructions must be + // checked in program order, so if a store has committed yet not + // completed, there may be some instructions that are waiting + // behind it that have completed and must be checked. + if (!instList.empty()) { + if (youngestSN < completed_inst->seqNum) { + DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", + completed_inst->seqNum, completed_inst->readPC()); + instList.push_back(completed_inst); + youngestSN = completed_inst->seqNum; + } + + if (!instList.front()->isCompleted()) { + return; + } else { + inst = instList.front(); + instList.pop_front(); + } + } else { + if (!completed_inst->isCompleted()) { + if (youngestSN < completed_inst->seqNum) { + DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", + completed_inst->seqNum, completed_inst->readPC()); + instList.push_back(completed_inst); + youngestSN = completed_inst->seqNum; + } + return; + } else { + if (youngestSN < completed_inst->seqNum) { + inst = completed_inst; + youngestSN = completed_inst->seqNum; + } else { + return; + } + } + } + + // Try to check all instructions that are completed, ending if we + // run out of instructions to check or if an instruction is not + // yet completed. + while (1) { + DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n", + inst->seqNum, inst->readPC()); + unverifiedResult.integer = inst->readIntResult(); + unverifiedReq = inst->req; + unverifiedMemData = inst->memData; + numCycles++; + + Fault fault = NoFault; + + // maintain $r0 semantics + thread->setIntReg(ZeroReg, 0); +#ifdef TARGET_ALPHA + thread->setFloatRegDouble(ZeroReg, 0.0); +#endif // TARGET_ALPHA + + // Check if any recent PC changes match up with anything we + // expect to happen. This is mostly to check if traps or + // PC-based events have occurred in both the checker and CPU. + if (changedPC) { + DPRINTF(Checker, "Changed PC recently to %#x\n", + thread->readPC()); + if (willChangePC) { + if (newPC == thread->readPC()) { + DPRINTF(Checker, "Changed PC matches expected PC\n"); + } else { + warn("%lli: Changed PC does not match expected PC, " + "changed: %#x, expected: %#x", + curTick, thread->readPC(), newPC); + CheckerCPU::handleError(); + } + willChangePC = false; + } + changedPC = false; + } + if (changedNextPC) { + DPRINTF(Checker, "Changed NextPC recently to %#x\n", + thread->readNextPC()); + changedNextPC = false; + } + + // Try to fetch the instruction + +#if FULL_SYSTEM +#define IFETCH_FLAGS(pc) ((pc) & 1) ? PHYSICAL : 0 +#else +#define IFETCH_FLAGS(pc) 0 +#endif + + uint64_t fetch_PC = thread->readPC() & ~3; + + // set up memory request for instruction fetch + memReq = new Request(inst->threadNumber, fetch_PC, + sizeof(uint32_t), + IFETCH_FLAGS(thread->readPC()), + fetch_PC, thread->readCpuId(), inst->threadNumber); + + bool succeeded = translateInstReq(memReq); + + if (!succeeded) { + if (inst->getFault() == NoFault) { + // In this case the instruction was not a dummy + // instruction carrying an ITB fault. In the single + // threaded case the ITB should still be able to + // translate this instruction; in the SMT case it's + // possible that its ITB entry was kicked out. + warn("%lli: Instruction PC %#x was not found in the ITB!", + curTick, thread->readPC()); + handleError(inst); + + // go to the next instruction + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); + + return; + } else { + // The instruction is carrying an ITB fault. Handle + // the fault and see if our results match the CPU on + // the next tick(). + fault = inst->getFault(); + } + } + + if (fault == NoFault) { + Packet *pkt = new Packet(memReq, Packet::ReadReq, + Packet::Broadcast); + + pkt->dataStatic(&machInst); + + icachePort->sendFunctional(pkt); + + delete pkt; + + // keep an instruction count + numInst++; + + // decode the instruction + machInst = gtoh(machInst); + // Checks that the instruction matches what we expected it to be. + // Checks both the machine instruction and the PC. + validateInst(inst); + + curStaticInst = StaticInst::decode(makeExtMI(machInst, + thread->readPC())); + +#if FULL_SYSTEM + thread->setInst(machInst); +#endif // FULL_SYSTEM + + fault = inst->getFault(); + } + + // Discard fetch's memReq. + delete memReq; + memReq = NULL; + + // Either the instruction was a fault and we should process the fault, + // or we should just go ahead execute the instruction. This assumes + // that the instruction is properly marked as a fault. + if (fault == NoFault) { + + thread->funcExeInst++; + + fault = curStaticInst->execute(this, NULL); + + // Checks to make sure instrution results are correct. + validateExecution(inst); + + if (curStaticInst->isLoad()) { + ++numLoad; + } + } + + if (fault != NoFault) { +#if FULL_SYSTEM + fault->invoke(tc); + willChangePC = true; + newPC = thread->readPC(); + DPRINTF(Checker, "Fault, PC is now %#x\n", newPC); +#endif + } else { +#if THE_ISA != MIPS_ISA + // go to the next instruction + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextPC() + sizeof(MachInst)); +#else + // go to the next instruction + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextNPC()); + thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); +#endif + + } + +#if FULL_SYSTEM + // @todo: Determine if these should happen only if the + // instruction hasn't faulted. In the SimpleCPU case this may + // not be true, but in the O3 or Ozone case this may be true. + Addr oldpc; + int count = 0; + do { + oldpc = thread->readPC(); + system->pcEventQueue.service(tc); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + willChangePC = true; + newPC = thread->readPC(); + DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC); + } +#endif + + // @todo: Optionally can check all registers. (Or just those + // that have been modified). + validateState(); + + if (memReq) { + delete memReq; + memReq = NULL; + } + + // Continue verifying instructions if there's another completed + // instruction waiting to be verified. + if (instList.empty()) { + break; + } else if (instList.front()->isCompleted()) { + inst = instList.front(); + instList.pop_front(); + } else { + break; + } + } +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::switchOut() +{ + instList.clear(); +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU) +{ +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::validateInst(DynInstPtr &inst) +{ + if (inst->readPC() != thread->readPC()) { + warn("%lli: PCs do not match! Inst: %#x, checker: %#x", + curTick, inst->readPC(), thread->readPC()); + if (changedPC) { + warn("%lli: Changed PCs recently, may not be an error", + curTick); + } else { + handleError(inst); + } + } + + MachInst mi = static_cast<MachInst>(inst->staticInst->machInst); + + if (mi != machInst) { + warn("%lli: Binary instructions do not match! Inst: %#x, " + "checker: %#x", + curTick, mi, machInst); + handleError(inst); + } +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::validateExecution(DynInstPtr &inst) +{ + bool result_mismatch = false; + if (inst->numDestRegs()) { + // @todo: Support more destination registers. + if (inst->isUnverifiable()) { + // Unverifiable instructions assume they were executed + // properly by the CPU. Grab the result from the + // instruction and write it to the register. + copyResult(inst); + } else if (result.integer != inst->readIntResult()) { + result_mismatch = true; + } + } + + if (result_mismatch) { + warn("%lli: Instruction results do not match! (Values may not " + "actually be integers) Inst: %#x, checker: %#x", + curTick, inst->readIntResult(), result.integer); + + // It's useful to verify load values from memory, but in MP + // systems the value obtained at execute may be different than + // the value obtained at completion. Similarly DMA can + // present the same problem on even UP systems. Thus there is + // the option to only warn on loads having a result error. + if (inst->isLoad() && warnOnlyOnLoadError) { + copyResult(inst); + } else { + handleError(inst); + } + } + + if (inst->readNextPC() != thread->readNextPC()) { + warn("%lli: Instruction next PCs do not match! Inst: %#x, " + "checker: %#x", + curTick, inst->readNextPC(), thread->readNextPC()); + handleError(inst); + } + + // Checking side effect registers can be difficult if they are not + // checked simultaneously with the execution of the instruction. + // This is because other valid instructions may have modified + // these registers in the meantime, and their values are not + // stored within the DynInst. + while (!miscRegIdxs.empty()) { + int misc_reg_idx = miscRegIdxs.front(); + miscRegIdxs.pop(); + + if (inst->tcBase()->readMiscReg(misc_reg_idx) != + thread->readMiscReg(misc_reg_idx)) { + warn("%lli: Misc reg idx %i (side effect) does not match! " + "Inst: %#x, checker: %#x", + curTick, misc_reg_idx, + inst->tcBase()->readMiscReg(misc_reg_idx), + thread->readMiscReg(misc_reg_idx)); + handleError(inst); + } + } +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::validateState() +{ +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::copyResult(DynInstPtr &inst) +{ + RegIndex idx = inst->destRegIdx(0); + if (idx < TheISA::FP_Base_DepTag) { + thread->setIntReg(idx, inst->readIntResult()); + } else if (idx < TheISA::Fpcr_DepTag) { + thread->setFloatRegBits(idx, inst->readIntResult()); + } else { + thread->setMiscReg(idx, inst->readIntResult()); + } +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::dumpAndExit(DynInstPtr &inst) +{ + cprintf("Error detected, instruction information:\n"); + cprintf("PC:%#x, nextPC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Completed:%i\n", + inst->readPC(), + inst->readNextPC(), + inst->seqNum, + inst->threadNumber, + inst->isCompleted()); + inst->dump(); + CheckerCPU::dumpAndExit(); +} + +template <class DynInstPtr> +void +Checker<DynInstPtr>::dumpInsts() +{ + int num = 0; + + InstListIt inst_list_it = --(instList.end()); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Completed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isCompleted()); + + cprintf("\n"); + + inst_list_it--; + ++num; + } + +} diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index c0ac8f01d..c035e92ac 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -120,7 +120,7 @@ class CheckerThreadContext : public ThreadContext void suspend() { actualTC->suspend(); } /// Set the status to Unallocated. - void deallocate() { actualTC->deallocate(); } + void deallocate(int delay = 0) { actualTC->deallocate(delay); } /// Set the status to Halted. void halt() { actualTC->halt(); } diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py index 1a9724ca6..5b0c6c4da 100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@ -26,6 +26,10 @@ # # Authors: Steve Reinhardt +import os +import os.path +import sys + ################ # CpuModel class # @@ -47,7 +51,6 @@ class CpuModel: # Add self to dict CpuModel.dict[name] = self - # # Define CPU models. # @@ -67,9 +70,6 @@ CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc', CpuModel('FullCPU', 'full_cpu_exec.cc', '#include "encumbered/cpu/full/dyn_inst.hh"', { 'CPU_exec_context': 'DynInst' }) -CpuModel('AlphaFullCPU', 'alpha_o3_exec.cc', - '#include "cpu/o3/alpha_dyn_inst.hh"', - { 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' }) CpuModel('OzoneSimpleCPU', 'ozone_simple_exec.cc', '#include "cpu/ozone/dyn_inst.hh"', { 'CPU_exec_context': 'OzoneDynInst<SimpleImpl>' }) @@ -79,4 +79,6 @@ CpuModel('OzoneCPU', 'ozone_exec.cc', CpuModel('CheckerCPU', 'checker_cpu_exec.cc', '#include "cpu/checker/cpu.hh"', { 'CPU_exec_context': 'CheckerCPU' }) - +CpuModel('O3CPU', 'o3_cpu_exec.cc', + '#include "cpu/o3/isa_specific.hh"', + { 'CPU_exec_context': 'O3DynInst' }) diff --git a/src/cpu/cpuevent.hh b/src/cpu/cpuevent.hh index 11ac7aafb..9dfae27cf 100644 --- a/src/cpu/cpuevent.hh +++ b/src/cpu/cpuevent.hh @@ -36,7 +36,7 @@ class ThreadContext; -/** This class creates a global list of events than need a pointer to an +/** This class creates a global list of events that need a pointer to a * thread context. When a switchover takes place the events can be migrated * to the new thread context, otherwise you could have a wake timer interrupt * go off on a switched out cpu or other unfortunate events. This object MUST be diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh new file mode 100644 index 000000000..f6e8d7c25 --- /dev/null +++ b/src/cpu/exec_context.hh @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#error "Cannot include this file" + +/** + * The ExecContext is not a usable class. It is simply here for + * documentation purposes. It shows the interface that is used by the + * ISA to access and change CPU state. + */ +class ExecContext { + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to reduce overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + /** Reads an integer register. */ + uint64_t readIntReg(const StaticInst *si, int idx); + + /** Reads a floating point register of a specific width. */ + FloatReg readFloatReg(const StaticInst *si, int idx, int width); + + /** Reads a floating point register of single register width. */ + FloatReg readFloatReg(const StaticInst *si, int idx); + + /** Reads a floating point register of a specific width in its + * binary format, instead of by value. */ + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width); + + /** Reads a floating point register in its binary format, instead + * of by value. */ + FloatRegBits readFloatRegBits(const StaticInst *si, int idx); + + /** Sets an integer register to a value. */ + void setIntReg(const StaticInst *si, int idx, uint64_t val); + + /** Sets a floating point register of a specific width to a value. */ + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width); + + /** Sets a floating point register of single width to a value. */ + void setFloatReg(const StaticInst *si, int idx, FloatReg val); + + /** Sets the bits of a floating point register of a specific width + * to a binary value. */ + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width); + + /** Sets the bits of a floating point register of single width + * to a binary value. */ + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val); + + /** Reads the PC. */ + uint64_t readPC(); + /** Reads the NextPC. */ + uint64_t readNextPC(); + /** Reads the Next-NextPC. Only for architectures like SPARC or MIPS. */ + uint64_t readNextNPC(); + + /** Sets the PC. */ + void setPC(uint64_t val); + /** Sets the NextPC. */ + void setNextPC(uint64_t val); + /** Sets the Next-NextPC. Only for architectures like SPARC or MIPS. */ + void setNextNPC(uint64_t val); + + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg); + + /** Reads a miscellaneous register, handling any architectural + * side effects due to reading that register. */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val); + + /** Sets a miscellaneous register, handling any architectural + * side effects due to writing that register. */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + + /** Records the effective address of the instruction. Only valid + * for memory ops. */ + void setEA(Addr EA); + /** Returns the effective address of the instruction. Only valid + * for memory ops. */ + Addr getEA(); + + /** Returns a pointer to the ThreadContext. */ + ThreadContext *tcBase(); + + /** Reads an address, creating a memory request with the given + * flags. Stores result of read in data. */ + template <class T> + Fault read(Addr addr, T &data, unsigned flags); + + /** Writes to an address, creating a memory request with the given + * flags. Writes data to memory. For store conditionals, returns + * the result of the store in res. */ + template <class T> + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + + /** Prefetches an address, creating a memory request with the + * given flags. */ + void prefetch(Addr addr, unsigned flags); + + /** Hints to the memory system that an address will be written to + * soon, with the given size. Creates a memory request with the + * given flags. */ + void writeHint(Addr addr, int size, unsigned flags); + +#if FULL_SYSTEM + /** Somewhat Alpha-specific function that handles returning from + * an error or interrupt. */ + Fault hwrei(); + /** Reads the interrupt flags. */ + int readIntrFlag(); + /** Sets the interrupt flags to a value. */ + void setIntrFlag(int val); + + /** + * Check for special simulator handling of specific PAL calls. If + * return value is false, actual PAL call will be suppressed. + */ + bool simPalCheck(int palFunc); +#else + /** Executes a syscall specified by the callnum. */ + void syscall(int64_t callnum); +#endif +}; diff --git a/src/cpu/func_unit.cc b/src/cpu/func_unit.cc new file mode 100644 index 000000000..c20578a43 --- /dev/null +++ b/src/cpu/func_unit.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2002-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Raasch + */ + +#include <sstream> + +#include "base/misc.hh" +#include "cpu/func_unit.hh" +#include "sim/builder.hh" + +using namespace std; + + +//////////////////////////////////////////////////////////////////////////// +// +// The funciton unit +// +FuncUnit::FuncUnit() +{ + capabilityList.reset(); +} + + +// Copy constructor +FuncUnit::FuncUnit(const FuncUnit &fu) +{ + + for (int i = 0; i < Num_OpClasses; ++i) { + opLatencies[i] = fu.opLatencies[i]; + issueLatencies[i] = fu.issueLatencies[i]; + } + + capabilityList = fu.capabilityList; +} + + +void +FuncUnit::addCapability(OpClass cap, unsigned oplat, unsigned issuelat) +{ + if (issuelat == 0 || oplat == 0) + panic("FuncUnit: you don't really want a zero-cycle latency do you?"); + + capabilityList.set(cap); + + opLatencies[cap] = oplat; + issueLatencies[cap] = issuelat; +} + +bool +FuncUnit::provides(OpClass capability) +{ + return capabilityList[capability]; +} + +bitset<Num_OpClasses> +FuncUnit::capabilities() +{ + return capabilityList; +} + +unsigned & +FuncUnit::opLatency(OpClass cap) +{ + return opLatencies[cap]; +} + +unsigned +FuncUnit::issueLatency(OpClass capability) +{ + return issueLatencies[capability]; +} + +//////////////////////////////////////////////////////////////////////////// +// +// The SimObjects we use to get the FU information into the simulator +// +//////////////////////////////////////////////////////////////////////////// + +// +// We use 2 objects to specify this data in the INI file: +// (1) OpDesc - Describes the operation class & latencies +// (multiple OpDesc objects can refer to the same +// operation classes) +// (2) FUDesc - Describes the operations available in the unit & +// the number of these units +// +// + + +// +// The operation-class description object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(OpDesc) + + SimpleEnumParam<OpClass> opClass; + Param<unsigned> opLat; + Param<unsigned> issueLat; + +END_DECLARE_SIM_OBJECT_PARAMS(OpDesc) + +BEGIN_INIT_SIM_OBJECT_PARAMS(OpDesc) + + INIT_ENUM_PARAM(opClass, "type of operation", opClassStrings), + INIT_PARAM(opLat, "cycles until result is available"), + INIT_PARAM(issueLat, "cycles until another can be issued") + +END_INIT_SIM_OBJECT_PARAMS(OpDesc) + + +CREATE_SIM_OBJECT(OpDesc) +{ + return new OpDesc(getInstanceName(), opClass, opLat, issueLat); +} + +REGISTER_SIM_OBJECT("OpDesc", OpDesc) + + +// +// The FuDesc object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUDesc) + + SimObjectVectorParam<OpDesc *> opList; + Param<unsigned> count; + +END_DECLARE_SIM_OBJECT_PARAMS(FUDesc) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(FUDesc) + + INIT_PARAM(opList, "list of operation classes for this FU type"), + INIT_PARAM(count, "number of these FU's available") + +END_INIT_SIM_OBJECT_PARAMS(FUDesc) + + +CREATE_SIM_OBJECT(FUDesc) +{ + return new FUDesc(getInstanceName(), opList, count); +} + +REGISTER_SIM_OBJECT("FUDesc", FUDesc) + diff --git a/src/cpu/func_unit.hh b/src/cpu/func_unit.hh new file mode 100644 index 000000000..780143096 --- /dev/null +++ b/src/cpu/func_unit.hh @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2002-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Raasch + */ + +#ifndef __CPU_FUNC_UNIT_HH__ +#define __CPU_FUNC_UNIT_HH__ + +#include <bitset> +#include <string> +#include <vector> + +#include "cpu/op_class.hh" +#include "sim/sim_object.hh" + +//////////////////////////////////////////////////////////////////////////// +// +// Structures used ONLY during the initialization phase... +// +// +// + +struct OpDesc : public SimObject +{ + OpClass opClass; + unsigned opLat; + unsigned issueLat; + + OpDesc(std::string name, OpClass c, unsigned o, unsigned i) + : SimObject(name), opClass(c), opLat(o), issueLat(i) {}; +}; + +struct FUDesc : public SimObject +{ + std::vector<OpDesc *> opDescList; + unsigned number; + + FUDesc(std::string name, std::vector<OpDesc *> l, unsigned n) + : SimObject(name), opDescList(l), number(n) {}; +}; + +typedef std::vector<OpDesc *>::iterator OPDDiterator; +typedef std::vector<FUDesc *>::iterator FUDDiterator; + + + + +//////////////////////////////////////////////////////////////////////////// +// +// The actual FU object +// +// +// +class FuncUnit +{ + private: + unsigned opLatencies[Num_OpClasses]; + unsigned issueLatencies[Num_OpClasses]; + std::bitset<Num_OpClasses> capabilityList; + + public: + FuncUnit(); + FuncUnit(const FuncUnit &fu); + + std::string name; + + void addCapability(OpClass cap, unsigned oplat, unsigned issuelat); + + bool provides(OpClass capability); + std::bitset<Num_OpClasses> capabilities(); + + unsigned &opLatency(OpClass capability); + unsigned issueLatency(OpClass capability); +}; + +#endif // __FU_POOL_HH__ diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript new file mode 100755 index 000000000..e65d41411 --- /dev/null +++ b/src/cpu/o3/SConscript @@ -0,0 +1,79 @@ +# -*- mode:python -*- + +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Korey Sewell + +import os +import os.path +import sys + +# Import build environment variable from SConstruct. +Import('env') + + +################################################################# +# +# Include ISA-specific files for the O3 CPU-model +# +################################################################# + +sources = [] + +if env['TARGET_ISA'] == 'alpha': + sources += Split(''' + alpha/dyn_inst.cc + alpha/cpu.cc + alpha/thread_context.cc + alpha/cpu_builder.cc + ''') +elif env['TARGET_ISA'] == 'mips': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # mips/dyn_inst.cc + # mips/cpu.cc + # mips/thread_context.cc + # mips/cpu_builder.cc + # ''') +elif env['TARGET_ISA'] == 'sparc': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # sparc/dyn_inst.cc + # sparc/cpu.cc + # sparc/thread_context.cc + # sparc/cpu_builder.cc + # ''') +else: + sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA']) + + +# Convert file names to SCons File objects. This takes care of the +# path relative to the top of the directory tree. +sources = [File(s) for s in sources] + +Return('sources') + diff --git a/src/cpu/o3/alpha_cpu.cc b/src/cpu/o3/alpha/cpu.cc index 39cae696b..ed10b2fd1 100644 --- a/src/cpu/o3/alpha_cpu.cc +++ b/src/cpu/o3/alpha/cpu.cc @@ -28,11 +28,11 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_cpu_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha/impl.hh" +#include "cpu/o3/alpha/cpu_impl.hh" +#include "cpu/o3/alpha/dyn_inst.hh" -// Force instantiation of AlphaFullCPU for all the implemntations that are +// Force instantiation of AlphaO3CPU for all the implemntations that are // needed. Consider merging this and alpha_dyn_inst.cc, and maybe all // classes that depend on a certain impl, into one file (alpha_impl.cc?). -template class AlphaFullCPU<AlphaSimpleImpl>; +template class AlphaO3CPU<AlphaSimpleImpl>; diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh new file mode 100644 index 000000000..b961341d5 --- /dev/null +++ b/src/cpu/o3/alpha/cpu.hh @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_CPU_HH__ +#define __CPU_O3_ALPHA_CPU_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/thread_context.hh" +#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * AlphaO3CPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ +template <class Impl> +class AlphaO3CPU : public FullO3CPU<Impl> +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef O3ThreadState<Impl> ImplState; + typedef O3ThreadState<Impl> Thread; + typedef typename Impl::Params Params; + + /** Constructs an AlphaO3CPU with the given parameters. */ + AlphaO3CPU(Params *params); + +#if FULL_SYSTEM + /** ITB pointer. */ + AlphaITB *itb; + /** DTB pointer. */ + AlphaDTB *dtb; +#endif + + /** Registers statistics. */ + void regStats(); + +#if FULL_SYSTEM + /** Translates instruction requestion. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return itb->translate(req, thread->getTC()); + } + + /** Translates data read request. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), false); + } + + /** Translates data write request. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), true); + } + +#else + /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + +#endif + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg, unsigned tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(unsigned tid); + +#if FULL_SYSTEM + /** Posts an interrupt. */ + void post_interrupt(int int_num, int index); + /** Reads the interrupt flag. */ + int readIntrFlag(); + /** Sets the interrupt flags. */ + void setIntrFlag(int val); + /** HW return from error interrupt. */ + Fault hwrei(unsigned tid); + /** Returns if a specific PC is a PAL mode PC. */ + bool inPalMode(uint64_t PC) + { return AlphaISA::PcPAL(PC); } + + bool simPalCheck(int palFunc, unsigned tid); + + /** Processes any interrupts. */ + void processInterrupts(); + + /** Halts the CPU. */ + void halt() { panic("Halt not implemented!\n"); } +#endif + + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); + +#if !FULL_SYSTEM + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, int tid); + /** Gets a syscall argument. */ + IntReg getSyscallArg(int i, int tid); + + /** Used to shift args for indirect syscall. */ + void setSyscallArg(int i, IntReg val, int tid); + + /** Sets the return value of a syscall. */ + void setSyscallReturn(SyscallReturn return_value, int tid); +#endif + + /** CPU read function, forwards read to LSQ. */ + template <class T> + Fault read(RequestPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + /** CPU write function, forwards write to LSQ. */ + template <class T> + Fault write(RequestPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; +}; + +#endif // __CPU_O3_ALPHA_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 828977ccb..5e767655d 100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -31,21 +31,21 @@ #include <string> #include "cpu/base.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/fu_pool.hh" #include "sim/builder.hh" -class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl> +class DerivO3CPU : public AlphaO3CPU<AlphaSimpleImpl> { public: - DerivAlphaFullCPU(AlphaSimpleParams *p) - : AlphaFullCPU<AlphaSimpleImpl>(p) + DerivO3CPU(AlphaSimpleParams *p) + : AlphaO3CPU<AlphaSimpleImpl>(p) { } }; -BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param<int> clock; Param<int> numThreads; @@ -91,12 +91,10 @@ Param<unsigned> renameWidth; Param<unsigned> commitToIEWDelay; Param<unsigned> renameToIEWDelay; Param<unsigned> issueToExecuteDelay; +Param<unsigned> dispatchWidth; Param<unsigned> issueWidth; -Param<unsigned> executeWidth; -Param<unsigned> executeIntWidth; -Param<unsigned> executeFloatWidth; -Param<unsigned> executeBranchWidth; -Param<unsigned> executeMemoryWidth; +Param<unsigned> wbWidth; +Param<unsigned> wbDepth; SimObjectParam<FUPool *> fuPool; Param<unsigned> iewToCommitDelay; @@ -104,7 +102,9 @@ Param<unsigned> renameToROBDelay; Param<unsigned> commitWidth; Param<unsigned> squashWidth; Param<Tick> trapLatency; -Param<Tick> fetchTrapLatency; + +Param<unsigned> backComSize; +Param<unsigned> forwardComSize; Param<std::string> predType; Param<unsigned> localPredictorSize; @@ -149,9 +149,9 @@ Param<bool> defer_registration; Param<bool> function_trace; Param<Tick> function_trace_start; -END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) -BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM(numThreads, "number of HW thread contexts"), @@ -212,12 +212,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) "Issue/Execute/Writeback delay"), INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -226,7 +224,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), - INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), @@ -271,11 +271,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") -END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) -CREATE_SIM_OBJECT(DerivAlphaFullCPU) +CREATE_SIM_OBJECT(DerivO3CPU) { - DerivAlphaFullCPU *cpu; + DerivO3CPU *cpu; #if FULL_SYSTEM // Full-system only supports a single thread for the moment. @@ -284,12 +284,12 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) // In non-full-system mode, we infer the number of threads from // the workload if it's not explicitly specified. int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); + (numThreads.isValid() && numThreads >= workload.size()) ? + numThreads : workload.size(); if (workload.size() == 0) { fatal("Must specify at least one workload!"); } - #endif AlphaSimpleParams *params = new AlphaSimpleParams; @@ -343,12 +343,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->commitToIEWDelay = commitToIEWDelay; params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; @@ -356,7 +354,9 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->commitWidth = commitWidth; params->squashWidth = squashWidth; params->trapLatency = trapLatency; - params->fetchTrapLatency = fetchTrapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; params->predType = predType; params->localPredictorSize = localPredictorSize; @@ -386,7 +386,16 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->numROBEntries = numROBEntries; params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + params->smtFetchPolicy = round_robin_policy; + else + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; params->smtLSQPolicy = smtLSQPolicy; params->smtLSQThreshold = smtLSQThreshold; @@ -401,10 +410,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; - cpu = new DerivAlphaFullCPU(params); + cpu = new DerivO3CPU(params); return cpu; } -REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU) +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU) diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh new file mode 100644 index 000000000..0473e60c2 --- /dev/null +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "config/use_checker.hh" + +#include "arch/alpha/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/checker/thread_context.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/params.hh" +#include "cpu/o3/alpha/thread_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/thread_state.hh" + +#if FULL_SYSTEM +#include "arch/alpha/osfpal.hh" +#include "arch/isa_traits.hh" +#include "cpu/quiesce_event.hh" +#include "kern/kernel_stats.hh" +#include "sim/sim_exit.hh" +#include "sim/system.hh" +#endif + +using namespace TheISA; + +template <class Impl> +AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) +#if FULL_SYSTEM + : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb) +#else + : FullO3CPU<Impl>(params) +#endif +{ + DPRINTF(O3CPU, "Creating AlphaO3CPU object.\n"); + + // Setup any thread state. + this->thread.resize(this->numThreads); + + for (int i = 0; i < this->numThreads; ++i) { +#if FULL_SYSTEM + // SMT is not supported in FS mode yet. + assert(this->numThreads == 1); + this->thread[i] = new Thread(this, 0); + this->thread[i]->setStatus(ThreadContext::Suspended); +#else + if (i < params->workload.size()) { + DPRINTF(O3CPU, "Workload[%i] process is %#x", + i, this->thread[i]); + this->thread[i] = new Thread(this, i, params->workload[i], + i, params->mem); + + this->thread[i]->setStatus(ThreadContext::Suspended); + +#if !FULL_SYSTEM + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), i), + params->workload[i]->pTable, + false); + mem_port = params->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + this->thread[i]->setMemPort(trans_port); +#endif + //usedTids[i] = true; + //threadMap[i] = i; + } else { + //Allocate Empty thread so M5 can use later + //when scheduling threads to CPU + Process* dummy_proc = NULL; + + this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); + //usedTids[i] = false; + } +#endif // !FULL_SYSTEM + + ThreadContext *tc; + + // Setup the TC that will serve as the interface to the threads/CPU. + AlphaTC<Impl> *alpha_tc = + new AlphaTC<Impl>; + + tc = alpha_tc; + + // If we're using a checker, then the TC should be the + // CheckerThreadContext. +#if USE_CHECKER + if (params->checker) { + tc = new CheckerThreadContext<AlphaTC<Impl> >( + alpha_tc, this->checker); + } +#endif + + alpha_tc->cpu = this; + alpha_tc->thread = this->thread[i]; + +#if FULL_SYSTEM + // Setup quiesce event. + this->thread[i]->quiesceEvent = new EndQuiesceEvent(tc); + + Port *mem_port; + FunctionalPort *phys_port; + VirtualPort *virt_port; + phys_port = new FunctionalPort(csprintf("%s-%d-funcport", + name(), i)); + mem_port = this->system->physmem->getPort("functional"); + mem_port->setPeer(phys_port); + phys_port->setPeer(mem_port); + + virt_port = new VirtualPort(csprintf("%s-%d-vport", + name(), i)); + mem_port = this->system->physmem->getPort("functional"); + mem_port->setPeer(virt_port); + virt_port->setPeer(mem_port); + + this->thread[i]->setPhysPort(phys_port); + this->thread[i]->setVirtPort(virt_port); +#endif + // Give the thread the TC. + this->thread[i]->tc = tc; + + // Add the TC to the CPU's list of TC's. + this->threadContexts.push_back(tc); + } + + for (int i=0; i < this->numThreads; i++) { + this->thread[i]->setFuncExeInst(0); + } + + // Sets CPU pointers. These must be set at this level because the CPU + // pointers are defined to be the highest level of CPU class. + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); + this->regFile.setCPU(this); + + lockAddr = 0; + lockFlag = false; +} + +template <class Impl> +void +AlphaO3CPU<Impl>::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + + +template <class Impl> +MiscReg +AlphaO3CPU<Impl>::readMiscReg(int misc_reg, unsigned tid) +{ + return this->regFile.readMiscReg(misc_reg, tid); +} + +template <class Impl> +MiscReg +AlphaO3CPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault, + unsigned tid) +{ + return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); +} + +template <class Impl> +Fault +AlphaO3CPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) +{ + return this->regFile.setMiscReg(misc_reg, val, tid); +} + +template <class Impl> +Fault +AlphaO3CPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val, + unsigned tid) +{ + return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::squashFromTC(unsigned tid) +{ + this->thread[tid]->inSyscall = true; + this->commit.generateTCEvent(tid); +} + +#if FULL_SYSTEM + +template <class Impl> +void +AlphaO3CPU<Impl>::post_interrupt(int int_num, int index) +{ + BaseCPU::post_interrupt(int_num, index); + + if (this->thread[0]->status() == ThreadContext::Suspended) { + DPRINTF(IPI,"Suspended Processor awoke\n"); + this->threadContexts[0]->activate(); + } +} + +template <class Impl> +int +AlphaO3CPU<Impl>::readIntrFlag() +{ + return this->regFile.readIntrFlag(); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::setIntrFlag(int val) +{ + this->regFile.setIntrFlag(val); +} + +template <class Impl> +Fault +AlphaO3CPU<Impl>::hwrei(unsigned tid) +{ + // Need to clear the lock flag upon returning from an interrupt. + this->lockFlag = false; + + this->thread[tid]->kernelStats->hwrei(); + + this->checkInterrupts = true; + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template <class Impl> +bool +AlphaO3CPU<Impl>::simPalCheck(int palFunc, unsigned tid) +{ + if (this->thread[tid]->kernelStats) + this->thread[tid]->kernelStats->callpal(palFunc, + this->threadContexts[tid]); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + exitSimLoop("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (this->system->breakpoint()) + return false; + break; + } + + return true; +} + +template <class Impl> +void +AlphaO3CPU<Impl>::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that + // exists within isa_fullsys_traits.hh. Also assume that thread 0 + // is the one that handles the interrupts. + // @todo: Possibly consolidate the interrupt checking code. + // @todo: Allow other threads to handle interrupts. + + // Check if there are any outstanding interrupts + //Handle the interrupts + int ipl = 0; + int summary = 0; + + this->checkInterrupts = false; + + if (this->readMiscReg(IPR_ASTRR, 0)) + panic("asynchronous traps not implemented\n"); + + if (this->readMiscReg(IPR_SIRR, 0)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = this->intr_status(); + + if (interrupts) { + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + } + + if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { + this->setMiscReg(IPR_ISR, summary, 0); + this->setMiscReg(IPR_INTID, ipl, 0); + // Checker needs to know these two registers were updated. +#if USE_CHECKER + if (this->checker) { + this->checker->threadBase()->setMiscReg(IPR_ISR, summary); + this->checker->threadBase()->setMiscReg(IPR_INTID, ipl); + } +#endif + this->trap(Fault(new InterruptFault), 0); + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + this->readMiscReg(IPR_IPLR, 0), ipl, summary); + } +} + +#endif // FULL_SYSTEM + +template <class Impl> +void +AlphaO3CPU<Impl>::trap(Fault fault, unsigned tid) +{ + // Pass the thread's TC into the invoke method. + fault->invoke(this->threadContexts[tid]); +} + +#if !FULL_SYSTEM + +template <class Impl> +void +AlphaO3CPU<Impl>::syscall(int64_t callnum, int tid) +{ + DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); + + DPRINTF(Activity,"Activity: syscall() called.\n"); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->thread[tid]->funcExeInst); + + // Execute the actual syscall. + this->thread[tid]->syscall(callnum); + + // Decrease funcExeInst by one as the normal commit will handle + // incrementing it. + --(this->thread[tid]->funcExeInst); +} + +template <class Impl> +TheISA::IntReg +AlphaO3CPU<Impl>::getSyscallArg(int i, int tid) +{ + return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::setSyscallArg(int i, IntReg val, int tid) +{ + this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid); +} + +template <class Impl> +void +AlphaO3CPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid) +{ + // check for error condition. Alpha syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + if (return_value.successful()) { + // no error + this->setArchIntReg(SyscallSuccessReg, 0, tid); + this->setArchIntReg(ReturnValueReg, return_value.value(), tid); + } else { + // got an error, return details + this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); + this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); + } +} +#endif diff --git a/src/cpu/o3/alpha_dyn_inst.cc b/src/cpu/o3/alpha/dyn_inst.cc index 0c1723eec..97d2f3d08 100644 --- a/src/cpu/o3/alpha_dyn_inst.cc +++ b/src/cpu/o3/alpha/dyn_inst.cc @@ -28,8 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst_impl.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/dyn_inst_impl.hh" +#include "cpu/o3/alpha/impl.hh" // Force instantiation of AlphaDynInst for all the implementations that // are needed. diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh index 36a08c4a7..9dee610b6 100644 --- a/src/cpu/o3/alpha_dyn_inst.hh +++ b/src/cpu/o3/alpha/dyn_inst.hh @@ -34,8 +34,8 @@ #include "arch/isa_traits.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" class Packet; @@ -51,7 +51,7 @@ class AlphaDynInst : public BaseDynInst<Impl> { public: /** Typedef for the CPU. */ - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; /** Binary machine instruction type. */ typedef TheISA::MachInst MachInst; @@ -74,7 +74,7 @@ class AlphaDynInst : public BaseDynInst<Impl> public: /** BaseDynInst constructor given a binary instruction. */ AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, - FullCPU *cpu); + O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ AlphaDynInst(StaticInstPtr &_staticInst); diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh index a73cf4a7d..2d1b4b309 100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha/dyn_inst_impl.hh @@ -28,11 +28,11 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha/dyn_inst.hh" template <class Impl> AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) + InstSeqNum seq_num, O3CPU *cpu) : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) { initVars(); @@ -102,15 +102,7 @@ template <class Impl> Fault AlphaDynInst<Impl>::completeAcc(Packet *pkt) { - if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(pkt, this, - this->traceData); - } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc(pkt, this, - this->traceData); - } else { - panic("Unknown type!"); - } + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); return this->fault; } diff --git a/src/cpu/o3/alpha_impl.hh b/src/cpu/o3/alpha/impl.hh index 52f7c2394..b928ae654 100644 --- a/src/cpu/o3/alpha_impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -33,20 +33,21 @@ #include "arch/alpha/isa_traits.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/cpu_policy.hh" + // Forward declarations. template <class Impl> class AlphaDynInst; template <class Impl> -class AlphaFullCPU; +class AlphaO3CPU; /** Implementation specific struct that defines several key types to the * CPU, the stages within the CPU, the time buffers, and the DynInst. * The struct defines the ISA, the CPU policy, the specific DynInst, the - * specific FullCPU, and all of the structs from the time buffers to do + * specific O3CPU, and all of the structs from the time buffers to do * communication. * This is one of the key things that must be defined for each hardware * specific CPU implementation. @@ -67,8 +68,14 @@ struct AlphaSimpleImpl */ typedef RefCountingPtr<DynInst> DynInstPtr; - /** The FullCPU type to be used. */ - typedef AlphaFullCPU<AlphaSimpleImpl> FullCPU; + /** The O3CPU type to be used. */ + typedef AlphaO3CPU<AlphaSimpleImpl> O3CPU; + + /** Same typedef, but for CPUType. BaseDynInst may not always use + * an O3 CPU, so it's clearer to call it CPUType instead in that + * case. + */ + typedef O3CPU CPUType; /** The Params to be passed to each stage. */ typedef AlphaSimpleParams Params; @@ -79,4 +86,7 @@ struct AlphaSimpleImpl }; }; +/** The O3Impl to be used. */ +typedef AlphaSimpleImpl O3CPUImpl; + #endif // __CPU_O3_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh new file mode 100644 index 000000000..c618cee08 --- /dev/null +++ b/src/cpu/o3/alpha/params.hh @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_PARAMS_HH__ +#define __CPU_O3_ALPHA_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" +#include "cpu/o3/params.hh" + +//Forward declarations +class AlphaDTB; +class AlphaITB; +class MemObject; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the AlphaO3CPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams : public O3Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#endif +}; + +#endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/alpha/thread_context.cc b/src/cpu/o3/alpha/thread_context.cc new file mode 100755 index 000000000..4a02715bc --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/o3/thread_context_impl.hh" + +template class O3ThreadContext<AlphaSimpleImpl>; + diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh new file mode 100644 index 000000000..ad52b0d2e --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.hh @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +template <class Impl> +class AlphaTC : public O3ThreadContext<Impl> +{ + public: +#if FULL_SYSTEM + /** Returns a pointer to the ITB. */ + virtual AlphaITB *getITBPtr() { return this->cpu->itb; } + + /** Returns a pointer to the DTB. */ + virtual AlphaDTB *getDTBPtr() { return this->cpu->dtb; } + + /** Returns pointer to the quiesce event. */ + virtual EndQuiesceEvent *getQuiesceEvent() + { + return this->thread->quiesceEvent; + } + + /** Returns if the thread is currently in PAL mode, based on + * the PC's value. */ + virtual bool inPalMode() + { return TheISA::PcPAL(this->cpu->readPC(this->thread->readTid())); } +#endif + + virtual uint64_t readNextNPC() + { + panic("Alpha has no NextNPC!"); + return 0; + } + + virtual void setNextNPC(uint64_t val) + { + panic("Alpha has no NextNPC!"); + } + + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Alpha!"); } + + + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ + virtual int exit() + { + this->deallocate(); + + // If there are still threads executing in the system + if (this->cpu->numActiveThreads()) + return 0; // don't exit simulation + else + return 1; // exit simulation + } +}; diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh deleted file mode 100644 index f81837f3c..000000000 --- a/src/cpu/o3/alpha_cpu.hh +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#ifndef __CPU_O3_ALPHA_FULL_CPU_HH__ -#define __CPU_O3_ALPHA_FULL_CPU_HH__ - -#include "arch/isa_traits.hh" -#include "cpu/thread_context.hh" -#include "cpu/o3/cpu.hh" -#include "sim/byteswap.hh" - -class EndQuiesceEvent; -namespace Kernel { - class Statistics; -}; - -class TranslatingPort; - -/** - * AlphaFullCPU class. Derives from the FullO3CPU class, and - * implements all ISA and implementation specific functions of the - * CPU. This is the CPU class that is used for the SimObjects, and is - * what is given to the DynInsts. Most of its state exists in the - * FullO3CPU; the state is has is mainly for ISA specific - * functionality. - */ -template <class Impl> -class AlphaFullCPU : public FullO3CPU<Impl> -{ - protected: - typedef TheISA::IntReg IntReg; - typedef TheISA::FloatReg FloatReg; - typedef TheISA::FloatRegBits FloatRegBits; - typedef TheISA::MiscReg MiscReg; - typedef TheISA::RegFile RegFile; - typedef TheISA::MiscRegFile MiscRegFile; - - public: - typedef O3ThreadState<Impl> ImplState; - typedef O3ThreadState<Impl> Thread; - typedef typename Impl::Params Params; - - /** Constructs an AlphaFullCPU with the given parameters. */ - AlphaFullCPU(Params *params); - - /** - * Derived ThreadContext class for use with the AlphaFullCPU. It - * provides the interface for any external objects to access a - * single thread's state and some general CPU state. Any time - * external objects try to update state through this interface, - * the CPU will create an event to squash all in-flight - * instructions in order to ensure state is maintained correctly. - * It must be defined specifically for the AlphaFullCPU because - * not all architectural state is located within the O3ThreadState - * (such as the commit PC, and registers), and specific actions - * must be taken when using this interface (such as squashing all - * in-flight instructions when doing a write to this interface). - */ - class AlphaTC : public ThreadContext - { - public: - /** Pointer to the CPU. */ - AlphaFullCPU<Impl> *cpu; - - /** Pointer to the thread state that this TC corrseponds to. */ - O3ThreadState<Impl> *thread; - - /** Returns a pointer to this CPU. */ - virtual BaseCPU *getCpuPtr() { return cpu; } - - /** Sets this CPU's ID. */ - virtual void setCpuId(int id) { cpu->cpu_id = id; } - - /** Reads this CPU's ID. */ - virtual int readCpuId() { return cpu->cpu_id; } - -#if FULL_SYSTEM - /** Returns a pointer to the system. */ - virtual System *getSystemPtr() { return cpu->system; } - - /** Returns a pointer to physical memory. */ - virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } - - /** Returns a pointer to the ITB. */ - virtual AlphaITB *getITBPtr() { return cpu->itb; } - - /** Returns a pointer to the DTB. */ - virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } - - /** Returns a pointer to this thread's kernel statistics. */ - virtual Kernel::Statistics *getKernelStats() - { return thread->kernelStats; } - - virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } - - virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); - - void delVirtPort(VirtualPort *vp); -#else - virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } - - /** Returns a pointer to this thread's process. */ - virtual Process *getProcessPtr() { return thread->getProcessPtr(); } -#endif - /** Returns this thread's status. */ - virtual Status status() const { return thread->status(); } - - /** Sets this thread's status. */ - virtual void setStatus(Status new_status) - { thread->setStatus(new_status); } - - /** Set the status to Active. Optional delay indicates number of - * cycles to wait before beginning execution. */ - virtual void activate(int delay = 1); - - /** Set the status to Suspended. */ - virtual void suspend(); - - /** Set the status to Unallocated. */ - virtual void deallocate(); - - /** Set the status to Halted. */ - virtual void halt(); - -#if FULL_SYSTEM - /** Dumps the function profiling information. - * @todo: Implement. - */ - virtual void dumpFuncProfile(); -#endif - /** Takes over execution of a thread from another CPU. */ - virtual void takeOverFrom(ThreadContext *old_context); - - /** Registers statistics associated with this TC. */ - virtual void regStats(const std::string &name); - - /** Serializes state. */ - virtual void serialize(std::ostream &os); - /** Unserializes state. */ - virtual void unserialize(Checkpoint *cp, const std::string §ion); - -#if FULL_SYSTEM - /** Returns pointer to the quiesce event. */ - virtual EndQuiesceEvent *getQuiesceEvent(); - - /** Reads the last tick that this thread was activated on. */ - virtual Tick readLastActivate(); - /** Reads the last tick that this thread was suspended on. */ - virtual Tick readLastSuspend(); - - /** Clears the function profiling information. */ - virtual void profileClear(); - /** Samples the function profiling information. */ - virtual void profileSample(); -#endif - /** Returns this thread's ID number. */ - virtual int getThreadNum() { return thread->readTid(); } - - /** Returns the instruction this thread is currently committing. - * Only used when an instruction faults. - */ - virtual TheISA::MachInst getInst(); - - /** Copies the architectural registers from another TC into this TC. */ - virtual void copyArchRegs(ThreadContext *tc); - - /** Resets all architectural registers to 0. */ - virtual void clearArchRegs(); - - /** Reads an integer register. */ - virtual uint64_t readIntReg(int reg_idx); - - virtual FloatReg readFloatReg(int reg_idx, int width); - - virtual FloatReg readFloatReg(int reg_idx); - - virtual FloatRegBits readFloatRegBits(int reg_idx, int width); - - virtual FloatRegBits readFloatRegBits(int reg_idx); - - /** Sets an integer register to a value. */ - virtual void setIntReg(int reg_idx, uint64_t val); - - virtual void setFloatReg(int reg_idx, FloatReg val, int width); - - virtual void setFloatReg(int reg_idx, FloatReg val); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val); - - /** Reads this thread's PC. */ - virtual uint64_t readPC() - { return cpu->readPC(thread->readTid()); } - - /** Sets this thread's PC. */ - virtual void setPC(uint64_t val); - - /** Reads this thread's next PC. */ - virtual uint64_t readNextPC() - { return cpu->readNextPC(thread->readTid()); } - - /** Sets this thread's next PC. */ - virtual void setNextPC(uint64_t val); - - virtual uint64_t readNextNPC() - { - panic("Alpha has no NextNPC!"); - return 0; - } - - virtual void setNextNPC(uint64_t val) - { } - - /** Reads a miscellaneous register. */ - virtual MiscReg readMiscReg(int misc_reg) - { return cpu->readMiscReg(misc_reg, thread->readTid()); } - - /** Reads a misc. register, including any side-effects the - * read might have as defined by the architecture. */ - virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) - { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } - - /** Sets a misc. register. */ - virtual Fault setMiscReg(int misc_reg, const MiscReg &val); - - /** Sets a misc. register, including any side-effects the - * write might have as defined by the architecture. */ - virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); - - /** Returns the number of consecutive store conditional failures. */ - // @todo: Figure out where these store cond failures should go. - virtual unsigned readStCondFailures() - { return thread->storeCondFailures; } - - /** Sets the number of consecutive store conditional failures. */ - virtual void setStCondFailures(unsigned sc_failures) - { thread->storeCondFailures = sc_failures; } - -#if FULL_SYSTEM - /** Returns if the thread is currently in PAL mode, based on - * the PC's value. */ - virtual bool inPalMode() - { return TheISA::PcPAL(cpu->readPC(thread->readTid())); } -#endif - // Only really makes sense for old CPU model. Lots of code - // outside the CPU still checks this function, so it will - // always return false to keep everything working. - /** Checks if the thread is misspeculating. Because it is - * very difficult to determine if the thread is - * misspeculating, this is set as false. */ - virtual bool misspeculating() { return false; } - -#if !FULL_SYSTEM - /** Gets a syscall argument by index. */ - virtual IntReg getSyscallArg(int i); - - /** Sets a syscall argument. */ - virtual void setSyscallArg(int i, IntReg val); - - /** Sets the syscall return value. */ - virtual void setSyscallReturn(SyscallReturn return_value); - - /** Executes a syscall in SE mode. */ - virtual void syscall(int64_t callnum) - { return cpu->syscall(callnum, thread->readTid()); } - - /** Reads the funcExeInst counter. */ - virtual Counter readFuncExeInst() { return thread->funcExeInst; } -#endif - virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, - TheISA::RegFile::ContextVal val) - { panic("Not supported on Alpha!"); } - }; - -#if FULL_SYSTEM - /** ITB pointer. */ - AlphaITB *itb; - /** DTB pointer. */ - AlphaDTB *dtb; -#endif - - /** Registers statistics. */ - void regStats(); - -#if FULL_SYSTEM - /** Translates instruction requestion. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return itb->translate(req, thread->getTC()); - } - - /** Translates data read request. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), false); - } - - /** Translates data write request. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), true); - } - -#else - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - -#endif - /** Reads a miscellaneous register. */ - MiscReg readMiscReg(int misc_reg, unsigned tid); - - /** Reads a misc. register, including any side effects the read - * might have as defined by the architecture. - */ - MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); - - /** Sets a miscellaneous register. */ - Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); - - /** Sets a misc. register, including any side effects the write - * might have as defined by the architecture. - */ - Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); - - /** Initiates a squash of all in-flight instructions for a given - * thread. The source of the squash is an external update of - * state through the TC. - */ - void squashFromTC(unsigned tid); - -#if FULL_SYSTEM - /** Posts an interrupt. */ - void post_interrupt(int int_num, int index); - /** Reads the interrupt flag. */ - int readIntrFlag(); - /** Sets the interrupt flags. */ - void setIntrFlag(int val); - /** HW return from error interrupt. */ - Fault hwrei(unsigned tid); - /** Returns if a specific PC is a PAL mode PC. */ - bool inPalMode(uint64_t PC) - { return AlphaISA::PcPAL(PC); } - - /** Traps to handle given fault. */ - void trap(Fault fault, unsigned tid); - bool simPalCheck(int palFunc, unsigned tid); - - /** Processes any interrupts. */ - void processInterrupts(); - - /** Halts the CPU. */ - void halt() { panic("Halt not implemented!\n"); } -#endif - - -#if !FULL_SYSTEM - /** Executes a syscall. - * @todo: Determine if this needs to be virtual. - */ - void syscall(int64_t callnum, int tid); - /** Gets a syscall argument. */ - IntReg getSyscallArg(int i, int tid); - - /** Used to shift args for indirect syscall. */ - void setSyscallArg(int i, IntReg val, int tid); - - /** Sets the return value of a syscall. */ - void setSyscallReturn(SyscallReturn return_value, int tid); -#endif - - /** CPU read function, forwards read to LSQ. */ - template <class T> - Fault read(RequestPtr &req, T &data, int load_idx) - { - return this->iew.ldstQueue.read(req, data, load_idx); - } - - /** CPU write function, forwards write to LSQ. */ - template <class T> - Fault write(RequestPtr &req, T &data, int store_idx) - { - return this->iew.ldstQueue.write(req, data, store_idx); - } - - Addr lockAddr; - - /** Temporary fix for the lock flag, works in the UP case. */ - bool lockFlag; -}; - -#endif // __CPU_O3_ALPHA_FULL_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh deleted file mode 100644 index 98290e57f..000000000 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ /dev/null @@ -1,872 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#include "arch/alpha/faults.hh" -#include "base/cprintf.hh" -#include "base/statistics.hh" -#include "base/timebuf.hh" -#include "cpu/checker/thread_context.hh" -#include "sim/sim_events.hh" -#include "sim/stats.hh" - -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_params.hh" -#include "cpu/o3/comm.hh" -#include "cpu/o3/thread_state.hh" - -#if FULL_SYSTEM -#include "arch/alpha/osfpal.hh" -#include "arch/isa_traits.hh" -#include "cpu/quiesce_event.hh" -#include "kern/kernel_stats.hh" -#include "sim/system.hh" -#endif - -using namespace TheISA; - -template <class Impl> -AlphaFullCPU<Impl>::AlphaFullCPU(Params *params) -#if FULL_SYSTEM - : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb) -#else - : FullO3CPU<Impl>(params) -#endif -{ - DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); - - // Setup any thread state. - this->thread.resize(this->numThreads); - - for (int i = 0; i < this->numThreads; ++i) { -#if FULL_SYSTEM - // SMT is not supported in FS mode yet. - assert(this->numThreads == 1); - this->thread[i] = new Thread(this, 0); - this->thread[i]->setStatus(ThreadContext::Suspended); -#else - if (i < params->workload.size()) { - DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x", - i, this->thread[i]); - this->thread[i] = new Thread(this, i, params->workload[i], - i, params->mem); - - this->thread[i]->setStatus(ThreadContext::Suspended); - -#if !FULL_SYSTEM - /* Use this port to for syscall emulation writes to memory. */ - Port *mem_port; - TranslatingPort *trans_port; - trans_port = new TranslatingPort(csprintf("%s-%d-funcport", - name(), i), - params->workload[i]->pTable, - false); - mem_port = params->mem->getPort("functional"); - mem_port->setPeer(trans_port); - trans_port->setPeer(mem_port); - this->thread[i]->setMemPort(trans_port); -#endif - //usedTids[i] = true; - //threadMap[i] = i; - } else { - //Allocate Empty thread so M5 can use later - //when scheduling threads to CPU - Process* dummy_proc = NULL; - - this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); - //usedTids[i] = false; - } -#endif // !FULL_SYSTEM - - ThreadContext *tc; - - // Setup the TC that will serve as the interface to the threads/CPU. - AlphaTC *alpha_tc = new AlphaTC; - - // If we're using a checker, then the TC should be the - // CheckerThreadContext. - if (params->checker) { - tc = new CheckerThreadContext<AlphaTC>( - alpha_tc, this->checker); - } else { - tc = alpha_tc; - } - - alpha_tc->cpu = this; - alpha_tc->thread = this->thread[i]; - -#if FULL_SYSTEM - // Setup quiesce event. - this->thread[i]->quiesceEvent = new EndQuiesceEvent(tc); - - Port *mem_port; - FunctionalPort *phys_port; - VirtualPort *virt_port; - phys_port = new FunctionalPort(csprintf("%s-%d-funcport", - name(), i)); - mem_port = this->system->physmem->getPort("functional"); - mem_port->setPeer(phys_port); - phys_port->setPeer(mem_port); - - virt_port = new VirtualPort(csprintf("%s-%d-vport", - name(), i)); - mem_port = this->system->physmem->getPort("functional"); - mem_port->setPeer(virt_port); - virt_port->setPeer(mem_port); - - this->thread[i]->setPhysPort(phys_port); - this->thread[i]->setVirtPort(virt_port); -#endif - // Give the thread the TC. - this->thread[i]->tc = tc; - - // Add the TC to the CPU's list of TC's. - this->threadContexts.push_back(tc); - } - - for (int i=0; i < this->numThreads; i++) { - this->thread[i]->setFuncExeInst(0); - } - - // Sets CPU pointers. These must be set at this level because the CPU - // pointers are defined to be the highest level of CPU class. - this->fetch.setCPU(this); - this->decode.setCPU(this); - this->rename.setCPU(this); - this->iew.setCPU(this); - this->commit.setCPU(this); - - this->rob.setCPU(this); - this->regFile.setCPU(this); - - lockAddr = 0; - lockFlag = false; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::regStats() -{ - // Register stats for everything that has stats. - this->fullCPURegStats(); - this->fetch.regStats(); - this->decode.regStats(); - this->rename.regStats(); - this->iew.regStats(); - this->commit.regStats(); -} - -#if FULL_SYSTEM -template <class Impl> -VirtualPort * -AlphaFullCPU<Impl>::AlphaTC::getVirtPort(ThreadContext *src_tc) -{ - if (!src_tc) - return thread->getVirtPort(); - - VirtualPort *vp; - Port *mem_port; - - vp = new VirtualPort("tc-vport", src_tc); - mem_port = cpu->system->physmem->getPort("functional"); - mem_port->setPeer(vp); - vp->setPeer(mem_port); - return vp; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::dumpFuncProfile() -{ - // Currently not supported -} -#endif - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::takeOverFrom(ThreadContext *old_context) -{ - // some things should already be set up -#if FULL_SYSTEM - assert(getSystemPtr() == old_context->getSystemPtr()); -#else - assert(getProcessPtr() == old_context->getProcessPtr()); -#endif - - // copy over functional state - setStatus(old_context->status()); - copyArchRegs(old_context); - setCpuId(old_context->readCpuId()); - -#if !FULL_SYSTEM - thread->funcExeInst = old_context->readFuncExeInst(); -#else - EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); - if (other_quiesce) { - // Point the quiesce event's TC at this TC so that it wakes up - // the proper CPU. - other_quiesce->tc = this; - } - if (thread->quiesceEvent) { - thread->quiesceEvent->tc = this; - } - - // Transfer kernel stats from one CPU to the other. - thread->kernelStats = old_context->getKernelStats(); -// storeCondFailures = 0; - cpu->lockFlag = false; -#endif - - old_context->setStatus(ThreadContext::Unallocated); - - thread->inSyscall = false; - thread->trapPending = false; -} - -#if FULL_SYSTEM -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::delVirtPort(VirtualPort *vp) -{ - delete vp->getPeer(); - delete vp; -} -#endif - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::activate(int delay) -{ - DPRINTF(FullCPU, "Calling activate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Active) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; -#endif - - if (thread->status() == ThreadContext::Unallocated) { - cpu->activateWhenReady(thread->readTid()); - return; - } - - thread->setStatus(ThreadContext::Active); - - // status() == Suspended - cpu->activateContext(thread->readTid(), delay); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::suspend() -{ - DPRINTF(FullCPU, "Calling suspend on AlphaTC\n"); - - if (thread->status() == ThreadContext::Suspended) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; - thread->lastSuspend = curTick; -#endif -/* -#if FULL_SYSTEM - // Don't change the status from active if there are pending interrupts - if (cpu->check_interrupts()) { - assert(status() == ThreadContext::Active); - return; - } -#endif -*/ - thread->setStatus(ThreadContext::Suspended); - cpu->suspendContext(thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::deallocate() -{ - DPRINTF(FullCPU, "Calling deallocate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Unallocated) - return; - - thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::halt() -{ - DPRINTF(FullCPU, "Calling halt on AlphaTC\n"); - - if (thread->status() == ThreadContext::Halted) - return; - - thread->setStatus(ThreadContext::Halted); - cpu->haltContext(thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::regStats(const std::string &name) -{ -#if FULL_SYSTEM - thread->kernelStats = new Kernel::Statistics(cpu->system); - thread->kernelStats->regStats(name + ".kern"); -#endif -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::serialize(std::ostream &os) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->serialize(os); -#endif - -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::unserialize(Checkpoint *cp, const std::string §ion) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->unserialize(cp, section); -#endif - -} - -#if FULL_SYSTEM -template <class Impl> -EndQuiesceEvent * -AlphaFullCPU<Impl>::AlphaTC::getQuiesceEvent() -{ - return thread->quiesceEvent; -} - -template <class Impl> -Tick -AlphaFullCPU<Impl>::AlphaTC::readLastActivate() -{ - return thread->lastActivate; -} - -template <class Impl> -Tick -AlphaFullCPU<Impl>::AlphaTC::readLastSuspend() -{ - return thread->lastSuspend; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::profileClear() -{} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::profileSample() -{} -#endif - -template <class Impl> -TheISA::MachInst -AlphaFullCPU<Impl>::AlphaTC:: getInst() -{ - return thread->getInst(); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::copyArchRegs(ThreadContext *tc) -{ - // This function will mess things up unless the ROB is empty and - // there are no instructions in the pipeline. - unsigned tid = thread->readTid(); - PhysRegIndex renamed_reg; - - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i); - - DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " - "now has data %lli.\n", - renamed_reg, cpu->readIntReg(renamed_reg), - tc->readIntReg(i)); - - cpu->setIntReg(renamed_reg, tc->readIntReg(i)); - } - - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag); - cpu->setFloatRegBits(renamed_reg, - tc->readFloatRegBits(i)); - } - - // Copy the misc regs. - copyMiscRegs(tc, this); - - // Then finally set the PC and the next PC. - cpu->setPC(tc->readPC(), tid); - cpu->setNextPC(tc->readNextPC(), tid); -#if !FULL_SYSTEM - this->thread->funcExeInst = tc->readFuncExeInst(); -#endif -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::clearArchRegs() -{} - -template <class Impl> -uint64_t -AlphaFullCPU<Impl>::AlphaTC::readIntReg(int reg_idx) -{ - return cpu->readArchIntReg(reg_idx, thread->readTid()); -} - -template <class Impl> -FloatReg -AlphaFullCPU<Impl>::AlphaTC::readFloatReg(int reg_idx, int width) -{ - switch(width) { - case 32: - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); - case 64: - return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); - default: - panic("Unsupported width!"); - return 0; - } -} - -template <class Impl> -FloatReg -AlphaFullCPU<Impl>::AlphaTC::readFloatReg(int reg_idx) -{ - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); -} - -template <class Impl> -FloatRegBits -AlphaFullCPU<Impl>::AlphaTC::readFloatRegBits(int reg_idx, int width) -{ - DPRINTF(Fault, "Reading floatint register through the TC!\n"); - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template <class Impl> -FloatRegBits -AlphaFullCPU<Impl>::AlphaTC::readFloatRegBits(int reg_idx) -{ - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setIntReg(int reg_idx, uint64_t val) -{ - cpu->setArchIntReg(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatReg(int reg_idx, FloatReg val, int width) -{ - switch(width) { - case 32: - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - break; - case 64: - cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); - break; - } - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatReg(int reg_idx, FloatReg val) -{ - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val, - int width) -{ - DPRINTF(Fault, "Setting floatint register through the TC!\n"); - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val) -{ - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setPC(uint64_t val) -{ - cpu->setPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setNextPC(uint64_t val) -{ - cpu->setNextPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::AlphaTC::setMiscReg(int misc_reg, const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::AlphaTC::setMiscRegWithEffect(int misc_reg, - const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, - thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -#if !FULL_SYSTEM - -template <class Impl> -TheISA::IntReg -AlphaFullCPU<Impl>::AlphaTC::getSyscallArg(int i) -{ - return cpu->getSyscallArg(i, thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setSyscallArg(int i, IntReg val) -{ - cpu->setSyscallArg(i, val, thread->readTid()); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::AlphaTC::setSyscallReturn(SyscallReturn return_value) -{ - cpu->setSyscallReturn(return_value, thread->readTid()); -} - -#endif // FULL_SYSTEM - -template <class Impl> -MiscReg -AlphaFullCPU<Impl>::readMiscReg(int misc_reg, unsigned tid) -{ - return this->regFile.readMiscReg(misc_reg, tid); -} - -template <class Impl> -MiscReg -AlphaFullCPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault, - unsigned tid) -{ - return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) -{ - return this->regFile.setMiscReg(misc_reg, val, tid); -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val, - unsigned tid) -{ - return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::squashFromTC(unsigned tid) -{ - this->thread[tid]->inSyscall = true; - this->commit.generateTCEvent(tid); -} - -#if FULL_SYSTEM - -template <class Impl> -void -AlphaFullCPU<Impl>::post_interrupt(int int_num, int index) -{ - BaseCPU::post_interrupt(int_num, index); - - if (this->thread[0]->status() == ThreadContext::Suspended) { - DPRINTF(IPI,"Suspended Processor awoke\n"); - this->threadContexts[0]->activate(); - } -} - -template <class Impl> -int -AlphaFullCPU<Impl>::readIntrFlag() -{ - return this->regFile.readIntrFlag(); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::setIntrFlag(int val) -{ - this->regFile.setIntrFlag(val); -} - -template <class Impl> -Fault -AlphaFullCPU<Impl>::hwrei(unsigned tid) -{ - // Need to clear the lock flag upon returning from an interrupt. - this->lockFlag = false; - - this->thread[tid]->kernelStats->hwrei(); - - this->checkInterrupts = true; - - // FIXME: XXX check for interrupts? XXX - return NoFault; -} - -template <class Impl> -bool -AlphaFullCPU<Impl>::simPalCheck(int palFunc, unsigned tid) -{ - if (this->thread[tid]->kernelStats) - this->thread[tid]->kernelStats->callpal(palFunc, - this->threadContexts[tid]); - - switch (palFunc) { - case PAL::halt: - halt(); - if (--System::numSystemsRunning == 0) - exitSimLoop("all cpus halted"); - break; - - case PAL::bpt: - case PAL::bugchk: - if (this->system->breakpoint()) - return false; - break; - } - - return true; -} - -template <class Impl> -void -AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid) -{ - // Pass the thread's TC into the invoke method. - fault->invoke(this->threadContexts[tid]); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::processInterrupts() -{ - // Check for interrupts here. For now can copy the code that - // exists within isa_fullsys_traits.hh. Also assume that thread 0 - // is the one that handles the interrupts. - // @todo: Possibly consolidate the interrupt checking code. - // @todo: Allow other threads to handle interrupts. - - // Check if there are any outstanding interrupts - //Handle the interrupts - int ipl = 0; - int summary = 0; - - this->checkInterrupts = false; - - if (this->readMiscReg(IPR_ASTRR, 0)) - panic("asynchronous traps not implemented\n"); - - if (this->readMiscReg(IPR_SIRR, 0)) { - for (int i = INTLEVEL_SOFTWARE_MIN; - i < INTLEVEL_SOFTWARE_MAX; i++) { - if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) { - // See table 4-19 of the 21164 hardware reference - ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; - summary |= (ULL(1) << i); - } - } - } - - uint64_t interrupts = this->intr_status(); - - if (interrupts) { - for (int i = INTLEVEL_EXTERNAL_MIN; - i < INTLEVEL_EXTERNAL_MAX; i++) { - if (interrupts & (ULL(1) << i)) { - // See table 4-19 of the 21164 hardware reference - ipl = i; - summary |= (ULL(1) << i); - } - } - } - - if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { - this->setMiscReg(IPR_ISR, summary, 0); - this->setMiscReg(IPR_INTID, ipl, 0); - // Checker needs to know these two registers were updated. - if (this->checker) { - this->checker->threadBase()->setMiscReg(IPR_ISR, summary); - this->checker->threadBase()->setMiscReg(IPR_INTID, ipl); - } - this->trap(Fault(new InterruptFault), 0); - DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", - this->readMiscReg(IPR_IPLR, 0), ipl, summary); - } -} - -#endif // FULL_SYSTEM - -#if !FULL_SYSTEM - -template <class Impl> -void -AlphaFullCPU<Impl>::syscall(int64_t callnum, int tid) -{ - DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid); - - DPRINTF(Activity,"Activity: syscall() called.\n"); - - // Temporarily increase this by one to account for the syscall - // instruction. - ++(this->thread[tid]->funcExeInst); - - // Execute the actual syscall. - this->thread[tid]->syscall(callnum); - - // Decrease funcExeInst by one as the normal commit will handle - // incrementing it. - --(this->thread[tid]->funcExeInst); -} - -template <class Impl> -TheISA::IntReg -AlphaFullCPU<Impl>::getSyscallArg(int i, int tid) -{ - return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::setSyscallArg(int i, IntReg val, int tid) -{ - this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid); -} - -template <class Impl> -void -AlphaFullCPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid) -{ - // check for error condition. Alpha syscall convention is to - // indicate success/failure in reg a3 (r19) and put the - // return value itself in the standard return value reg (v0). - if (return_value.successful()) { - // no error - this->setArchIntReg(SyscallSuccessReg, 0, tid); - this->setArchIntReg(ReturnValueReg, return_value.value(), tid); - } else { - // got an error, return details - this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); - this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); - } -} -#endif diff --git a/src/cpu/o3/base_dyn_inst.cc b/src/cpu/o3/base_dyn_inst.cc new file mode 100644 index 000000000..0979c5c8f --- /dev/null +++ b/src/cpu/o3/base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/o3/isa_specific.hh" + +// Explicit instantiation +template class BaseDynInst<O3CPUImpl>; + +template <> +int +BaseDynInst<O3CPUImpl>::instcount = 0; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index b33543bdc..08fd4e8ea 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -29,11 +29,6 @@ */ #include "cpu/o3/bpred_unit_impl.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/ozone/ozone_impl.hh" -//#include "cpu/ozone/simple_impl.hh" +#include "cpu/o3/isa_specific.hh" -template class BPredUnit<AlphaSimpleImpl>; -template class BPredUnit<OzoneImpl>; -//template class BPredUnit<SimpleImpl>; +template class BPredUnit<O3CPUImpl>; diff --git a/src/cpu/checker/o3_cpu_builder.cc b/src/cpu/o3/checker_builder.cc index 59a6c7158..782d963b0 100644 --- a/src/cpu/checker/o3_cpu_builder.cc +++ b/src/cpu/o3/checker_builder.cc @@ -30,16 +30,19 @@ #include <string> -#include "cpu/checker/cpu.hh" +#include "cpu/checker/cpu_impl.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/dyn_inst.hh" +#include "cpu/o3/alpha/impl.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" class MemObject; +template +class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >; + /** * Specific non-templated derived class used for SimObject configuration. */ @@ -75,6 +78,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker) Param<bool> defer_registration; Param<bool> exitOnError; + Param<bool> warnOnlyOnLoadError; Param<bool> function_trace; Param<Tick> function_trace_start; @@ -105,6 +109,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker) INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load " + "result errors", false), INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") @@ -121,6 +127,7 @@ CREATE_SIM_OBJECT(O3Checker) params->max_loads_any_thread = 0; params->max_loads_all_threads = 0; params->exitOnError = exitOnError; + params->warnOnlyOnLoadError = warnOnlyOnLoadError; params->deferRegistration = defer_registration; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 770008a33..637d59f52 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/commit_impl.hh" -template class DefaultCommit<AlphaSimpleImpl>; +template class DefaultCommit<O3CPUImpl>; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index b7404c488..956b6ec3e 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_COMMIT_HH__ @@ -67,7 +68,7 @@ class DefaultCommit { public: // Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; typedef typename Impl::CPUPol CPUPol; @@ -145,7 +146,7 @@ class DefaultCommit void regStats(); /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the list of threads. */ void setThreads(std::vector<Thread *> &threads); @@ -161,10 +162,6 @@ class DefaultCommit /** Sets the pointer to the queue coming from IEW. */ void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); - void setFetchStage(Fetch *fetch_stage); - - Fetch *fetchStage; - /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); @@ -186,11 +183,14 @@ class DefaultCommit /** Initializes stage by sending back the number of free entries. */ void initStage(); - /** Initializes the switching out of commit. */ - void switchOut(); + /** Initializes the draining of commit. */ + bool drain(); + + /** Resumes execution after draining. */ + void resume(); /** Completes the switch out of commit. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -280,12 +280,20 @@ class DefaultCommit /** Sets the PC of a specific thread. */ void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } - /** Reads the PC of a specific thread. */ + /** Reads the next PC of a specific thread. */ uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } +#if THE_ISA != ALPHA_ISA + /** Reads the next NPC of a specific thread. */ + uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; } + + /** Sets the next NPC of a specific thread. */ + void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } +#endif + private: /** Time buffer interface. */ TimeBuffer<TimeStruct> *timeBuffer; @@ -317,16 +325,12 @@ class DefaultCommit ROB *rob; private: - /** Pointer to FullCPU. */ - FullCPU *cpu; + /** Pointer to O3CPU. */ + O3CPU *cpu; /** Vector of all of the threads. */ std::vector<Thread *> thread; - Fault fetchFault; - - int fetchTrapWait; - /** Records that commit has written to the time buffer this cycle. Used for * the CPU to determine if it can deschedule itself if there is no activity. */ @@ -365,11 +369,6 @@ class DefaultCommit */ unsigned renameWidth; - /** IEW width, in instructions. Used so ROB knows how many - * instructions to get from the IEW instruction queue. - */ - unsigned iewWidth; - /** Commit width, in instructions. */ unsigned commitWidth; @@ -379,8 +378,8 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; - /** Is a switch out pending. */ - bool switchPending; + /** Is a drain pending. */ + bool drainPending; /** Is commit switched out. */ bool switchedOut; @@ -390,10 +389,6 @@ class DefaultCommit */ Tick trapLatency; - Tick fetchTrapLatency; - - Tick fetchFaultTick; - /** The commit PC of each thread. Refers to the instruction that * is currently being processed/committed. */ @@ -402,6 +397,11 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; +#if THE_ISA != ALPHA_ISA + /** The next NPC of each thread. */ + Addr nextNPC[Impl::MaxThreads]; +#endif + /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index ceb2918e0..904af1071 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -28,16 +28,22 @@ * Authors: Kevin Lim */ +#include "config/full_system.hh" +#include "config/use_checker.hh" + #include <algorithm> #include <string> #include "base/loader/symtab.hh" #include "base/timebuf.hh" -#include "cpu/checker/cpu.hh" #include "cpu/exetrace.hh" #include "cpu/o3/commit.hh" #include "cpu/o3/thread_state.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + using namespace std; template <class Impl> @@ -72,13 +78,11 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) renameToROBDelay(params->renameToROBDelay), fetchToCommitDelay(params->commitToFetchDelay), renameWidth(params->renameWidth), - iewWidth(params->executeWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), - switchPending(false), + drainPending(false), switchedOut(false), - trapLatency(params->trapLatency), - fetchTrapLatency(params->fetchTrapLatency) + trapLatency(params->trapLatency) { _status = Active; _nextStatus = Inactive; @@ -118,9 +122,6 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) tcSquash[i] = false; PC[i] = nextPC[i] = 0; } - - fetchFaultTick = 0; - fetchTrapWait = 0; } template <class Impl> @@ -205,19 +206,6 @@ DefaultCommit<Impl>::regStats() .flags(total) ; - // - // Commit-Eligible instructions... - // - // -> The number of instructions eligible to commit in those - // cycles where we reached our commit BW limit (less the number - // actually committed) - // - // -> The average value is computed over ALL CYCLES... not just - // the BW limited cycles - // - // -> The standard deviation is computed only over cycles where - // we reached the BW limit - // commitEligible .init(cpu->number_of_threads) .name(name() + ".COM:bw_limited") @@ -233,17 +221,16 @@ DefaultCommit<Impl>::regStats() template <class Impl> void -DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); cpu = cpu_ptr; // Commit must broadcast the number of free entries it has at the start of // the simulation, so it starts as active. - cpu->activateStage(FullCPU::CommitIdx); + cpu->activateStage(O3CPU::CommitIdx); trapLatency = cpu->cycles(trapLatency); - fetchTrapLatency = cpu->cycles(fetchTrapLatency); } template <class Impl> @@ -302,13 +289,6 @@ DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) template <class Impl> void -DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage) -{ - fetchStage = fetch_stage; -} - -template <class Impl> -void DefaultCommit<Impl>::setIEWStage(IEW *iew_stage) { iewStage = iew_stage; @@ -358,23 +338,38 @@ DefaultCommit<Impl>::initStage() } template <class Impl> -void -DefaultCommit<Impl>::switchOut() +bool +DefaultCommit<Impl>::drain() { - switchPending = true; + drainPending = true; + + // If it's already drained, return true. + if (rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + return true; + } + + return false; } template <class Impl> void -DefaultCommit<Impl>::doSwitchOut() +DefaultCommit<Impl>::switchOut() { switchedOut = true; - switchPending = false; + drainPending = false; rob->switchOut(); } template <class Impl> void +DefaultCommit<Impl>::resume() +{ + drainPending = false; +} + +template <class Impl> +void DefaultCommit<Impl>::takeOverFrom() { switchedOut = false; @@ -409,10 +404,10 @@ DefaultCommit<Impl>::updateStatus() if (_nextStatus == Inactive && _status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::CommitIdx); + cpu->deactivateStage(O3CPU::CommitIdx); } else if (_nextStatus == Active && _status == Inactive) { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::CommitIdx); + cpu->activateStage(O3CPU::CommitIdx); } _status = _nextStatus; @@ -434,7 +429,7 @@ DefaultCommit<Impl>::setNextStatus() } } - assert(squashes == squashCounter); + squashCounter = squashes; // If commit is currently squashing, then it will have activity for the // next cycle. Set its next status as active. @@ -539,8 +534,6 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid) commitStatus[tid] = ROBSquashing; cpu->activityThisCycle(); - - ++squashCounter; } template <class Impl> @@ -558,8 +551,6 @@ DefaultCommit<Impl>::squashFromTC(unsigned tid) cpu->activityThisCycle(); tcSquash[tid] = false; - - ++squashCounter; } template <class Impl> @@ -569,11 +560,15 @@ DefaultCommit<Impl>::tick() wroteToTimeBuffer = false; _nextStatus = Inactive; - if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalSwitched(); + if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + drainPending = false; return; } + if ((*activeThreads).size() <= 0) + return; + list<unsigned>::iterator threads = (*activeThreads).begin(); // Check if any of the threads are done squashing. Change the @@ -585,10 +580,12 @@ DefaultCommit<Impl>::tick() if (rob->isDoneSquashing(tid)) { commitStatus[tid] = Running; - --squashCounter; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" - "insts this cycle.\n", tid); + " insts this cycle.\n", tid); + rob->doSquash(tid); + toIEW->commitInfo[tid].robSquashing = true; + wroteToTimeBuffer = true; } } } @@ -694,29 +691,7 @@ DefaultCommit<Impl>::commit() while (threads != (*activeThreads).end()) { unsigned tid = *threads++; -/* - if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { - // Record the fault. Wait until it's empty in the ROB. - // Then handle the trap. Ignore it if there's already a - // trap pending as fetch will be redirected. - fetchFault = fromFetch->fetchFault; - fetchFaultTick = curTick + fetchTrapLatency; - commitStatus[0] = FetchTrapPending; - DPRINTF(Commit, "Fault from fetch recorded. Will trap if the " - "ROB empties without squashing the fault.\n"); - fetchTrapWait = 0; - } - // Fetch may tell commit to clear the trap if it's been squashed. - if (fromFetch->clearFetchFault) { - DPRINTF(Commit, "Received clear fetch fault signal\n"); - fetchTrapWait = 0; - if (commitStatus[0] == FetchTrapPending) { - DPRINTF(Commit, "Clearing fault from fetch\n"); - commitStatus[0] = Running; - } - } -*/ // Not sure which one takes priority. I think if we have // both, that's a bad sign. if (trapSquash[tid] == true) { @@ -744,8 +719,6 @@ DefaultCommit<Impl>::commit() commitStatus[tid] = ROBSquashing; - ++squashCounter; - // If we want to include the squashing instruction in the squash, // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; @@ -947,7 +920,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. thread[tid]->funcExeInst--; - head_inst->reachedCommit = true; + head_inst->setAtCommit(); if (head_inst->isNonSpeculative() || head_inst->isStoreConditional() || @@ -1012,18 +985,19 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->setCompleted(); } +#if USE_CHECKER // Use checker prior to updating anything due to traps or PC // based events. if (cpu->checker) { - cpu->checker->tick(head_inst); + cpu->checker->verify(head_inst); } +#endif // Check if the instruction caused a fault. If so, trap. Fault inst_fault = head_inst->getFault(); if (inst_fault != NoFault) { head_inst->setCompleted(); -#if FULL_SYSTEM DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", head_inst->seqNum, head_inst->readPC()); @@ -1032,9 +1006,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } +#if USE_CHECKER if (cpu->checker && head_inst->isStore()) { - cpu->checker->tick(head_inst); + cpu->checker->verify(head_inst); } +#endif assert(!thread[tid]->inSyscall); @@ -1065,10 +1041,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) generateTrapEvent(tid); return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - head_inst->PC); -#endif // FULL_SYSTEM } updateComInstStats(head_inst); @@ -1256,7 +1228,8 @@ DefaultCommit<Impl>::roundRobin() unsigned tid = *pri_iter; if (commitStatus[tid] == Running || - commitStatus[tid] == Idle) { + commitStatus[tid] == Idle || + commitStatus[tid] == FetchTrapPending) { if (rob->isHeadReady(tid)) { priority_list.erase(pri_iter); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 788c6b164..b407f4fcc 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -26,9 +26,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" +#include "config/use_checker.hh" #if FULL_SYSTEM #include "sim/system.hh" @@ -37,26 +39,28 @@ #endif #include "cpu/activity.hh" -#include "cpu/checker/cpu.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/cpu.hh" #include "sim/root.hh" #include "sim/stat_control.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + using namespace std; using namespace TheISA; -BaseFullCPU::BaseFullCPU(Params *params) +BaseO3CPU::BaseO3CPU(Params *params) : BaseCPU(params), cpu_id(0) { } void -BaseFullCPU::regStats() +BaseO3CPU::regStats() { BaseCPU::regStats(); } @@ -82,8 +86,67 @@ FullO3CPU<Impl>::TickEvent::description() } template <class Impl> +FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::ActivateThreadEvent::init(int thread_num, + FullO3CPU<Impl> *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template <class Impl> +void +FullO3CPU<Impl>::ActivateThreadEvent::process() +{ + cpu->activateThread(tid); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::ActivateThreadEvent::description() +{ + return "FullO3CPU \"Activate Thread\" event"; +} + +template <class Impl> +FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num, + FullO3CPU<Impl> *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template <class Impl> +void +FullO3CPU<Impl>::DeallocateContextEvent::process() +{ + cpu->deactivateThread(tid); + cpu->removeThread(tid); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::DeallocateContextEvent::description() +{ + return "FullO3CPU \"Deallocate Context\" event"; +} + +template <class Impl> FullO3CPU<Impl>::FullO3CPU(Params *params) - : BaseFullCPU(params), + : BaseO3CPU(params), tickEvent(this), removeInstsThisCycle(false), fetch(params), @@ -94,7 +157,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs), - freeList(params->numberOfThreads,//number of activeThreads + freeList(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs), @@ -102,21 +165,20 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) params->smtROBPolicy, params->smtROBThreshold, params->numberOfThreads), - scoreboard(params->numberOfThreads,//number of activeThreads + scoreboard(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs, TheISA::NumMiscRegs * number_of_threads, TheISA::ZeroReg), - // For now just have these time buffers be pretty big. - // @todo: Make these time buffer sizes parameters or derived - // from latencies - timeBuffer(5, 5), - fetchQueue(5, 5), - decodeQueue(5, 5), - renameQueue(5, 5), - iewQueue(5, 5), - activityRec(NumStages, 10, params->activity), + timeBuffer(params->backComSize, params->forwardComSize), + fetchQueue(params->backComSize, params->forwardComSize), + decodeQueue(params->backComSize, params->forwardComSize), + renameQueue(params->backComSize, params->forwardComSize), + iewQueue(params->backComSize, params->forwardComSize), + activityRec(NumStages, + params->backComSize + params->forwardComSize, + params->activity), globalSeqNum(1), @@ -125,21 +187,25 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) physmem(system->physmem), #endif // FULL_SYSTEM mem(params->mem), - switchCount(0), + drainCount(0), deferRegistration(params->deferRegistration), numThreads(number_of_threads) { _status = Idle; + checker = NULL; + if (params->checker) { +#if USE_CHECKER BaseCPU *temp_checker = params->checker; checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker); checker->setMemory(mem); #if FULL_SYSTEM checker->setSystem(params->system); #endif - } else { - checker = NULL; +#else + panic("Checker enabled but not compiled in!"); +#endif // USE_CHECKER } #if !FULL_SYSTEM @@ -177,13 +243,18 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); - commit.setFetchStage(&fetch); commit.setIEWStage(&iew); rename.setIEWStage(&iew); rename.setCommitStage(&commit); #if !FULL_SYSTEM int active_threads = params->workload.size(); + + if (active_threads > Impl::MaxThreads) { + panic("Workload Size too large. Increase the 'MaxThreads'" + "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) or " + "edit your workload size."); + } #else int active_threads = 1; #endif @@ -249,6 +320,8 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) lastRunningCycle = curTick; + lastActivatedCycle = -1; + contextSwitch = false; } @@ -261,9 +334,9 @@ template <class Impl> void FullO3CPU<Impl>::fullCPURegStats() { - BaseFullCPU::regStats(); + BaseO3CPU::regStats(); - // Register any of the FullCPU's stats here. + // Register any of the O3CPU's stats here. timesIdled .name(name() + ".timesIdled") .desc("Number of times that the entire CPU went into an idle state and" @@ -316,10 +389,22 @@ FullO3CPU<Impl>::fullCPURegStats() } template <class Impl> +Port * +FullO3CPU<Impl>::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return iew.getDcachePort(); + else if (if_name == "icache_port") + return fetch.getIcachePort(); + else + panic("No Such Port\n"); +} + +template <class Impl> void FullO3CPU<Impl>::tick() { - DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n"); + DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); ++numCycles; @@ -355,7 +440,8 @@ FullO3CPU<Impl>::tick() } if (!tickEvent.scheduled()) { - if (_status == SwitchedOut) { + if (_status == SwitchedOut || + getState() == SimObject::Drained) { // increment stat lastRunningCycle = curTick; } else if (!activityRec.active()) { @@ -416,16 +502,107 @@ FullO3CPU<Impl>::init() template <class Impl> void +FullO3CPU<Impl>::activateThread(unsigned tid) +{ + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive == activeThreads.end()) { + DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", + tid); + + activeThreads.push_back(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deactivateThread(unsigned tid) +{ + //Remove From Active List, if Active + list<unsigned>::iterator thread_it = + find(activeThreads.begin(), activeThreads.end(), tid); + + if (thread_it != activeThreads.end()) { + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(thread_it); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::activateContext(int tid, int delay) +{ + // Needs to set each stage to running as well. + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleActivateThreadEvent(tid, delay); + } else { + activateThread(tid); + } + + if(lastActivatedCycle < curTick) { + scheduleTickEvent(delay); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + + lastActivatedCycle = curTick; + + _status = Running; + } +} + +template <class Impl> +void +FullO3CPU<Impl>::deallocateContext(int tid, int delay) +{ + // Schedule removal of thread data from CPU + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleDeallocateContextEvent(tid, delay); + } else { + deactivateThread(tid); + removeThread(tid); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::suspendContext(int tid) +{ + DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); + deactivateThread(tid); + if (activeThreads.size() == 0) + unscheduleTickEvent(); + _status = Idle; +} + +template <class Impl> +void +FullO3CPU<Impl>::haltContext(int tid) +{ + //For now, this is the same as deallocate + DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); + deallocateContext(tid, 1); +} + +template <class Impl> +void FullO3CPU<Impl>::insertThread(unsigned tid) { - DPRINTF(FullCPU,"[tid:%i] Initializing thread data"); + DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); // Will change now that the PC and thread state is internal to the CPU // and not in the ThreadContext. -#if 0 #if FULL_SYSTEM ThreadContext *src_tc = system->threadContexts[tid]; #else - ThreadContext *src_tc = thread[tid]; + ThreadContext *src_tc = tcBase(tid); #endif //Bind Int Regs to Rename Map @@ -445,11 +622,14 @@ FullO3CPU<Impl>::insertThread(unsigned tid) } //Copy Thread Data Into RegFile - this->copyFromTC(tid); + //this->copyFromTC(tid); - //Set PC/NPC - regFile.pc[tid] = src_tc->readPC(); - regFile.npc[tid] = src_tc->readNextPC(); + //Set PC/NPC/NNPC + setPC(src_tc->readPC(), tid); + setNextPC(src_tc->readNextPC(), tid); +#if THE_ISA != ALPHA_ISA + setNextNPC(src_tc->readNextNPC(), tid); +#endif src_tc->setStatus(ThreadContext::Active); @@ -458,16 +638,19 @@ FullO3CPU<Impl>::insertThread(unsigned tid) //Reset ROB/IQ/LSQ Entries commit.rob->resetEntries(); iew.resetEntries(); -#endif } template <class Impl> void FullO3CPU<Impl>::removeThread(unsigned tid) { - DPRINTF(FullCPU,"[tid:%i] Removing thread data"); -#if 0 - //Unbind Int Regs from Rename Map + DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); + + // Copy Thread Data From RegFile + // If thread is suspended, it might be re-allocated + //this->copyToTC(tid); + + // Unbind Int Regs from Rename Map for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); @@ -475,7 +658,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Unbind Float Regs from Rename Map + // Unbind Float Regs from Rename Map for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(freg); @@ -483,27 +666,20 @@ FullO3CPU<Impl>::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Copy Thread Data From RegFile - /* Fix Me: - * Do we really need to do this if we are removing a thread - * in the sense that it's finished (exiting)? If the thread is just - * being suspended we might... - */ -// this->copyToTC(tid); - - //Squash Throughout Pipeline + // Squash Throughout Pipeline fetch.squash(0,tid); decode.squash(tid); rename.squash(tid); + iew.squash(tid); + commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); assert(iew.ldstQueue.getCount(tid) == 0); - //Reset ROB/IQ/LSQ Entries + // Reset ROB/IQ/LSQ Entries if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } -#endif } @@ -511,37 +687,37 @@ template <class Impl> void FullO3CPU<Impl>::activateWhenReady(int tid) { - DPRINTF(FullCPU,"[tid:%i]: Checking if resources are available for incoming" + DPRINTF(O3CPU,"[tid:%i]: Checking if resources are available for incoming" "(e.g. PhysRegs/ROB/IQ/LSQ) \n", tid); bool ready = true; if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "Phys. Int. Regs.\n", tid); ready = false; } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "Phys. Float. Regs.\n", tid); ready = false; } else if (commit.rob->numFreeEntries() >= commit.rob->entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "ROB entries.\n", tid); ready = false; } else if (iew.instQueue.numFreeEntries() >= iew.instQueue.entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "IQ entries.\n", tid); ready = false; } else if (iew.ldstQueue.numFreeEntries() >= iew.ldstQueue.entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "LSQ entries.\n", tid); ready = false; @@ -559,6 +735,7 @@ FullO3CPU<Impl>::activateWhenReady(int tid) //blocks fetch contextSwitch = true; + //@todo: dont always add to waitlist //do waitlist cpuWaitList.push_back(tid); } @@ -566,133 +743,130 @@ FullO3CPU<Impl>::activateWhenReady(int tid) template <class Impl> void -FullO3CPU<Impl>::activateContext(int tid, int delay) +FullO3CPU<Impl>::serialize(std::ostream &os) { - // Needs to set each stage to running as well. - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive == activeThreads.end()) { - //May Need to Re-code this if the delay variable is the - //delay needed for thread to activate - DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", - tid); - - activeThreads.push_back(tid); + SERIALIZE_ENUM(_status); + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyTC(thread[i]->getTC()); + temp.serialize(os); } - - assert(_status == Idle || _status == SwitchedOut); - - scheduleTickEvent(delay); - - // Be sure to signal that there's some activity so the CPU doesn't - // deschedule itself. - activityRec.activity(); - fetch.wakeFromQuiesce(); - - _status = Running; } template <class Impl> void -FullO3CPU<Impl>::suspendContext(int tid) +FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion) { - DPRINTF(FullCPU,"[tid: %i]: Suspended ...\n", tid); - unscheduleTickEvent(); - _status = Idle; -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + UNSERIALIZE_ENUM(_status); + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + temp.copyTC(thread[i]->getTC()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getTC()->copyArchRegs(temp.getTC()); + } +} - if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); +template <class Impl> +unsigned int +FullO3CPU<Impl>::drain(Event *drain_event) +{ + drainCount = 0; + fetch.drain(); + decode.drain(); + rename.drain(); + iew.drain(); + commit.drain(); + + // Wake the CPU and record activity so everything can drain out if + // the CPU was not able to immediately drain. + if (getState() != SimObject::Drained) { + // A bit of a hack...set the drainEvent after all the drain() + // calls have been made, that way if all of the stages drain + // immediately, the signalDrained() function knows not to call + // process on the drain event. + drainEvent = drain_event; + + wakeCPU(); + activityRec.activity(); + + return 1; + } else { + return 0; } -*/ } template <class Impl> void -FullO3CPU<Impl>::deallocateContext(int tid) +FullO3CPU<Impl>::resume() { - DPRINTF(FullCPU,"[tid:%i]: Deallocating ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + assert(system->getMemoryMode() == System::Timing); + fetch.resume(); + decode.resume(); + rename.resume(); + iew.resume(); + commit.resume(); - if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Running); - removeThread(tid); - } -*/ + if (_status == SwitchedOut || _status == Idle) + return; + + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + _status = Running; } template <class Impl> void -FullO3CPU<Impl>::haltContext(int tid) +FullO3CPU<Impl>::signalDrained() { - DPRINTF(FullCPU,"[tid:%i]: Halted ...", tid); -/* - //Remove From Active List, if Active - list<unsigned>::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + if (++drainCount == NumStages) { + if (tickEvent.scheduled()) + tickEvent.squash(); - if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); + changeState(SimObject::Drained); - removeThread(tid); + if (drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } -*/ + assert(drainCount <= 5); } template <class Impl> void -FullO3CPU<Impl>::switchOut(Sampler *_sampler) +FullO3CPU<Impl>::switchOut() { - sampler = _sampler; - switchCount = 0; fetch.switchOut(); - decode.switchOut(); rename.switchOut(); - iew.switchOut(); commit.switchOut(); - - // Wake the CPU and record activity so everything can drain out if - // the CPU is currently idle. - wakeCPU(); - activityRec.activity(); -} - -template <class Impl> -void -FullO3CPU<Impl>::signalSwitched() -{ - if (++switchCount == NumStages) { - fetch.doSwitchOut(); - rename.doSwitchOut(); - commit.doSwitchOut(); - instList.clear(); - while (!removeList.empty()) { - removeList.pop(); - } - - if (checker) - checker->switchOut(sampler); - - if (tickEvent.scheduled()) - tickEvent.squash(); - sampler->signalSwitched(); - _status = SwitchedOut; + instList.clear(); + while (!removeList.empty()) { + removeList.pop(); } - assert(switchCount <= 5); + + _status = SwitchedOut; +#if USE_CHECKER + if (checker) + checker->switchOut(); +#endif } template <class Impl> @@ -700,7 +874,7 @@ void FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) { // Flush out any old data from the time buffers. - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < timeBuffer.getSize(); ++i) { timeBuffer.advance(); fetchQueue.advance(); decodeQueue.advance(); @@ -730,7 +904,7 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) if (isActive == activeThreads.end()) { //May Need to Re-code this if the delay variable is the delay //needed for thread to activate - DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", + DPRINTF(O3CPU, "Adding Thread %i to active threads list\n", tid); activeThreads.push_back(tid); @@ -922,6 +1096,22 @@ FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid) commit.setNextPC(val, tid); } +#if THE_ISA != ALPHA_ISA +template <class Impl> +uint64_t +FullO3CPU<Impl>::readNextNPC(unsigned tid) +{ + return commit.readNextNPC(tid); +} + +template <class Impl> +void +FullO3CPU<Impl>::setNextNNPC(uint64_t val,unsigned tid) +{ + commit.setNextNPC(val, tid); +} +#endif + template <class Impl> typename FullO3CPU<Impl>::ListIt FullO3CPU<Impl>::addInst(DynInstPtr &inst) @@ -958,7 +1148,7 @@ template <class Impl> void FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst) { - DPRINTF(FullCPU, "FullCPU: Removing committed instruction [tid:%i] PC %#x " + DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %#x " "[sn:%lli]\n", inst->threadNumber, inst->readPC(), inst->seqNum); @@ -972,7 +1162,7 @@ template <class Impl> void FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) { - DPRINTF(FullCPU, "FullCPU: Thread %i: Deleting instructions from instruction" + DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); ListIt end_it; @@ -982,12 +1172,12 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) if (instList.empty()) { return; } else if (rob.isEmpty(/*tid*/)) { - DPRINTF(FullCPU, "FullCPU: ROB is empty, squashing all insts.\n"); + DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); end_it = instList.begin(); rob_empty = true; } else { end_it = (rob.readTailInst(tid))->getInstListIt(); - DPRINTF(FullCPU, "FullCPU: ROB is not empty, squashing insts not in ROB.\n"); + DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); } removeInstsThisCycle = true; @@ -1026,7 +1216,7 @@ FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, inst_iter--; - DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + DPRINTF(O3CPU, "Deleting instructions from instruction " "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", tid, seq_num, (*inst_iter)->seqNum); @@ -1048,7 +1238,7 @@ inline void FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, const unsigned &tid) { if ((*instIt)->threadNumber == tid) { - DPRINTF(FullCPU, "FullCPU: Squashing instruction, " + DPRINTF(O3CPU, "Squashing instruction, " "[tid:%i] [sn:%lli] PC %#x\n", (*instIt)->threadNumber, (*instIt)->seqNum, @@ -1069,7 +1259,7 @@ void FullO3CPU<Impl>::cleanUpRemovedInsts() { while (!removeList.empty()) { - DPRINTF(FullCPU, "FullCPU: Removing instruction, " + DPRINTF(O3CPU, "Removing instruction, " "[tid:%i] [sn:%lli] PC %#x\n", (*removeList.front())->threadNumber, (*removeList.front())->seqNum, @@ -1185,4 +1375,4 @@ FullO3CPU<Impl>::updateThreadPriority() } // Forward declaration of FullO3CPU. -template class FullO3CPU<AlphaSimpleImpl>; +template class FullO3CPU<O3CPUImpl>; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index ff41a3306..83cb966e3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_CPU_HH__ @@ -48,24 +49,33 @@ #include "cpu/o3/cpu_policy.hh" #include "cpu/o3/scoreboard.hh" #include "cpu/o3/thread_state.hh" +//#include "cpu/o3/thread_context.hh" #include "sim/process.hh" template <class> class Checker; class ThreadContext; +template <class> +class O3ThreadContext; + +class Checkpoint; class MemObject; class Process; -class BaseFullCPU : public BaseCPU +class BaseO3CPU : public BaseCPU { //Stuff that's pretty ISA independent will go here. public: typedef BaseCPU::Params Params; - BaseFullCPU(Params *params); + BaseO3CPU(Params *params); void regStats(); + /** Sets this CPU's ID. */ + void setCpuId(int id) { cpu_id = id; } + + /** Reads this CPU's ID. */ int readCpuId() { return cpu_id; } protected: @@ -78,7 +88,7 @@ class BaseFullCPU : public BaseCPU * tick() function for the CPU is defined here. */ template <class Impl> -class FullO3CPU : public BaseFullCPU +class FullO3CPU : public BaseO3CPU { public: typedef TheISA::FloatReg FloatReg; @@ -93,6 +103,8 @@ class FullO3CPU : public BaseFullCPU typedef typename std::list<DynInstPtr>::iterator ListIt; + friend class O3ThreadContext<Impl>; + public: enum Status { Running, @@ -105,6 +117,9 @@ class FullO3CPU : public BaseFullCPU /** Overall CPU status. */ Status _status; + /** Per-thread status in CPU, used for SMT. */ + Status _threadStatus[Impl::MaxThreads]; + private: class TickEvent : public Event { @@ -141,6 +156,92 @@ class FullO3CPU : public BaseFullCPU tickEvent.squash(); } + class ActivateThreadEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU<Impl> *cpu; + + public: + /** Constructs the event. */ + ActivateThreadEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU<Impl> *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule thread to activate , regardless of its current state. */ + void scheduleActivateThreadEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (activateThreadEvent[tid].squashed()) + activateThreadEvent[tid].reschedule(curTick + cycles(delay)); + else if (!activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule actiavte thread event, regardless of its current state. */ + void unscheduleActivateThreadEvent(int tid) + { + if (activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + ActivateThreadEvent activateThreadEvent[Impl::MaxThreads]; + + class DeallocateContextEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU<Impl> *cpu; + + public: + /** Constructs the event. */ + DeallocateContextEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU<Impl> *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule cpu to deallocate thread context.*/ + void scheduleDeallocateContextEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (deallocateContextEvent[tid].squashed()) + deallocateContextEvent[tid].reschedule(curTick + cycles(delay)); + else if (!deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule thread deallocation in CPU */ + void unscheduleDeallocateContextEvent(int tid) + { + if (deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads]; + public: /** Constructs a CPU with the given parameters. */ FullO3CPU(Params *params); @@ -150,6 +251,9 @@ class FullO3CPU : public BaseFullCPU /** Registers statistics. */ void fullCPURegStats(); + /** Returns a specific port. */ + Port *getPort(const std::string &if_name, int idx); + /** Ticks CPU, calling tick() on each stage, and checking the overall * activity to see if the CPU should deschedule itself. */ @@ -158,6 +262,16 @@ class FullO3CPU : public BaseFullCPU /** Initialize the CPU */ void init(); + /** Returns the Number of Active Threads in the CPU */ + int numActiveThreads() + { return activeThreads.size(); } + + /** Add Thread to Active Threads List */ + void activateThread(unsigned tid); + + /** Remove Thread from Active Threads List */ + void deactivateThread(unsigned tid); + /** Setup CPU to insert a thread's context */ void insertThread(unsigned tid); @@ -184,7 +298,7 @@ class FullO3CPU : public BaseFullCPU /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. */ - void deallocateContext(int tid); + void deallocateContext(int tid, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. @@ -200,6 +314,13 @@ class FullO3CPU : public BaseFullCPU /** Update The Order In Which We Process Threads. */ void updateThreadPriority(); + /** Serialize state. */ + virtual void serialize(std::ostream &os); + + /** Unserialize from a checkpoint. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + public: /** Executes a syscall on this cycle. * --------------------------------------- * Note: this is a virtual function. CPU-Specific @@ -207,14 +328,21 @@ class FullO3CPU : public BaseFullCPU */ virtual void syscall(int tid) { panic("Unimplemented!"); } - /** Switches out this CPU. */ - void switchOut(Sampler *sampler); + /** Starts draining the CPU's pipeline of all instructions in + * order to stop all memory accesses. */ + virtual unsigned int drain(Event *drain_event); + + /** Resumes execution after a drain. */ + virtual void resume(); /** Signals to this CPU that a stage has completed switching out. */ - void signalSwitched(); + void signalDrained(); + + /** Switches out this CPU. */ + virtual void switchOut(); /** Takes over from another CPU. */ - void takeOverFrom(BaseCPU *oldCPU); + virtual void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ InstSeqNum getAndIncrementInstSeq() @@ -299,6 +427,12 @@ class FullO3CPU : public BaseFullCPU /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid); + /** Reads the next NPC of a specific thread. */ + uint64_t readNextNPC(unsigned tid); + + /** Sets the next NPC of a specific thread. */ + void setNextNPC(uint64_t val, unsigned tid); + /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. */ @@ -481,11 +615,11 @@ class FullO3CPU : public BaseFullCPU /** Pointer to memory. */ MemObject *mem; - /** Pointer to the sampler */ - Sampler *sampler; + /** Event to call process() on once draining has completed. */ + Event *drainEvent; - /** Counter of how many stages have completed switching out. */ - int switchCount; + /** Counter of how many stages have completed draining. */ + int drainCount; /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; @@ -507,6 +641,9 @@ class FullO3CPU : public BaseFullCPU /** The cycle that the CPU was last running, used for statistics. */ Tick lastRunningCycle; + /** The cycle that the CPU was last activated by a new thread*/ + Tick lastActivatedCycle; + /** Number of Threads CPU can process */ unsigned numThreads; diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc index 4924f018a..896e38331 100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/decode_impl.hh" -template class DefaultDecode<AlphaSimpleImpl>; +template class DefaultDecode<O3CPUImpl>; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index ff88358d6..7f5ecbc26 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -48,7 +48,7 @@ class DefaultDecode { private: // Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; typedef typename Impl::CPUPol CPUPol; @@ -95,7 +95,7 @@ class DefaultDecode void regStats(); /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); @@ -109,8 +109,14 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); + /** Drains the decode stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the decode stage. */ - void switchOut(); + void switchOut() { } /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -189,7 +195,7 @@ class DefaultDecode private: // Interfaces to objects outside of decode. /** CPU interface. */ - FullCPU *cpu; + O3CPU *cpu; /** Time buffer interface. */ TimeBuffer<TimeStruct> *timeBuffer; diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 8a6ea6626..8b851c032 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -112,7 +112,7 @@ DefaultDecode<Impl>::regStats() template<class Impl> void -DefaultDecode<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultDecode<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Decode, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -165,11 +165,12 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr) } template <class Impl> -void -DefaultDecode<Impl>::switchOut() +bool +DefaultDecode<Impl>::drain() { - // Decode can immediately switch out. - cpu->signalSwitched(); + // Decode is done draining at any time. + cpu->signalDrained(); + return true; } template <class Impl> @@ -296,7 +297,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid && fromFetch->insts[i]->seqNum > inst->seqNum) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); } } @@ -345,7 +346,7 @@ DefaultDecode<Impl>::squash(unsigned tid) for (int i=0; i<fromFetch->size; i++) { if (fromFetch->insts[i]->threadNumber == tid) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); squash_count++; } } @@ -427,7 +428,7 @@ DefaultDecode<Impl>::updateStatus() DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::DecodeIdx); + cpu->activateStage(O3CPU::DecodeIdx); } } else { // If it's not unblocking, then decode will not have any internal @@ -436,7 +437,7 @@ DefaultDecode<Impl>::updateStatus() _status = Inactive; DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::DecodeIdx); + cpu->deactivateStage(O3CPU::DecodeIdx); } } } @@ -515,7 +516,7 @@ DefaultDecode<Impl>::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Decode, "[tid:%]: ROB is still squashing.\n",tid); + DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. decodeStatus[tid] = Squashing; diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh index 3659b1a37..c19fd0abf 100644 --- a/src/cpu/o3/dep_graph.hh +++ b/src/cpu/o3/dep_graph.hh @@ -68,6 +68,8 @@ class DependencyGraph : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) { } + ~DependencyGraph(); + /** Resize the dependency graph to have num_entries registers. */ void resize(int num_entries); @@ -121,6 +123,12 @@ class DependencyGraph }; template <class DynInstPtr> +DependencyGraph<DynInstPtr>::~DependencyGraph() +{ + delete [] dependGraph; +} + +template <class DynInstPtr> void DependencyGraph<DynInstPtr>::resize(int num_entries) { diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh new file mode 100644 index 000000000..a2cdf2dba --- /dev/null +++ b/src/cpu/o3/dyn_inst.hh @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#ifndef __CPU_O3_DYN_INST_HH__ +#define __CPU_O3_DYN_INST_HH__ + +#include "arch/isa_specific.hh" + +#if THE_ISA == ALPHA_ISA +template <class Impl> +class AlphaDynInst; + +struct AlphaSimpleImpl; + +typedef AlphaDynInst<AlphaSimpleImpl> O3DynInst; +#endif + +#endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index 5f52d0fca..d809b07e4 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/fetch_impl.hh" -template class DefaultFetch<AlphaSimpleImpl>; +template class DefaultFetch<O3CPUImpl>; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 76b32de68..931919af8 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_FETCH_HH__ @@ -35,12 +36,10 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/pc_event.hh" -#include "mem/packet.hh" +#include "mem/packet_impl.hh" #include "mem/port.hh" #include "sim/eventq.hh" -class Sampler; - /** * DefaultFetch class handles both single threaded and SMT fetch. Its * width is specified by the parameters; each cycle it tries to fetch @@ -57,7 +56,7 @@ class DefaultFetch typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; /** Typedefs from the CPU policy. */ @@ -163,8 +162,11 @@ class DefaultFetch /** Registers statistics. */ void regStats(); + /** Returns the icache port. */ + Port *getIcachePort() { return icachePort; } + /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer); @@ -181,11 +183,14 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); - /** Begins the switch out of the fetch stage. */ - void switchOut(); + /** Begins the drain of the fetch stage. */ + bool drain(); - /** Completes the switch out of the fetch stage. */ - void doSwitchOut(); + /** Resumes execution after a drain. */ + void resume(); + + /** Tells fetch stage to prepare to be switched out. */ + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -296,8 +301,8 @@ class DefaultFetch int branchCount(); private: - /** Pointer to the FullCPU. */ - FullCPU *cpu; + /** Pointer to the O3CPU. */ + O3CPU *cpu; /** Time buffer interface. */ TimeBuffer<TimeStruct> *timeBuffer; @@ -335,6 +340,15 @@ class DefaultFetch /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; +#if THE_ISA != ALPHA_ISA + /** Per-thread next Next PC. + * This is not a real register but is used for + * architectures that use a branch-delay slot. + * (such as MIPS or Sparc) + */ + Addr nextNPC[Impl::MaxThreads]; +#endif + /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; @@ -390,6 +404,12 @@ class DefaultFetch /** The cache line being fetched. */ uint8_t *cacheData[Impl::MaxThreads]; + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; @@ -413,6 +433,9 @@ class DefaultFetch */ bool interruptPending; + /** Is there a drain pending. */ + bool drainPending; + /** Records if fetch is switched out. */ bool switchedOut; @@ -421,6 +444,7 @@ class DefaultFetch Stats::Scalar<> icacheStallCycles; /** Stat for total number of fetched instructions. */ Stats::Scalar<> fetchedInsts; + /** Total number of fetched branches. */ Stats::Scalar<> fetchedBranches; /** Stat for total number of predicted branches. */ Stats::Scalar<> predictedBranches; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index c0a2a5d09..4184e1867 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -26,8 +26,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ +#include "config/use_checker.hh" + #include "arch/isa_traits.hh" #include "arch/utility.hh" #include "cpu/checker/cpu.hh" @@ -106,13 +109,12 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) numThreads(params->numberOfThreads), numFetchingThreads(params->smtNumFetchingThreads), interruptPending(false), + drainPending(false), switchedOut(false) { if (numThreads > Impl::MaxThreads) fatal("numThreads is not a valid value\n"); - DPRINTF(Fetch, "Fetch constructor called\n"); - // Set fetch stage's status to inactive. _status = Inactive; @@ -125,6 +127,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Figure out fetch policy if (policy == "singlethread") { fetchPolicy = SingleThread; + if (numThreads > 1) + panic("Invalid Fetch Policy for a SMT workload."); } else if (policy == "roundrobin") { fetchPolicy = RoundRobin; DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); @@ -158,6 +162,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; stalls[tid].decode = 0; stalls[tid].rename = 0; @@ -268,7 +274,7 @@ DefaultFetch<Impl>::regStats() template<class Impl> void -DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Fetch, "Setting the CPU pointer.\n"); cpu = cpu_ptr; @@ -276,13 +282,11 @@ DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr) // Name is finally available, so create the port. icachePort = new IcachePort(this); - Port *mem_dport = mem->getPort(""); - icachePort->setPeer(mem_dport); - mem_dport->setPeer(icachePort); - +#if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(icachePort); } +#endif // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. @@ -330,6 +334,9 @@ DefaultFetch<Impl>::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); +#if THE_ISA != ALPHA_ISA + nextNPC[tid] = cpu->readNextNPC(tid); +#endif } } @@ -349,18 +356,22 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) ++fetchIcacheSquashes; delete pkt->req; delete pkt; - memReq[tid] = NULL; return; } - // Wake up the CPU (if it went to sleep and was waiting on this completion - // event). - cpu->wakeCPU(); + memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize); + cacheDataValid[tid] = true; - DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", - tid); + if (!drainPending) { + // Wake up the CPU (if it went to sleep and was waiting on + // this completion event). + cpu->wakeCPU(); - switchToActive(); + DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", + tid); + + switchToActive(); + } // Only switch to IcacheAccessComplete if we're not stalled as well. if (checkStall(tid)) { @@ -376,18 +387,27 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) } template <class Impl> +bool +DefaultFetch<Impl>::drain() +{ + // Fetch is ready to drain at any time. + cpu->signalDrained(); + drainPending = true; + return true; +} + +template <class Impl> void -DefaultFetch<Impl>::switchOut() +DefaultFetch<Impl>::resume() { - // Fetch is ready to switch out at any time. - switchedOut = true; - cpu->signalSwitched(); + drainPending = false; } template <class Impl> void -DefaultFetch<Impl>::doSwitchOut() +DefaultFetch<Impl>::switchOut() { + switchedOut = true; // Branch predictor needs to have its state cleared. branchPred.switchOut(); } @@ -404,6 +424,9 @@ DefaultFetch<Impl>::takeOverFrom() stalls[i].commit = 0; PC[i] = cpu->readPC(i); nextPC[i] = cpu->readNextPC(i); +#if THE_ISA != ALPHA_ISA + nextNPC[i] = cpu->readNextNPC(i); +#endif fetchStatus[i] = Running; } numInst = 0; @@ -430,7 +453,7 @@ DefaultFetch<Impl>::switchToActive() if (_status == Inactive) { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::FetchIdx); + cpu->activateStage(O3CPU::FetchIdx); _status = Active; } @@ -443,7 +466,7 @@ DefaultFetch<Impl>::switchToInactive() if (_status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::FetchIdx); + cpu->deactivateStage(O3CPU::FetchIdx); _status = Inactive; } @@ -488,7 +511,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (cacheBlocked || (interruptPending && flags == 0) || switchedOut) { + if (cacheBlocked || (interruptPending && flags == 0)) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or @@ -499,6 +522,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. @@ -530,7 +558,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Build packet here. PacketPtr data_pkt = new Packet(mem_req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataStatic(cacheData[tid]); + data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); + + cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); @@ -549,7 +580,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid return false; } - DPRINTF(Fetch, "Doing cache access.\n"); + DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid); lastIcacheStall[tid] = curTick; @@ -662,7 +693,7 @@ DefaultFetch<Impl>::updateFetchStatus() "completion\n",tid); } - cpu->activateStage(FullCPU::FetchIdx); + cpu->activateStage(O3CPU::FetchIdx); } return Active; @@ -673,7 +704,7 @@ DefaultFetch<Impl>::updateFetchStatus() if (_status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::FetchIdx); + cpu->deactivateStage(O3CPU::FetchIdx); } return Inactive; @@ -714,12 +745,15 @@ DefaultFetch<Impl>::tick() // Reset the number of the instruction we're fetching. numInst = 0; +#if FULL_SYSTEM if (fromCommit->commitInfo[0].interruptPending) { interruptPending = true; } + if (fromCommit->commitInfo[0].clearInterrupt) { interruptPending = false; } +#endif for (threadFetched = 0; threadFetched < numFetchingThreads; threadFetched++) { @@ -817,7 +851,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Fetch, "[tid:%u]: ROB is still squashing Thread %u.\n", tid); + DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. fetchStatus[tid] = Squashing; @@ -885,7 +919,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) ////////////////////////////////////////// int tid = getFetchingThread(fetchPolicy); - if (tid == -1) { + if (tid == -1 || drainPending) { DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); // Breaks looping condition in tick() @@ -893,6 +927,8 @@ DefaultFetch<Impl>::fetch(bool &status_change) return; } + DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); + // The current PC. Addr &fetch_PC = PC[tid]; @@ -915,7 +951,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); if (!fetch_success) { - ++fetchMiscStallCycles; + if (cacheBlocked) { + ++icacheStallCycles; + } else { + ++fetchMiscStallCycles; + } return; } } else { @@ -984,11 +1024,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, next_PC, inst_seq, cpu); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " "[sn:%lli]\n", @@ -1020,7 +1060,7 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetch_PC = next_PC; if (instruction->isQuiesce()) { - warn("%lli: Quiesce instruction encountered, halting fetch!", + warn("cycle %lli: Quiesce instruction encountered, halting fetch!", curTick); fetchStatus[tid] = QuiescePending; ++numInst; @@ -1041,8 +1081,17 @@ DefaultFetch<Impl>::fetch(bool &status_change) if (fault == NoFault) { DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); +#if THE_ISA == ALPHA_ISA + PC[tid] = next_PC; + nextPC[tid] = next_PC + instSize; +#else PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; + nextPC[tid] = next_PC + instSize; + + thread->setNextPC(thread->readNextNPC()); + thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); +#endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB // miss) @@ -1065,11 +1114,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) next_PC, inst_seq, cpu); instruction->setPredTarg(next_PC + instSize); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); instruction->traceData = NULL; @@ -1085,9 +1134,9 @@ DefaultFetch<Impl>::fetch(bool &status_change) fetchStatus[tid] = TrapPending; status_change = true; - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #else // !FULL_SYSTEM - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #endif // FULL_SYSTEM } } @@ -1256,6 +1305,6 @@ int DefaultFetch<Impl>::branchCount() { list<unsigned>::iterator threads = (*activeThreads).begin(); - + panic("Branch Count Fetch policy unimplemented\n"); return *threads; } diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc index 545deea9b..42e329aca 100644 --- a/src/cpu/o3/fu_pool.cc +++ b/src/cpu/o3/fu_pool.cc @@ -31,7 +31,7 @@ #include <sstream> #include "cpu/o3/fu_pool.hh" -#include "encumbered/cpu/full/fu_pool.hh" +#include "cpu/func_unit.hh" #include "sim/builder.hh" using namespace std; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 8145f4cc7..f99be7fe0 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -28,9 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/iew_impl.hh" #include "cpu/o3/inst_queue.hh" -template class DefaultIEW<AlphaSimpleImpl>; +template class DefaultIEW<O3CPUImpl>; diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 2e61af5fc..fb9afde54 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -68,7 +68,7 @@ class DefaultIEW //Typedefs from Impl typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; typedef typename CPUPol::IQ IQ; @@ -80,7 +80,7 @@ class DefaultIEW typedef typename CPUPol::RenameStruct RenameStruct; typedef typename CPUPol::IssueStruct IssueStruct; - friend class Impl::FullCPU; + friend class Impl::O3CPU; friend class CPUPol::IQ; public: @@ -125,8 +125,11 @@ class DefaultIEW /** Initializes stage; sends back the number of free IQ and LSQ entries. */ void initStage(); + /** Returns the dcache port. */ + Port *getDcachePort() { return ldstQueue.getDcachePort(); } + /** Sets CPU pointer for IEW, IQ, and LSQ. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets main time buffer used for backwards communication. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); @@ -143,11 +146,14 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); - /** Starts switch out of IEW stage. */ - void switchOut(); + /** Drains IEW stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume(); /** Completes switch out of IEW stage. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -204,6 +210,45 @@ class DefaultIEW /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#if DEBUG + std::set<InstSeqNum> wbList; + + void dumpWb() + { + std::set<InstSeqNum>::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + private: /** Sends commit proper information for a squash due to a branch * mispredict. @@ -261,6 +306,9 @@ class DefaultIEW /** Processes inputs and changes state accordingly. */ void checkSignalsAndUpdate(unsigned tid); + /** Removes instructions from rename from a thread's instruction list. */ + void emptyRenameInsts(unsigned tid); + /** Sorts instructions coming from rename into lists separated by thread. */ void sortInsts(); @@ -328,7 +376,7 @@ class DefaultIEW private: /** CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; /** Records if IEW has written to the time buffer this cycle, so that the * CPU can deschedule itself if there is no activity. @@ -381,20 +429,12 @@ class DefaultIEW */ unsigned issueToExecuteDelay; - /** Width of issue's read path, in instructions. The read path is both - * the skid buffer and the rename instruction queue. - * Note to self: is this really different than issueWidth? - */ - unsigned issueReadWidth; + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; /** Width of issue, in instructions. */ unsigned issueWidth; - /** Width of execute, in instructions. Might make more sense to break - * down into FP vs int. - */ - unsigned executeWidth; - /** Index into queue of instructions being written back. */ unsigned wbNumInst; @@ -405,6 +445,17 @@ class DefaultIEW */ unsigned wbCycle; + /** Number of instructions in flight that will writeback. */ + unsigned wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + /** Number of active threads. */ unsigned numThreads; @@ -439,14 +490,6 @@ class DefaultIEW Stats::Scalar<> iewIQFullEvents; /** Stat for number of times the LSQ becomes full. */ Stats::Scalar<> iewLSQFullEvents; - /** Stat for total number of executed instructions. */ - Stats::Scalar<> iewExecutedInsts; - /** Stat for total number of executed load instructions. */ - Stats::Vector<> iewExecLoadInsts; - /** Stat for total number of executed store instructions. */ -// Stats::Scalar<> iewExecStoreInsts; - /** Stat for total number of squashed instructions skipped at execute. */ - Stats::Scalar<> iewExecSquashedInsts; /** Stat for total number of memory ordering violation events. */ Stats::Scalar<> memOrderViolationEvents; /** Stat for total number of incorrect predicted taken branches. */ @@ -456,28 +499,25 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + /** Stat for total number of executed instructions. */ + Stats::Scalar<> iewExecutedInsts; + /** Stat for total number of executed load instructions. */ + Stats::Vector<> iewExecLoadInsts; + /** Stat for total number of squashed instructions skipped at execute. */ + Stats::Scalar<> iewExecSquashedInsts; /** Number of executed software prefetches. */ - Stats::Vector<> exeSwp; + Stats::Vector<> iewExecutedSwp; /** Number of executed nops. */ - Stats::Vector<> exeNop; + Stats::Vector<> iewExecutedNop; /** Number of executed meomory references. */ - Stats::Vector<> exeRefs; + Stats::Vector<> iewExecutedRefs; /** Number of executed branches. */ - Stats::Vector<> exeBranches; - -// Stats::Vector<> issued_ops; -/* - Stats::Vector<> stat_fu_busy; - Stats::Vector2d<> stat_fuBusy; - Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; -*/ - /** Number of instructions issued per cycle. */ - Stats::Formula issueRate; + Stats::Vector<> iewExecutedBranches; /** Number of executed store instructions. */ Stats::Formula iewExecStoreInsts; -// Stats::Formula issue_op_rate; -// Stats::Formula fu_busy_rate; + /** Number of instructions executed per cycle. */ + Stats::Formula iewExecRate; + /** Number of instructions sent to commit. */ Stats::Vector<> iewInstsToCommit; /** Number of instructions that writeback. */ @@ -490,7 +530,6 @@ class DefaultIEW * to resource contention. */ Stats::Vector<> wbPenalized; - /** Number of instructions per cycle written back. */ Stats::Formula wbRate; /** Average number of woken instructions per writeback. */ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 3929f2e19..684ae2295 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -42,17 +42,17 @@ using namespace std; template<class Impl> DefaultIEW<Impl>::DefaultIEW(Params *params) - : // @todo: Make this into a parameter. - issueToExecQueue(5, 5), + : issueToExecQueue(params->backComSize, params->forwardComSize), instQueue(params), ldstQueue(params), fuPool(params->fuPool), commitToIEWDelay(params->commitToIEWDelay), renameToIEWDelay(params->renameToIEWDelay), issueToExecuteDelay(params->issueToExecuteDelay), - issueReadWidth(params->issueWidth), + dispatchWidth(params->dispatchWidth), issueWidth(params->issueWidth), - executeWidth(params->executeWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -75,8 +75,12 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) fetchRedirect[i] = false; } + wbMax = wbWidth * params->wbDepth; + updateLSQNextCycle = false; + ableToIssue = true; + skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -94,6 +98,7 @@ DefaultIEW<Impl>::regStats() using namespace Stats; instQueue.regStats(); + ldstQueue.regStats(); iewIdleCycles .name(name() + ".iewIdleCycles") @@ -139,20 +144,6 @@ DefaultIEW<Impl>::regStats() .name(name() + ".iewLSQFullEvents") .desc("Number of times the LSQ has become full, causing a stall"); - iewExecutedInsts - .name(name() + ".iewExecutedInsts") - .desc("Number of executed instructions"); - - iewExecLoadInsts - .init(cpu->number_of_threads) - .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed") - .flags(total); - - iewExecSquashedInsts - .name(name() + ".iewExecSquashedInsts") - .desc("Number of squashed instructions skipped in execute"); - memOrderViolationEvents .name(name() + ".memOrderViolationEvents") .desc("Number of memory order violations"); @@ -171,114 +162,105 @@ DefaultIEW<Impl>::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - exeSwp + iewExecutedInsts + .name(name() + ".EXEC:insts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .init(cpu->number_of_threads) + .name(name() + ".EXEC:loads") + .desc("Number of load instructions executed") + .flags(total); + + iewExecSquashedInsts + .name(name() + ".EXEC:squashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + iewExecutedSwp .init(cpu->number_of_threads) .name(name() + ".EXEC:swp") .desc("number of swp insts executed") - .flags(total) - ; + .flags(total); - exeNop + iewExecutedNop .init(cpu->number_of_threads) .name(name() + ".EXEC:nop") .desc("number of nop insts executed") - .flags(total) - ; + .flags(total); - exeRefs + iewExecutedRefs .init(cpu->number_of_threads) .name(name() + ".EXEC:refs") .desc("number of memory reference insts executed") - .flags(total) - ; + .flags(total); - exeBranches + iewExecutedBranches .init(cpu->number_of_threads) .name(name() + ".EXEC:branches") .desc("Number of branches executed") - .flags(total) - ; - - issueRate - .name(name() + ".EXEC:rate") - .desc("Inst execution rate") - .flags(total) - ; - issueRate = iewExecutedInsts / cpu->numCycles; + .flags(total); iewExecStoreInsts .name(name() + ".EXEC:stores") .desc("Number of stores executed") - .flags(total) - ; - iewExecStoreInsts = exeRefs - iewExecLoadInsts; -/* - for (int i=0; i<Num_OpClasses; ++i) { - stringstream subname; - subname << opClassStrings[i] << "_delay"; - issue_delay_dist.subname(i, subname.str()); - } -*/ - // - // Other stats - // + .flags(total); + iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts; + + iewExecRate + .name(name() + ".EXEC:rate") + .desc("Inst execution rate") + .flags(total); + + iewExecRate = iewExecutedInsts / cpu->numCycles; iewInstsToCommit .init(cpu->number_of_threads) .name(name() + ".WB:sent") .desc("cumulative count of insts sent to commit") - .flags(total) - ; + .flags(total); writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") - .flags(total) - ; + .flags(total); producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") - .flags(total) - ; + .flags(total); consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") - .flags(total) - ; + .flags(total); wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate = wbPenalized / writebackCount; wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") - .flags(total) - ; + .flags(total); wbFanout = producerInst / consumerInst; wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") - .flags(total) - ; + .flags(total); wbRate = writebackCount / cpu->numCycles; } @@ -299,7 +281,7 @@ DefaultIEW<Impl>::initStage() template<class Impl> void -DefaultIEW<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultIEW<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(IEW, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -307,7 +289,7 @@ DefaultIEW<Impl>::setCPU(FullCPU *cpu_ptr) instQueue.setCPU(cpu_ptr); ldstQueue.setCPU(cpu_ptr); - cpu->activateStage(FullCPU::IEWIdx); + cpu->activateStage(O3CPU::IEWIdx); } template<class Impl> @@ -371,16 +353,23 @@ DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr) } template <class Impl> +bool +DefaultIEW<Impl>::drain() +{ + // IEW is ready to drain at any time. + cpu->signalDrained(); + return true; +} + +template <class Impl> void -DefaultIEW<Impl>::switchOut() +DefaultIEW<Impl>::resume() { - // IEW is ready to switch out at any time. - cpu->signalSwitched(); } template <class Impl> void -DefaultIEW<Impl>::doSwitchOut() +DefaultIEW<Impl>::switchOut() { // Clear any state. switchedOut = true; @@ -423,7 +412,7 @@ DefaultIEW<Impl>::takeOverFrom() updateLSQNextCycle = false; // @todo: Fix hardcoded number - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < issueToExecQueue.getSize(); ++i) { issueToExecQueue.advance(); } } @@ -456,16 +445,7 @@ DefaultIEW<Impl>::squash(unsigned tid) skidBuffer[tid].pop(); } - while (!insts[tid].empty()) { - if (insts[tid].front()->isLoad() || - insts[tid].front()->isStore() ) { - toRename->iewInfo[tid].dispatchedToLSQ++; - } - - toRename->iewInfo[tid].dispatched++; - - insts[tid].pop(); - } + emptyRenameInsts(tid); } template<class Impl> @@ -591,12 +571,12 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; - if (wbNumInst == issueWidth) { + if (wbNumInst == wbWidth) { ++wbCycle; wbNumInst = 0; } - assert(wbCycle < 5); + assert((wbCycle * wbWidth + wbNumInst) < wbMax); } // Add finished instruction to queue to commit. @@ -611,7 +591,7 @@ DefaultIEW<Impl>::validInstsFromRename() unsigned inst_count = 0; for (int i=0; i<fromRename->size; i++) { - if (!fromRename->insts[i]->squashed) + if (!fromRename->insts[i]->isSquashed()) inst_count++; } @@ -799,10 +779,12 @@ DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid) } if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n"); + DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid); dispatchStatus[tid] = Squashing; + emptyRenameInsts(tid); + wroteToTimeBuffer = true; return; } @@ -853,6 +835,22 @@ DefaultIEW<Impl>::sortInsts() template <class Impl> void +DefaultIEW<Impl>::emptyRenameInsts(unsigned tid) +{ + while (!insts[tid].empty()) { + if (insts[tid].front()->isLoad() || + insts[tid].front()->isStore() ) { + toRename->iewInfo[tid].dispatchedToLSQ++; + } + + toRename->iewInfo[tid].dispatched++; + + insts[tid].pop(); + } +} + +template <class Impl> +void DefaultIEW<Impl>::wakeCPU() { cpu->wakeCPU(); @@ -871,7 +869,7 @@ inline void DefaultIEW<Impl>::activateStage() { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::IEWIdx); + cpu->activateStage(O3CPU::IEWIdx); } template <class Impl> @@ -879,7 +877,7 @@ inline void DefaultIEW<Impl>::deactivateStage() { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::IEWIdx); + cpu->deactivateStage(O3CPU::IEWIdx); } template<class Impl> @@ -951,7 +949,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) // Loop through the instructions, putting them in the instruction // queue. for ( ; dis_num_inst < insts_to_add && - dis_num_inst < issueReadWidth; + dis_num_inst < dispatchWidth; ++dis_num_inst) { inst = insts_to_dispatch.front(); @@ -1090,7 +1088,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) instQueue.recordProducer(inst); - exeNop[tid]++; + iewExecutedNop[tid]++; add_to_iq = false; } else if (inst->isExecuted()) { @@ -1203,6 +1201,7 @@ DefaultIEW<Impl>::executeInsts() ++iewExecSquashedInsts; + decrWb(inst->seqNum); continue; } @@ -1365,6 +1364,8 @@ DefaultIEW<Impl>::writebackInsts() } writebackCount[tid]++; } + + decrWb(inst->seqNum); } } @@ -1501,9 +1502,9 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exeSwp[thread_number]++; + iewExecutedSwp[thread_number]++; else - iewExecutedInsts++; + iewIewExecutedcutedInsts++; #else iewExecutedInsts++; #endif @@ -1512,13 +1513,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // Control operations // if (inst->isControl()) - exeBranches[thread_number]++; + iewExecutedBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exeRefs[thread_number]++; + iewExecutedRefs[thread_number]++; if (inst->isLoad()) { iewExecLoadInsts[thread_number]++; diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index f2c6b8213..a539066f9 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -28,9 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/inst_queue_impl.hh" // Force instantiation of InstructionQueue. -template class InstructionQueue<AlphaSimpleImpl>; +template class InstructionQueue<O3CPUImpl>; diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 60a713020..4c69ca384 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_INST_QUEUE_HH__ @@ -68,7 +69,7 @@ class InstructionQueue { public: //Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; @@ -80,7 +81,7 @@ class InstructionQueue // Typedef of iterator through the list of instructions. typedef typename std::list<DynInstPtr>::iterator ListIt; - friend class Impl::FullCPU; + friend class Impl::O3CPU; /** FU completion event class. */ class FUCompletion : public Event { @@ -125,7 +126,7 @@ class InstructionQueue void resetState(); /** Sets CPU pointer. */ - void setCPU(FullCPU *_cpu) { cpu = _cpu; } + void setCPU(O3CPU *_cpu) { cpu = _cpu; } /** Sets active threads list. */ void setActiveThreads(std::list<unsigned> *at_ptr); @@ -252,7 +253,7 @@ class InstructionQueue ///////////////////////// /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Cache interface. */ MemInterface *dcacheInterface; @@ -474,12 +475,17 @@ class InstructionQueue /** Stat for number of non-speculative instructions removed due to a squash. */ Stats::Scalar<> iqSquashedNonSpecRemoved; + // Also include number of instructions rescheduled and replayed. - /** Distribution of number of instructions in the queue. */ + /** Distribution of number of instructions in the queue. + * @todo: Need to create struct to track the entry time for each + * instruction. */ Stats::VectorDistribution<> queueResDist; /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; - /** Distribution of the cycles it takes to issue an instruction. */ + /** Distribution of the cycles it takes to issue an instruction. + * @todo: Need to create struct to track the ready time for each + * instruction. */ Stats::VectorDistribution<> issueDelayDist; /** Number of times an instruction could not be issued because a @@ -492,8 +498,7 @@ class InstructionQueue /** Number of instructions issued per cycle. */ Stats::Formula issueRate; -// Stats::Formula issue_stores; -// Stats::Formula issue_op_rate; + /** Number of times the FU was busy. */ Stats::Vector<> fuBusy; /** Number of times the FU was busy per instruction issued. */ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 06a052c6f..36e0842be 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include <limits> @@ -125,7 +126,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) maxEntries[i] = part_amt; } - DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" + DPRINTF(IQ, "IQ sharing policy set to Partitioned:" "%i entries per thread.\n",part_amt); } else if (policy == "threshold") { @@ -140,7 +141,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params) maxEntries[i] = thresholdIQ; } - DPRINTF(Fetch, "IQ sharing policy set to Threshold:" + DPRINTF(IQ, "IQ sharing policy set to Threshold:" "%i entries per thread.\n",thresholdIQ); } else { assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," @@ -289,22 +290,7 @@ InstructionQueue<Impl>::regStats() .flags(total) ; issueRate = iqInstsIssued / cpu->numCycles; -/* - issue_stores - .name(name() + ".ISSUE:stores") - .desc("Number of stores issued") - .flags(total) - ; - issue_stores = exe_refs - exe_loads; -*/ -/* - issue_op_rate - .name(name() + ".ISSUE:op_rate") - .desc("Operation issue rate") - .flags(total) - ; - issue_op_rate = issued_ops / numCycles; -*/ + statFuBusy .init(Num_OpClasses) .name(name() + ".ISSUE:fu_full") @@ -701,6 +687,7 @@ InstructionQueue<Impl>::scheduleReadyInsts() int total_issued = 0; while (total_issued < totalWidth && + iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -791,13 +778,14 @@ InstructionQueue<Impl>::scheduleReadyInsts() // complete. ++freeEntries; count[tid]--; - issuing_inst->removeInIQ(); + issuing_inst->clearInIQ(); } else { memDepUnit[tid].issue(issuing_inst); } listOrder.erase(order_it++); statIssuedInstType[tid][op_class]++; + iewStage->incrWb(issuing_inst->seqNum); } else { statFuBusy[op_class]++; fuBusy[tid]++; @@ -1097,7 +1085,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid) // inst will flow through the rest of the pipeline. squashed_inst->setIssued(); squashed_inst->setCanCommit(); - squashed_inst->removeInIQ(); + squashed_inst->clearInIQ(); //Update Thread IQ Count count[squashed_inst->threadNumber]--; diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh new file mode 100755 index 000000000..f8a9dd8cc --- /dev/null +++ b/src/cpu/o3/isa_specific.hh @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#include "cpu/base.hh" + +#if THE_ISA == ALPHA_ISA + #include "cpu/o3/alpha/cpu.hh" + #include "cpu/o3/alpha/impl.hh" + #include "cpu/o3/alpha/params.hh" + #include "cpu/o3/alpha/dyn_inst.hh" +#else + #error "O3CPU doesnt support this ISA" +#endif diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index de0325920..527947281 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -28,11 +28,9 @@ * Authors: Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQ<AlphaSimpleImpl>; +template class LSQ<O3CPUImpl>; diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index bc4154c85..190734dc2 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -44,7 +44,7 @@ template <class Impl> class LSQ { public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; typedef typename Impl::CPUPol::LSQUnit LSQUnit; @@ -62,10 +62,20 @@ class LSQ { /** Returns the name of the LSQ. */ std::string name() const; + /** Registers statistics of each LSQ unit. */ + void regStats(); + + /** Returns dcache port. + * @todo: Dcache port needs to be moved up to this level for SMT + * to work. For now it just returns the port from one of the + * threads. + */ + Port *getDcachePort() { return &dcachePort; } + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the IEW stage pointer. */ void setIEW(IEW *iew_ptr); /** Switches out the LSQ. */ @@ -248,6 +258,15 @@ class LSQ { bool willWB(unsigned tid) { return thread[tid].willWB(); } + /** Returns if the cache is currently blocked. */ + bool cacheBlocked() + { return retryTid != -1; } + + /** Sets the retry thread id, indicating that one of the LSQUnits + * tried to access the cache but the cache was blocked. */ + void setRetryTid(int tid) + { retryTid = tid; } + /** Debugging function to print out all instructions. */ void dumpInsts(); /** Debugging function to print out instructions from a specific thread. */ @@ -264,7 +283,49 @@ class LSQ { template <class T> Fault write(RequestPtr req, T &data, int store_idx); - private: + /** DcachePort class for this LSQ. Handles doing the + * communication with the cache/memory. + */ + class DcachePort : public Port + { + protected: + /** Pointer to LSQ. */ + LSQ *lsq; + + public: + /** Default constructor. */ + DcachePort(LSQ *_lsq) + : lsq(_lsq) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles writing back and + * completing the load or store that has returned from + * memory. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of the previous send. */ + virtual void recvRetry(); + }; + + /** D-cache port. */ + DcachePort dcachePort; + + protected: /** The LSQ policy for SMT mode. */ LSQPolicy lsqPolicy; @@ -272,7 +333,7 @@ class LSQ { LSQUnit thread[Impl::MaxThreads]; /** The CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; /** The IEW stage pointer. */ IEW *iewStage; @@ -293,6 +354,10 @@ class LSQ { /** Number of Threads. */ unsigned numThreads; + + /** The thread id of the LSQ Unit that is currently waiting for a + * retry. */ + int retryTid; }; template <class Impl> diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 27aa0dc3c..4e3957029 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 The Regents of The University of Michigan + * Copyright (c) 2005-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,9 +36,53 @@ using namespace std; template <class Impl> +Tick +LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template <class Impl> +bool +LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); + return true; +} + +template <class Impl> +void +LSQ<Impl>::DcachePort::recvRetry() +{ + lsq->thread[lsq->retryTid].recvRetry(); + // Speculatively clear the retry Tid. This will get set again if + // the LSQUnit was unable to complete its access. + lsq->retryTid = -1; +} + +template <class Impl> LSQ<Impl>::LSQ(Params *params) - : LQEntries(params->LQEntries), SQEntries(params->SQEntries), - numThreads(params->numberOfThreads) + : dcachePort(this), LQEntries(params->LQEntries), + SQEntries(params->SQEntries), numThreads(params->numberOfThreads), + retryTid(-1) { DPRINTF(LSQ, "Creating LSQ object.\n"); @@ -94,7 +138,8 @@ LSQ<Impl>::LSQ(Params *params) //Initialize LSQs for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid); + thread[tid].setDcachePort(&dcachePort); } } @@ -108,6 +153,16 @@ LSQ<Impl>::name() const template<class Impl> void +LSQ<Impl>::regStats() +{ + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].regStats(); + } +} + +template<class Impl> +void LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) { activeThreads = at_ptr; @@ -116,10 +171,12 @@ LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) template<class Impl> void -LSQ<Impl>::setCPU(FullCPU *cpu_ptr) +LSQ<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; + dcachePort.setName(name()); + for (int tid=0; tid < numThreads; tid++) { thread[tid].setCPU(cpu_ptr); } @@ -492,6 +549,9 @@ LSQ<Impl>::hasStoresToWB() { list<unsigned>::iterator active_threads = (*activeThreads).begin(); + if ((*activeThreads).empty()) + return false; + while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; if (!hasStoresToWB(tid)) diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index e935ffa5c..3ca3fa667 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -29,11 +29,9 @@ * Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_unit_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQUnit<AlphaSimpleImpl>; +template class LSQUnit<O3CPUImpl>; diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index ce0cdd36f..a76a73f0c 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -40,7 +40,7 @@ #include "config/full_system.hh" #include "base/hashmap.hh" #include "cpu/inst_seq.hh" -#include "mem/packet.hh" +#include "mem/packet_impl.hh" #include "mem/port.hh" /** @@ -61,9 +61,10 @@ class LSQUnit { typedef TheISA::IntReg IntReg; public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQ LSQ; typedef typename Impl::CPUPol::IssueStruct IssueStruct; public: @@ -71,19 +72,26 @@ class LSQUnit { LSQUnit(); /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, + void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id); /** Returns the name of the LSQ unit. */ std::string name() const; + /** Registers statistics. */ + void regStats(); + /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the IEW stage pointer. */ void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } + /** Sets the pointer to the dcache port. */ + void setDcachePort(Port *dcache_port) + { dcachePort = dcache_port; } + /** Switches out LSQ unit. */ void switchOut(); @@ -125,11 +133,10 @@ class LSQUnit { /** Writes back stores. */ void writebackStores(); + /** Completes the data access that has been returned from the + * memory system. */ void completeDataAccess(PacketPtr pkt); - // @todo: Include stats in the LSQ unit. - //void regStats(); - /** Clears all the entries in the LQ. */ void clearLQ(); @@ -204,6 +211,9 @@ class LSQUnit { !storeQueue[storeWBIdx].completed && !isStoreBlocked; } + /** Handles doing the retry. */ + void recvRetry(); + private: /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); @@ -214,9 +224,6 @@ class LSQUnit { /** Completes the store at the specified index. */ void completeStore(int store_idx); - /** Handles doing the retry. */ - void recvRetry(); - /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx); /** Decrements the given store index (circular queue). */ @@ -232,59 +239,16 @@ class LSQUnit { private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Pointer to the IEW stage. */ IEW *iewStage; - /** Pointer to memory object. */ - MemObject *mem; - - /** DcachePort class for this LSQ Unit. Handles doing the - * communication with the cache/memory. - * @todo: Needs to be moved to the LSQ level and have some sort - * of arbitration. - */ - class DcachePort : public Port - { - protected: - /** Pointer to CPU. */ - FullCPU *cpu; - /** Pointer to LSQ. */ - LSQUnit *lsq; - - public: - /** Default constructor. */ - DcachePort(FullCPU *_cpu, LSQUnit *_lsq) - : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) - { } - - protected: - /** Atomic version of receive. Panics. */ - virtual Tick recvAtomic(PacketPtr pkt); + /** Pointer to the LSQ. */ + LSQ *lsq; - /** Functional version of receive. Panics. */ - virtual void recvFunctional(PacketPtr pkt); - - /** Receives status change. Other than range changing, panics. */ - virtual void recvStatusChange(Status status); - - /** Returns the address ranges of this device. */ - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - /** Timing version of receive. Handles writing back and - * completing the load or store that has returned from - * memory. */ - virtual bool recvTiming(PacketPtr pkt); - - /** Handles doing a retry of the previous send. */ - virtual void recvRetry(); - }; - - /** Pointer to the D-cache. */ - DcachePort *dcachePort; + /** Pointer to the dcache port. Used only for sending. */ + Port *dcachePort; /** Derived class to hold any sender state the LSQ needs. */ class LSQSenderState : public Packet::SenderState @@ -443,25 +407,35 @@ class LSQUnit { // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. -/* - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; + /** Total number of loads forwaded from LSQ stores. */ + Stats::Scalar<> lsqForwLoads; + + /** Total number of loads ignored due to invalid addresses. */ + Stats::Scalar<> invAddrLoads; - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; + /** Total number of squashed loads. */ + Stats::Scalar<> lsqSquashedLoads; - // total non-speculative bogus addresses seen (debug var) - Counter sim_invalid_addrs; - Stats::Vector<> fu_busy; //cumulative fu busy + /** Total number of responses from the memory system that are + * ignored due to the instruction already being squashed. */ + Stats::Scalar<> lsqIgnoredResponses; - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; + /** Total number of squashed stores. */ + Stats::Scalar<> lsqSquashedStores; + + /** Total number of software prefetches ignored due to invalid addresses. */ + Stats::Scalar<> invAddrSwpfs; + + /** Ready loads blocked due to partial store-forwarding. */ + Stats::Scalar<> lsqBlockedLoads; + + /** Number of loads that were rescheduled. */ + Stats::Scalar<> lsqRescheduledLoads; + + /** Number of times the LSQ is blocked due to the cache. */ + Stats::Scalar<> lsqCacheBlocked; - Stats::Scalar<> lsqInversion; -*/ public: /** Executes the load at the given index. */ template <class T> @@ -517,8 +491,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). if (req->getFlags() & UNCACHEABLE && - (load_idx != loadHead || !load_inst->reachedCommit)) { + (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; return TheISA::genMachineCheckFault(); } @@ -598,7 +573,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // @todo: Need to make this a parameter. wb->schedule(curTick); - // Should keep track of stat for forwarded data + ++lsqForwLoads; return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || (store_has_upper_limit && upper_load_has_store_part) || @@ -626,6 +601,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not // complete. @@ -633,12 +609,13 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->getVaddr()); + ++lsqBlockedLoads; return NoFault; } } // If there's no forwarding case, then go access memory - DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", load_inst->seqNum, load_inst->readPC()); assert(!load_inst->memData); @@ -646,9 +623,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; - DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", - load_inst->readPC()); - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); @@ -658,8 +632,19 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) state->inst = load_inst; data_pkt->senderState = state; - // if we have a cache, do cache access too - if (!dcachePort->sendTiming(data_pkt)) { + // if we the cache is not blocked, do cache access + if (!lsq->cacheBlocked()) { + if (!dcachePort->sendTiming(data_pkt)) { + // If the access didn't succeed, tell the LSQ by setting + // the retry thread id. + lsq->setRetryTid(lsqID); + } + } + + // If the cache was blocked, or has become blocked due to the access, + // handle it. + if (lsq->cacheBlocked()) { + ++lsqCacheBlocked; // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) return NoFault; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 6f32ec304..85b150cd9 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -29,11 +29,18 @@ * Korey Sewell */ -#include "cpu/checker/cpu.hh" +#include "config/use_checker.hh" + +#include "cpu/o3/lsq.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" +#include "mem/packet.hh" #include "mem/request.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + template<class Impl> LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, LSQUnit *lsq_ptr) @@ -71,6 +78,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); if (isSwitchedOut() || inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); delete state; delete pkt; return; @@ -89,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt) } template <class Impl> -Tick -LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt) -{ - panic("O3CPU model does not work with atomic mode!"); - return curTick; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt) -{ - panic("O3CPU doesn't expect recvFunctional callback!"); -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvStatusChange(Status status) -{ - if (status == RangeChange) - return; - - panic("O3CPU doesn't expect recvStatusChange callback!"); -} - -template <class Impl> -bool -LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt) -{ - lsq->completeDataAccess(pkt); - return true; -} - -template <class Impl> -void -LSQUnit<Impl>::DcachePort::recvRetry() -{ - lsq->recvRetry(); -} - -template <class Impl> LSQUnit<Impl>::LSQUnit() : loads(0), stores(0), storesToWB(0), stalled(false), isStoreBlocked(false), isLoadBlocked(false), @@ -138,13 +106,15 @@ LSQUnit<Impl>::LSQUnit() template<class Impl> void -LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, +LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id) { DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); switchedOut = false; + lsq = lsq_ptr; + lsqID = id; // Add 1 for the sentinel entry (they are circular queues). @@ -161,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, usedPorts = 0; cachePorts = params->cachePorts; - mem = params->mem; - memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -170,18 +138,15 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, template<class Impl> void -LSQUnit<Impl>::setCPU(FullCPU *cpu_ptr) +LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; - dcachePort = new DcachePort(cpu, this); - - Port *mem_dport = mem->getPort(""); - dcachePort->setPeer(mem_dport); - mem_dport->setPeer(dcachePort); +#if USE_CHECKER if (cpu->checker) { cpu->checker->setDcachePort(dcachePort); } +#endif } template<class Impl> @@ -197,6 +162,47 @@ LSQUnit<Impl>::name() const template<class Impl> void +LSQUnit<Impl>::regStats() +{ + lsqForwLoads + .name(name() + ".forwLoads") + .desc("Number of loads that had data forwarded from stores"); + + invAddrLoads + .name(name() + ".invAddrLoads") + .desc("Number of loads ignored due to an invalid address"); + + lsqSquashedLoads + .name(name() + ".squashedLoads") + .desc("Number of loads squashed"); + + lsqIgnoredResponses + .name(name() + ".ignoredResponses") + .desc("Number of memory responses ignored because the instruction is squashed"); + + lsqSquashedStores + .name(name() + ".squashedStores") + .desc("Number of stores squashed"); + + invAddrSwpfs + .name(name() + ".invAddrSwpfs") + .desc("Number of software prefetches ignored due to an invalid address"); + + lsqBlockedLoads + .name(name() + ".blockedLoads") + .desc("Number of blocked loads due to partial load-store forwarding"); + + lsqRescheduledLoads + .name(name() + ".rescheduledLoads") + .desc("Number of loads that were rescheduled"); + + lsqCacheBlocked + .name(name() + ".cacheBlocked") + .desc("Number of times an access to memory failed due to the cache being blocked"); +} + +template<class Impl> +void LSQUnit<Impl>::clearLQ() { loadQueue.clear(); @@ -542,7 +548,7 @@ LSQUnit<Impl>::writebackStores() storeQueue[storeWBIdx].canWB && usedPorts < cachePorts) { - if (isStoreBlocked) { + if (isStoreBlocked || lsq->cacheBlocked()) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); break; @@ -617,7 +623,7 @@ LSQUnit<Impl>::writebackStores() if (!dcachePort->sendTiming(data_pkt)) { // Need to handle becoming blocked on a store. isStoreBlocked = true; - + ++lsqCacheBlocked; assert(retryPkt == NULL); retryPkt = data_pkt; } else { @@ -668,7 +674,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - loadQueue[load_idx]->squashed = true; + loadQueue[load_idx]->setSquashed(); loadQueue[load_idx] = NULL; --loads; @@ -676,6 +682,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) loadTail = load_idx; decrLdIdx(load_idx); + ++lsqSquashedLoads; } if (isLoadBlocked) { @@ -711,7 +718,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst->setSquashed(); storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; @@ -722,6 +729,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) storeTail = store_idx; decrStIdx(store_idx); + ++lsqSquashedStores; } } @@ -744,9 +752,11 @@ LSQUnit<Impl>::storePostSend(Packet *pkt) // only works so long as the checker doesn't try to // verify the value in memory for stores. storeQueue[storeWBIdx].inst->setCompleted(); +#if USE_CHECKER if (cpu->checker) { - cpu->checker->tick(storeQueue[storeWBIdx].inst); + cpu->checker->verify(storeQueue[storeWBIdx].inst); } +#endif } if (pkt->result != Packet::Success) { @@ -781,6 +791,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) // Squashed instructions do not need to complete their access. if (inst->isSquashed()) { assert(!inst->isStore()); + ++lsqIgnoredResponses; return; } @@ -839,9 +850,11 @@ LSQUnit<Impl>::completeStore(int store_idx) // Tell the checker we've completed this instruction. Some stores // may get reported twice to the checker, but the checker can // handle that case. +#if USE_CHECKER if (cpu->checker) { - cpu->checker->tick(storeQueue[store_idx].inst); + cpu->checker->verify(storeQueue[store_idx].inst); } +#endif } template <class Impl> @@ -857,6 +870,8 @@ LSQUnit<Impl>::recvRetry() isStoreBlocked = false; } else { // Still blocked! + ++lsqCacheBlocked; + lsq->setRetryTid(lsqID); } } else if (isLoadBlocked) { DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index a95103266..6a14dcbff 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -28,23 +28,22 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/store_set.hh" #include "cpu/o3/mem_dep_unit_impl.hh" // Force instantation of memory dependency unit using store sets and -// AlphaSimpleImpl. -template class MemDepUnit<StoreSet, AlphaSimpleImpl>; +// O3CPUImpl. +template class MemDepUnit<StoreSet, O3CPUImpl>; #ifdef DEBUG template <> int -MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0; +MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_count = 0; template <> int -MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0; +MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_insert = 0; template <> int -MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0; +MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_erase = 0; #endif diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/params.hh index f3cf36887..1c234bcd7 100644..100755 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/params.hh @@ -28,32 +28,28 @@ * Authors: Kevin Lim */ -#ifndef __CPU_O3_ALPHA_PARAMS_HH__ -#define __CPU_O3_ALPHA_PARAMS_HH__ +#ifndef __CPU_O3_PARAMS_HH__ +#define __CPU_O3_PARAMS_HH__ #include "cpu/o3/cpu.hh" //Forward declarations -class AlphaDTB; -class AlphaITB; class FUPool; -class MemObject; -class Process; -class System; /** - * This file defines the parameters that will be used for the AlphaFullCPU. + * This file defines the parameters that will be used for the O3CPU. * This must be defined externally so that the Impl can have a params class * defined that it can pass to all of the individual stages. */ - -class AlphaSimpleParams : public BaseFullCPU::Params +class O3Params : public BaseO3CPU::Params { public: + unsigned activity; -#if FULL_SYSTEM - AlphaITB *itb; AlphaDTB *dtb; -#else + // + // Pointers to key objects + // +#if !FULL_SYSTEM std::vector<Process *> workload; Process *process; #endif // FULL_SYSTEM @@ -62,13 +58,11 @@ class AlphaSimpleParams : public BaseFullCPU::Params BaseCPU *checker; - unsigned activity; - // // Caches // -// MemInterface *icacheInterface; -// MemInterface *dcacheInterface; + // MemInterface *icacheInterface; + // MemInterface *dcacheInterface; unsigned cachePorts; @@ -104,12 +98,10 @@ class AlphaSimpleParams : public BaseFullCPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; - unsigned executeWidth; - unsigned executeIntWidth; - unsigned executeFloatWidth; - unsigned executeBranchWidth; - unsigned executeMemoryWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // @@ -123,6 +115,12 @@ class AlphaSimpleParams : public BaseFullCPU::Params Tick fetchTrapLatency; // + // Timebuffer sizes + // + unsigned backComSize; + unsigned forwardComSize; + + // // Branch predictor (BP, BTB, RAS) // std::string predType; diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index ade5e4e56..b6677b4b1 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -72,7 +72,7 @@ class PhysRegFile // Will make these registers public for now, but they probably should // be private eventually with some accessor functions. public: - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; /** * Constructs a physical register file with the specified amount of @@ -86,10 +86,6 @@ class PhysRegFile //The duplication is unfortunate but it's better than having //different ways to access certain registers. - //Add these in later when everything else is in place -// void serialize(std::ostream &os); -// void unserialize(Checkpoint *cp, const std::string §ion); - /** Reads an integer register. */ uint64_t readIntReg(PhysRegIndex reg_idx) { @@ -278,11 +274,11 @@ class PhysRegFile private: /** CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; public: /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } + void setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; } /** Number of physical integer registers. */ unsigned numPhysicalIntRegs; diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index 9ca8e82c6..443ada0cb 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rename_impl.hh" -template class DefaultRename<AlphaSimpleImpl>; +template class DefaultRename<O3CPUImpl>; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 42fdf6bf5..034087feb 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -55,7 +55,7 @@ class DefaultRename // Typedefs from the Impl. typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; // Typedefs from the CPUPol @@ -115,7 +115,7 @@ class DefaultRename void regStats(); /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); @@ -157,12 +157,15 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); + /** Drains the rename stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the rename stage. */ void switchOut(); - /** Completes the switch out. */ - void doSwitchOut(); - /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -291,7 +294,7 @@ class DefaultRename std::list<RenameHistory> historyBuffer[Impl::MaxThreads]; /** Pointer to CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Pointer to main time buffer used for backwards communication. */ TimeBuffer<TimeStruct> *timeBuffer; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index df33b98ee..805a72808 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -162,7 +162,7 @@ DefaultRename<Impl>::regStats() template <class Impl> void -DefaultRename<Impl>::setCPU(FullCPU *cpu_ptr) +DefaultRename<Impl>::setCPU(O3CPU *cpu_ptr) { DPRINTF(Rename, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -257,16 +257,17 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard) } template <class Impl> -void -DefaultRename<Impl>::switchOut() +bool +DefaultRename<Impl>::drain() { // Rename is ready to switch out at any time. - cpu->signalSwitched(); + cpu->signalDrained(); + return true; } template <class Impl> void -DefaultRename<Impl>::doSwitchOut() +DefaultRename<Impl>::switchOut() { // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { @@ -341,7 +342,7 @@ DefaultRename<Impl>::squash(unsigned tid) for (int i=0; i<fromDecode->size; i++) { if (fromDecode->insts[i]->threadNumber == tid) { - fromDecode->insts[i]->squashed = true; + fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } @@ -755,7 +756,7 @@ DefaultRename<Impl>::updateStatus() DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::RenameIdx); + cpu->activateStage(O3CPU::RenameIdx); } } else { // If it's not unblocking, then rename will not have any internal @@ -764,7 +765,7 @@ DefaultRename<Impl>::updateStatus() _status = Inactive; DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::RenameIdx); + cpu->deactivateStage(O3CPU::RenameIdx); } } } @@ -1022,7 +1023,7 @@ DefaultRename<Impl>::validInsts() unsigned inst_count = 0; for (int i=0; i<fromDecode->size; i++) { - if (!fromDecode->insts[i]->squashed) + if (!fromDecode->insts[i]->isSquashed()) inst_count++; } @@ -1206,7 +1207,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid) } DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename." - " Adding to front of list.", tid); + " Adding to front of list.\n", tid); serializeInst[tid] = NULL; diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc index f99e5ccfd..9976049cd 100644 --- a/src/cpu/o3/rob.cc +++ b/src/cpu/o3/rob.cc @@ -29,9 +29,8 @@ * Nathan Binkert */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rob_impl.hh" // Force instantiation of InstructionQueue. -template class ROB<AlphaSimpleImpl>; +template class ROB<O3CPUImpl>; diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index 6d1402531..7cd5a5143 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_ROB_HH__ @@ -45,7 +46,7 @@ class ROB typedef TheISA::RegIndex RegIndex; public: //Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo; @@ -90,7 +91,7 @@ class ROB * is created within. * @param cpu_ptr Pointer to the implementation specific full CPU object. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets pointer to the list of active threads. * @param at_ptr Pointer to the list of active threads. @@ -257,7 +258,7 @@ class ROB private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Active Threads in CPU */ std::list<unsigned>* activeThreads; @@ -307,7 +308,7 @@ class ROB private: /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum; + InstSeqNum squashedSeqNum[Impl::MaxThreads]; /** Is the ROB done squashing. */ bool doneSquashing[Impl::MaxThreads]; diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 97694e371..1b9f666b8 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" @@ -40,10 +41,10 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth, : numEntries(_numEntries), squashWidth(_squashWidth), numInstsInROB(0), - squashedSeqNum(0), numThreads(_numThreads) { for (int tid=0; tid < numThreads; tid++) { + squashedSeqNum[tid] = 0; doneSquashing[tid] = true; threadEntries[tid] = 0; } @@ -100,7 +101,7 @@ ROB<Impl>::name() const template <class Impl> void -ROB<Impl>::setCPU(FullCPU *cpu_ptr) +ROB<Impl>::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; @@ -276,7 +277,7 @@ ROB<Impl>::retireHead(unsigned tid) --numInstsInROB; --threadEntries[tid]; - head_inst->removeInROB(); + head_inst->clearInROB(); head_inst->setCommitted(); instList[tid].erase(head_it); @@ -351,11 +352,11 @@ void ROB<Impl>::doSquash(unsigned tid) { DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", - tid, squashedSeqNum); + tid, squashedSeqNum[tid]); assert(squashIt[tid] != instList[tid].end()); - if ((*squashIt[tid])->seqNum < squashedSeqNum) { + if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -370,7 +371,7 @@ ROB<Impl>::doSquash(unsigned tid) for (int numSquashed = 0; numSquashed < squashWidth && squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum; + (*squashIt[tid])->seqNum > squashedSeqNum[tid]; ++numSquashed) { DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n", @@ -407,7 +408,7 @@ ROB<Impl>::doSquash(unsigned tid) // Check if ROB is done squashing. - if ((*squashIt[tid])->seqNum <= squashedSeqNum) { + if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -519,7 +520,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid) doneSquashing[tid] = false; - squashedSeqNum = squash_num; + squashedSeqNum[tid] = squash_num; if (!instList[tid].empty()) { InstIt tail_thread = instList[tid].end(); @@ -543,6 +544,7 @@ ROB<Impl>::readHeadInst() } } */ + template <class Impl> typename Impl::DynInstPtr ROB<Impl>::readHeadInst(unsigned tid) @@ -557,6 +559,7 @@ ROB<Impl>::readHeadInst(unsigned tid) return dummyInst; } } + /* template <class Impl> uint64_t diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh new file mode 100755 index 000000000..df8d1a6d8 --- /dev/null +++ b/src/cpu/o3/thread_context.hh @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_THREAD_CONTEXT_HH__ +#define __CPU_O3_THREAD_CONTEXT_HH__ + +#include "cpu/o3/isa_specific.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * Derived ThreadContext class for use with the O3CPU. It + * provides the interface for any external objects to access a + * single thread's state and some general CPU state. Any time + * external objects try to update state through this interface, + * the CPU will create an event to squash all in-flight + * instructions in order to ensure state is maintained correctly. + * It must be defined specifically for the O3CPU because + * not all architectural state is located within the O3ThreadState + * (such as the commit PC, and registers), and specific actions + * must be taken when using this interface (such as squashing all + * in-flight instructions when doing a write to this interface). + */ +template <class Impl> +class O3ThreadContext : public ThreadContext +{ + public: + typedef typename Impl::O3CPU O3CPU; + + /** Pointer to the CPU. */ + O3CPU *cpu; + + /** Pointer to the thread state that this TC corrseponds to. */ + O3ThreadState<Impl> *thread; + + /** Returns a pointer to this CPU. */ + virtual BaseCPU *getCpuPtr() { return cpu; } + + /** Sets this CPU's ID. */ + virtual void setCpuId(int id) { cpu->setCpuId(id); } + + /** Reads this CPU's ID. */ + virtual int readCpuId() { return cpu->readCpuId(); } + +#if FULL_SYSTEM + /** Returns a pointer to the system. */ + virtual System *getSystemPtr() { return cpu->system; } + + /** Returns a pointer to physical memory. */ + virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } + + /** Returns a pointer to this thread's kernel statistics. */ + virtual Kernel::Statistics *getKernelStats() + { return thread->kernelStats; } + + virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } + + virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); + + void delVirtPort(VirtualPort *vp); +#else + virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } + + /** Returns a pointer to this thread's process. */ + virtual Process *getProcessPtr() { return thread->getProcessPtr(); } +#endif + /** Returns this thread's status. */ + virtual Status status() const { return thread->status(); } + + /** Sets this thread's status. */ + virtual void setStatus(Status new_status) + { thread->setStatus(new_status); } + + /** Set the status to Active. Optional delay indicates number of + * cycles to wait before beginning execution. */ + virtual void activate(int delay = 1); + + /** Set the status to Suspended. */ + virtual void suspend(); + + /** Set the status to Unallocated. */ + virtual void deallocate(int delay = 0); + + /** Set the status to Halted. */ + virtual void halt(); + +#if FULL_SYSTEM + /** Dumps the function profiling information. + * @todo: Implement. + */ + virtual void dumpFuncProfile(); +#endif + /** Takes over execution of a thread from another CPU. */ + virtual void takeOverFrom(ThreadContext *old_context); + + /** Registers statistics associated with this TC. */ + virtual void regStats(const std::string &name); + + /** Serializes state. */ + virtual void serialize(std::ostream &os); + /** Unserializes state. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#if FULL_SYSTEM + /** Reads the last tick that this thread was activated on. */ + virtual Tick readLastActivate(); + /** Reads the last tick that this thread was suspended on. */ + virtual Tick readLastSuspend(); + + /** Clears the function profiling information. */ + virtual void profileClear(); + /** Samples the function profiling information. */ + virtual void profileSample(); +#endif + /** Returns this thread's ID number. */ + virtual int getThreadNum() { return thread->readTid(); } + + /** Returns the instruction this thread is currently committing. + * Only used when an instruction faults. + */ + virtual TheISA::MachInst getInst(); + + /** Copies the architectural registers from another TC into this TC. */ + virtual void copyArchRegs(ThreadContext *tc); + + /** Resets all architectural registers to 0. */ + virtual void clearArchRegs(); + + /** Reads an integer register. */ + virtual uint64_t readIntReg(int reg_idx); + + virtual FloatReg readFloatReg(int reg_idx, int width); + + virtual FloatReg readFloatReg(int reg_idx); + + virtual FloatRegBits readFloatRegBits(int reg_idx, int width); + + virtual FloatRegBits readFloatRegBits(int reg_idx); + + /** Sets an integer register to a value. */ + virtual void setIntReg(int reg_idx, uint64_t val); + + virtual void setFloatReg(int reg_idx, FloatReg val, int width); + + virtual void setFloatReg(int reg_idx, FloatReg val); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val); + + /** Reads this thread's PC. */ + virtual uint64_t readPC() + { return cpu->readPC(thread->readTid()); } + + /** Sets this thread's PC. */ + virtual void setPC(uint64_t val); + + /** Reads this thread's next PC. */ + virtual uint64_t readNextPC() + { return cpu->readNextPC(thread->readTid()); } + + /** Sets this thread's next PC. */ + virtual void setNextPC(uint64_t val); + + /** Reads a miscellaneous register. */ + virtual MiscReg readMiscReg(int misc_reg) + { return cpu->readMiscReg(misc_reg, thread->readTid()); } + + /** Reads a misc. register, including any side-effects the + * read might have as defined by the architecture. */ + virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } + + /** Sets a misc. register. */ + virtual Fault setMiscReg(int misc_reg, const MiscReg &val); + + /** Sets a misc. register, including any side-effects the + * write might have as defined by the architecture. */ + virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + + /** Returns the number of consecutive store conditional failures. */ + // @todo: Figure out where these store cond failures should go. + virtual unsigned readStCondFailures() + { return thread->storeCondFailures; } + + /** Sets the number of consecutive store conditional failures. */ + virtual void setStCondFailures(unsigned sc_failures) + { thread->storeCondFailures = sc_failures; } + + // Only really makes sense for old CPU model. Lots of code + // outside the CPU still checks this function, so it will + // always return false to keep everything working. + /** Checks if the thread is misspeculating. Because it is + * very difficult to determine if the thread is + * misspeculating, this is set as false. */ + virtual bool misspeculating() { return false; } + +#if !FULL_SYSTEM + /** Gets a syscall argument by index. */ + virtual IntReg getSyscallArg(int i); + + /** Sets a syscall argument. */ + virtual void setSyscallArg(int i, IntReg val); + + /** Sets the syscall return value. */ + virtual void setSyscallReturn(SyscallReturn return_value); + + /** Executes a syscall in SE mode. */ + virtual void syscall(int64_t callnum) + { return cpu->syscall(callnum, thread->readTid()); } + + /** Reads the funcExeInst counter. */ + virtual Counter readFuncExeInst() { return thread->funcExeInst; } +#endif +}; + +#endif diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh new file mode 100755 index 000000000..bf8cbf850 --- /dev/null +++ b/src/cpu/o3/thread_context_impl.hh @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/quiesce_event.hh" + +using namespace TheISA; + +#if FULL_SYSTEM +template <class Impl> +VirtualPort * +O3ThreadContext<Impl>::getVirtPort(ThreadContext *src_tc) +{ + if (!src_tc) + return thread->getVirtPort(); + + VirtualPort *vp; + Port *mem_port; + + vp = new VirtualPort("tc-vport", src_tc); + mem_port = cpu->system->physmem->getPort("functional"); + mem_port->setPeer(vp); + vp->setPeer(mem_port); + return vp; +} + +template <class Impl> +void +O3ThreadContext<Impl>::dumpFuncProfile() +{ + // Currently not supported +} +#endif + +template <class Impl> +void +O3ThreadContext<Impl>::takeOverFrom(ThreadContext *old_context) +{ + // some things should already be set up +#if FULL_SYSTEM + assert(getSystemPtr() == old_context->getSystemPtr()); +#else + assert(getProcessPtr() == old_context->getProcessPtr()); +#endif + + // copy over functional state + setStatus(old_context->status()); + copyArchRegs(old_context); + setCpuId(old_context->readCpuId()); + +#if !FULL_SYSTEM + thread->funcExeInst = old_context->readFuncExeInst(); +#else + EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); + if (other_quiesce) { + // Point the quiesce event's TC at this TC so that it wakes up + // the proper CPU. + other_quiesce->tc = this; + } + if (thread->quiesceEvent) { + thread->quiesceEvent->tc = this; + } + + // Transfer kernel stats from one CPU to the other. + thread->kernelStats = old_context->getKernelStats(); +// storeCondFailures = 0; + cpu->lockFlag = false; +#endif + + old_context->setStatus(ThreadContext::Unallocated); + + thread->inSyscall = false; + thread->trapPending = false; +} + +#if FULL_SYSTEM +template <class Impl> +void +O3ThreadContext<Impl>::delVirtPort(VirtualPort *vp) +{ + delete vp->getPeer(); + delete vp; +} +#endif + +template <class Impl> +void +O3ThreadContext<Impl>::activate(int delay) +{ + DPRINTF(O3CPU, "Calling activate on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Active) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; +#endif + + if (thread->status() == ThreadContext::Unallocated) { + cpu->activateWhenReady(thread->readTid()); + return; + } + + thread->setStatus(ThreadContext::Active); + + // status() == Suspended + cpu->activateContext(thread->readTid(), delay); +} + +template <class Impl> +void +O3ThreadContext<Impl>::suspend() +{ + DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Suspended) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; + thread->lastSuspend = curTick; +#endif +/* +#if FULL_SYSTEM + // Don't change the status from active if there are pending interrupts + if (cpu->check_interrupts()) { + assert(status() == ThreadContext::Active); + return; + } +#endif +*/ + thread->setStatus(ThreadContext::Suspended); + cpu->suspendContext(thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::deallocate(int delay) +{ + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Unallocated) + return; + + thread->setStatus(ThreadContext::Unallocated); + cpu->deallocateContext(thread->readTid(), delay); +} + +template <class Impl> +void +O3ThreadContext<Impl>::halt() +{ + DPRINTF(O3CPU, "Calling halt on Thread Context %d\n", + getThreadNum()); + + if (thread->status() == ThreadContext::Halted) + return; + + thread->setStatus(ThreadContext::Halted); + cpu->haltContext(thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::regStats(const std::string &name) +{ +#if FULL_SYSTEM + thread->kernelStats = new Kernel::Statistics(cpu->system); + thread->kernelStats->regStats(name + ".kern"); +#endif +} + +template <class Impl> +void +O3ThreadContext<Impl>::serialize(std::ostream &os) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->serialize(os); +#endif + +} + +template <class Impl> +void +O3ThreadContext<Impl>::unserialize(Checkpoint *cp, const std::string §ion) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->unserialize(cp, section); +#endif + +} + +#if FULL_SYSTEM +template <class Impl> +Tick +O3ThreadContext<Impl>::readLastActivate() +{ + return thread->lastActivate; +} + +template <class Impl> +Tick +O3ThreadContext<Impl>::readLastSuspend() +{ + return thread->lastSuspend; +} + +template <class Impl> +void +O3ThreadContext<Impl>::profileClear() +{} + +template <class Impl> +void +O3ThreadContext<Impl>::profileSample() +{} +#endif + +template <class Impl> +TheISA::MachInst +O3ThreadContext<Impl>:: getInst() +{ + return thread->getInst(); +} + +template <class Impl> +void +O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc) +{ + // This function will mess things up unless the ROB is empty and + // there are no instructions in the pipeline. + unsigned tid = thread->readTid(); + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < TheISA::NumIntRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i); + + DPRINTF(O3CPU, "Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, cpu->readIntReg(renamed_reg), + tc->readIntReg(i)); + + cpu->setIntReg(renamed_reg, tc->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < TheISA::NumFloatRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i + TheISA::FP_Base_DepTag); + cpu->setFloatRegBits(renamed_reg, + tc->readFloatRegBits(i)); + } + + // Copy the misc regs. + copyMiscRegs(tc, this); + + // Then finally set the PC and the next PC. + cpu->setPC(tc->readPC(), tid); + cpu->setNextPC(tc->readNextPC(), tid); +#if !FULL_SYSTEM + this->thread->funcExeInst = tc->readFuncExeInst(); +#endif +} + +template <class Impl> +void +O3ThreadContext<Impl>::clearArchRegs() +{} + +template <class Impl> +uint64_t +O3ThreadContext<Impl>::readIntReg(int reg_idx) +{ + return cpu->readArchIntReg(reg_idx, thread->readTid()); +} + +template <class Impl> +FloatReg +O3ThreadContext<Impl>::readFloatReg(int reg_idx, int width) +{ + switch(width) { + case 32: + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); + case 64: + return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); + default: + panic("Unsupported width!"); + return 0; + } +} + +template <class Impl> +FloatReg +O3ThreadContext<Impl>::readFloatReg(int reg_idx) +{ + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); +} + +template <class Impl> +FloatRegBits +O3ThreadContext<Impl>::readFloatRegBits(int reg_idx, int width) +{ + DPRINTF(Fault, "Reading floatint register through the TC!\n"); + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template <class Impl> +FloatRegBits +O3ThreadContext<Impl>::readFloatRegBits(int reg_idx) +{ + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setIntReg(int reg_idx, uint64_t val) +{ + cpu->setArchIntReg(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val, int width) +{ + switch(width) { + case 32: + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + break; + case 64: + cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); + break; + } + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val) +{ + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val, + int width) +{ + DPRINTF(Fault, "Setting floatint register through the TC!\n"); + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setPC(uint64_t val) +{ + cpu->setPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +void +O3ThreadContext<Impl>::setNextPC(uint64_t val) +{ + cpu->setNextPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template <class Impl> +Fault +O3ThreadContext<Impl>::setMiscReg(int misc_reg, const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +template <class Impl> +Fault +O3ThreadContext<Impl>::setMiscRegWithEffect(int misc_reg, + const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, + thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +#if !FULL_SYSTEM + +template <class Impl> +TheISA::IntReg +O3ThreadContext<Impl>::getSyscallArg(int i) +{ + return cpu->getSyscallArg(i, thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setSyscallArg(int i, IntReg val) +{ + cpu->setSyscallArg(i, val, thread->readTid()); +} + +template <class Impl> +void +O3ThreadContext<Impl>::setSyscallReturn(SyscallReturn return_value) +{ + cpu->setSyscallReturn(return_value, thread->readTid()); +} + +#endif // FULL_SYSTEM + diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh index b6535baa1..1c8105204 100644 --- a/src/cpu/o3/thread_state.hh +++ b/src/cpu/o3/thread_state.hh @@ -58,11 +58,11 @@ class Process; template <class Impl> struct O3ThreadState : public ThreadState { typedef ThreadContext::Status Status; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; public: /** Whether or not the thread is currently in syscall mode, and * thus able to be externally updated without squashing. @@ -75,14 +75,14 @@ struct O3ThreadState : public ThreadState { bool trapPending; #if FULL_SYSTEM - O3ThreadState(FullCPU *_cpu, int _thread_num) + O3ThreadState(O3CPU *_cpu, int _thread_num) : ThreadState(-1, _thread_num), inSyscall(0), trapPending(0) { } #else - O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid, + O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *mem) - : ThreadState(-1, _thread_num, mem, _process, _asid), + : ThreadState(-1, _thread_num, _process, _asid, mem), cpu(_cpu), inSyscall(0), trapPending(0) { } #endif diff --git a/src/cpu/ozone/base_dyn_inst.cc b/src/cpu/ozone/base_dyn_inst.cc new file mode 100644 index 000000000..5a3a69dff --- /dev/null +++ b/src/cpu/ozone/base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +// Explicit instantiation +template class BaseDynInst<OzoneImpl>; + +template <> +int +BaseDynInst<OzoneImpl>::instcount = 0; diff --git a/src/cpu/ozone/bpred_unit.cc b/src/cpu/ozone/bpred_unit.cc new file mode 100644 index 000000000..c823f5e80 --- /dev/null +++ b/src/cpu/ozone/bpred_unit.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/o3/bpred_unit_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +//#include "cpu/ozone/simple_impl.hh" + +template class BPredUnit<OzoneImpl>; +//template class BPredUnit<SimpleImpl>; diff --git a/src/cpu/checker/cpu_builder.cc b/src/cpu/ozone/checker_builder.cc index 3b7583294..c372e51d6 100644 --- a/src/cpu/checker/cpu_builder.cc +++ b/src/cpu/ozone/checker_builder.cc @@ -30,19 +30,24 @@ #include <string> -#include "cpu/checker/cpu.hh" +#include "cpu/checker/cpu_impl.hh" #include "cpu/inst_seq.hh" #include "cpu/ozone/dyn_inst.hh" #include "cpu/ozone/ozone_impl.hh" -#include "mem/base_mem.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" +class MemObject; + +template +class Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >; + /** * Specific non-templated derived class used for SimObject configuration. */ -class OzoneChecker : public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > > +class OzoneChecker : + public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > > { public: OzoneChecker(Params *p) @@ -64,7 +69,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<FunctionalMemory *> mem; SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; @@ -72,11 +76,10 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) SimObjectParam<Process *> workload; #endif // FULL_SYSTEM Param<int> clock; - SimObjectParam<BaseMem *> icache; - SimObjectParam<BaseMem *> dcache; Param<bool> defer_registration; Param<bool> exitOnError; + Param<bool> warnOnlyOnLoadError; Param<bool> function_trace; Param<Tick> function_trace_start; @@ -96,7 +99,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker) #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(mem, "memory"), INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), @@ -105,11 +107,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker) #endif // FULL_SYSTEM INIT_PARAM(clock, "clock speed"), - INIT_PARAM(icache, "L1 instruction cache object"), - INIT_PARAM(dcache, "L1 data cache object"), INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load " + "result errors", false), INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") @@ -126,6 +128,7 @@ CREATE_SIM_OBJECT(OzoneChecker) params->max_loads_any_thread = 0; params->max_loads_all_threads = 0; params->exitOnError = exitOnError; + params->warnOnlyOnLoadError = warnOnlyOnLoadError; params->deferRegistration = defer_registration; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; @@ -137,13 +140,10 @@ CREATE_SIM_OBJECT(OzoneChecker) temp = max_insts_all_threads; temp = max_loads_any_thread; temp = max_loads_all_threads; - BaseMem *cache = icache; - cache = dcache; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->mem = mem; params->system = system; params->cpu_id = cpu_id; params->profile = profile; diff --git a/src/cpu/ozone/cpu.cc b/src/cpu/ozone/cpu.cc index 303c78eea..eb6ac37bd 100644 --- a/src/cpu/ozone/cpu.cc +++ b/src/cpu/ozone/cpu.cc @@ -31,7 +31,7 @@ #include "cpu/ozone/cpu_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" -template class OzoneCPU<SimpleImpl>; +//template class OzoneCPU<SimpleImpl>; template class OzoneCPU<OzoneImpl>; diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index e9550c39b..e411c12bd 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -43,6 +43,7 @@ #include "cpu/ozone/thread_state.hh" #include "cpu/pc_event.hh" #include "cpu/static_inst.hh" +#include "mem/page_table.hh" #include "sim/eventq.hh" // forward declarations @@ -54,7 +55,6 @@ class AlphaDTB; class PhysicalMemory; class MemoryController; -class Sampler; class RemoteGDB; class GDBListener; @@ -70,6 +70,7 @@ class Process; class Checkpoint; class EndQuiesceEvent; +class MemObject; class Request; namespace Trace { @@ -111,7 +112,7 @@ class OzoneCPU : public BaseCPU void setCpuId(int id); - int readCpuId() { return thread->cpuId; } + int readCpuId() { return thread->readCpuId(); } #if FULL_SYSTEM System *getSystemPtr() { return cpu->system; } @@ -122,22 +123,22 @@ class OzoneCPU : public BaseCPU AlphaDTB * getDTBPtr() { return cpu->dtb; } - Kernel::Statistics *getKernelStats() { return thread->kernelStats; } + Kernel::Statistics *getKernelStats() + { return thread->getKernelStats(); } FunctionalPort *getPhysPort() { return thread->getPhysPort(); } VirtualPort *getVirtPort(ThreadContext *tc = NULL) { return thread->getVirtPort(tc); } - void delVirtPort(VirtualPort *vp) - { thread->delVirtPort(vp); } + void delVirtPort(VirtualPort *vp); #else - TranslatingPort *getMemPort() { return thread->port; } + TranslatingPort *getMemPort() { return thread->getMemPort(); } - Process *getProcessPtr() { return thread->process; } + Process *getProcessPtr() { return thread->getProcessPtr(); } #endif - Status status() const { return thread->_status; } + Status status() const { return thread->status(); } void setStatus(Status new_status); @@ -149,7 +150,7 @@ class OzoneCPU : public BaseCPU void suspend(); /// Set the status to Unallocated. - void deallocate(); + void deallocate(int delay = 0); /// Set the status to Halted. void halt(); @@ -212,12 +213,11 @@ class OzoneCPU : public BaseCPU uint64_t readNextNPC() { - panic("Alpha has no NextNPC!"); return 0; } void setNextNPC(uint64_t val) - { panic("Alpha has no NextNPC!"); } + { } public: // ISA stuff: @@ -250,7 +250,7 @@ class OzoneCPU : public BaseCPU { thread->renameTable[TheISA::ArgumentReg0 + i]->setIntResult(i); } void setSyscallReturn(SyscallReturn return_value) - { cpu->setSyscallReturn(return_value, thread->tid); } + { cpu->setSyscallReturn(return_value, thread->readTid()); } Counter readFuncExeInst() { return thread->funcExeInst; } @@ -355,12 +355,10 @@ class OzoneCPU : public BaseCPU int cpuId; - void switchOut(Sampler *sampler); + void switchOut(); void signalSwitched(); void takeOverFrom(BaseCPU *oldCPU); - Sampler *sampler; - int switchCount; #if FULL_SYSTEM @@ -374,6 +372,10 @@ class OzoneCPU : public BaseCPU PhysicalMemory *physmem; #endif + virtual Port *getPort(const std::string &name, int idx); + + MemObject *mem; + FrontEnd *frontEnd; BackEnd *backEnd; @@ -383,7 +385,7 @@ class OzoneCPU : public BaseCPU virtual void activateContext(int thread_num, int delay); virtual void suspendContext(int thread_num); - virtual void deallocateContext(int thread_num); + virtual void deallocateContext(int thread_num, int delay); virtual void haltContext(int thread_num); // statistics @@ -415,50 +417,41 @@ class OzoneCPU : public BaseCPU #if FULL_SYSTEM - bool validInstAddr(Addr addr) { return true; } - bool validDataAddr(Addr addr) { return true; } - - Fault translateInstReq(Request *req) + /** Translates instruction requestion. */ + Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return itb->translate(req, tc); + return itb->translate(req, thread->getTC()); } - Fault translateDataReadReq(Request *req) + /** Translates data read request. */ + Fault translateDataReadReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return dtb->translate(req, tc, false); + return dtb->translate(req, thread->getTC(), false); } - Fault translateDataWriteReq(Request *req) + /** Translates data write request. */ + Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return dtb->translate(req, tc, true); + return dtb->translate(req, thread->getTC(), true); } #else - bool validInstAddr(Addr addr) - { return true; } - - bool validDataAddr(Addr addr) - { return true; } - - int getInstAsid() { return thread.asid; } - int getDataAsid() { return thread.asid; } - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(Request *req) + Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return thread.translateInstReq(req); + return thread->getProcessPtr()->pTable->translate(req); } /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(Request *req) + Fault translateDataReadReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return thread.translateDataReadReq(req); + return thread->getProcessPtr()->pTable->translate(req); } /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(Request *req) + Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState<Impl> *thread) { - return thread.translateDataWriteReq(req); + return thread->getProcessPtr()->pTable->translate(req); } #endif @@ -599,14 +592,14 @@ class OzoneCPU : public BaseCPU #if FULL_SYSTEM Fault hwrei(); - int readIntrFlag() { return thread.regs.intrflag; } - void setIntrFlag(int val) { thread.regs.intrflag = val; } + int readIntrFlag() { return thread.intrflag; } + void setIntrFlag(int val) { thread.intrflag = val; } bool inPalMode() { return AlphaISA::PcPAL(thread.PC); } bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); } bool simPalCheck(int palFunc); void processInterrupts(); #else - void syscall(); + void syscall(uint64_t &callnum); void setSyscallReturn(SyscallReturn return_value, int tid); #endif diff --git a/src/cpu/ozone/cpu_builder.cc b/src/cpu/ozone/cpu_builder.cc index 18f257a25..e239b7a94 100644 --- a/src/cpu/ozone/cpu_builder.cc +++ b/src/cpu/ozone/cpu_builder.cc @@ -34,9 +34,7 @@ #include "cpu/inst_seq.hh" #include "cpu/ozone/cpu.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" #include "cpu/ozone/simple_params.hh" -#include "mem/cache/base_cache.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" @@ -49,14 +47,6 @@ class DerivOzoneCPU : public OzoneCPU<OzoneImpl> { } }; -class SimpleOzoneCPU : public OzoneCPU<SimpleImpl> -{ - public: - SimpleOzoneCPU(SimpleParams *p) - : OzoneCPU<SimpleImpl>(p) - { } -}; - //////////////////////////////////////////////////////////////////////// // @@ -78,7 +68,7 @@ SimObjectVectorParam<Process *> workload; //SimObjectParam<PageTable *> page_table; #endif // FULL_SYSTEM -SimObjectParam<FunctionalMemory *> mem; +SimObjectParam<MemObject *> mem; SimObjectParam<BaseCPU *> checker; @@ -87,8 +77,8 @@ Param<Counter> max_insts_all_threads; Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; -SimObjectParam<BaseCache *> icache; -SimObjectParam<BaseCache *> dcache; +//SimObjectParam<BaseCache *> icache; +//SimObjectParam<BaseCache *> dcache; Param<unsigned> cachePorts; Param<unsigned> width; @@ -215,8 +205,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) "count", 0), - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), +// INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), +// INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), INIT_PARAM_DFLT(width, "Width", 1), @@ -361,8 +351,8 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) // // Caches // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; +// params->icacheInterface = icache ? icache->getInterface() : NULL; +// params->dcacheInterface = dcache ? dcache->getInterface() : NULL; params->cachePorts = cachePorts; params->width = width; @@ -459,405 +449,3 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) } REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU) - - - -//////////////////////////////////////////////////////////////////////// -// -// OzoneCPU Simulation Object -// - -BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - - Param<int> clock; - Param<int> numThreads; - -#if FULL_SYSTEM -SimObjectParam<System *> system; -Param<int> cpu_id; -SimObjectParam<AlphaITB *> itb; -SimObjectParam<AlphaDTB *> dtb; -#else -SimObjectVectorParam<Process *> workload; -//SimObjectParam<PageTable *> page_table; -#endif // FULL_SYSTEM - -SimObjectParam<FunctionalMemory *> mem; - -SimObjectParam<BaseCPU *> checker; - -Param<Counter> max_insts_any_thread; -Param<Counter> max_insts_all_threads; -Param<Counter> max_loads_any_thread; -Param<Counter> max_loads_all_threads; - -SimObjectParam<BaseCache *> icache; -SimObjectParam<BaseCache *> dcache; - -Param<unsigned> cachePorts; -Param<unsigned> width; -Param<unsigned> frontEndWidth; -Param<unsigned> backEndWidth; -Param<unsigned> backEndSquashLatency; -Param<unsigned> backEndLatency; -Param<unsigned> maxInstBufferSize; -Param<unsigned> numPhysicalRegs; - -Param<unsigned> decodeToFetchDelay; -Param<unsigned> renameToFetchDelay; -Param<unsigned> iewToFetchDelay; -Param<unsigned> commitToFetchDelay; -Param<unsigned> fetchWidth; - -Param<unsigned> renameToDecodeDelay; -Param<unsigned> iewToDecodeDelay; -Param<unsigned> commitToDecodeDelay; -Param<unsigned> fetchToDecodeDelay; -Param<unsigned> decodeWidth; - -Param<unsigned> iewToRenameDelay; -Param<unsigned> commitToRenameDelay; -Param<unsigned> decodeToRenameDelay; -Param<unsigned> renameWidth; - -Param<unsigned> commitToIEWDelay; -Param<unsigned> renameToIEWDelay; -Param<unsigned> issueToExecuteDelay; -Param<unsigned> issueWidth; -Param<unsigned> executeWidth; -Param<unsigned> executeIntWidth; -Param<unsigned> executeFloatWidth; -Param<unsigned> executeBranchWidth; -Param<unsigned> executeMemoryWidth; - -Param<unsigned> iewToCommitDelay; -Param<unsigned> renameToROBDelay; -Param<unsigned> commitWidth; -Param<unsigned> squashWidth; - -Param<std::string> predType; -Param<unsigned> localPredictorSize; -Param<unsigned> localCtrBits; -Param<unsigned> localHistoryTableSize; -Param<unsigned> localHistoryBits; -Param<unsigned> globalPredictorSize; -Param<unsigned> globalCtrBits; -Param<unsigned> globalHistoryBits; -Param<unsigned> choicePredictorSize; -Param<unsigned> choiceCtrBits; - -Param<unsigned> BTBEntries; -Param<unsigned> BTBTagSize; - -Param<unsigned> RASSize; - -Param<unsigned> LQEntries; -Param<unsigned> SQEntries; -Param<unsigned> LFSTSize; -Param<unsigned> SSITSize; - -Param<unsigned> numPhysIntRegs; -Param<unsigned> numPhysFloatRegs; -Param<unsigned> numIQEntries; -Param<unsigned> numROBEntries; - -Param<bool> decoupledFrontEnd; -Param<int> dispatchWidth; -Param<int> wbWidth; - -Param<unsigned> smtNumFetchingThreads; -Param<std::string> smtFetchPolicy; -Param<std::string> smtLSQPolicy; -Param<unsigned> smtLSQThreshold; -Param<std::string> smtIQPolicy; -Param<unsigned> smtIQThreshold; -Param<std::string> smtROBPolicy; -Param<unsigned> smtROBThreshold; -Param<std::string> smtCommitPolicy; - -Param<unsigned> instShiftAmt; - -Param<bool> defer_registration; - -Param<bool> function_trace; -Param<Tick> function_trace_start; - -END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - -BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - - INIT_PARAM(clock, "clock speed"), - INIT_PARAM(numThreads, "number of HW thread contexts"), - -#if FULL_SYSTEM - INIT_PARAM(system, "System object"), - INIT_PARAM(cpu_id, "processor ID"), - INIT_PARAM(itb, "Instruction translation buffer"), - INIT_PARAM(dtb, "Data translation buffer"), -#else - INIT_PARAM(workload, "Processes to run"), -// INIT_PARAM(page_table, "Page table"), -#endif // FULL_SYSTEM - - INIT_PARAM_DFLT(mem, "Memory", NULL), - - INIT_PARAM_DFLT(checker, "Checker CPU", NULL), - - INIT_PARAM_DFLT(max_insts_any_thread, - "Terminate when any thread reaches this inst count", - 0), - INIT_PARAM_DFLT(max_insts_all_threads, - "Terminate when all threads have reached" - "this inst count", - 0), - INIT_PARAM_DFLT(max_loads_any_thread, - "Terminate when any thread reaches this load count", - 0), - INIT_PARAM_DFLT(max_loads_all_threads, - "Terminate when all threads have reached this load" - "count", - 0), - - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), - - INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), - INIT_PARAM_DFLT(width, "Width", 1), - INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), - INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), - INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), - INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), - INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), - INIT_PARAM(numPhysicalRegs, "Number of physical registers"), - - INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), - INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), - INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" - "delay"), - INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), - INIT_PARAM(fetchWidth, "Fetch width"), - INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), - INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" - "delay"), - INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), - INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), - INIT_PARAM(decodeWidth, "Decode width"), - - INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" - "delay"), - INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), - INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), - INIT_PARAM(renameWidth, "Rename width"), - - INIT_PARAM(commitToIEWDelay, "Commit to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(renameToIEWDelay, "Rename to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" - "to the IEW stage)"), - INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), - - INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " - "delay"), - INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), - INIT_PARAM(commitWidth, "Commit width"), - INIT_PARAM(squashWidth, "Squash width"), - - INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), - INIT_PARAM(localPredictorSize, "Size of local predictor"), - INIT_PARAM(localCtrBits, "Bits per counter"), - INIT_PARAM(localHistoryTableSize, "Size of local history table"), - INIT_PARAM(localHistoryBits, "Bits for the local history"), - INIT_PARAM(globalPredictorSize, "Size of global predictor"), - INIT_PARAM(globalCtrBits, "Bits per counter"), - INIT_PARAM(globalHistoryBits, "Bits of history"), - INIT_PARAM(choicePredictorSize, "Size of choice predictor"), - INIT_PARAM(choiceCtrBits, "Bits of choice counters"), - - INIT_PARAM(BTBEntries, "Number of BTB entries"), - INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), - - INIT_PARAM(RASSize, "RAS size"), - - INIT_PARAM(LQEntries, "Number of load queue entries"), - INIT_PARAM(SQEntries, "Number of store queue entries"), - INIT_PARAM(LFSTSize, "Last fetched store table size"), - INIT_PARAM(SSITSize, "Store set ID table size"), - - INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), - INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " - "registers"), - INIT_PARAM(numIQEntries, "Number of instruction queue entries"), - INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), - - INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), - INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), - INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), - - INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), - INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), - INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), - INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), - INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), - INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), - INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), - - INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), - INIT_PARAM(defer_registration, "defer system registration (for sampling)"), - - INIT_PARAM(function_trace, "Enable function trace"), - INIT_PARAM(function_trace_start, "Cycle to start function trace") - -END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - -CREATE_SIM_OBJECT(SimpleOzoneCPU) -{ - SimpleOzoneCPU *cpu; - -#if FULL_SYSTEM - // Full-system only supports a single thread for the moment. - int actual_num_threads = 1; -#else - // In non-full-system mode, we infer the number of threads from - // the workload if it's not explicitly specified. - int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); - - if (workload.size() == 0) { - fatal("Must specify at least one workload!"); - } - -#endif - - SimpleParams *params = new SimpleParams; - - params->clock = clock; - - params->name = getInstanceName(); - params->numberOfThreads = actual_num_threads; - -#if FULL_SYSTEM - params->system = system; - params->cpu_id = cpu_id; - params->itb = itb; - params->dtb = dtb; -#else - params->workload = workload; -// params->pTable = page_table; -#endif // FULL_SYSTEM - - params->mem = mem; - params->checker = checker; - params->max_insts_any_thread = max_insts_any_thread; - params->max_insts_all_threads = max_insts_all_threads; - params->max_loads_any_thread = max_loads_any_thread; - params->max_loads_all_threads = max_loads_all_threads; - - // - // Caches - // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; - params->cachePorts = cachePorts; - - params->width = width; - params->frontEndWidth = frontEndWidth; - params->backEndWidth = backEndWidth; - params->backEndSquashLatency = backEndSquashLatency; - params->backEndLatency = backEndLatency; - params->maxInstBufferSize = maxInstBufferSize; - params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; - - params->decodeToFetchDelay = decodeToFetchDelay; - params->renameToFetchDelay = renameToFetchDelay; - params->iewToFetchDelay = iewToFetchDelay; - params->commitToFetchDelay = commitToFetchDelay; - params->fetchWidth = fetchWidth; - - params->renameToDecodeDelay = renameToDecodeDelay; - params->iewToDecodeDelay = iewToDecodeDelay; - params->commitToDecodeDelay = commitToDecodeDelay; - params->fetchToDecodeDelay = fetchToDecodeDelay; - params->decodeWidth = decodeWidth; - - params->iewToRenameDelay = iewToRenameDelay; - params->commitToRenameDelay = commitToRenameDelay; - params->decodeToRenameDelay = decodeToRenameDelay; - params->renameWidth = renameWidth; - - params->commitToIEWDelay = commitToIEWDelay; - params->renameToIEWDelay = renameToIEWDelay; - params->issueToExecuteDelay = issueToExecuteDelay; - params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; - - params->iewToCommitDelay = iewToCommitDelay; - params->renameToROBDelay = renameToROBDelay; - params->commitWidth = commitWidth; - params->squashWidth = squashWidth; - - params->predType = predType; - params->localPredictorSize = localPredictorSize; - params->localCtrBits = localCtrBits; - params->localHistoryTableSize = localHistoryTableSize; - params->localHistoryBits = localHistoryBits; - params->globalPredictorSize = globalPredictorSize; - params->globalCtrBits = globalCtrBits; - params->globalHistoryBits = globalHistoryBits; - params->choicePredictorSize = choicePredictorSize; - params->choiceCtrBits = choiceCtrBits; - - params->BTBEntries = BTBEntries; - params->BTBTagSize = BTBTagSize; - - params->RASSize = RASSize; - - params->LQEntries = LQEntries; - params->SQEntries = SQEntries; - - params->SSITSize = SSITSize; - params->LFSTSize = LFSTSize; - - params->numPhysIntRegs = numPhysIntRegs; - params->numPhysFloatRegs = numPhysFloatRegs; - params->numIQEntries = numIQEntries; - params->numROBEntries = numROBEntries; - - params->decoupledFrontEnd = decoupledFrontEnd; - params->dispatchWidth = dispatchWidth; - params->wbWidth = wbWidth; - - params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; - params->smtIQPolicy = smtIQPolicy; - params->smtLSQPolicy = smtLSQPolicy; - params->smtLSQThreshold = smtLSQThreshold; - params->smtROBPolicy = smtROBPolicy; - params->smtROBThreshold = smtROBThreshold; - params->smtCommitPolicy = smtCommitPolicy; - - params->instShiftAmt = 2; - - params->deferRegistration = defer_registration; - - params->functionTrace = function_trace; - params->functionTraceStart = function_trace_start; - - cpu = new SimpleOzoneCPU(params); - - return cpu; -} - -REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) - diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 76e2318aa..f58b81990 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -29,21 +29,17 @@ * Nathan Binkert */ -//#include <cstdio> -//#include <cstdlib> +#include "config/full_system.hh" +#include "config/use_checker.hh" #include "arch/isa_traits.hh" // For MachInst #include "base/trace.hh" -#include "config/full_system.hh" #include "cpu/base.hh" -#include "cpu/checker/thread_context.hh" #include "cpu/thread_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/cpu.hh" #include "cpu/quiesce_event.hh" #include "cpu/static_inst.hh" -//#include "mem/base_mem.hh" -#include "mem/mem_interface.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" @@ -56,17 +52,18 @@ //#include "base/remote_gdb.hh" #include "cpu/profile.hh" #include "kern/kernel_stats.hh" -#include "mem/functional/memory_control.hh" -#include "mem/functional/physical.hh" #include "sim/faults.hh" #include "sim/sim_events.hh" #include "sim/sim_exit.hh" #include "sim/system.hh" #else // !FULL_SYSTEM -#include "mem/functional/functional.hh" #include "sim/process.hh" #endif // FULL_SYSTEM +#if USE_CHECKER +#include "cpu/checker/thread_context.hh" +#endif + using namespace TheISA; template <class Impl> @@ -101,13 +98,12 @@ OzoneCPU<Impl>::TickEvent::description() template <class Impl> OzoneCPU<Impl>::OzoneCPU(Params *p) #if FULL_SYSTEM - : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), - mem(p->mem), + : BaseCPU(p), thread(this, 0), tickEvent(this, p->width), #else - : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width), - mem(p->workload[0]->getMemory()), + : BaseCPU(p), thread(this, 0, p->workload[0], 0, p->mem), + tickEvent(this, p->width), #endif - comm(5, 5) + mem(p->mem), comm(5, 5) { frontEnd = new FrontEnd(p); backEnd = new BackEnd(p); @@ -115,6 +111,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) _status = Idle; if (p->checker) { +#if USE_CHECKER BaseCPU *temp_checker = p->checker; checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker); checker->setMemory(mem); @@ -123,7 +120,10 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) #endif checkerTC = new CheckerThreadContext<OzoneTC>(&ozoneTC, checker); thread.tc = checkerTC; - tc = checkerXC; + tc = checkerTC; +#else + panic("Checker enabled but not compiled in!"); +#endif } else { checker = NULL; thread.tc = &ozoneTC; @@ -139,15 +139,13 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) #if FULL_SYSTEM /***** All thread state stuff *****/ thread.cpu = this; - thread.tid = 0; - thread.mem = p->mem; + thread.setTid(0); thread.quiesceEvent = new EndQuiesceEvent(tc); system = p->system; itb = p->itb; dtb = p->dtb; - memctrl = p->system->memctrl; physmem = p->system->physmem; if (p->profile) { @@ -166,9 +164,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) thread.profilePC = 3; #else thread.cpu = this; - thread.tid = 0; - thread.process = p->workload[0]; - thread.asid = 0; #endif // !FULL_SYSTEM numInst = 0; @@ -206,7 +201,35 @@ OzoneCPU<Impl>::OzoneCPU(Params *p) backEnd->renameTable.copyFrom(thread.renameTable); #if !FULL_SYSTEM -// pTable = p->pTable; + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), 0), + p->workload[0]->pTable, + false); + mem_port = p->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + thread.setMemPort(trans_port); +#else + Port *mem_port; + FunctionalPort *phys_port; + VirtualPort *virt_port; + phys_port = new FunctionalPort(csprintf("%s-%d-funcport", + name(), 0)); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(phys_port); + phys_port->setPeer(mem_port); + + virt_port = new VirtualPort(csprintf("%s-%d-vport", + name(), 0)); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(virt_port); + virt_port->setPeer(mem_port); + + thread.setPhysPort(phys_port); + thread.setVirtPort(virt_port); #endif lockFlag = 0; @@ -221,9 +244,8 @@ OzoneCPU<Impl>::~OzoneCPU() template <class Impl> void -OzoneCPU<Impl>::switchOut(Sampler *_sampler) +OzoneCPU<Impl>::switchOut() { - sampler = _sampler; switchCount = 0; // Front end needs state from back end, so switch out the back end first. backEnd->switchOut(); @@ -237,12 +259,14 @@ OzoneCPU<Impl>::signalSwitched() if (++switchCount == 2) { backEnd->doSwitchOut(); frontEnd->doSwitchOut(); +#if USE_CHECKER if (checker) - checker->switchOut(sampler); + checker->switchOut(); +#endif + _status = SwitchedOut; if (tickEvent.scheduled()) tickEvent.squash(); - sampler->signalSwitched(); } assert(switchCount <= 2); } @@ -291,7 +315,7 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay) notIdleFraction++; scheduleTickEvent(delay); _status = Running; - thread._status = ThreadContext::Active; + thread.setStatus(ThreadContext::Active); frontEnd->wakeFromQuiesce(); } @@ -311,7 +335,7 @@ OzoneCPU<Impl>::suspendContext(int thread_num) template <class Impl> void -OzoneCPU<Impl>::deallocateContext(int thread_num) +OzoneCPU<Impl>::deallocateContext(int thread_num, int delay) { // for now, these are equivalent suspendContext(thread_num); @@ -395,6 +419,18 @@ OzoneCPU<Impl>::init() } template <class Impl> +Port * +OzoneCPU<Impl>::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return backEnd->getDcachePort(); + else if (if_name == "icache_port") + return frontEnd->getIcachePort(); + else + panic("No Such Port\n"); +} + +template <class Impl> void OzoneCPU<Impl>::serialize(std::ostream &os) { @@ -510,7 +546,7 @@ template <class Impl> Addr OzoneCPU<Impl>::dbg_vtophys(Addr addr) { - return vtophys(tcProxy, addr); + return vtophys(tc, addr); } #endif // FULL_SYSTEM @@ -526,7 +562,7 @@ OzoneCPU<Impl>::post_interrupt(int int_num, int index) // thread.activate(); // Hack for now. Otherwise might have to go through the tc, or // I need to figure out what's the right thing to call. - activateContext(thread.tid, 1); + activateContext(thread.readTid(), 1); } } #endif // FULL_SYSTEM @@ -565,7 +601,7 @@ OzoneCPU<Impl>::squashFromTC() #if !FULL_SYSTEM template <class Impl> void -OzoneCPU<Impl>::syscall() +OzoneCPU<Impl>::syscall(uint64_t &callnum) { // Not sure this copy is needed, depending on how the TC proxy is made. thread.renameTable.copyFrom(backEnd->renameTable); @@ -576,7 +612,7 @@ OzoneCPU<Impl>::syscall() DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst); - thread.process->syscall(yc); + thread.process->syscall(callnum, tc); thread.funcExeInst--; @@ -690,9 +726,9 @@ OzoneCPU<Impl>::simPalCheck(int palFunc) switch (palFunc) { case PAL::halt: - haltContext(thread.tid); + haltContext(thread.readTid()); if (--System::numSystemsRunning == 0) - new SimExitEvent("all cpus halted"); + exitSimLoop("all cpus halted"); break; case PAL::bpt: @@ -718,21 +754,31 @@ void OzoneCPU<Impl>::OzoneTC::setCpuId(int id) { cpu->cpuId = id; - thread->cpuId = id; + thread->setCpuId(id); } +#if FULL_SYSTEM +template <class Impl> +void +OzoneCPU<Impl>::OzoneTC::delVirtPort(VirtualPort *vp) +{ + delete vp->getPeer(); + delete vp; +} +#endif + template <class Impl> void OzoneCPU<Impl>::OzoneTC::setStatus(Status new_status) { - thread->_status = new_status; + thread->setStatus(new_status); } template <class Impl> void OzoneCPU<Impl>::OzoneTC::activate(int delay) { - cpu->activateContext(thread->tid, delay); + cpu->activateContext(thread->readTid(), delay); } /// Set the status to Suspended. @@ -740,15 +786,15 @@ template <class Impl> void OzoneCPU<Impl>::OzoneTC::suspend() { - cpu->suspendContext(thread->tid); + cpu->suspendContext(thread->readTid()); } /// Set the status to Unallocated. template <class Impl> void -OzoneCPU<Impl>::OzoneTC::deallocate() +OzoneCPU<Impl>::OzoneTC::deallocate(int delay) { - cpu->deallocateContext(thread->tid); + cpu->deallocateContext(thread->readTid(), delay); } /// Set the status to Halted. @@ -756,7 +802,7 @@ template <class Impl> void OzoneCPU<Impl>::OzoneTC::halt() { - cpu->haltContext(thread->tid); + cpu->haltContext(thread->readTid()); } #if FULL_SYSTEM @@ -771,7 +817,6 @@ void OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context) { // some things should already be set up - assert(getMemPtr() == old_context->getMemPtr()); #if FULL_SYSTEM assert(getSystemPtr() == old_context->getSystemPtr()); #else @@ -867,7 +912,7 @@ template <class Impl> int OzoneCPU<Impl>::OzoneTC::getThreadNum() { - return thread->tid; + return thread->readTid(); } // Also somewhat obnoxious. Really only used for the TLB fault. @@ -875,7 +920,7 @@ template <class Impl> TheISA::MachInst OzoneCPU<Impl>::OzoneTC::getInst() { - return thread->inst; + return thread->getInst(); } template <class Impl> @@ -894,7 +939,7 @@ OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc) } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) { int fp_idx = i - TheISA::FP_Base_DepTag; thread->renameTable[i]->setDoubleResult( - tc->readFloatRegDouble(fp_idx)); + tc->readFloatReg(fp_idx, 64)); } } @@ -904,7 +949,7 @@ OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc) // Need to copy the TC values into the current rename table, // copy the misc regs. - thread->regs.miscRegs.copyMiscRegs(tc); + copyMiscRegs(tc, this); } template <class Impl> @@ -922,7 +967,7 @@ OzoneCPU<Impl>::OzoneTC::readIntReg(int reg_idx) } template <class Impl> -float +TheISA::FloatReg OzoneCPU<Impl>::OzoneTC::readFloatReg(int reg_idx, int width) { int idx = reg_idx + TheISA::FP_Base_DepTag; @@ -1049,15 +1094,15 @@ template <class Impl> TheISA::MiscReg OzoneCPU<Impl>::OzoneTC::readMiscReg(int misc_reg) { - return thread->regs.miscRegs.readReg(misc_reg); + return thread->miscRegFile.readReg(misc_reg); } template <class Impl> TheISA::MiscReg OzoneCPU<Impl>::OzoneTC::readMiscRegWithEffect(int misc_reg, Fault &fault) { - return thread->regs.miscRegs.readRegWithEffect(misc_reg, - fault, this); + return thread->miscRegFile.readRegWithEffect(misc_reg, + fault, this); } template <class Impl> @@ -1065,7 +1110,7 @@ Fault OzoneCPU<Impl>::OzoneTC::setMiscReg(int misc_reg, const MiscReg &val) { // Needs to setup a squash event unless we're in syscall mode - Fault ret_fault = thread->regs.miscRegs.setReg(misc_reg, val); + Fault ret_fault = thread->miscRegFile.setReg(misc_reg, val); if (!thread->inSyscall) { cpu->squashFromTC(); @@ -1079,8 +1124,8 @@ Fault OzoneCPU<Impl>::OzoneTC::setMiscRegWithEffect(int misc_reg, const MiscReg &val) { // Needs to setup a squash event unless we're in syscall mode - Fault ret_fault = thread->regs.miscRegs.setRegWithEffect(misc_reg, val, - this); + Fault ret_fault = thread->miscRegFile.setRegWithEffect(misc_reg, val, + this); if (!thread->inSyscall) { cpu->squashFromTC(); diff --git a/src/cpu/ozone/dyn_inst.hh b/src/cpu/ozone/dyn_inst.hh index 0bb50bd69..67691d416 100644 --- a/src/cpu/ozone/dyn_inst.hh +++ b/src/cpu/ozone/dyn_inst.hh @@ -34,9 +34,8 @@ #include "arch/isa_traits.hh" #include "config/full_system.hh" #include "cpu/base_dyn_inst.hh" -#include "cpu/ozone/cpu.hh" // MUST include this #include "cpu/inst_seq.hh" -//#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this +#include "cpu/ozone/cpu.hh" // MUST include this #include "cpu/ozone/ozone_impl.hh" #include <list> @@ -47,15 +46,17 @@ class OzoneDynInst : public BaseDynInst<Impl> { public: // Typedefs - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::OzoneCPU OzoneCPU; - typedef typename FullCPU::ImplState ImplState; + typedef typename OzoneCPU::ImplState ImplState; // Typedef for DynInstPtr. This is really just a RefCountingPtr<OoODynInst>. typedef typename Impl::DynInstPtr DynInstPtr; typedef TheISA::ExtMachInst ExtMachInst; typedef TheISA::MachInst MachInst; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; typedef typename std::list<DynInstPtr>::iterator ListIt; @@ -67,10 +68,10 @@ class OzoneDynInst : public BaseDynInst<Impl> MaxInstDestRegs = TheISA::MaxInstDestRegs }; - OzoneDynInst(FullCPU *cpu); + OzoneDynInst(OzoneCPU *cpu); OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu); + InstSeqNum seq_num, OzoneCPU *cpu); OzoneDynInst(StaticInstPtr inst); @@ -131,7 +132,7 @@ class OzoneDynInst : public BaseDynInst<Impl> Fault initiateAcc(); - Fault completeAcc(); + Fault completeAcc(Packet *pkt); // The register accessor methods provide the index of the // instruction's operand (e.g., 0 or 1), not the architectural @@ -149,17 +150,30 @@ class OzoneDynInst : public BaseDynInst<Impl> return srcInsts[idx]->readIntResult(); } - float readFloatRegSingle(const StaticInst *si, int idx) + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + switch(width) { + case 32: + return srcInsts[idx]->readFloatResult(); + case 64: + return srcInsts[idx]->readDoubleResult(); + default: + panic("Width not supported"); + return 0; + } + } + + FloatReg readFloatReg(const StaticInst *si, int idx) { return srcInsts[idx]->readFloatResult(); } - double readFloatRegDouble(const StaticInst *si, int idx) + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) { - return srcInsts[idx]->readDoubleResult(); + return srcInsts[idx]->readIntResult(); } - uint64_t readFloatRegInt(const StaticInst *si, int idx) + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) { return srcInsts[idx]->readIntResult(); } @@ -172,19 +186,25 @@ class OzoneDynInst : public BaseDynInst<Impl> BaseDynInst<Impl>::setIntReg(si, idx, val); } - void setFloatRegSingle(const StaticInst *si, int idx, float val) + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + BaseDynInst<Impl>::setFloatReg(si, idx, val, width); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) { - BaseDynInst<Impl>::setFloatRegSingle(si, idx, val); + BaseDynInst<Impl>::setFloatReg(si, idx, val); } - void setFloatRegDouble(const StaticInst *si, int idx, double val) + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) { - BaseDynInst<Impl>::setFloatRegDouble(si, idx, val); + BaseDynInst<Impl>::setFloatRegBits(si, idx, val); } - void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) { - BaseDynInst<Impl>::setFloatRegInt(si, idx, val); + BaseDynInst<Impl>::setFloatRegBits(si, idx, val); } void setIntResult(uint64_t result) { this->instResult.integer = result; } @@ -223,7 +243,7 @@ class OzoneDynInst : public BaseDynInst<Impl> void trap(Fault fault); bool simPalCheck(int palFunc); #else - void syscall(); + void syscall(uint64_t &callnum); #endif ListIt iqIt; diff --git a/src/cpu/ozone/dyn_inst_impl.hh b/src/cpu/ozone/dyn_inst_impl.hh index 4149bf144..bad902c2a 100644 --- a/src/cpu/ozone/dyn_inst_impl.hh +++ b/src/cpu/ozone/dyn_inst_impl.hh @@ -37,7 +37,7 @@ using namespace TheISA; template <class Impl> -OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu) +OzoneDynInst<Impl>::OzoneDynInst(OzoneCPU *cpu) : BaseDynInst<Impl>(0, 0, 0, 0, cpu) { this->setResultReady(); @@ -47,7 +47,7 @@ OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu) template <class Impl> OzoneDynInst<Impl>::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) + InstSeqNum seq_num, OzoneCPU *cpu) : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) { initInstPtrs(); @@ -111,19 +111,9 @@ OzoneDynInst<Impl>::initiateAcc() template <class Impl> Fault -OzoneDynInst<Impl>::completeAcc() +OzoneDynInst<Impl>::completeAcc(Packet *pkt) { - if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(this->req->data, - this, - this->traceData); - } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, - this, - this->traceData); - } else { - panic("Unknown type!"); - } + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); return this->fault; } @@ -298,7 +288,7 @@ template <class Impl> void OzoneDynInst<Impl>::trap(Fault fault) { - fault->invoke(this->thread->getXCProxy()); + fault->invoke(this->thread->getTC()); } template <class Impl> @@ -310,8 +300,8 @@ OzoneDynInst<Impl>::simPalCheck(int palFunc) #else template <class Impl> void -OzoneDynInst<Impl>::syscall() +OzoneDynInst<Impl>::syscall(uint64_t &callnum) { - this->cpu->syscall(); + this->cpu->syscall(callnum); } #endif diff --git a/src/cpu/ozone/front_end.cc b/src/cpu/ozone/front_end.cc index f0ea8eae1..cfd033564 100644 --- a/src/cpu/ozone/front_end.cc +++ b/src/cpu/ozone/front_end.cc @@ -30,7 +30,7 @@ #include "cpu/ozone/front_end_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" template class FrontEnd<OzoneImpl>; -template class FrontEnd<SimpleImpl>; +//template class FrontEnd<SimpleImpl>; diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh index af190008c..3ed3c4d18 100644 --- a/src/cpu/ozone/front_end.hh +++ b/src/cpu/ozone/front_end.hh @@ -33,15 +33,17 @@ #include <deque> +#include "arch/utility.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/bpred_unit.hh" #include "cpu/ozone/rename_table.hh" +#include "mem/port.hh" #include "mem/request.hh" #include "sim/eventq.hh" #include "sim/stats.hh" class ThreadContext; -class MemInterface; +class MemObject; template <class> class OzoneThreadState; class PageTable; @@ -55,18 +57,55 @@ class FrontEnd typedef typename Impl::Params Params; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::CPUType CPUType; typedef typename Impl::BackEnd BackEnd; - typedef typename Impl::FullCPU::OzoneTC OzoneTC; - typedef typename Impl::FullCPU::CommStruct CommStruct; + typedef typename Impl::CPUType::OzoneTC OzoneTC; + typedef typename Impl::CPUType::CommStruct CommStruct; + + /** IcachePort class. Handles doing the communication with the + * cache/memory. + */ + class IcachePort : public Port + { + protected: + /** Pointer to FE. */ + FrontEnd<Impl> *fe; + + public: + /** Default constructor. */ + IcachePort(FrontEnd<Impl> *_fe) + : fe(_fe) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles setting fetch to the + * proper status to start fetching. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of a failed fetch. */ + virtual void recvRetry(); + }; FrontEnd(Params *params); std::string name() const; - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } + void setCPU(CPUType *cpu_ptr); void setBackEnd(BackEnd *back_end_ptr) { backEnd = back_end_ptr; } @@ -80,6 +119,8 @@ class FrontEnd void regStats(); + Port *getIcachePort() { return &icachePort; } + void tick(); Fault fetchCacheLine(); void processInst(DynInstPtr &inst); @@ -104,6 +145,8 @@ class FrontEnd bool switchedOut; private: + void recvRetry(); + bool updateStatus(); void checkBE(); @@ -130,7 +173,7 @@ class FrontEnd { return cpu->globalSeqNum++; } public: - FullCPU *cpu; + CPUType *cpu; BackEnd *backEnd; @@ -141,8 +184,9 @@ class FrontEnd enum Status { Running, Idle, - IcacheMissStall, - IcacheMissComplete, + IcacheWaitResponse, + IcacheWaitRetry, + IcacheAccessComplete, SerializeBlocked, SerializeComplete, RenameBlocked, @@ -161,37 +205,9 @@ class FrontEnd BranchPred branchPred; - class IcachePort : public Port - { - protected: - FrontEnd *fe; - - public: - IcachePort(const std::string &_name, FrontEnd *_fe) - : Port(_name), fe(_fe) - { } - - protected: - virtual Tick recvAtomic(PacketPtr pkt); - - virtual void recvFunctional(PacketPtr pkt); - - virtual void recvStatusChange(Status status); - - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - virtual bool recvTiming(PacketPtr pkt); - - virtual void recvRetry(); - }; - IcachePort icachePort; -#if !FULL_SYSTEM - PageTable *pTable; -#endif + MemObject *mem; RequestPtr memReq; @@ -209,6 +225,11 @@ class FrontEnd bool cacheBlkValid; + bool cacheBlocked; + + /** The packet that is waiting to be retried. */ + PacketPtr retryPkt; + public: RenameTable<Impl> renameTable; diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index 467567c10..9da937320 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -28,21 +28,69 @@ * Authors: Kevin Lim */ +#include "config/use_checker.hh" + #include "arch/faults.hh" #include "arch/isa_traits.hh" #include "base/statistics.hh" #include "cpu/thread_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/front_end.hh" -#include "mem/mem_interface.hh" -#include "sim/byte_swap.hh" +#include "mem/mem_object.hh" +#include "mem/packet.hh" +#include "mem/request.hh" + +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif using namespace TheISA; +template<class Impl> +Tick +FrontEnd<Impl>::IcachePort::recvAtomic(PacketPtr pkt) +{ + panic("FrontEnd doesn't expect recvAtomic callback!"); + return curTick; +} + +template<class Impl> +void +FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt) +{ + panic("FrontEnd doesn't expect recvFunctional callback!"); +} + +template<class Impl> +void +FrontEnd<Impl>::IcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("FrontEnd doesn't expect recvStatusChange callback!"); +} + +template<class Impl> +bool +FrontEnd<Impl>::IcachePort::recvTiming(Packet *pkt) +{ + fe->processCacheCompletion(pkt); + return true; +} + +template<class Impl> +void +FrontEnd<Impl>::IcachePort::recvRetry() +{ + fe->recvRetry(); +} + template <class Impl> FrontEnd<Impl>::FrontEnd(Params *params) : branchPred(params), - icacheInterface(params->icacheInterface), + icachePort(this), + mem(params->mem), instBufferSize(0), maxInstBufferSize(params->maxInstBufferSize), width(params->frontEndWidth), @@ -57,7 +105,7 @@ FrontEnd<Impl>::FrontEnd(Params *params) memReq = NULL; // Size of cache block. - cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + cacheBlkSize = 64; assert(isPowerOf2(cacheBlkSize)); @@ -69,11 +117,10 @@ FrontEnd<Impl>::FrontEnd(Params *params) fetchCacheLineNextCycle = true; - cacheBlkValid = false; + cacheBlkValid = cacheBlocked = false; + + retryPkt = NULL; -#if !FULL_SYSTEM -// pTable = params->pTable; -#endif fetchFault = NoFault; } @@ -86,6 +133,21 @@ FrontEnd<Impl>::name() const template <class Impl> void +FrontEnd<Impl>::setCPU(CPUType *cpu_ptr) +{ + cpu = cpu_ptr; + + icachePort.setName(this->name() + "-iport"); + +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->setIcachePort(&icachePort); + } +#endif +} + +template <class Impl> +void FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm) { comm = _comm; @@ -272,7 +334,7 @@ FrontEnd<Impl>::tick() IFQFcount += instBufferSize == maxInstBufferSize; // Fetch cache line - if (status == IcacheMissComplete) { + if (status == IcacheAccessComplete) { cacheBlkValid = true; status = Running; @@ -281,8 +343,8 @@ FrontEnd<Impl>::tick() if (freeRegs <= 0) status = RenameBlocked; checkBE(); - } else if (status == IcacheMissStall) { - DPRINTF(FE, "Still in Icache miss stall.\n"); + } else if (status == IcacheWaitResponse || status == IcacheWaitRetry) { + DPRINTF(FE, "Still in Icache wait.\n"); icacheStallCycles++; return; } @@ -303,7 +365,7 @@ FrontEnd<Impl>::tick() } else if (status == QuiescePending) { DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n"); return; - } else if (status != IcacheMissComplete) { + } else if (status != IcacheAccessComplete) { if (fetchCacheLineNextCycle) { Fault fault = fetchCacheLine(); if (fault != NoFault) { @@ -314,7 +376,7 @@ FrontEnd<Impl>::tick() fetchCacheLineNextCycle = false; } // If miss, stall until it returns. - if (status == IcacheMissStall) { + if (status == IcacheWaitResponse || status == IcacheWaitRetry) { // Tell CPU to not tick me for now. return; } @@ -404,22 +466,16 @@ FrontEnd<Impl>::fetchCacheLine() // Setup the memReq to do a read of the first isntruction's address. // Set the appropriate read size and flags as well. - memReq = new MemReq(); - - memReq->asid = 0; - memReq->thread_num = 0; - memReq->data = new uint8_t[64]; - memReq->tc = tc; - memReq->cmd = Read; - memReq->reset(fetch_PC, cacheBlkSize, flags); + memReq = new Request(0, fetch_PC, cacheBlkSize, flags, + fetch_PC, cpu->readCpuId(), 0); // Translate the instruction request. - fault = cpu->translateInstReq(memReq); + fault = cpu->translateInstReq(memReq, thread); // Now do the timing access to see whether or not the instruction // exists within the cache. - if (icacheInterface && fault == NoFault) { -#if FULL_SYSTEM + if (fault == NoFault) { +#if 0 if (cpu->system->memctrl->badaddr(memReq->paddr) || memReq->flags & UNCACHEABLE) { DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " @@ -429,30 +485,21 @@ FrontEnd<Impl>::fetchCacheLine() } #endif - memReq->completionEvent = NULL; - - memReq->time = curTick; - fault = cpu->mem->read(memReq, cacheData); - - MemAccessResult res = icacheInterface->access(memReq); - - // If the cache missed then schedule an event to wake - // up this stage once the cache miss completes. - if (icacheInterface->doEvents() && res != MA_HIT) { - memReq->completionEvent = new ICacheCompletionEvent(memReq, this); - - status = IcacheMissStall; - - cacheBlkValid = false; - - DPRINTF(FE, "Cache miss.\n"); - } else { - DPRINTF(FE, "Cache hit.\n"); - - cacheBlkValid = true; - -// memcpy(cacheData, memReq->data, memReq->size); + // Build packet here. + PacketPtr data_pkt = new Packet(memReq, + Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(cacheData); + + if (!icachePort.sendTiming(data_pkt)) { + assert(retryPkt == NULL); + DPRINTF(Fetch, "Out of MSHRs!\n"); + status = IcacheWaitRetry; + retryPkt = data_pkt; + cacheBlocked = true; + return NoFault; } + + status = IcacheWaitResponse; } // Note that this will set the cache block PC a bit earlier than it should @@ -565,7 +612,7 @@ FrontEnd<Impl>::handleFault(Fault &fault) // instruction->setASID(tid); - instruction->setState(thread); + instruction->setThreadState(thread); instruction->traceData = NULL; @@ -614,8 +661,8 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC, } // Clear the icache miss if it's outstanding. - if (status == IcacheMissStall && icacheInterface) { - DPRINTF(FE, "Squashing outstanding Icache miss.\n"); + if (status == IcacheWaitResponse) { + DPRINTF(FE, "Squashing outstanding Icache access.\n"); memReq = NULL; } @@ -652,20 +699,22 @@ FrontEnd<Impl>::getInst() template <class Impl> void -FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req) +FrontEnd<Impl>::processCacheCompletion(PacketPtr pkt) { DPRINTF(FE, "Processing cache completion\n"); // Do something here. - if (status != IcacheMissStall || - req != memReq || + if (status != IcacheWaitResponse || + pkt->req != memReq || switchedOut) { DPRINTF(FE, "Previous fetch was squashed.\n"); fetchIcacheSquashes++; + delete pkt->req; + delete pkt; return; } - status = IcacheMissComplete; + status = IcacheAccessComplete; /* if (checkStall(tid)) { fetchStatus[tid] = Blocked; @@ -677,6 +726,8 @@ FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req) // Reset the completion event to NULL. // memReq->completionEvent = NULL; + delete pkt->req; + delete pkt; memReq = NULL; } @@ -698,6 +749,27 @@ FrontEnd<Impl>::addFreeRegs(int num_freed) } template <class Impl> +void +FrontEnd<Impl>::recvRetry() +{ + assert(cacheBlocked); + if (retryPkt != NULL) { + assert(status == IcacheWaitRetry); + + if (icachePort.sendTiming(retryPkt)) { + status = IcacheWaitResponse; + retryPkt = NULL; + cacheBlocked = false; + } + } else { + // Access has been squashed since it was sent out. Just clear + // the cache being blocked. + cacheBlocked = false; + } + +} + +template <class Impl> bool FrontEnd<Impl>::updateStatus() { @@ -775,7 +847,7 @@ FrontEnd<Impl>::getInstFromCacheline() DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), inst_seq, cpu); - instruction->setState(thread); + instruction->setThreadState(thread); DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n", inst_seq, instruction->readPC(), @@ -899,24 +971,3 @@ FrontEnd<Impl>::dumpInsts() buff_it++; } } - -template <class Impl> -FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe) - : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe) -{ - this->setFlags(Event::AutoDelete); -} - -template <class Impl> -void -FrontEnd<Impl>::ICacheCompletionEvent::process() -{ - frontEnd->processCacheCompletion(req); -} - -template <class Impl> -const char * -FrontEnd<Impl>::ICacheCompletionEvent::description() -{ - return "ICache completion event"; -} diff --git a/src/cpu/ozone/lw_back_end.hh b/src/cpu/ozone/lw_back_end.hh index bb81f60c8..d836ceebd 100644 --- a/src/cpu/ozone/lw_back_end.hh +++ b/src/cpu/ozone/lw_back_end.hh @@ -51,6 +51,8 @@ class ThreadContext; template <class Impl> class OzoneThreadState; +class Port; + template <class Impl> class LWBackEnd { @@ -60,9 +62,9 @@ class LWBackEnd typedef typename Impl::Params Params; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::OzoneCPU OzoneCPU; typedef typename Impl::FrontEnd FrontEnd; - typedef typename Impl::FullCPU::CommStruct CommStruct; + typedef typename Impl::OzoneCPU::CommStruct CommStruct; struct SizeStruct { int size; @@ -95,35 +97,13 @@ class LWBackEnd const char *description(); }; - /** LdWriteback event for a load completion. */ - class LdWritebackEvent : public Event { - private: - /** Instruction that is writing back data to the register file. */ - DynInstPtr inst; - /** Pointer to IEW stage. */ - LWBackEnd *be; - - bool dcacheMiss; - - public: - /** Constructs a load writeback event. */ - LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be); - - /** Processes writeback event. */ - virtual void process(); - /** Returns the description of the writeback event. */ - virtual const char *description(); - - void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); } - }; - LWBackEnd(Params *params); std::string name() const; void regStats(); - void setCPU(FullCPU *cpu_ptr); + void setCPU(OzoneCPU *cpu_ptr); void setFrontEnd(FrontEnd *front_end_ptr) { frontEnd = front_end_ptr; } @@ -136,6 +116,8 @@ class LWBackEnd void setCommBuffer(TimeBuffer<CommStruct> *_comm); + Port *getDcachePort() { return LSQ.getDcachePort(); } + void tick(); void squash(); void generateTCEvent() { tcSquash = true; } @@ -239,7 +221,7 @@ class LWBackEnd void updateComInstStats(DynInstPtr &inst); public: - FullCPU *cpu; + OzoneCPU *cpu; FrontEnd *frontEnd; @@ -273,24 +255,6 @@ class LWBackEnd RenameTable<Impl> renameTable; private: - class DCacheCompletionEvent : public Event - { - private: - LWBackEnd *be; - - public: - DCacheCompletionEvent(LWBackEnd *_be); - - virtual void process(); - virtual const char *description(); - }; - - friend class DCacheCompletionEvent; - - DCacheCompletionEvent cacheCompletionEvent; - - MemInterface *dcacheInterface; - // General back end width. Used if the more specific isn't given. int width; diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh index ed406d5a3..a4f1d805e 100644 --- a/src/cpu/ozone/lw_back_end_impl.hh +++ b/src/cpu/ozone/lw_back_end_impl.hh @@ -28,9 +28,14 @@ * Authors: Kevin Lim */ -#include "cpu/checker/cpu.hh" +#include "config/use_checker.hh" + #include "cpu/ozone/lw_back_end.hh" -#include "encumbered/cpu/full/op_class.hh" +#include "cpu/op_class.hh" + +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif template <class Impl> void @@ -134,86 +139,11 @@ LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst) assert(found_inst); } -template<class Impl> -LWBackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, - LWBackEnd<Impl> *_be) - : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false) -{ - this->setFlags(Event::AutoDelete); -} - -template<class Impl> -void -LWBackEnd<Impl>::LdWritebackEvent::process() -{ - DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); -// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); - - //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); - -// iewStage->wakeCPU(); - - if (be->isSwitchedOut()) - return; - - if (dcacheMiss) { - be->removeDcacheMiss(inst); - } - - if (inst->isSquashed()) { - inst = NULL; - return; - } - - if (!inst->isExecuted()) { - inst->setExecuted(); - - // Execute again to copy data to proper place. - inst->completeAcc(); - } - - // Need to insert instruction into queue to commit - be->instToCommit(inst); - - //wroteToTimeBuffer = true; -// iewStage->activityThisCycle(); - - inst = NULL; -} - -template<class Impl> -const char * -LWBackEnd<Impl>::LdWritebackEvent::description() -{ - return "Load writeback event"; -} - - -template <class Impl> -LWBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be) - : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) -{ -} - -template <class Impl> -void -LWBackEnd<Impl>::DCacheCompletionEvent::process() -{ -} - -template <class Impl> -const char * -LWBackEnd<Impl>::DCacheCompletionEvent::description() -{ - return "Cache completion event"; -} - template <class Impl> LWBackEnd<Impl>::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - trapSquash(false), tcSquash(false), cacheCompletionEvent(this), - dcacheInterface(params->dcacheInterface), width(params->backEndWidth), - exactFullStall(true) + trapSquash(false), tcSquash(false), + width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; numInsts = 0; @@ -239,6 +169,7 @@ LWBackEnd<Impl>::LWBackEnd(Params *params) LSQ.init(params, params->LQEntries, params->SQEntries, 0); dispatchStatus = Running; + commitStatus = Running; } template <class Impl> @@ -569,7 +500,7 @@ LWBackEnd<Impl>::regStats() template <class Impl> void -LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr) +LWBackEnd<Impl>::setCPU(OzoneCPU *cpu_ptr) { cpu = cpu_ptr; LSQ.setCPU(cpu_ptr); @@ -626,6 +557,7 @@ LWBackEnd<Impl>::checkInterrupts() } } } +#endif template <class Impl> void @@ -639,7 +571,7 @@ LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency) // Consider holding onto the trap and waiting until the trap event // happens for this to be executed. - fault->invoke(thread->getTCProxy()); + fault->invoke(thread->getTC()); // Exit state update mode to avoid accidental updating. thread->inSyscall = false; @@ -649,7 +581,6 @@ LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency) // Generate trap squash event. generateTrapEvent(latency); } -#endif template <class Impl> void @@ -671,6 +602,7 @@ LWBackEnd<Impl>::tick() #if FULL_SYSTEM checkInterrupts(); +#endif if (trapSquash) { assert(!tcSquash); @@ -678,7 +610,6 @@ LWBackEnd<Impl>::tick() } else if (tcSquash) { squashFromTC(); } -#endif if (dispatchStatus != Blocked) { dispatchInsts(); @@ -929,11 +860,6 @@ LWBackEnd<Impl>::executeInsts() // at the commit stage. if (inst->isMemRef() && (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { - if (dcacheInterface->isBlocked()) { - // Should I move the instruction aside? - DPRINTF(BE, "Execute: dcache is blocked\n"); - break; - } DPRINTF(BE, "Execute: Initiating access for memory " "reference.\n"); @@ -941,7 +867,7 @@ LWBackEnd<Impl>::executeInsts() LSQ.executeLoad(inst); } else if (inst->isStore()) { LSQ.executeStore(inst); - if (inst->req && !(inst->req->flags & LOCKED)) { + if (inst->req && !(inst->req->getFlags() & LOCKED)) { inst->setExecuted(); instToCommit(inst); @@ -1078,7 +1004,7 @@ LWBackEnd<Impl>::commitInst(int inst_num) thread->setPC(inst->readPC()); thread->setNextPC(inst->readNextPC()); - inst->reachedCommit = true; + inst->setAtCommit(); // If the instruction is not executed yet, then it is a non-speculative // or store inst. Signal backwards that it should be executed. @@ -1183,9 +1109,11 @@ LWBackEnd<Impl>::commitInst(int inst_num) // Use checker prior to updating anything due to traps or PC // based events. +#if USE_CHECKER if (checker) { - checker->tick(inst); + checker->verify(inst); } +#endif if (inst_fault != NoFault) { DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", @@ -1200,19 +1128,18 @@ LWBackEnd<Impl>::commitInst(int inst_num) } else if (inst_num != 0) { DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); return false; - } else if (checker && inst->isStore()) { - checker->tick(inst); } +#if USE_CHECKER + else if (checker && inst->isStore()) { + checker->verify(inst); + } +#endif thread->setInst( static_cast<TheISA::MachInst>(inst->staticInst->machInst)); -#if FULL_SYSTEM + handleFault(inst_fault); return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - inst->PC); -#endif // FULL_SYSTEM } int freed_regs = 0; @@ -1259,7 +1186,7 @@ LWBackEnd<Impl>::commitInst(int inst_num) assert(!thread->inSyscall && !thread->trapPending); oldpc = thread->readPC(); cpu->system->pcEventQueue.service( - thread->getTCProxy()); + thread->getTC()); count++; } while (oldpc != thread->readPC()); if (count > 1) { @@ -1346,7 +1273,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn) (*insts_it)->setCanCommit(); - (*insts_it)->removeInROB(); + (*insts_it)->clearInROB(); for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); @@ -1497,10 +1424,10 @@ LWBackEnd<Impl>::doSwitchOut() template <class Impl> void -LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_xc) +LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_tc) { switchedOut = false; - xcSquash = false; + tcSquash = false; trapSquash = false; numInsts = 0; @@ -1510,7 +1437,7 @@ LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_xc) switchedOut = false; dispatchStatus = Running; commitStatus = Running; - LSQ.takeOverFrom(old_xc); + LSQ.takeOverFrom(old_tc); } template <class Impl> diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index b2924db54..2eb09d01a 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -47,7 +47,7 @@ #include "sim/debug.hh" #include "sim/sim_object.hh" -//class PageTable; +class MemObject; /** * Class that implements the actual LQ and SQ for each specific thread. @@ -64,7 +64,7 @@ template <class Impl> class OzoneLWLSQ { public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::OzoneCPU OzoneCPU; typedef typename Impl::BackEnd BackEnd; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::IssueStruct IssueStruct; @@ -73,35 +73,6 @@ class OzoneLWLSQ { typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt; - private: - class StoreCompletionEvent : public Event { - public: - /** Constructs a store completion event. */ - StoreCompletionEvent(DynInstPtr &inst, BackEnd *be, - Event *wb_event, OzoneLWLSQ *lsq_ptr); - - /** Processes the store completion event. */ - void process(); - - /** Returns the description of this event. */ - const char *description(); - - private: - /** The store index of the store being written back. */ - DynInstPtr inst; - - BackEnd *be; - /** The writeback event for the store. Needed for store - * conditionals. - */ - public: - Event *wbEvent; - bool miss; - private: - /** The pointer to the LSQ unit that issued the store. */ - OzoneLWLSQ<Impl> *lsqPtr; - }; - public: /** Constructs an LSQ unit. init() must be called prior to use. */ OzoneLWLSQ(); @@ -114,15 +85,13 @@ class OzoneLWLSQ { std::string name() const; /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } + void setCPU(OzoneCPU *cpu_ptr); /** Sets the back-end stage pointer. */ void setBE(BackEnd *be_ptr) { be = be_ptr; } - /** Sets the page table pointer. */ -// void setPageTable(PageTable *pt_ptr); + Port *getDcachePort() { return &dcachePort; } /** Ticks the LSQ unit, which in this case only resets the number of * used cache ports. @@ -155,6 +124,10 @@ class OzoneLWLSQ { /** Writes back stores. */ void writebackStores(); + /** Completes the data access that has been returned from the + * memory system. */ + void completeDataAccess(PacketPtr pkt); + // @todo: Include stats in the LSQ unit. //void regStats(); @@ -231,8 +204,8 @@ class OzoneLWLSQ { /** Returns if the LSQ unit will writeback on this cycle. */ bool willWB() { return storeQueue.back().canWB && - !storeQueue.back().completed/* && - !dcacheInterface->isBlocked()*/; } + !storeQueue.back().completed && + !isStoreBlocked; } void switchOut(); @@ -243,12 +216,21 @@ class OzoneLWLSQ { bool switchedOut; private: + /** Writes back the instruction, sending it to IEW. */ + void writeback(DynInstPtr &inst, PacketPtr pkt); + + /** Handles completing the send of a store to memory. */ + void storePostSend(Packet *pkt, DynInstPtr &inst); + /** Completes the store at the specified index. */ void completeStore(int store_idx); + /** Handles doing the retry. */ + void recvRetry(); + private: /** Pointer to the CPU. */ - FullCPU *cpu; + OzoneCPU *cpu; /** Pointer to the back-end stage. */ BackEnd *be; @@ -258,11 +240,11 @@ class OzoneLWLSQ { class DcachePort : public Port { protected: - FullCPU *cpu; + OzoneLWLSQ *lsq; public: - DcachePort(const std::string &_name, FullCPU *_cpu) - : Port(_name), cpu(_cpu) + DcachePort(OzoneLWLSQ *_lsq) + : lsq(_lsq) { } protected: @@ -281,12 +263,9 @@ class OzoneLWLSQ { virtual void recvRetry(); }; - /** Pointer to the D-cache. */ + /** D-cache port. */ DcachePort dcachePort; - /** Pointer to the page table. */ -// PageTable *pTable; - public: struct SQEntry { /** Constructs an empty store queue entry. */ @@ -319,6 +298,48 @@ class OzoneLWLSQ { typename std::list<DynInstPtr>::iterator lqIt; }; + /** Derived class to hold any sender state the LSQ needs. */ + class LSQSenderState : public Packet::SenderState + { + public: + /** Default constructor. */ + LSQSenderState() + : noWB(false) + { } + + /** Instruction who initiated the access to memory. */ + DynInstPtr inst; + /** Whether or not it is a load. */ + bool isLoad; + /** The LQ/SQ index of the instruction. */ + int idx; + /** Whether or not the instruction will need to writeback. */ + bool noWB; + }; + + /** Writeback event, specifically for when stores forward data to loads. */ + class WritebackEvent : public Event { + public: + /** Constructs a writeback event. */ + WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, OzoneLWLSQ *lsq_ptr); + + /** Processes the writeback event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** Instruction whose results are being written back. */ + DynInstPtr inst; + + /** The packet that would have been sent to memory. */ + PacketPtr pkt; + + /** The pointer to the LSQ unit that issued the store. */ + OzoneLWLSQ<Impl> *lsqPtr; + }; + enum Status { Running, Idle, @@ -395,6 +416,12 @@ class OzoneLWLSQ { /** The index of the above store. */ LQIt stallingLoad; + /** The packet that needs to be retried. */ + PacketPtr retryPkt; + + /** Whehter or not a store is blocked due to the memory system. */ + bool isStoreBlocked; + /** Whether or not a load is blocked due to the memory system. It is * cleared when this value is checked via loadBlocked(). */ @@ -470,7 +497,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) // too). // @todo: Fix uncached accesses. if (req->getFlags() & UNCACHEABLE && - (inst != loadQueue.back() || !inst->reachedCommit)) { + (inst != loadQueue.back() || !inst->isAtCommit())) { DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " "commit/LSQ!\n", inst->seqNum); @@ -532,17 +559,19 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to " "[sn:%lli] addr %#x, data %#x\n", - (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(inst->memData)); -/* - typename BackEnd::LdWritebackEvent *wb = - new typename BackEnd::LdWritebackEvent(inst, - be); + (*sq_it).inst->seqNum, inst->seqNum, req->getVaddr(), + *(inst->memData)); + + PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); + + WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); // We'll say this has a 1 cycle load-store forwarding latency // for now. - // FIXME - Need to make this a parameter. + // @todo: Need to make this a parameter. wb->schedule(curTick); -*/ + // Should keep track of stat for forwarded data return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || @@ -575,7 +604,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " "Store [sn:%lli] to load addr %#x\n", - (*sq_it).inst->seqNum, req->vaddr); + (*sq_it).inst->seqNum, req->getVaddr()); return NoFault; } @@ -597,6 +626,12 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(inst->memData); + LSQSenderState *state = new LSQSenderState; + state->isLoad = true; + state->idx = load_idx; + state->inst = inst; + data_pkt->senderState = state; + // if we have a cache, do cache access too if (!dcachePort.sendTiming(data_pkt)) { // There's an older load that's already going to squash. @@ -613,6 +648,10 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) return NoFault; } + if (req->getFlags() & LOCKED) { + cpu->lockFlag = true; + } + if (data_pkt->result != Packet::Success) { DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n"); DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index 05db3028a..88e9c218f 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -28,64 +28,112 @@ * Authors: Kevin Lim */ +#include "config/use_checker.hh" + #include "arch/isa_traits.hh" #include "base/str.hh" #include "cpu/ozone/lw_lsq.hh" #include "cpu/checker/cpu.hh" -template <class Impl> -OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, - BackEnd *_be, - Event *wb_event, - OzoneLWLSQ<Impl> *lsq_ptr) - : Event(&mainEventQueue), - inst(_inst), - be(_be), - wbEvent(wb_event), - miss(false), - lsqPtr(lsq_ptr) +template<class Impl> +OzoneLWLSQ<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, + OzoneLWLSQ *lsq_ptr) + : Event(&mainEventQueue), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) { this->setFlags(Event::AutoDelete); } -template <class Impl> +template<class Impl> void -OzoneLWLSQ<Impl>::StoreCompletionEvent::process() +OzoneLWLSQ<Impl>::WritebackEvent::process() +{ + if (!lsqPtr->isSwitchedOut()) { + lsqPtr->writeback(inst, pkt); + } + delete pkt; +} + +template<class Impl> +const char * +OzoneLWLSQ<Impl>::WritebackEvent::description() { - DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n", - inst->seqNum); + return "Store writeback event"; +} - //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); +template <class Impl> +Tick +OzoneLWLSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} -// lsqPtr->cpu->wakeCPU(); - if (lsqPtr->isSwitchedOut()) { - if (wbEvent) - delete wbEvent; +template <class Impl> +void +OzoneLWLSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} +template <class Impl> +void +OzoneLWLSQ<Impl>::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) return; - } - if (wbEvent) { - wbEvent->process(); - delete wbEvent; - } + panic("O3CPU doesn't expect recvStatusChange callback!"); +} - lsqPtr->completeStore(inst->sqIdx); - if (miss) - be->removeDcacheMiss(inst); +template <class Impl> +bool +OzoneLWLSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->completeDataAccess(pkt); + return true; } template <class Impl> -const char * -OzoneLWLSQ<Impl>::StoreCompletionEvent::description() +void +OzoneLWLSQ<Impl>::DcachePort::recvRetry() +{ + lsq->recvRetry(); +} + +template<class Impl> +void +OzoneLWLSQ<Impl>::completeDataAccess(PacketPtr pkt) { - return "LSQ store completion event"; + LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState); + DynInstPtr inst = state->inst; + DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum); + DPRINTF(Activity, "Activity: Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + + if (isSwitchedOut() || inst->isSquashed()) { + delete state; + delete pkt; + return; + } else { + if (!state->noWB) { + writeback(inst, pkt); + } + + if (inst->isStore()) { + completeStore(state->idx); + } + } + + delete state; + delete pkt; } template <class Impl> OzoneLWLSQ<Impl>::OzoneLWLSQ() - : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), - loadBlockedHandled(false) + : switchedOut(false), dcachePort(this), loads(0), stores(0), + storesToWB(0), stalled(false), isStoreBlocked(false), + isLoadBlocked(false), loadBlockedHandled(false) { } @@ -106,11 +154,11 @@ OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries, SQIndices.push(i); } + mem = params->mem; + usedPorts = 0; cachePorts = params->cachePorts; - dcacheInterface = params->dcacheInterface; - loadFaultInst = storeFaultInst = memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -125,6 +173,20 @@ OzoneLWLSQ<Impl>::name() const template<class Impl> void +OzoneLWLSQ<Impl>::setCPU(OzoneCPU *cpu_ptr) +{ + cpu = cpu_ptr; + dcachePort.setName(this->name() + "-dport"); + +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->setDcachePort(&dcachePort); + } +#endif +} + +template<class Impl> +void OzoneLWLSQ<Impl>::clearLQ() { loadQueue.clear(); @@ -481,6 +543,12 @@ OzoneLWLSQ<Impl>::writebackStores() (*sq_it).canWB && usedPorts < cachePorts) { + if (isStoreBlocked) { + DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + DynInstPtr inst = (*sq_it).inst; if ((*sq_it).size == 0 && !(*sq_it).completed) { @@ -495,48 +563,64 @@ OzoneLWLSQ<Impl>::writebackStores() continue; } - if (dcacheInterface && dcacheInterface->isBlocked()) { - DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" - " is blocked!\n"); - break; - } - ++usedPorts; assert((*sq_it).req); assert(!(*sq_it).committed); + Request *req = (*sq_it).req; (*sq_it).committed = true; - MemReqPtr req = (*sq_it).req; + assert(!inst->memData); + inst->memData = new uint8_t[64]; + memcpy(inst->memData, (uint8_t *)&(*sq_it).data, + req->getSize()); - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; + PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); - switch((*sq_it).size) { - case 1: - cpu->write(req, (uint8_t &)(*sq_it).data); - break; - case 2: - cpu->write(req, (uint16_t &)(*sq_it).data); - break; - case 4: - cpu->write(req, (uint32_t &)(*sq_it).data); - break; - case 8: - cpu->write(req, (uint64_t &)(*sq_it).data); - break; - default: - panic("Unexpected store size!\n"); - } - if (!(req->flags & LOCKED)) { - (*sq_it).inst->setCompleted(); - if (cpu->checker) { - cpu->checker->tick((*sq_it).inst); + LSQSenderState *state = new LSQSenderState; + state->isLoad = false; + state->idx = inst->sqIdx; + state->inst = inst; + data_pkt->senderState = state; + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + (*sq_it).inst->readPC(), + req->getPaddr(), *(inst->memData), + inst->seqNum); + + // @todo: Remove this SC hack once the memory system handles it. + if (req->getFlags() & LOCKED) { + if (req->getFlags() & UNCACHEABLE) { + req->setScResult(2); + } else { + if (cpu->lockFlag) { + req->setScResult(1); + } else { + req->setScResult(0); + // Hack: Instantly complete this store. + completeDataAccess(data_pkt); + --sq_it; + continue; + } } + } else { + // Non-store conditionals do not need a writeback. + state->noWB = true; } + if (!dcachePort.sendTiming(data_pkt)) { + // Need to handle becoming blocked on a store. + isStoreBlocked = true; + assert(retryPkt == NULL); + retryPkt = data_pkt; + } else { + storePostSend(data_pkt, inst); + --sq_it; + } +/* DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " "to Addr:%#x, data:%#x [sn:%lli]\n", inst->sqIdx,inst->readPC(), @@ -606,6 +690,7 @@ OzoneLWLSQ<Impl>::writebackStores() } else { panic("Must HAVE DCACHE!!!!!\n"); } +*/ } // Not sure this should set it to 0. @@ -685,10 +770,6 @@ OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num) SQIndices.push((*sq_it).inst->sqIdx); (*sq_it).inst = NULL; (*sq_it).canWB = 0; - - if ((*sq_it).req) { - assert(!(*sq_it).req->completionEvent); - } (*sq_it).req = NULL; --stores; storeQueue.erase(sq_it++); @@ -734,6 +815,72 @@ OzoneLWLSQ<Impl>::dumpInsts() template <class Impl> void +OzoneLWLSQ<Impl>::storePostSend(Packet *pkt, DynInstPtr &inst) +{ + if (isStalled() && + inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load [sn:%lli]\n", + stallingStoreIsn, (*stallingLoad)->seqNum); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst((*stallingLoad)); + } + + if (!inst->isStoreConditional()) { + // The store is basically completed at this time. This + // only works so long as the checker doesn't try to + // verify the value in memory for stores. + inst->setCompleted(); +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->verify(inst); + } +#endif + } + + if (pkt->result != Packet::Success) { + DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); + + DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", + inst->seqNum); + + //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); + + //DPRINTF(OzoneLWLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size()); + + // @todo: Increment stat here. + } else { + DPRINTF(OzoneLSQ,"D-Cache: Write Hit!\n"); + + DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", + inst->seqNum); + } +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) +{ + // Squashed instructions do not need to complete their access. + if (inst->isSquashed()) { + assert(!inst->isStore()); + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Complete access to copy data to proper place. + inst->completeAcc(pkt); + } + + // Need to insert instruction into queue to commit + be->instToCommit(inst); +} + +template <class Impl> +void OzoneLWLSQ<Impl>::completeStore(int store_idx) { SQHashIt sq_hash_it = SQItHash.find(store_idx); @@ -766,9 +913,18 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx) --stores; inst->setCompleted(); +#if USE_CHECKER if (cpu->checker) { - cpu->checker->tick(inst); + cpu->checker->verify(inst); } +#endif +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::recvRetry() +{ + panic("Unimplemented!"); } template <class Impl> @@ -777,68 +933,6 @@ OzoneLWLSQ<Impl>::switchOut() { assert(storesToWB == 0); switchedOut = true; - SQIt sq_it = --(storeQueue.end()); - while (storesToWB > 0 && - sq_it != storeQueue.end() && - (*sq_it).inst && - (*sq_it).canWB) { - - DynInstPtr inst = (*sq_it).inst; - - if ((*sq_it).size == 0 && !(*sq_it).completed) { - sq_it--; - continue; - } - - // Store conditionals don't complete until *after* they have written - // back. If it's here and not yet sent to memory, then don't bother - // as it's not part of committed state. - if (inst->isDataPrefetch() || (*sq_it).committed) { - sq_it--; - continue; - } else if ((*sq_it).req->flags & LOCKED) { - sq_it--; - assert(!(*sq_it).canWB || - ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); - continue; - } - - assert((*sq_it).req); - assert(!(*sq_it).committed); - - MemReqPtr req = (*sq_it).req; - (*sq_it).committed = true; - - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); - - DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n", - inst->sqIdx,inst->readPC(), - req->paddr, *(req->data), - inst->seqNum); - - switch((*sq_it).size) { - case 1: - cpu->write(req, (uint8_t &)(*sq_it).data); - break; - case 2: - cpu->write(req, (uint16_t &)(*sq_it).data); - break; - case 4: - cpu->write(req, (uint32_t &)(*sq_it).data); - break; - case 8: - cpu->write(req, (uint64_t &)(*sq_it).data); - break; - default: - panic("Unexpected store size!\n"); - } - } // Clear the queue to free up resources storeQueue.clear(); diff --git a/src/cpu/ozone/ozone_base_dyn_inst.cc b/src/cpu/ozone/ozone_base_dyn_inst.cc new file mode 100644 index 000000000..5a3a69dff --- /dev/null +++ b/src/cpu/ozone/ozone_base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +// Explicit instantiation +template class BaseDynInst<OzoneImpl>; + +template <> +int +BaseDynInst<OzoneImpl>::instcount = 0; diff --git a/src/cpu/ozone/ozone_impl.hh b/src/cpu/ozone/ozone_impl.hh index e977d06a9..503675738 100644 --- a/src/cpu/ozone/ozone_impl.hh +++ b/src/cpu/ozone/ozone_impl.hh @@ -50,7 +50,7 @@ class OzoneDynInst; struct OzoneImpl { typedef SimpleParams Params; typedef OzoneCPU<OzoneImpl> OzoneCPU; - typedef OzoneCPU FullCPU; + typedef OzoneCPU CPUType; // Would like to put these into their own area. // typedef NullPredictor BranchPred; diff --git a/src/cpu/ozone/rename_table.cc b/src/cpu/ozone/rename_table.cc index b0a36afbe..a44054b6e 100644 --- a/src/cpu/ozone/rename_table.cc +++ b/src/cpu/ozone/rename_table.cc @@ -30,7 +30,7 @@ #include "cpu/ozone/rename_table_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" template class RenameTable<OzoneImpl>; -template class RenameTable<SimpleImpl>; +//template class RenameTable<SimpleImpl>; diff --git a/src/cpu/ozone/simple_base_dyn_inst.cc b/src/cpu/ozone/simple_base_dyn_inst.cc new file mode 100644 index 000000000..fdaeaf57e --- /dev/null +++ b/src/cpu/ozone/simple_base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +// Explicit instantiation +template class BaseDynInst<SimpleImpl>; + +template <> +int +BaseDynInst<SimpleImpl>::instcount = 0; diff --git a/src/cpu/ozone/simple_cpu_builder.cc b/src/cpu/ozone/simple_cpu_builder.cc new file mode 100644 index 000000000..baaf7c708 --- /dev/null +++ b/src/cpu/ozone/simple_cpu_builder.cc @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include <string> + +#include "cpu/checker/cpu.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/cpu_impl.hh" +#include "cpu/ozone/simple_impl.hh" +#include "cpu/ozone/simple_params.hh" +#include "mem/cache/base_cache.hh" +#include "sim/builder.hh" +#include "sim/process.hh" +#include "sim/sim_object.hh" + +template +class OzoneCPU<SimpleImpl>; + +class SimpleOzoneCPU : public OzoneCPU<SimpleImpl> +{ + public: + SimpleOzoneCPU(SimpleParams *p) + : OzoneCPU<SimpleImpl>(p) + { } +}; + +//////////////////////////////////////////////////////////////////////// +// +// OzoneCPU Simulation Object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + Param<int> clock; + Param<int> numThreads; + +#if FULL_SYSTEM +SimObjectParam<System *> system; +Param<int> cpu_id; +SimObjectParam<AlphaITB *> itb; +SimObjectParam<AlphaDTB *> dtb; +#else +SimObjectVectorParam<Process *> workload; +//SimObjectParam<PageTable *> page_table; +#endif // FULL_SYSTEM + +SimObjectParam<FunctionalMemory *> mem; + +SimObjectParam<BaseCPU *> checker; + +Param<Counter> max_insts_any_thread; +Param<Counter> max_insts_all_threads; +Param<Counter> max_loads_any_thread; +Param<Counter> max_loads_all_threads; + +SimObjectParam<BaseCache *> icache; +SimObjectParam<BaseCache *> dcache; + +Param<unsigned> cachePorts; +Param<unsigned> width; +Param<unsigned> frontEndWidth; +Param<unsigned> backEndWidth; +Param<unsigned> backEndSquashLatency; +Param<unsigned> backEndLatency; +Param<unsigned> maxInstBufferSize; +Param<unsigned> numPhysicalRegs; + +Param<unsigned> decodeToFetchDelay; +Param<unsigned> renameToFetchDelay; +Param<unsigned> iewToFetchDelay; +Param<unsigned> commitToFetchDelay; +Param<unsigned> fetchWidth; + +Param<unsigned> renameToDecodeDelay; +Param<unsigned> iewToDecodeDelay; +Param<unsigned> commitToDecodeDelay; +Param<unsigned> fetchToDecodeDelay; +Param<unsigned> decodeWidth; + +Param<unsigned> iewToRenameDelay; +Param<unsigned> commitToRenameDelay; +Param<unsigned> decodeToRenameDelay; +Param<unsigned> renameWidth; + +Param<unsigned> commitToIEWDelay; +Param<unsigned> renameToIEWDelay; +Param<unsigned> issueToExecuteDelay; +Param<unsigned> issueWidth; +Param<unsigned> executeWidth; +Param<unsigned> executeIntWidth; +Param<unsigned> executeFloatWidth; +Param<unsigned> executeBranchWidth; +Param<unsigned> executeMemoryWidth; + +Param<unsigned> iewToCommitDelay; +Param<unsigned> renameToROBDelay; +Param<unsigned> commitWidth; +Param<unsigned> squashWidth; + +Param<std::string> predType; +Param<unsigned> localPredictorSize; +Param<unsigned> localCtrBits; +Param<unsigned> localHistoryTableSize; +Param<unsigned> localHistoryBits; +Param<unsigned> globalPredictorSize; +Param<unsigned> globalCtrBits; +Param<unsigned> globalHistoryBits; +Param<unsigned> choicePredictorSize; +Param<unsigned> choiceCtrBits; + +Param<unsigned> BTBEntries; +Param<unsigned> BTBTagSize; + +Param<unsigned> RASSize; + +Param<unsigned> LQEntries; +Param<unsigned> SQEntries; +Param<unsigned> LFSTSize; +Param<unsigned> SSITSize; + +Param<unsigned> numPhysIntRegs; +Param<unsigned> numPhysFloatRegs; +Param<unsigned> numIQEntries; +Param<unsigned> numROBEntries; + +Param<bool> decoupledFrontEnd; +Param<int> dispatchWidth; +Param<int> wbWidth; + +Param<unsigned> smtNumFetchingThreads; +Param<std::string> smtFetchPolicy; +Param<std::string> smtLSQPolicy; +Param<unsigned> smtLSQThreshold; +Param<std::string> smtIQPolicy; +Param<unsigned> smtIQThreshold; +Param<std::string> smtROBPolicy; +Param<unsigned> smtROBThreshold; +Param<std::string> smtCommitPolicy; + +Param<unsigned> instShiftAmt; + +Param<bool> defer_registration; + +Param<bool> function_trace; +Param<Tick> function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + INIT_PARAM_DFLT(width, "Width", 1), + INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), + INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), + INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), + INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), + INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), + INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), + INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), + INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +CREATE_SIM_OBJECT(SimpleOzoneCPU) +{ + SimpleOzoneCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + SimpleParams *params = new SimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params->system = system; + params->cpu_id = cpu_id; + params->itb = itb; + params->dtb = dtb; +#else + params->workload = workload; +// params->pTable = page_table; +#endif // FULL_SYSTEM + + params->mem = mem; + params->checker = checker; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->icacheInterface = icache ? icache->getInterface() : NULL; + params->dcacheInterface = dcache ? dcache->getInterface() : NULL; + params->cachePorts = cachePorts; + + params->width = width; + params->frontEndWidth = frontEndWidth; + params->backEndWidth = backEndWidth; + params->backEndSquashLatency = backEndSquashLatency; + params->backEndLatency = backEndLatency; + params->maxInstBufferSize = maxInstBufferSize; + params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->issueWidth = issueWidth; + params->executeWidth = executeWidth; + params->executeIntWidth = executeIntWidth; + params->executeFloatWidth = executeFloatWidth; + params->executeBranchWidth = executeBranchWidth; + params->executeMemoryWidth = executeMemoryWidth; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + + params->predType = predType; + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->decoupledFrontEnd = decoupledFrontEnd; + params->dispatchWidth = dispatchWidth; + params->wbWidth = wbWidth; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new SimpleOzoneCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) + diff --git a/src/cpu/ozone/simple_params.hh b/src/cpu/ozone/simple_params.hh index 13eb05e77..11cee716f 100644 --- a/src/cpu/ozone/simple_params.hh +++ b/src/cpu/ozone/simple_params.hh @@ -37,8 +37,7 @@ class AlphaDTB; class AlphaITB; class FUPool; -class FunctionalMemory; -class MemInterface; +class MemObject; class PageTable; class Process; class System; @@ -62,13 +61,13 @@ class SimpleParams : public BaseCPU::Params //Page Table PageTable *pTable; - FunctionalMemory *mem; + MemObject *mem; // // Caches // - MemInterface *icacheInterface; - MemInterface *dcacheInterface; +// MemInterface *icacheInterface; +// MemInterface *dcacheInterface; unsigned cachePorts; unsigned width; diff --git a/src/cpu/ozone/thread_state.hh b/src/cpu/ozone/thread_state.hh index 299878c29..ef4b1429d 100644 --- a/src/cpu/ozone/thread_state.hh +++ b/src/cpu/ozone/thread_state.hh @@ -58,30 +58,23 @@ class FunctionalMemory; template <class Impl> struct OzoneThreadState : public ThreadState { typedef typename ThreadContext::Status Status; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::CPUType CPUType; typedef TheISA::MiscReg MiscReg; #if FULL_SYSTEM - OzoneThreadState(FullCPU *_cpu, int _thread_num) + OzoneThreadState(CPUType *_cpu, int _thread_num) : ThreadState(-1, _thread_num), - inSyscall(0), trapPending(0) + intrflag(0), inSyscall(0), trapPending(0) { - memset(®s, 0, sizeof(TheISA::RegFile)); + miscRegFile.clear(); } #else - OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) - : ThreadState(-1, _thread_num, NULL, _process, _asid), + OzoneThreadState(CPUType *_cpu, int _thread_num, Process *_process, + int _asid, MemObject *mem) + : ThreadState(-1, _thread_num, _process, _asid, mem), cpu(_cpu), inSyscall(0), trapPending(0) { - memset(®s, 0, sizeof(TheISA::RegFile)); - } - - OzoneThreadState(FullCPU *_cpu, int _thread_num, - int _asid) - : ThreadState(-1, _thread_num, NULL, NULL, _asid), - cpu(_cpu), inSyscall(0), trapPending(0) - { - memset(®s, 0, sizeof(TheISA::RegFile)); + miscRegFile.clear(); } #endif @@ -91,9 +84,11 @@ struct OzoneThreadState : public ThreadState { Addr nextPC; - TheISA::RegFile regs; + TheISA::MiscRegFile miscRegFile; + + int intrflag; - typename Impl::FullCPU *cpu; + typename Impl::CPUType *cpu; bool inSyscall; @@ -103,54 +98,24 @@ struct OzoneThreadState : public ThreadState { ThreadContext *getTC() { return tc; } -#if !FULL_SYSTEM - Fault translateInstReq(Request *req) - { - return process->pTable->translate(req); - } - Fault translateDataReadReq(Request *req) - { - return process->pTable->translate(req); - } - Fault translateDataWriteReq(Request *req) - { - return process->pTable->translate(req); - } -#else - Fault translateInstReq(Request *req) - { - return cpu->itb->translate(req); - } - - Fault translateDataReadReq(Request *req) - { - return cpu->dtb->translate(req, false); - } - - Fault translateDataWriteReq(Request *req) - { - return cpu->dtb->translate(req, true); - } -#endif - MiscReg readMiscReg(int misc_reg) { - return regs.readMiscReg(misc_reg); + return miscRegFile.readReg(misc_reg); } MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) { - return regs.readMiscRegWithEffect(misc_reg, fault, tc); + return miscRegFile.readRegWithEffect(misc_reg, fault, tc); } Fault setMiscReg(int misc_reg, const MiscReg &val) { - return regs.setMiscReg(misc_reg, val); + return miscRegFile.setReg(misc_reg, val); } Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) { - return regs.setMiscRegWithEffect(misc_reg, val, tc); + return miscRegFile.setRegWithEffect(misc_reg, val, tc); } uint64_t readPC() diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 071193f02..1752b2b5b 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -33,6 +33,7 @@ #include "cpu/simple/atomic.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; @@ -55,18 +56,28 @@ AtomicSimpleCPU::TickEvent::description() return "AtomicSimpleCPU tick event"; } +Port * +AtomicSimpleCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return &dcachePort; + else if (if_name == "icache_port") + return &icachePort; + else + panic("No Such Port\n"); +} void AtomicSimpleCPU::init() { //Create Memory Ports (conect them up) - Port *mem_dport = mem->getPort(""); - dcachePort.setPeer(mem_dport); - mem_dport->setPeer(&dcachePort); +// Port *mem_dport = mem->getPort(""); +// dcachePort.setPeer(mem_dport); +// mem_dport->setPeer(&dcachePort); - Port *mem_iport = mem->getPort(""); - icachePort.setPeer(mem_iport); - mem_iport->setPeer(&icachePort); +// Port *mem_iport = mem->getPort(""); +// icachePort.setPeer(mem_iport); +// mem_iport->setPeer(&icachePort); BaseCPU::init(); #if FULL_SYSTEM @@ -124,15 +135,18 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) // @todo fix me and get the real cpu id & thread number!!! ifetch_req = new Request(); + ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); ifetch_pkt->dataStatic(&inst); data_read_req = new Request(); + data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_read_pkt = new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); data_read_pkt->dataStatic(&dataReg); data_write_req = new Request(); + data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_write_pkt = new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); } @@ -145,8 +159,8 @@ AtomicSimpleCPU::~AtomicSimpleCPU() void AtomicSimpleCPU::serialize(ostream &os) { - BaseSimpleCPU::serialize(os); SERIALIZE_ENUM(_status); + BaseSimpleCPU::serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); } @@ -154,21 +168,25 @@ AtomicSimpleCPU::serialize(ostream &os) void AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - BaseSimpleCPU::unserialize(cp, section); UNSERIALIZE_ENUM(_status); + BaseSimpleCPU::unserialize(cp, section); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } void -AtomicSimpleCPU::switchOut(Sampler *s) +AtomicSimpleCPU::resume() { - sampler = s; - if (status() == Running) { - _status = SwitchedOut; + assert(system->getMemoryMode() == System::Atomic); + changeState(SimObject::Running); +} - tickEvent.squash(); - } - sampler->signalSwitched(); +void +AtomicSimpleCPU::switchOut() +{ + assert(status() == Running || status() == Idle); + _status = SwitchedOut; + + tickEvent.squash(); } @@ -410,15 +428,14 @@ AtomicSimpleCPU::tick() postExecute(); if (simulate_stalls) { - // This calculation assumes that the icache and dcache - // access latencies are always a multiple of the CPU's - // cycle time. If not, the next tick event may get - // scheduled at a non-integer multiple of the CPU - // cycle time. Tick icache_stall = icache_latency - cycles(1); Tick dcache_stall = dcache_access ? dcache_latency - cycles(1) : 0; - latency += icache_stall + dcache_stall; + Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1); + if (cycles(stall_cycles) < (icache_stall + dcache_stall)) + latency += cycles(stall_cycles+1); + else + latency += cycles(stall_cycles); } } @@ -442,11 +459,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; SimObjectParam<MemObject *> mem; + SimObjectParam<System *> system; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; #else @@ -474,11 +491,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -511,11 +528,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->width = width; params->simulate_stalls = simulate_stalls; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 7f4956da9..d59ca01aa 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -122,10 +122,13 @@ class AtomicSimpleCPU : public BaseSimpleCPU public: + virtual Port *getPort(const std::string &if_name, int idx = -1); + virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - void switchOut(Sampler *s); + virtual void resume(); + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); virtual void activateContext(int thread_num, int delay); diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index d94b0e079..af10e64d7 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Steve Reinhardt + * Korey Sewell */ #include "arch/utility.hh" @@ -40,7 +41,6 @@ #include "cpu/base.hh" #include "cpu/exetrace.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/simple/base.hh" #include "cpu/simple_thread.hh" #include "cpu/smt.hh" @@ -55,10 +55,10 @@ #include "sim/sim_events.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" +#include "sim/system.hh" #if FULL_SYSTEM #include "base/remote_gdb.hh" -#include "sim/system.hh" #include "arch/tlb.hh" #include "arch/stacktrace.hh" #include "arch/vtophys.hh" @@ -358,8 +358,13 @@ Fault BaseSimpleCPU::setupFetchRequest(Request *req) { // set up memory request for instruction fetch +#if THE_ISA == ALPHA_ISA + DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p",thread->readPC(), + thread->readNextPC()); +#else DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p NNPC:%08p\n",thread->readPC(), thread->readNextPC(),thread->readNextNPC()); +#endif req->setVirt(0, thread->readPC() & ~3, sizeof(MachInst), (FULL_SYSTEM && (thread->readPC() & 1)) ? PHYSICAL : 0, @@ -440,11 +445,7 @@ void BaseSimpleCPU::advancePC(Fault fault) { if (fault != NoFault) { -#if FULL_SYSTEM fault->invoke(tc); -#else // !FULL_SYSTEM - fatal("fault (%s) detected @ PC %08p", fault->name(), thread->readPC()); -#endif // FULL_SYSTEM } else { // go to the next instruction diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 39bc86050..57cfa3c2c 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -38,7 +38,6 @@ #include "cpu/base.hh" #include "cpu/simple_thread.hh" #include "cpu/pc_event.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "mem/port.hh" @@ -128,11 +127,6 @@ class BaseSimpleCPU : public BaseCPU // Static data storage TheISA::IntReg dataReg; - // Pointer to the sampler that is telling us to switchover. - // Used to signal the completion of the pipe drain and schedule - // the next switchover - Sampler *sampler; - StaticInstPtr curStaticInst; void checkForInterrupts(); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index c99db8fbf..d2c2c7c47 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -33,23 +33,25 @@ #include "cpu/simple/timing.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; +Port * +TimingSimpleCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return &dcachePort; + else if (if_name == "icache_port") + return &icachePort; + else + panic("No Such Port\n"); +} void TimingSimpleCPU::init() { - //Create Memory Ports (conect them up) - Port *mem_dport = mem->getPort(""); - dcachePort.setPeer(mem_dport); - mem_dport->setPeer(&dcachePort); - - Port *mem_iport = mem->getPort(""); - icachePort.setPeer(mem_iport); - mem_iport->setPeer(&icachePort); - BaseCPU::init(); #if FULL_SYSTEM for (int i = 0; i < threadContexts.size(); ++i) { @@ -88,6 +90,9 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; + drainEvent = NULL; + fetchEvent = NULL; + changeState(SimObject::Running); } @@ -98,25 +103,61 @@ TimingSimpleCPU::~TimingSimpleCPU() void TimingSimpleCPU::serialize(ostream &os) { - BaseSimpleCPU::serialize(os); SERIALIZE_ENUM(_status); + BaseSimpleCPU::serialize(os); } void TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - BaseSimpleCPU::unserialize(cp, section); UNSERIALIZE_ENUM(_status); + BaseSimpleCPU::unserialize(cp, section); +} + +unsigned int +TimingSimpleCPU::drain(Event *drain_event) +{ + // TimingSimpleCPU is ready to drain if it's not waiting for + // an access to complete. + if (status() == Idle || status() == Running || status() == SwitchedOut) { + changeState(SimObject::Drained); + return 0; + } else { + changeState(SimObject::Draining); + drainEvent = drain_event; + return 1; + } } void -TimingSimpleCPU::switchOut(Sampler *s) +TimingSimpleCPU::resume() { - sampler = s; - if (status() == Running) { - _status = SwitchedOut; + if (_status != SwitchedOut && _status != Idle) { + // Delete the old event if it existed. + if (fetchEvent) { + assert(!fetchEvent->scheduled()); + delete fetchEvent; + } + + fetchEvent = + new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false); + fetchEvent->schedule(curTick); } - sampler->signalSwitched(); + + assert(system->getMemoryMode() == System::Timing); + changeState(SimObject::Running); +} + +void +TimingSimpleCPU::switchOut() +{ + assert(status() == Running || status() == Idle); + _status = SwitchedOut; + + // If we've been scheduled to resume but are then told to switch out, + // we'll need to cancel it. + if (fetchEvent && fetchEvent->scheduled()) + fetchEvent->deschedule(); } @@ -148,9 +189,9 @@ TimingSimpleCPU::activateContext(int thread_num, int delay) notIdleFraction++; _status = Running; // kick things off by initiating the fetch of the next instruction - Event *e = - new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, true); - e->schedule(curTick + cycles(delay)); + fetchEvent = + new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, false); + fetchEvent->schedule(curTick + cycles(delay)); } @@ -176,7 +217,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) { // need to fill in CPU & thread IDs here Request *data_read_req = new Request(); - + data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { @@ -257,6 +298,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { // need to fill in CPU & thread IDs here Request *data_write_req = new Request(); + data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); // translate to physical address @@ -340,6 +382,7 @@ TimingSimpleCPU::fetch() // need to fill in CPU & thread IDs here Request *ifetch_req = new Request(); + ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE Fault fault = setupFetchRequest(ifetch_req); ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); @@ -383,11 +426,17 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) // instruction assert(pkt->result == Packet::Success); assert(_status == IcacheWaitResponse); + _status = Running; delete pkt->req; delete pkt; + if (getState() == SimObject::Draining) { + completeDrain(); + return; + } + preExecute(); if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) { // load or store: just send to dcache @@ -440,6 +489,15 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) assert(_status == DcacheWaitResponse); _status = Running; + if (getState() == SimObject::Draining) { + completeDrain(); + + delete pkt->req; + delete pkt; + + return; + } + Fault fault = curStaticInst->completeAcc(pkt, this, traceData); delete pkt->req; @@ -450,6 +508,13 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) } +void +TimingSimpleCPU::completeDrain() +{ + DPRINTF(Config, "Done draining\n"); + changeState(SimObject::Drained); + drainEvent->process(); +} bool TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt) @@ -484,11 +549,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) Param<Counter> max_loads_any_thread; Param<Counter> max_loads_all_threads; SimObjectParam<MemObject *> mem; + SimObjectParam<System *> system; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - SimObjectParam<System *> system; Param<int> cpu_id; Param<Tick> profile; #else @@ -516,11 +581,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -551,11 +616,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index ab0b2d2ca..ac36e5c99 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -64,6 +64,10 @@ class TimingSimpleCPU : public BaseSimpleCPU Status status() const { return _status; } + Event *drainEvent; + + Event *fetchEvent; + private: class CpuPort : public Port @@ -128,10 +132,15 @@ class TimingSimpleCPU : public BaseSimpleCPU public: + virtual Port *getPort(const std::string &if_name, int idx = -1); + virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - void switchOut(Sampler *s); + virtual unsigned int drain(Event *drain_event); + virtual void resume(); + + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); virtual void activateContext(int thread_num, int delay); @@ -147,6 +156,8 @@ class TimingSimpleCPU : public BaseSimpleCPU void completeIfetch(Packet *); void completeDataAccess(Packet *); void advanceInst(Fault fault); + private: + void completeDrain(); }; #endif // __CPU_SIMPLE_TIMING_HH__ diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index 48383ca93..af1db2ff2 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -107,7 +107,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys, #else SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject* memobj) - : ThreadState(-1, _thread_num, memobj, _process, _asid), + : ThreadState(-1, _thread_num, _process, _asid, memobj), cpu(_cpu) { /* Use this port to for syscall emulation writes to memory. */ @@ -123,15 +123,19 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, tc = new ProxyThreadContext<SimpleThread>(this); } -SimpleThread::SimpleThread(RegFile *regFile) - : ThreadState(-1, -1, NULL, NULL, -1), cpu(NULL) +#endif + +SimpleThread::SimpleThread() +#if FULL_SYSTEM + : ThreadState(-1, -1) +#else + : ThreadState(-1, -1, NULL, -1, NULL) +#endif { - regs = *regFile; tc = new ProxyThreadContext<SimpleThread>(this); + regs.clear(); } -#endif - SimpleThread::~SimpleThread() { delete tc; @@ -147,13 +151,8 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext) assert(process == oldContext->getProcessPtr()); #endif - // copy over functional state - _status = oldContext->status(); - copyArchRegs(oldContext); - cpuId = oldContext->readCpuId(); -#if !FULL_SYSTEM - funcExeInst = oldContext->readFuncExeInst(); -#else + copyState(oldContext); +#if FULL_SYSTEM EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent(); if (quiesce) { // Point the quiesce event's TC at this TC so that it wakes up @@ -171,42 +170,49 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext) } void -SimpleThread::serialize(ostream &os) +SimpleThread::copyTC(ThreadContext *context) { - SERIALIZE_ENUM(_status); - regs.serialize(os); - // thread_num and cpu_id are deterministic from the config - SERIALIZE_SCALAR(funcExeInst); - SERIALIZE_SCALAR(inst); + copyState(context); #if FULL_SYSTEM - Tick quiesceEndTick = 0; - if (quiesceEvent->scheduled()) - quiesceEndTick = quiesceEvent->when(); - SERIALIZE_SCALAR(quiesceEndTick); - if (kernelStats) - kernelStats->serialize(os); + EndQuiesceEvent *quiesce = context->getQuiesceEvent(); + if (quiesce) { + quiesceEvent = quiesce; + } + Kernel::Statistics *stats = context->getKernelStats(); + if (stats) { + kernelStats = stats; + } #endif } +void +SimpleThread::copyState(ThreadContext *oldContext) +{ + // copy over functional state + _status = oldContext->status(); + copyArchRegs(oldContext); + cpuId = oldContext->readCpuId(); +#if !FULL_SYSTEM + funcExeInst = oldContext->readFuncExeInst(); +#endif +} + +void +SimpleThread::serialize(ostream &os) +{ + ThreadState::serialize(os); + regs.serialize(os); + // thread_num and cpu_id are deterministic from the config +} + void SimpleThread::unserialize(Checkpoint *cp, const std::string §ion) { - UNSERIALIZE_ENUM(_status); + ThreadState::unserialize(cp, section); regs.unserialize(cp, section); // thread_num and cpu_id are deterministic from the config - UNSERIALIZE_SCALAR(funcExeInst); - UNSERIALIZE_SCALAR(inst); - -#if FULL_SYSTEM - Tick quiesceEndTick; - UNSERIALIZE_SCALAR(quiesceEndTick); - if (quiesceEndTick) - quiesceEvent->schedule(quiesceEndTick); - if (kernelStats) - kernelStats->unserialize(cp, section); -#endif } #if FULL_SYSTEM diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index de65e9891..d36853db4 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -119,16 +119,20 @@ class SimpleThread : public ThreadState #else SimpleThread(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *memobj); - // Constructor to use SimpleThread to pass reg file around. Not - // used for anything else. - SimpleThread(RegFile *regFile); #endif + + SimpleThread(); + virtual ~SimpleThread(); virtual void takeOverFrom(ThreadContext *oldContext); void regStats(const std::string &name); + void copyTC(ThreadContext *context); + + void copyState(ThreadContext *oldContext); + void serialize(std::ostream &os); void unserialize(Checkpoint *cp, const std::string §ion); diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index bea52f510..ea1a65148 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -34,10 +34,12 @@ #include <bitset> #include <string> +#include "base/bitfield.hh" #include "base/hashmap.hh" #include "base/misc.hh" #include "base/refcnt.hh" #include "cpu/op_class.hh" +#include "cpu/o3/dyn_inst.hh" #include "sim/host.hh" #include "arch/isa_traits.hh" @@ -50,9 +52,6 @@ class DynInst; class Packet; template <class Impl> -class AlphaDynInst; - -template <class Impl> class OzoneDynInst; class CheckerCPU; @@ -411,16 +410,10 @@ class StaticInst : public StaticInstBase //This is defined as inline below. static StaticInstPtr decode(ExtMachInst mach_inst); - //MIPS Decoder Debug Functions - int getOpcode() { return (machInst & 0xFC000000) >> 26 ; }//31..26 - int getRs() { return (machInst & 0x03E00000) >> 21; } //25...21 - int getRt() { return (machInst & 0x001F0000) >> 16; } //20...16 - int getRd() { return (machInst & 0x0000F800) >> 11; } //15...11 - int getImm() { return (machInst & 0x0000FFFF); } //15...0 - int getFunction(){ return (machInst & 0x0000003F); }//5...0 - int getBranch(){ return (machInst & 0x0000FFFF); }//15...0 - int getJump(){ return (machInst & 0x03FFFFFF); }//5...0 - int getHint(){ return (machInst & 0x000007C0) >> 6; } //10...6 + /// Return opcode of machine instruction + uint32_t getOpcode() { return bits(machInst, 31, 26);} + + /// Return name of machine instruction std::string getName() { return mnemonic; } }; diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 48c8fa28d..e019e22bc 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -143,7 +143,7 @@ class ThreadContext virtual void suspend() = 0; /// Set the status to Unallocated. - virtual void deallocate() = 0; + virtual void deallocate(int delay = 0) = 0; /// Set the status to Halted. virtual void halt() = 0; @@ -245,10 +245,13 @@ class ThreadContext virtual void setSyscallReturn(SyscallReturn return_value) = 0; - virtual void syscall(int64_t callnum) = 0; - // Same with st cond failures. virtual Counter readFuncExeInst() = 0; + + // This function exits the thread context in the CPU and returns + // 1 if the CPU has no more active threads (meaning it's OK to exit); + // Used in syscall-emulation mode when a thread calls the exit syscall. + virtual int exit() { return 1; }; #endif virtual void changeRegFileContext(RegFile::ContextParam param, @@ -315,7 +318,7 @@ class ProxyThreadContext : public ThreadContext void suspend() { actualTC->suspend(); } /// Set the status to Unallocated. - void deallocate() { actualTC->deallocate(); } + void deallocate(int delay = 0) { actualTC->deallocate(); } /// Set the status to Halted. void halt() { actualTC->halt(); } @@ -432,8 +435,6 @@ class ProxyThreadContext : public ThreadContext void setSyscallReturn(SyscallReturn return_value) { actualTC->setSyscallReturn(return_value); } - void syscall(int64_t callnum) { actualTC->syscall(callnum); } - Counter readFuncExeInst() { return actualTC->readFuncExeInst(); } #endif diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc index dcfa93c3e..6a96560f1 100644 --- a/src/cpu/thread_state.cc +++ b/src/cpu/thread_state.cc @@ -31,6 +31,12 @@ #include "base/output.hh" #include "cpu/profile.hh" #include "cpu/thread_state.hh" +#include "sim/serialize.hh" + +#if FULL_SYSTEM +#include "cpu/quiesce_event.hh" +#include "kern/kernel_stats.hh" +#endif #if FULL_SYSTEM ThreadState::ThreadState(int _cpuId, int _tid) @@ -38,8 +44,8 @@ ThreadState::ThreadState(int _cpuId, int _tid) profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL), funcExeInst(0), storeCondFailures(0) #else -ThreadState::ThreadState(int _cpuId, int _tid, MemObject *mem, - Process *_process, short _asid) +ThreadState::ThreadState(int _cpuId, int _tid, Process *_process, + short _asid, MemObject *mem) : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0), process(_process), asid(_asid), funcExeInst(0), storeCondFailures(0) @@ -49,6 +55,43 @@ ThreadState::ThreadState(int _cpuId, int _tid, MemObject *mem, numLoad = 0; } +void +ThreadState::serialize(std::ostream &os) +{ + SERIALIZE_ENUM(_status); + // thread_num and cpu_id are deterministic from the config + SERIALIZE_SCALAR(funcExeInst); + SERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick = 0; + if (quiesceEvent->scheduled()) + quiesceEndTick = quiesceEvent->when(); + SERIALIZE_SCALAR(quiesceEndTick); + if (kernelStats) + kernelStats->serialize(os); +#endif +} + +void +ThreadState::unserialize(Checkpoint *cp, const std::string §ion) +{ + + UNSERIALIZE_ENUM(_status); + // thread_num and cpu_id are deterministic from the config + UNSERIALIZE_SCALAR(funcExeInst); + UNSERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick; + UNSERIALIZE_SCALAR(quiesceEndTick); + if (quiesceEndTick) + quiesceEvent->schedule(quiesceEndTick); + if (kernelStats) + kernelStats->unserialize(cp, section); +#endif +} + #if FULL_SYSTEM void diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh index de9b2f14e..b03a2e2bb 100644 --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -49,6 +49,8 @@ namespace Kernel { }; #endif +class Checkpoint; + /** * Struct for holding general thread state that is needed across CPU * models. This includes things such as pointers to the process, @@ -61,10 +63,14 @@ struct ThreadState { #if FULL_SYSTEM ThreadState(int _cpuId, int _tid); #else - ThreadState(int _cpuId, int _tid, MemObject *mem, - Process *_process, short _asid); + ThreadState(int _cpuId, int _tid, Process *_process, + short _asid, MemObject *mem); #endif + void serialize(std::ostream &os); + + void unserialize(Checkpoint *cp, const std::string §ion); + void setCpuId(int id) { cpuId = id; } int readCpuId() { return cpuId; } |