diff options
Diffstat (limited to 'src/cpu')
105 files changed, 22965 insertions, 0 deletions
diff --git a/src/cpu/SConscript b/src/cpu/SConscript new file mode 100644 index 000000000..34fb6df78 --- /dev/null +++ b/src/cpu/SConscript @@ -0,0 +1,143 @@ +# -*- mode:python -*- + +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import os.path + +# Import build environment variable from SConstruct. +Import('env') + +################################################################# +# +# Generate StaticInst execute() method signatures. +# +# There must be one signature for each CPU model compiled in. +# Since the set of compiled-in models is flexible, we generate a +# header containing the appropriate set of signatures on the fly. +# +################################################################# + +# CPU model-specific data is contained in cpu_models.py +# Convert to SCons File node to get path handling +models_db = File('cpu_models.py') +# slurp in contents of file +execfile(models_db.srcnode().abspath) + +# Template for execute() signature. +exec_sig_template = ''' +virtual Fault execute(%s *xc, Trace::InstRecord *traceData) const = 0; +virtual Fault initiateAcc(%s *xc, Trace::InstRecord *traceData) const +{ panic("initiateAcc not defined!"); }; +virtual Fault completeAcc(Packet *pkt, %s *xc, + Trace::InstRecord *traceData) const +{ panic("completeAcc not defined!"); }; +''' + +# Generate header. +def gen_cpu_exec_signatures(target, source, env): + f = open(str(target[0]), 'w') + print >> f, ''' +#ifndef __CPU_STATIC_INST_EXEC_SIGS_HH__ +#define __CPU_STATIC_INST_EXEC_SIGS_HH__ +''' + for cpu in env['CPU_MODELS']: + xc_type = CpuModel.dict[cpu].strings['CPU_exec_context'] + print >> f, exec_sig_template % (xc_type, xc_type, xc_type) + print >> f, ''' +#endif // __CPU_STATIC_INST_EXEC_SIGS_HH__ +''' + +# Generate string that gets printed when header is rebuilt +def gen_sigs_string(target, source, env): + return "Generating static_inst_exec_sigs.hh: " \ + + ', '.join(env['CPU_MODELS']) + +# Add command to generate header to environment. +env.Command('static_inst_exec_sigs.hh', models_db, + Action(gen_cpu_exec_signatures, gen_sigs_string, + varlist = ['CPU_MODELS'])) + +################################################################# +# +# Include CPU-model-specific files based on set of models +# specified in CPU_MODELS build option. +# +################################################################# + +sources = [] + +need_simple_base = False +if 'AtomicSimpleCPU' in env['CPU_MODELS']: + need_simple_base = True + sources += Split('simple/atomic.cc') + +if 'TimingSimpleCPU' in env['CPU_MODELS']: + need_simple_base = True + sources += Split('simple/timing.cc') + +if need_simple_base: + sources += Split('simple/base.cc') + +if 'FastCPU' in env['CPU_MODELS']: + sources += Split('fast/cpu.cc') + +if 'AlphaFullCPU' in env['CPU_MODELS']: + sources += Split(''' + o3/2bit_local_pred.cc + o3/alpha_dyn_inst.cc + o3/alpha_cpu.cc + o3/alpha_cpu_builder.cc + o3/bpred_unit.cc + o3/btb.cc + o3/commit.cc + o3/decode.cc + o3/fetch.cc + o3/free_list.cc + o3/cpu.cc + o3/iew.cc + o3/inst_queue.cc + o3/ldstq.cc + o3/mem_dep_unit.cc + o3/ras.cc + o3/rename.cc + o3/rename_map.cc + o3/rob.cc + o3/sat_counter.cc + o3/store_set.cc + o3/tournament_pred.cc + ''') + +# FullCPU sources are included from m5/SConscript since they're not +# below this point in the file hierarchy. + +# Convert file names to SCons File objects. This takes care of the +# path relative to the top of the directory tree. +sources = [File(s) for s in sources] + +Return('sources') + diff --git a/src/cpu/base.cc b/src/cpu/base.cc new file mode 100644 index 000000000..9ce458c64 --- /dev/null +++ b/src/cpu/base.cc @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <iostream> +#include <string> +#include <sstream> + +#include "base/cprintf.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "base/output.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/profile.hh" +#include "cpu/sampler/sampler.hh" +#include "sim/param.hh" +#include "sim/process.hh" +#include "sim/sim_events.hh" +#include "sim/system.hh" + +#include "base/trace.hh" + +#if FULL_SYSTEM +#include "kern/kernel_stats.hh" +#endif + +using namespace std; + +vector<BaseCPU *> BaseCPU::cpuList; + +// This variable reflects the max number of threads in any CPU. Be +// careful to only use it once all the CPUs that you care about have +// been initialized +int maxThreadsPerCPU = 1; + +#if FULL_SYSTEM +BaseCPU::BaseCPU(Params *p) + : SimObject(p->name), clock(p->clock), checkInterrupts(true), + params(p), number_of_threads(p->numberOfThreads), system(p->system) +#else +BaseCPU::BaseCPU(Params *p) + : SimObject(p->name), clock(p->clock), params(p), + number_of_threads(p->numberOfThreads), system(p->system) +#endif +{ + DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this); + + // add self to global list of CPUs + cpuList.push_back(this); + + DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", + this); + + if (number_of_threads > maxThreadsPerCPU) + maxThreadsPerCPU = number_of_threads; + + // allocate per-thread instruction-based event queues + comInstEventQueue = new EventQueue *[number_of_threads]; + for (int i = 0; i < number_of_threads; ++i) + comInstEventQueue[i] = new EventQueue("instruction-based event queue"); + + // + // set up instruction-count-based termination events, if any + // + if (p->max_insts_any_thread != 0) + for (int i = 0; i < number_of_threads; ++i) + new SimExitEvent(comInstEventQueue[i], p->max_insts_any_thread, + "a thread reached the max instruction count"); + + if (p->max_insts_all_threads != 0) { + // allocate & initialize shared downcounter: each event will + // decrement this when triggered; simulation will terminate + // when counter reaches 0 + int *counter = new int; + *counter = number_of_threads; + for (int i = 0; i < number_of_threads; ++i) + new CountedExitEvent(comInstEventQueue[i], + "all threads reached the max instruction count", + p->max_insts_all_threads, *counter); + } + + // allocate per-thread load-based event queues + comLoadEventQueue = new EventQueue *[number_of_threads]; + for (int i = 0; i < number_of_threads; ++i) + comLoadEventQueue[i] = new EventQueue("load-based event queue"); + + // + // set up instruction-count-based termination events, if any + // + if (p->max_loads_any_thread != 0) + for (int i = 0; i < number_of_threads; ++i) + new SimExitEvent(comLoadEventQueue[i], p->max_loads_any_thread, + "a thread reached the max load count"); + + if (p->max_loads_all_threads != 0) { + // allocate & initialize shared downcounter: each event will + // decrement this when triggered; simulation will terminate + // when counter reaches 0 + int *counter = new int; + *counter = number_of_threads; + for (int i = 0; i < number_of_threads; ++i) + new CountedExitEvent(comLoadEventQueue[i], + "all threads reached the max load count", + p->max_loads_all_threads, *counter); + } + +#if FULL_SYSTEM + memset(interrupts, 0, sizeof(interrupts)); + intstatus = 0; +#endif + + functionTracingEnabled = false; + if (p->functionTrace) { + functionTraceStream = simout.find(csprintf("ftrace.%s", name())); + currentFunctionStart = currentFunctionEnd = 0; + functionEntryTick = p->functionTraceStart; + + if (p->functionTraceStart == 0) { + functionTracingEnabled = true; + } else { + Event *e = + new EventWrapper<BaseCPU, &BaseCPU::enableFunctionTrace>(this, + true); + e->schedule(p->functionTraceStart); + } + } +#if FULL_SYSTEM + profileEvent = NULL; + if (params->profile) + profileEvent = new ProfileEvent(this, params->profile); + + kernelStats = new Kernel::Statistics(system); +#endif + +} + +BaseCPU::Params::Params() +{ +#if FULL_SYSTEM + profile = false; +#endif +} + +void +BaseCPU::enableFunctionTrace() +{ + functionTracingEnabled = true; +} + +BaseCPU::~BaseCPU() +{ +#if FULL_SYSTEM + if (kernelStats) + delete kernelStats; +#endif +} + +void +BaseCPU::init() +{ + if (!params->deferRegistration) + registerExecContexts(); +} + +void +BaseCPU::startup() +{ +#if FULL_SYSTEM + if (!params->deferRegistration && profileEvent) + profileEvent->schedule(curTick); +#endif +} + + +void +BaseCPU::regStats() +{ + using namespace Stats; + + numCycles + .name(name() + ".numCycles") + .desc("number of cpu cycles simulated") + ; + + int size = execContexts.size(); + if (size > 1) { + for (int i = 0; i < size; ++i) { + stringstream namestr; + ccprintf(namestr, "%s.ctx%d", name(), i); + execContexts[i]->regStats(namestr.str()); + } + } else if (size == 1) + execContexts[0]->regStats(name()); + +#if FULL_SYSTEM + if (kernelStats) + kernelStats->regStats(name() + ".kern"); +#endif +} + + +void +BaseCPU::registerExecContexts() +{ + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + +#if FULL_SYSTEM + int id = params->cpu_id; + if (id != -1) + id += i; + + xc->setCpuId(system->registerExecContext(xc, id)); +#else + xc->setCpuId(xc->getProcessPtr()->registerExecContext(xc)); +#endif + } +} + + +void +BaseCPU::switchOut(Sampler *sampler) +{ + panic("This CPU doesn't support sampling!"); +} + +void +BaseCPU::takeOverFrom(BaseCPU *oldCPU) +{ + assert(execContexts.size() == oldCPU->execContexts.size()); + + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *newXC = execContexts[i]; + ExecContext *oldXC = oldCPU->execContexts[i]; + + newXC->takeOverFrom(oldXC); + assert(newXC->readCpuId() == oldXC->readCpuId()); +#if FULL_SYSTEM + system->replaceExecContext(newXC, newXC->readCpuId()); +#else + assert(newXC->getProcessPtr() == oldXC->getProcessPtr()); + newXC->getProcessPtr()->replaceExecContext(newXC, newXC->readCpuId()); +#endif + } + +#if FULL_SYSTEM + for (int i = 0; i < TheISA::NumInterruptLevels; ++i) + interrupts[i] = oldCPU->interrupts[i]; + intstatus = oldCPU->intstatus; + + for (int i = 0; i < execContexts.size(); ++i) + execContexts[i]->profileClear(); + + if (profileEvent) + profileEvent->schedule(curTick); +#endif +} + + +#if FULL_SYSTEM +BaseCPU::ProfileEvent::ProfileEvent(BaseCPU *_cpu, int _interval) + : Event(&mainEventQueue), cpu(_cpu), interval(_interval) +{ } + +void +BaseCPU::ProfileEvent::process() +{ + for (int i = 0, size = cpu->execContexts.size(); i < size; ++i) { + ExecContext *xc = cpu->execContexts[i]; + xc->profileSample(); + } + + schedule(curTick + interval); +} + +void +BaseCPU::post_interrupt(int int_num, int index) +{ + DPRINTF(Interrupt, "Interrupt %d:%d posted\n", int_num, index); + + if (int_num < 0 || int_num >= TheISA::NumInterruptLevels) + panic("int_num out of bounds\n"); + + if (index < 0 || index >= sizeof(uint64_t) * 8) + panic("int_num out of bounds\n"); + + checkInterrupts = true; + interrupts[int_num] |= 1 << index; + intstatus |= (ULL(1) << int_num); +} + +void +BaseCPU::clear_interrupt(int int_num, int index) +{ + DPRINTF(Interrupt, "Interrupt %d:%d cleared\n", int_num, index); + + if (int_num < 0 || int_num >= TheISA::NumInterruptLevels) + panic("int_num out of bounds\n"); + + if (index < 0 || index >= sizeof(uint64_t) * 8) + panic("int_num out of bounds\n"); + + interrupts[int_num] &= ~(1 << index); + if (interrupts[int_num] == 0) + intstatus &= ~(ULL(1) << int_num); +} + +void +BaseCPU::clear_interrupts() +{ + DPRINTF(Interrupt, "Interrupts all cleared\n"); + + memset(interrupts, 0, sizeof(interrupts)); + intstatus = 0; +} + + +void +BaseCPU::serialize(std::ostream &os) +{ + SERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels); + SERIALIZE_SCALAR(intstatus); + +#if FULL_SYSTEM + if (kernelStats) + kernelStats->serialize(os); +#endif + +} + +void +BaseCPU::unserialize(Checkpoint *cp, const std::string §ion) +{ + UNSERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels); + UNSERIALIZE_SCALAR(intstatus); + +#if FULL_SYSTEM + if (kernelStats) + kernelStats->unserialize(cp, section); +#endif +} + +#endif // FULL_SYSTEM + +void +BaseCPU::traceFunctionsInternal(Addr pc) +{ + if (!debugSymbolTable) + return; + + // if pc enters different function, print new function symbol and + // update saved range. Otherwise do nothing. + if (pc < currentFunctionStart || pc >= currentFunctionEnd) { + string sym_str; + bool found = debugSymbolTable->findNearestSymbol(pc, sym_str, + currentFunctionStart, + currentFunctionEnd); + + if (!found) { + // no symbol found: use addr as label + sym_str = csprintf("0x%x", pc); + currentFunctionStart = pc; + currentFunctionEnd = pc + 1; + } + + ccprintf(*functionTraceStream, " (%d)\n%d: %s", + curTick - functionEntryTick, curTick, sym_str); + functionEntryTick = curTick; + } +} + + +DEFINE_SIM_OBJECT_CLASS_NAME("BaseCPU", BaseCPU) diff --git a/src/cpu/base.hh b/src/cpu/base.hh new file mode 100644 index 000000000..79700c117 --- /dev/null +++ b/src/cpu/base.hh @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_BASE_HH__ +#define __CPU_BASE_HH__ + +#include <vector> + +#include "base/statistics.hh" +#include "config/full_system.hh" +#include "cpu/sampler/sampler.hh" +#include "sim/eventq.hh" +#include "sim/sim_object.hh" +#include "arch/isa_traits.hh" + +class System; +namespace Kernel { class Statistics; } +class BranchPred; +class ExecContext; + +class BaseCPU : public SimObject +{ + protected: + // CPU's clock period in terms of the number of ticks of curTime. + Tick clock; + + public: + inline Tick frequency() const { return Clock::Frequency / clock; } + inline Tick cycles(int numCycles) const { return clock * numCycles; } + inline Tick curCycle() const { return curTick / clock; } + +#if FULL_SYSTEM + protected: + uint64_t interrupts[TheISA::NumInterruptLevels]; + uint64_t intstatus; + + public: + virtual void post_interrupt(int int_num, int index); + virtual void clear_interrupt(int int_num, int index); + virtual void clear_interrupts(); + bool checkInterrupts; + + bool check_interrupt(int int_num) const { + if (int_num > TheISA::NumInterruptLevels) + panic("int_num out of bounds\n"); + + return interrupts[int_num] != 0; + } + + bool check_interrupts() const { return intstatus != 0; } + uint64_t intr_status() const { return intstatus; } + + class ProfileEvent : public Event + { + private: + BaseCPU *cpu; + int interval; + + public: + ProfileEvent(BaseCPU *cpu, int interval); + void process(); + }; + ProfileEvent *profileEvent; +#endif + + protected: + std::vector<ExecContext *> execContexts; + + public: + + /// Notify the CPU that the indicated context is now active. The + /// delay parameter indicates the number of ticks to wait before + /// executing (typically 0 or 1). + virtual void activateContext(int thread_num, int delay) {} + + /// Notify the CPU that the indicated context is now suspended. + virtual void suspendContext(int thread_num) {} + + /// Notify the CPU that the indicated context is now deallocated. + virtual void deallocateContext(int thread_num) {} + + /// Notify the CPU that the indicated context is now halted. + virtual void haltContext(int thread_num) {} + + public: + struct Params + { + std::string name; + int numberOfThreads; + bool deferRegistration; + Counter max_insts_any_thread; + Counter max_insts_all_threads; + Counter max_loads_any_thread; + Counter max_loads_all_threads; + Tick clock; + bool functionTrace; + Tick functionTraceStart; + System *system; +#if FULL_SYSTEM + int cpu_id; + Tick profile; +#endif + + Params(); + }; + + const Params *params; + + BaseCPU(Params *params); + virtual ~BaseCPU(); + + virtual void init(); + virtual void startup(); + virtual void regStats(); + + virtual void activateWhenReady(int tid) {}; + + void registerExecContexts(); + + /// Prepare for another CPU to take over execution. When it is + /// is ready (drained pipe) it signals the sampler. + virtual void switchOut(Sampler *); + + /// Take over execution from the given CPU. Used for warm-up and + /// sampling. + virtual void takeOverFrom(BaseCPU *); + + /** + * Number of threads we're actually simulating (<= SMT_MAX_THREADS). + * This is a constant for the duration of the simulation. + */ + int number_of_threads; + + /** + * Vector of per-thread instruction-based event queues. Used for + * scheduling events based on number of instructions committed by + * a particular thread. + */ + EventQueue **comInstEventQueue; + + /** + * Vector of per-thread load-based event queues. Used for + * scheduling events based on number of loads committed by + *a particular thread. + */ + EventQueue **comLoadEventQueue; + + System *system; + +#if FULL_SYSTEM + /** + * Serialize this object to the given output stream. + * @param os The stream to serialize to. + */ + virtual void serialize(std::ostream &os); + + /** + * Reconstruct the state of this object from a checkpoint. + * @param cp The checkpoint use. + * @param section The section name of this object + */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#endif + + /** + * Return pointer to CPU's branch predictor (NULL if none). + * @return Branch predictor pointer. + */ + virtual BranchPred *getBranchPred() { return NULL; }; + + virtual Counter totalInstructions() const { return 0; } + + // Function tracing + private: + bool functionTracingEnabled; + std::ostream *functionTraceStream; + Addr currentFunctionStart; + Addr currentFunctionEnd; + Tick functionEntryTick; + void enableFunctionTrace(); + void traceFunctionsInternal(Addr pc); + + protected: + void traceFunctions(Addr pc) + { + if (functionTracingEnabled) + traceFunctionsInternal(pc); + } + + private: + static std::vector<BaseCPU *> cpuList; //!< Static global cpu list + + public: + static int numSimulatedCPUs() { return cpuList.size(); } + static Counter numSimulatedInstructions() + { + Counter total = 0; + + int size = cpuList.size(); + for (int i = 0; i < size; ++i) + total += cpuList[i]->totalInstructions(); + + return total; + } + + public: + // Number of CPU cycles simulated + Stats::Scalar<> numCycles; + +#if FULL_SYSTEM + Kernel::Statistics *kernelStats; +#endif +}; + +#endif // __CPU_BASE_HH__ diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc new file mode 100644 index 000000000..bf7c35cad --- /dev/null +++ b/src/cpu/base_dyn_inst.cc @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_BASE_DYN_INST_CC__ +#define __CPU_BASE_DYN_INST_CC__ + +#include <iostream> +#include <string> +#include <sstream> + +#include "base/cprintf.hh" +#include "base/trace.hh" + +#include "arch/faults.hh" +#include "cpu/exetrace.hh" +#include "mem/mem_req.hh" + +#include "cpu/base_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha_cpu.hh" + +using namespace std; +using namespace TheISA; + +#define NOHASH +#ifndef NOHASH + +#include "base/hashmap.hh" + +unsigned int MyHashFunc(const BaseDynInst *addr) +{ + unsigned a = (unsigned)addr; + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; +} + +typedef m5::hash_map<const BaseDynInst *, const BaseDynInst *, MyHashFunc> my_hash_t; +my_hash_t thishash; +#endif + +template <class Impl> +BaseDynInst<Impl>::BaseDynInst(MachInst machInst, Addr inst_PC, + Addr pred_PC, InstSeqNum seq_num, + FullCPU *cpu) + : staticInst(machInst), traceData(NULL), cpu(cpu), cpuXC(cpu->cpuXCBase()) +{ + seqNum = seq_num; + + PC = inst_PC; + nextPC = PC + sizeof(MachInst); + predPC = pred_PC; + + initVars(); +} + +template <class Impl> +BaseDynInst<Impl>::BaseDynInst(StaticInstPtr &_staticInst) + : staticInst(_staticInst), traceData(NULL) +{ + initVars(); +} + +template <class Impl> +void +BaseDynInst<Impl>::initVars() +{ + effAddr = MemReq::inval_addr; + physEffAddr = MemReq::inval_addr; + + readyRegs = 0; + + completed = false; + canIssue = false; + issued = false; + executed = false; + canCommit = false; + squashed = false; + squashedInIQ = false; + eaCalcDone = false; + + blockingInst = false; + recoverInst = false; + + // Eventually make this a parameter. + threadNumber = 0; + + // Also make this a parameter, or perhaps get it from xc or cpu. + asid = 0; + + // Initialize the fault to be unimplemented opcode. + fault = new UnimplementedOpcodeFault; + + ++instcount; + + DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", + instcount); +} + +template <class Impl> +BaseDynInst<Impl>::~BaseDynInst() +{ + --instcount; + DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n", + instcount); +} + +template <class Impl> +void +BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags) +{ + // This is the "functional" implementation of prefetch. Not much + // happens here since prefetches don't affect the architectural + // state. + + // Generate a MemReq so we can translate the effective address. + MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), 1, flags); + req->asid = asid; + + // Prefetches never cause faults. + fault = NoFault; + + // note this is a local, not BaseDynInst::fault + Fault trans_fault = cpuXC->translateDataReadReq(req); + + if (trans_fault == NoFault && !(req->flags & UNCACHEABLE)) { + // It's a valid address to cacheable space. Record key MemReq + // parameters so we can generate another one just like it for + // the timing access without calling translate() again (which + // might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // Bogus address (invalid or uncacheable space). Mark it by + // setting the eff_addr to InvalidAddr. + effAddr = physEffAddr = MemReq::inval_addr; + } + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#if !FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (traceData) { + traceData->setAddr(addr); + } +} + +template <class Impl> +void +BaseDynInst<Impl>::writeHint(Addr addr, int size, unsigned flags) +{ + // Need to create a MemReq here so we can do a translation. This + // will casue a TLB miss trap if necessary... not sure whether + // that's the best thing to do or not. We don't really need the + // MemReq otherwise, since wh64 has no functional effect. + MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), size, flags); + req->asid = asid; + + fault = cpuXC->translateDataWriteReq(req); + + if (fault == NoFault && !(req->flags & UNCACHEABLE)) { + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // ignore faults & accesses to uncacheable space... treat as no-op + effAddr = physEffAddr = MemReq::inval_addr; + } + + storeSize = size; + storeData = 0; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template <class Impl> +Fault +BaseDynInst<Impl>::copySrcTranslate(Addr src) +{ + MemReqPtr req = new MemReq(src, cpuXC->getProxy(), 64); + req->asid = asid; + + // translate to physical address + Fault fault = cpuXC->translateDataReadReq(req); + + if (fault == NoFault) { + cpuXC->copySrcAddr = src; + cpuXC->copySrcPhysAddr = req->paddr; + } else { + cpuXC->copySrcAddr = 0; + cpuXC->copySrcPhysAddr = 0; + } + return fault; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template <class Impl> +Fault +BaseDynInst<Impl>::copy(Addr dest) +{ + uint8_t data[64]; + FunctionalMemory *mem = cpuXC->mem; + assert(cpuXC->copySrcPhysAddr || cpuXC->misspeculating()); + MemReqPtr req = new MemReq(dest, cpuXC->getProxy(), 64); + req->asid = asid; + + // translate to physical address + Fault fault = cpuXC->translateDataWriteReq(req); + + if (fault == NoFault) { + Addr dest_addr = req->paddr; + // Need to read straight from memory since we have more than 8 bytes. + req->paddr = cpuXC->copySrcPhysAddr; + mem->read(req, data); + req->paddr = dest_addr; + mem->write(req, data); + } + return fault; +} + +template <class Impl> +void +BaseDynInst<Impl>::dump() +{ + cprintf("T%d : %#08d `", threadNumber, PC); + cout << staticInst->disassemble(PC); + cprintf("'\n"); +} + +template <class Impl> +void +BaseDynInst<Impl>::dump(std::string &outstring) +{ + std::ostringstream s; + s << "T" << threadNumber << " : 0x" << PC << " " + << staticInst->disassemble(PC); + + outstring = s.str(); +} + + +#if 0 +template <class Impl> +Fault +BaseDynInst<Impl>::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes) +{ + Fault fault; + + // check alignments, even speculative this test should always pass + if ((nbytes & nbytes - 1) != 0 || (addr & nbytes - 1) != 0) { + for (int i = 0; i < nbytes; i++) + ((char *) p)[i] = 0; + + // I added the following because according to the comment above, + // we should never get here. The comment lies +#if 0 + panic("unaligned access. Cycle = %n", curTick); +#endif + return NoFault; + } + + MemReqPtr req = new MemReq(addr, thread, nbytes); + switch(cmd) { + case Read: + fault = spec_mem->read(req, (uint8_t *)p); + break; + + case Write: + fault = spec_mem->write(req, (uint8_t *)p); + if (fault != NoFault) + break; + + specMemWrite = true; + storeSize = nbytes; + switch(nbytes) { + case sizeof(uint8_t): + *(uint8_t)&storeData = (uint8_t *)p; + break; + case sizeof(uint16_t): + *(uint16_t)&storeData = (uint16_t *)p; + break; + case sizeof(uint32_t): + *(uint32_t)&storeData = (uint32_t *)p; + break; + case sizeof(uint64_t): + *(uint64_t)&storeData = (uint64_t *)p; + break; + } + break; + + default: + fault = genMachineCheckFault(); + break; + } + + trace_mem(fault, cmd, addr, p, nbytes); + + return fault; +} + +#endif + +template <class Impl> +bool +BaseDynInst<Impl>::eaSrcsReady() +{ + // For now I am assuming that src registers 1..n-1 are the ones that the + // EA calc depends on. (i.e. src reg 0 is the source of the data to be + // stored) + + for (int i = 1; i < numSrcRegs(); ++i) + { + if (!_readySrcRegIdx[i]) + return false; + } + + return true; +} + +// Forward declaration +template class BaseDynInst<AlphaSimpleImpl>; + +template <> +int +BaseDynInst<AlphaSimpleImpl>::instcount = 0; + +#endif // __CPU_BASE_DYN_INST_CC__ diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh new file mode 100644 index 000000000..3a7852f79 --- /dev/null +++ b/src/cpu/base_dyn_inst.hh @@ -0,0 +1,530 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_BASE_DYN_INST_HH__ +#define __CPU_BASE_DYN_INST_HH__ + +#include <string> +#include <vector> + +#include "base/fast_alloc.hh" +#include "base/trace.hh" +#include "config/full_system.hh" +#include "cpu/exetrace.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/comm.hh" +#include "cpu/static_inst.hh" +#include "encumbered/cpu/full/bpred_update.hh" +#include "encumbered/cpu/full/op_class.hh" +#include "encumbered/cpu/full/spec_memory.hh" +#include "encumbered/cpu/full/spec_state.hh" +#include "encumbered/mem/functional/main.hh" + +/** + * @file + * Defines a dynamic instruction context. + */ + +// Forward declaration. +class StaticInstPtr; + +template <class Impl> +class BaseDynInst : public FastAlloc, public RefCounted +{ + public: + // Typedef for the CPU. + typedef typename Impl::FullCPU FullCPU; + + /// Binary machine instruction type. + typedef TheISA::MachInst MachInst; + /// Logical register index type. + typedef TheISA::RegIndex RegIndex; + /// Integer register index type. + typedef TheISA::IntReg IntReg; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + /** The static inst used by this dyn inst. */ + StaticInstPtr staticInst; + + //////////////////////////////////////////// + // + // INSTRUCTION EXECUTION + // + //////////////////////////////////////////// + Trace::InstRecord *traceData; + + template <class T> + Fault read(Addr addr, T &data, unsigned flags); + + template <class T> + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res); + + void prefetch(Addr addr, unsigned flags); + void writeHint(Addr addr, int size, unsigned flags); + Fault copySrcTranslate(Addr src); + Fault copy(Addr dest); + + /** @todo: Consider making this private. */ + public: + /** Is this instruction valid. */ + bool valid; + + /** The sequence number of the instruction. */ + InstSeqNum seqNum; + + /** How many source registers are ready. */ + unsigned readyRegs; + + /** Is the instruction completed. */ + bool completed; + + /** Can this instruction issue. */ + bool canIssue; + + /** Has this instruction issued. */ + bool issued; + + /** Has this instruction executed (or made it through execute) yet. */ + bool executed; + + /** Can this instruction commit. */ + bool canCommit; + + /** Is this instruction squashed. */ + bool squashed; + + /** Is this instruction squashed in the instruction queue. */ + bool squashedInIQ; + + /** Is this a recover instruction. */ + bool recoverInst; + + /** Is this a thread blocking instruction. */ + bool blockingInst; /* this inst has called thread_block() */ + + /** Is this a thread syncrhonization instruction. */ + bool threadsyncWait; + + /** The thread this instruction is from. */ + short threadNumber; + + /** data address space ID, for loads & stores. */ + short asid; + + /** Pointer to the FullCPU object. */ + FullCPU *cpu; + + /** Pointer to the exec context. Will not exist in the final version. */ + CPUExecContext *cpuXC; + + /** The kind of fault this instruction has generated. */ + Fault fault; + + /** The effective virtual address (lds & stores only). */ + Addr effAddr; + + /** The effective physical address. */ + Addr physEffAddr; + + /** Effective virtual address for a copy source. */ + Addr copySrcEffAddr; + + /** Effective physical address for a copy source. */ + Addr copySrcPhysEffAddr; + + /** The memory request flags (from translation). */ + unsigned memReqFlags; + + /** The size of the data to be stored. */ + int storeSize; + + /** The data to be stored. */ + IntReg storeData; + + union Result { + uint64_t integer; + float fp; + double dbl; + }; + + /** The result of the instruction; assumes for now that there's only one + * destination register. + */ + Result instResult; + + /** PC of this instruction. */ + Addr PC; + + /** Next non-speculative PC. It is not filled in at fetch, but rather + * once the target of the branch is truly known (either decode or + * execute). + */ + Addr nextPC; + + /** Predicted next PC. */ + Addr predPC; + + /** Count of total number of dynamic instructions. */ + static int instcount; + + /** Whether or not the source register is ready. Not sure this should be + * here vs. the derived class. + */ + bool _readySrcRegIdx[MaxInstSrcRegs]; + + public: + /** BaseDynInst constructor given a binary instruction. */ + BaseDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + BaseDynInst(StaticInstPtr &_staticInst); + + /** BaseDynInst destructor. */ + ~BaseDynInst(); + + private: + /** Function to initialize variables in the constructors. */ + void initVars(); + + public: + void + trace_mem(Fault fault, // last fault + MemCmd cmd, // last command + Addr addr, // virtual address of access + void *p, // memory accessed + int nbytes); // access size + + /** Dumps out contents of this BaseDynInst. */ + void dump(); + + /** Dumps out contents of this BaseDynInst into given string. */ + void dump(std::string &outstring); + + /** Returns the fault type. */ + Fault getFault() { return fault; } + + /** Checks whether or not this instruction has had its branch target + * calculated yet. For now it is not utilized and is hacked to be + * always false. + */ + bool doneTargCalc() { return false; } + + /** Returns the next PC. This could be the speculative next PC if it is + * called prior to the actual branch target being calculated. + */ + Addr readNextPC() { return nextPC; } + + /** Set the predicted target of this current instruction. */ + void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; } + + /** Returns the predicted target of the branch. */ + Addr readPredTarg() { return predPC; } + + /** Returns whether the instruction was predicted taken or not. */ + bool predTaken() { + return( predPC != (PC + sizeof(MachInst) ) ); + } + + /** Returns whether the instruction mispredicted. */ + bool mispredicted() { return (predPC != nextPC); } + + // + // Instruction types. Forward checks to StaticInst object. + // + bool isNop() const { return staticInst->isNop(); } + bool isMemRef() const { return staticInst->isMemRef(); } + bool isLoad() const { return staticInst->isLoad(); } + bool isStore() const { return staticInst->isStore(); } + bool isInstPrefetch() const { return staticInst->isInstPrefetch(); } + bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } + bool isCopy() const { return staticInst->isCopy(); } + bool isInteger() const { return staticInst->isInteger(); } + bool isFloating() const { return staticInst->isFloating(); } + bool isControl() const { return staticInst->isControl(); } + bool isCall() const { return staticInst->isCall(); } + bool isReturn() const { return staticInst->isReturn(); } + bool isDirectCtrl() const { return staticInst->isDirectCtrl(); } + bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } + bool isCondCtrl() const { return staticInst->isCondCtrl(); } + bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isThreadSync() const { return staticInst->isThreadSync(); } + bool isSerializing() const { return staticInst->isSerializing(); } + bool isMemBarrier() const { return staticInst->isMemBarrier(); } + bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } + bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } + + /** Returns the opclass of this instruction. */ + OpClass opClass() const { return staticInst->opClass(); } + + /** Returns the branch target address. */ + Addr branchTarget() const { return staticInst->branchTarget(PC); } + + /** Number of source registers. */ + int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } + + /** Number of destination registers. */ + int8_t numDestRegs() const { return staticInst->numDestRegs(); } + + // the following are used to track physical register usage + // for machines with separate int & FP reg files + int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } + int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } + + /** Returns the logical register index of the i'th destination register. */ + RegIndex destRegIdx(int i) const + { + return staticInst->destRegIdx(i); + } + + /** Returns the logical register index of the i'th source register. */ + RegIndex srcRegIdx(int i) const + { + return staticInst->srcRegIdx(i); + } + + /** Returns the result of an integer instruction. */ + uint64_t readIntResult() { return instResult.integer; } + + /** Returns the result of a floating point instruction. */ + float readFloatResult() { return instResult.fp; } + + /** Returns the result of a floating point (double) instruction. */ + double readDoubleResult() { return instResult.dbl; } + + //Push to .cc file. + /** Records that one of the source registers is ready. */ + void markSrcRegReady() + { + ++readyRegs; + if(readyRegs == numSrcRegs()) { + canIssue = true; + } + } + + /** Marks a specific register as ready. + * @todo: Move this to .cc file. + */ + void markSrcRegReady(RegIndex src_idx) + { + ++readyRegs; + + _readySrcRegIdx[src_idx] = 1; + + if(readyRegs == numSrcRegs()) { + canIssue = true; + } + } + + /** Returns if a source register is ready. */ + bool isReadySrcRegIdx(int idx) const + { + return this->_readySrcRegIdx[idx]; + } + + /** Sets this instruction as completed. */ + void setCompleted() { completed = true; } + + /** Returns whethe or not this instruction is completed. */ + bool isCompleted() const { return completed; } + + /** Sets this instruction as ready to issue. */ + void setCanIssue() { canIssue = true; } + + /** Returns whether or not this instruction is ready to issue. */ + bool readyToIssue() const { return canIssue; } + + /** Sets this instruction as issued from the IQ. */ + void setIssued() { issued = true; } + + /** Returns whether or not this instruction has issued. */ + bool isIssued() const { return issued; } + + /** Sets this instruction as executed. */ + void setExecuted() { executed = true; } + + /** Returns whether or not this instruction has executed. */ + bool isExecuted() const { return executed; } + + /** Sets this instruction as ready to commit. */ + void setCanCommit() { canCommit = true; } + + /** Clears this instruction as being ready to commit. */ + void clearCanCommit() { canCommit = false; } + + /** Returns whether or not this instruction is ready to commit. */ + bool readyToCommit() const { return canCommit; } + + /** Sets this instruction as squashed. */ + void setSquashed() { squashed = true; } + + /** Returns whether or not this instruction is squashed. */ + bool isSquashed() const { return squashed; } + + /** Sets this instruction as squashed in the IQ. */ + void setSquashedInIQ() { squashedInIQ = true; } + + /** Returns whether or not this instruction is squashed in the IQ. */ + bool isSquashedInIQ() const { return squashedInIQ; } + + /** Read the PC of this instruction. */ + const Addr readPC() const { return PC; } + + /** Set the next PC of this instruction (its actual target). */ + void setNextPC(uint64_t val) { nextPC = val; } + + /** Returns the exec context. + * @todo: Remove this once the ExecContext is no longer used. + */ + ExecContext *xcBase() { return cpuXC->getProxy(); } + + private: + /** Instruction effective address. + * @todo: Consider if this is necessary or not. + */ + Addr instEffAddr; + /** Whether or not the effective address calculation is completed. + * @todo: Consider if this is necessary or not. + */ + bool eaCalcDone; + + public: + /** Sets the effective address. */ + void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } + + /** Returns the effective address. */ + const Addr &getEA() const { return instEffAddr; } + + /** Returns whether or not the eff. addr. calculation has been completed. */ + bool doneEACalc() { return eaCalcDone; } + + /** Returns whether or not the eff. addr. source registers are ready. */ + bool eaSrcsReady(); + + public: + /** Load queue index. */ + int16_t lqIdx; + + /** Store queue index. */ + int16_t sqIdx; +}; + +template<class Impl> +template<class T> +inline Fault +BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags) +{ + MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), sizeof(T), flags); + req->asid = asid; + + fault = cpu->translateDataReadReq(req); + + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + // Do I ever really need this? -KTL 3/05 + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#if !FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (fault == NoFault) { + fault = cpu->read(req, data, lqIdx); + } else { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + data = (T)-1; + } + + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + return fault; +} + +template<class Impl> +template<class T> +inline Fault +BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), sizeof(T), flags); + + req->asid = asid; + + fault = cpu->translateDataWriteReq(req); + + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + + /** + * @todo + * Replace the disjoint functional memory with a unified one and remove + * this hack. + */ +#if !FULL_SYSTEM + req->paddr = req->vaddr; +#endif + + if (fault == NoFault) { + fault = cpu->write(req, data, sqIdx); + } + + if (res) { + // always return some result to keep misspeculated paths + // (which will ignore faults) deterministic + *res = (fault == NoFault) ? req->result : 0; + } + + return fault; +} + +#endif // __CPU_BASE_DYN_INST_HH__ diff --git a/src/cpu/cpu_exec_context.cc b/src/cpu/cpu_exec_context.cc new file mode 100644 index 000000000..ec1e94561 --- /dev/null +++ b/src/cpu/cpu_exec_context.cc @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2001-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string> + +#include "arch/isa_traits.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" + +#if FULL_SYSTEM +#include "base/callback.hh" +#include "base/cprintf.hh" +#include "base/output.hh" +#include "base/trace.hh" +#include "cpu/profile.hh" +#include "kern/kernel_stats.hh" +#include "sim/serialize.hh" +#include "sim/sim_exit.hh" +#include "arch/stacktrace.hh" +#else +#include "sim/process.hh" +#include "sim/system.hh" +#include "mem/translating_port.hh" +#endif + +using namespace std; + +// constructor +#if FULL_SYSTEM +CPUExecContext::CPUExecContext(BaseCPU *_cpu, int _thread_num, System *_sys, + AlphaITB *_itb, AlphaDTB *_dtb) + : _status(ExecContext::Unallocated), cpu(_cpu), thread_num(_thread_num), + cpu_id(-1), lastActivate(0), lastSuspend(0), system(_sys), itb(_itb), + dtb(_dtb), profile(NULL), quiesceEvent(this), func_exe_inst(0), + storeCondFailures(0) + +{ + proxy = new ProxyExecContext<CPUExecContext>(this); + + regs.clear(); + + if (cpu->params->profile) { + profile = new FunctionProfile(system->kernelSymtab); + Callback *cb = + new MakeCallback<CPUExecContext, + &CPUExecContext::dumpFuncProfile>(this); + registerExitCallback(cb); + } + + // let's fill with a dummy node for now so we don't get a segfault + // on the first cycle when there's no node available. + static ProfileNode dummyNode; + profileNode = &dummyNode; + profilePC = 3; + + Port *mem_port; + physPort = new FunctionalPort(); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(physPort); + physPort->setPeer(mem_port); + + virtPort = new VirtualPort(); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(virtPort); + virtPort->setPeer(mem_port); +} +#else +CPUExecContext::CPUExecContext(BaseCPU *_cpu, int _thread_num, + Process *_process, int _asid, MemObject* memobj) + : _status(ExecContext::Unallocated), + cpu(_cpu), thread_num(_thread_num), cpu_id(-1), lastActivate(0), + lastSuspend(0), process(_process), asid(_asid), + func_exe_inst(0), storeCondFailures(0) +{ + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + port = new TranslatingPort(process->pTable, false); + mem_port = memobj->getPort("functional"); + mem_port->setPeer(port); + port->setPeer(mem_port); + + regs.clear(); + proxy = new ProxyExecContext<CPUExecContext>(this); +} + +CPUExecContext::CPUExecContext(RegFile *regFile) + : cpu(NULL), thread_num(-1), process(NULL), asid(-1), + func_exe_inst(0), storeCondFailures(0) +{ + regs = *regFile; + proxy = new ProxyExecContext<CPUExecContext>(this); +} + +#endif + +CPUExecContext::~CPUExecContext() +{ + delete proxy; +} + +#if FULL_SYSTEM +void +CPUExecContext::dumpFuncProfile() +{ + std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name())); + profile->dump(proxy, *os); +} + +CPUExecContext::EndQuiesceEvent::EndQuiesceEvent(CPUExecContext *_cpuXC) + : Event(&mainEventQueue), cpuXC(_cpuXC) +{ +} + +void +CPUExecContext::EndQuiesceEvent::process() +{ + cpuXC->activate(); +} + +const char* +CPUExecContext::EndQuiesceEvent::description() +{ + return "End Quiesce Event."; +} + +void +CPUExecContext::profileClear() +{ + if (profile) + profile->clear(); +} + +void +CPUExecContext::profileSample() +{ + if (profile) + profile->sample(profileNode, profilePC); +} + +#endif + +void +CPUExecContext::takeOverFrom(ExecContext *oldContext) +{ + // some things should already be set up +#if FULL_SYSTEM + assert(system == oldContext->getSystemPtr()); +#else + assert(process == oldContext->getProcessPtr()); +#endif + + // copy over functional state + _status = oldContext->status(); + copyArchRegs(oldContext); + cpu_id = oldContext->readCpuId(); +#if !FULL_SYSTEM + func_exe_inst = oldContext->readFuncExeInst(); +#endif + + storeCondFailures = 0; + + oldContext->setStatus(ExecContext::Unallocated); +} + +void +CPUExecContext::serialize(ostream &os) +{ + SERIALIZE_ENUM(_status); + regs.serialize(os); + // thread_num and cpu_id are deterministic from the config + SERIALIZE_SCALAR(func_exe_inst); + SERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick = 0; + if (quiesceEvent.scheduled()) + quiesceEndTick = quiesceEvent.when(); + SERIALIZE_SCALAR(quiesceEndTick); + +#endif +} + + +void +CPUExecContext::unserialize(Checkpoint *cp, const std::string §ion) +{ + UNSERIALIZE_ENUM(_status); + regs.unserialize(cp, section); + // thread_num and cpu_id are deterministic from the config + UNSERIALIZE_SCALAR(func_exe_inst); + UNSERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick; + UNSERIALIZE_SCALAR(quiesceEndTick); + if (quiesceEndTick) + quiesceEvent.schedule(quiesceEndTick); +#endif +} + + +void +CPUExecContext::activate(int delay) +{ + if (status() == ExecContext::Active) + return; + + lastActivate = curTick; + + _status = ExecContext::Active; + cpu->activateContext(thread_num, delay); +} + +void +CPUExecContext::suspend() +{ + if (status() == ExecContext::Suspended) + return; + + lastActivate = curTick; + lastSuspend = curTick; +/* +#if FULL_SYSTEM + // Don't change the status from active if there are pending interrupts + if (cpu->check_interrupts()) { + assert(status() == ExecContext::Active); + return; + } +#endif +*/ + _status = ExecContext::Suspended; + cpu->suspendContext(thread_num); +} + +void +CPUExecContext::deallocate() +{ + if (status() == ExecContext::Unallocated) + return; + + _status = ExecContext::Unallocated; + cpu->deallocateContext(thread_num); +} + +void +CPUExecContext::halt() +{ + if (status() == ExecContext::Halted) + return; + + _status = ExecContext::Halted; + cpu->haltContext(thread_num); +} + + +void +CPUExecContext::regStats(const string &name) +{ +} + +void +CPUExecContext::copyArchRegs(ExecContext *xc) +{ + TheISA::copyRegs(xc, proxy); +} + +#if FULL_SYSTEM +VirtualPort* +CPUExecContext::getVirtPort(ExecContext *xc) +{ + if (!xc) + return virtPort; + + VirtualPort *vp; + Port *mem_port; + + vp = new VirtualPort(xc); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(vp); + vp->setPeer(mem_port); + return vp; +} + +void +CPUExecContext::delVirtPort(VirtualPort *vp) +{ +// assert(!vp->nullExecContext()); + delete vp->getPeer(); + delete vp; +} + + +#endif + diff --git a/src/cpu/cpu_exec_context.hh b/src/cpu/cpu_exec_context.hh new file mode 100644 index 000000000..2c06a7b3b --- /dev/null +++ b/src/cpu/cpu_exec_context.hh @@ -0,0 +1,546 @@ +/* + * Copyright (c) 2001-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_CPU_EXEC_CONTEXT_HH__ +#define __CPU_CPU_EXEC_CONTEXT_HH__ + +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "cpu/exec_context.hh" +#include "mem/physical.hh" +#include "mem/request.hh" +#include "sim/byteswap.hh" +#include "sim/eventq.hh" +#include "sim/host.hh" +#include "sim/serialize.hh" + +class BaseCPU; + +#if FULL_SYSTEM + +#include "sim/system.hh" +#include "arch/tlb.hh" + +class FunctionProfile; +class ProfileNode; +class FunctionalPort; +class PhysicalPort; + + +#else // !FULL_SYSTEM + +#include "sim/process.hh" +#include "mem/page_table.hh" +class TranslatingPort; + + +#endif // FULL_SYSTEM + +// +// The CPUExecContext object represents a functional context for +// instruction execution. It incorporates everything required for +// architecture-level functional simulation of a single thread. +// + +class CPUExecContext +{ + protected: + typedef TheISA::RegFile RegFile; + typedef TheISA::MachInst MachInst; + typedef TheISA::MiscRegFile MiscRegFile; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + public: + typedef ExecContext::Status Status; + + private: + Status _status; + + public: + Status status() const { return _status; } + + void setStatus(Status newStatus) { _status = newStatus; } + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + void activate(int delay = 1); + + /// Set the status to Suspended. + void suspend(); + + /// Set the status to Unallocated. + void deallocate(); + + /// Set the status to Halted. + void halt(); + + protected: + RegFile regs; // correct-path register context + + public: + // pointer to CPU associated with this context + BaseCPU *cpu; + + ProxyExecContext<CPUExecContext> *proxy; + + // Current instruction + MachInst inst; + + // Index of hardware thread context on the CPU that this represents. + int thread_num; + + // ID of this context w.r.t. the System or Process object to which + // it belongs. For full-system mode, this is the system CPU ID. + int cpu_id; + + Tick lastActivate; + Tick lastSuspend; + + System *system; + + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; + + /** A functional port outgoing only for functional accesses to physical + * addresses.*/ + FunctionalPort *physPort; + + /** A functional port, outgoing only, for functional accesse to virtual + * addresses. That doen't require execution context information */ + VirtualPort *virtPort; + + FunctionProfile *profile; + ProfileNode *profileNode; + Addr profilePC; + void dumpFuncProfile(); + + /** Event for timing out quiesce instruction */ + struct EndQuiesceEvent : public Event + { + /** A pointer to the execution context that is quiesced */ + CPUExecContext *cpuXC; + + EndQuiesceEvent(CPUExecContext *_cpuXC); + + /** Event process to occur at interrupt*/ + virtual void process(); + + /** Event description */ + virtual const char *description(); + }; + EndQuiesceEvent quiesceEvent; + + Event *getQuiesceEvent() { return &quiesceEvent; } + + Tick readLastActivate() { return lastActivate; } + + Tick readLastSuspend() { return lastSuspend; } + + void profileClear(); + + void profileSample(); + +#else + /// Port that syscalls can use to access memory (provides translation step). + TranslatingPort *port; + + Process *process; + + // Address space ID. Note that this is used for TIMING cache + // simulation only; all functional memory accesses should use + // one of the FunctionalMemory pointers above. + short asid; + +#endif + + /** + * Temporary storage to pass the source address from copy_load to + * copy_store. + * @todo Remove this temporary when we have a better way to do it. + */ + Addr copySrcAddr; + /** + * Temp storage for the physical source address of a copy. + * @todo Remove this temporary when we have a better way to do it. + */ + Addr copySrcPhysAddr; + + + /* + * number of executed instructions, for matching with syscall trace + * points in EIO files. + */ + Counter func_exe_inst; + + // + // Count failed store conditionals so we can warn of apparent + // application deadlock situations. + unsigned storeCondFailures; + + // constructor: initialize context from given process structure +#if FULL_SYSTEM + CPUExecContext(BaseCPU *_cpu, int _thread_num, System *_system, + AlphaITB *_itb, AlphaDTB *_dtb); +#else + CPUExecContext(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, + MemObject *memobj); + // Constructor to use XC to pass reg file around. Not used for anything + // else. + CPUExecContext(RegFile *regFile); +#endif + virtual ~CPUExecContext(); + + virtual void takeOverFrom(ExecContext *oldContext); + + void regStats(const std::string &name); + + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + + BaseCPU *getCpuPtr() { return cpu; } + + ExecContext *getProxy() { return proxy; } + + int getThreadNum() { return thread_num; } + +#if FULL_SYSTEM + System *getSystemPtr() { return system; } + + AlphaITB *getITBPtr() { return itb; } + + AlphaDTB *getDTBPtr() { return dtb; } + + int getInstAsid() { return regs.instAsid(); } + int getDataAsid() { return regs.dataAsid(); } + + Fault translateInstReq(RequestPtr &req) + { + return itb->translate(req, proxy); + } + + Fault translateDataReadReq(RequestPtr &req) + { + return dtb->translate(req, proxy, false); + } + + Fault translateDataWriteReq(RequestPtr &req) + { + return dtb->translate(req, proxy, true); + } + + FunctionalPort *getPhysPort() { return physPort; } + + /** Return a virtual port. If no exec context is specified then a static + * port is returned. Otherwise a port is created and returned. It must be + * deleted by deleteVirtPort(). */ + VirtualPort *getVirtPort(ExecContext *xc); + + void delVirtPort(VirtualPort *vp); + +#else + TranslatingPort *getMemPort() { return port; } + + Process *getProcessPtr() { return process; } + + int getInstAsid() { return asid; } + int getDataAsid() { return asid; } + + Fault translateInstReq(RequestPtr &req) + { + return process->pTable->translate(req); + } + + Fault translateDataReadReq(RequestPtr &req) + { + return process->pTable->translate(req); + } + + Fault translateDataWriteReq(RequestPtr &req) + { + return process->pTable->translate(req); + } + +#endif + +/* + template <class T> + Fault read(RequestPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif + + Fault error; + error = mem->prot_read(req->paddr, data, req->size); + data = LittleEndianGuest::gtoh(data); + return error; + } + + template <class T> + Fault write(RequestPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < system->execContexts.size(); i++){ + xc = system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + +#endif + return mem->prot_write(req->paddr, (T)htog(data), req->size); + } +*/ + virtual bool misspeculating(); + + + MachInst getInst() { return inst; } + + void setInst(MachInst new_inst) + { + inst = new_inst; + } + + Fault instRead(RequestPtr &req) + { + panic("instRead not implemented"); + // return funcPhysMem->read(req, inst); + return NoFault; + } + + void setCpuId(int id) { cpu_id = id; } + + int readCpuId() { return cpu_id; } + + void copyArchRegs(ExecContext *xc); + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx) + { + return regs.readIntReg(reg_idx); + } + + FloatReg readFloatReg(int reg_idx, int width) + { + return regs.readFloatReg(reg_idx, width); + } + + FloatReg readFloatReg(int reg_idx) + { + return regs.readFloatReg(reg_idx); + } + + FloatRegBits readFloatRegBits(int reg_idx, int width) + { + return regs.readFloatRegBits(reg_idx, width); + } + + FloatRegBits readFloatRegBits(int reg_idx) + { + return regs.readFloatRegBits(reg_idx); + } + + void setIntReg(int reg_idx, uint64_t val) + { + regs.setIntReg(reg_idx, val); + } + + void setFloatReg(int reg_idx, FloatReg val, int width) + { + regs.setFloatReg(reg_idx, val, width); + } + + void setFloatReg(int reg_idx, FloatReg val) + { + regs.setFloatReg(reg_idx, val); + } + + void setFloatRegBits(int reg_idx, FloatRegBits val, int width) + { + regs.setFloatRegBits(reg_idx, val, width); + } + + void setFloatRegBits(int reg_idx, FloatRegBits val) + { + regs.setFloatRegBits(reg_idx, val); + } + + uint64_t readPC() + { + return regs.readPC(); + } + + void setPC(uint64_t val) + { + regs.setPC(val); + } + + uint64_t readNextPC() + { + return regs.readNextPC(); + } + + void setNextPC(uint64_t val) + { + regs.setNextPC(val); + } + + uint64_t readNextNPC() + { + return regs.readNextNPC(); + } + + void setNextNPC(uint64_t val) + { + regs.setNextNPC(val); + } + + + MiscReg readMiscReg(int misc_reg) + { + return regs.readMiscReg(misc_reg); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return regs.readMiscRegWithEffect(misc_reg, fault, proxy); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + return regs.setMiscReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return regs.setMiscRegWithEffect(misc_reg, val, proxy); + } + + unsigned readStCondFailures() { return storeCondFailures; } + + void setStCondFailures(unsigned sc_failures) + { storeCondFailures = sc_failures; } + + void clearArchRegs() { regs.clear(); } + +#if FULL_SYSTEM + int readIntrFlag() { return regs.intrflag; } + void setIntrFlag(int val) { regs.intrflag = val; } + Fault hwrei(); + bool inPalMode() { return AlphaISA::PcPAL(regs.readPC()); } + bool simPalCheck(int palFunc); +#endif + +#if !FULL_SYSTEM + TheISA::IntReg getSyscallArg(int i) + { + return regs.readIntReg(TheISA::ArgumentReg0 + i); + } + + // used to shift args for indirect syscall + void setSyscallArg(int i, TheISA::IntReg val) + { + regs.setIntReg(TheISA::ArgumentReg0 + i, val); + } + + void setSyscallReturn(SyscallReturn return_value) + { + TheISA::setSyscallReturn(return_value, ®s); + } + + void syscall(int64_t callnum) + { + process->syscall(callnum, proxy); + } + + Counter readFuncExeInst() { return func_exe_inst; } + + void setFuncExeInst(Counter new_val) { func_exe_inst = new_val; } +#endif + + void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) + { + regs.changeContext(param, val); + } +}; + + +// for non-speculative execution context, spec_mode is always false +inline bool +CPUExecContext::misspeculating() +{ + return false; +} + +#endif // __CPU_CPU_EXEC_CONTEXT_HH__ diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py new file mode 100644 index 000000000..30cbabde1 --- /dev/null +++ b/src/cpu/cpu_models.py @@ -0,0 +1,74 @@ +# Copyright (c) 2003-2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +################ +# CpuModel class +# +# The CpuModel class encapsulates everything the ISA parser needs to +# know about a particular CPU model. + +class CpuModel: + # Dict of available CPU model objects. Accessible as CpuModel.dict. + dict = {} + + # Constructor. Automatically adds models to CpuModel.dict. + def __init__(self, name, filename, includes, strings): + self.name = name + self.filename = filename # filename for output exec code + self.includes = includes # include files needed in exec file + # The 'strings' dict holds all the per-CPU symbols we can + # substitute into templates etc. + self.strings = strings + # Add self to dict + CpuModel.dict[name] = self + + +# +# Define CPU models. +# +# Parameters are: +# - name of model +# - filename for generated ISA execution file +# - includes needed for generated ISA execution file +# - substitution strings for ISA description templates +# + +CpuModel('AtomicSimpleCPU', 'atomic_simple_cpu_exec.cc', + '#include "cpu/simple/atomic.hh"', + { 'CPU_exec_context': 'AtomicSimpleCPU' }) +CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc', + '#include "cpu/simple/timing.hh"', + { 'CPU_exec_context': 'TimingSimpleCPU' }) +CpuModel('FastCPU', 'fast_cpu_exec.cc', + '#include "cpu/fast/cpu.hh"', + { 'CPU_exec_context': 'FastCPU' }) +CpuModel('FullCPU', 'full_cpu_exec.cc', + '#include "encumbered/cpu/full/dyn_inst.hh"', + { 'CPU_exec_context': 'DynInst' }) +CpuModel('AlphaFullCPU', 'alpha_o3_exec.cc', + '#include "cpu/o3/alpha_dyn_inst.hh"', + { 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' }) + diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh new file mode 100644 index 000000000..1f26183ab --- /dev/null +++ b/src/cpu/exec_context.hh @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_EXEC_CONTEXT_HH__ +#define __CPU_EXEC_CONTEXT_HH__ + +#include "config/full_system.hh" +#include "mem/request.hh" +#include "sim/faults.hh" +#include "sim/host.hh" +#include "sim/serialize.hh" +#include "sim/byteswap.hh" + +// @todo: Figure out a more architecture independent way to obtain the ITB and +// DTB pointers. +class AlphaDTB; +class AlphaITB; +class BaseCPU; +class Event; +class TranslatingPort; +class FunctionalPort; +class VirtualPort; +class Process; +class System; + +class ExecContext +{ + protected: + typedef TheISA::RegFile RegFile; + typedef TheISA::MachInst MachInst; + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscRegFile MiscRegFile; + typedef TheISA::MiscReg MiscReg; + public: + enum Status + { + /// Initialized but not running yet. All CPUs start in + /// this state, but most transition to Active on cycle 1. + /// In MP or SMT systems, non-primary contexts will stay + /// in this state until a thread is assigned to them. + Unallocated, + + /// Running. Instructions should be executed only when + /// the context is in this state. + Active, + + /// Temporarily inactive. Entered while waiting for + /// synchronization, etc. + Suspended, + + /// Permanently shut down. Entered when target executes + /// m5exit pseudo-instruction. When all contexts enter + /// this state, the simulation will terminate. + Halted + }; + + virtual ~ExecContext() { }; + + virtual BaseCPU *getCpuPtr() = 0; + + virtual void setCpuId(int id) = 0; + + virtual int readCpuId() = 0; + +#if FULL_SYSTEM + virtual System *getSystemPtr() = 0; + + virtual AlphaITB *getITBPtr() = 0; + + virtual AlphaDTB * getDTBPtr() = 0; + + virtual FunctionalPort *getPhysPort() = 0; + + virtual VirtualPort *getVirtPort(ExecContext *xc = NULL) = 0; + + virtual void delVirtPort(VirtualPort *vp) = 0; +#else + virtual TranslatingPort *getMemPort() = 0; + + virtual Process *getProcessPtr() = 0; +#endif + + virtual Status status() const = 0; + + virtual void setStatus(Status new_status) = 0; + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + virtual void activate(int delay = 1) = 0; + + /// Set the status to Suspended. + virtual void suspend() = 0; + + /// Set the status to Unallocated. + virtual void deallocate() = 0; + + /// Set the status to Halted. + virtual void halt() = 0; + +#if FULL_SYSTEM + virtual void dumpFuncProfile() = 0; +#endif + + virtual void takeOverFrom(ExecContext *old_context) = 0; + + virtual void regStats(const std::string &name) = 0; + + virtual void serialize(std::ostream &os) = 0; + virtual void unserialize(Checkpoint *cp, const std::string §ion) = 0; + +#if FULL_SYSTEM + virtual Event *getQuiesceEvent() = 0; + + // Not necessarily the best location for these... + // Having an extra function just to read these is obnoxious + virtual Tick readLastActivate() = 0; + virtual Tick readLastSuspend() = 0; + + virtual void profileClear() = 0; + virtual void profileSample() = 0; +#endif + + virtual int getThreadNum() = 0; + + virtual int getInstAsid() = 0; + virtual int getDataAsid() = 0; + + virtual Fault translateInstReq(RequestPtr &req) = 0; + + virtual Fault translateDataReadReq(RequestPtr &req) = 0; + + virtual Fault translateDataWriteReq(RequestPtr &req) = 0; + + // Also somewhat obnoxious. Really only used for the TLB fault. + // However, may be quite useful in SPARC. + virtual TheISA::MachInst getInst() = 0; + + virtual void copyArchRegs(ExecContext *xc) = 0; + + virtual void clearArchRegs() = 0; + + // + // New accessors for new decoder. + // + virtual uint64_t readIntReg(int reg_idx) = 0; + + virtual FloatReg readFloatReg(int reg_idx, int width) = 0; + + virtual FloatReg readFloatReg(int reg_idx) = 0; + + virtual FloatRegBits readFloatRegBits(int reg_idx, int width) = 0; + + virtual FloatRegBits readFloatRegBits(int reg_idx) = 0; + + virtual void setIntReg(int reg_idx, uint64_t val) = 0; + + virtual void setFloatReg(int reg_idx, FloatReg val, int width) = 0; + + virtual void setFloatReg(int reg_idx, FloatReg val) = 0; + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val) = 0; + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width) = 0; + + virtual uint64_t readPC() = 0; + + virtual void setPC(uint64_t val) = 0; + + virtual uint64_t readNextPC() = 0; + + virtual void setNextPC(uint64_t val) = 0; + + virtual uint64_t readNextNPC() = 0; + + virtual void setNextNPC(uint64_t val) = 0; + + virtual MiscReg readMiscReg(int misc_reg) = 0; + + virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) = 0; + + virtual Fault setMiscReg(int misc_reg, const MiscReg &val) = 0; + + virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) = 0; + + // Also not necessarily the best location for these two. Hopefully will go + // away once we decide upon where st cond failures goes. + virtual unsigned readStCondFailures() = 0; + + virtual void setStCondFailures(unsigned sc_failures) = 0; + +#if FULL_SYSTEM + virtual int readIntrFlag() = 0; + virtual void setIntrFlag(int val) = 0; + virtual Fault hwrei() = 0; + virtual bool inPalMode() = 0; + virtual bool simPalCheck(int palFunc) = 0; +#endif + + // Only really makes sense for old CPU model. Still could be useful though. + virtual bool misspeculating() = 0; + +#if !FULL_SYSTEM + virtual IntReg getSyscallArg(int i) = 0; + + // used to shift args for indirect syscall + virtual void setSyscallArg(int i, IntReg val) = 0; + + virtual void setSyscallReturn(SyscallReturn return_value) = 0; + + virtual void syscall(int64_t callnum) = 0; + + // Same with st cond failures. + virtual Counter readFuncExeInst() = 0; + + virtual void setFuncExeInst(Counter new_val) = 0; +#endif + + virtual void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) = 0; +}; + +template <class XC> +class ProxyExecContext : public ExecContext +{ + public: + ProxyExecContext(XC *actual_xc) + { actualXC = actual_xc; } + + private: + XC *actualXC; + + public: + + BaseCPU *getCpuPtr() { return actualXC->getCpuPtr(); } + + void setCpuId(int id) { actualXC->setCpuId(id); } + + int readCpuId() { return actualXC->readCpuId(); } + +#if FULL_SYSTEM + System *getSystemPtr() { return actualXC->getSystemPtr(); } + + AlphaITB *getITBPtr() { return actualXC->getITBPtr(); } + + AlphaDTB *getDTBPtr() { return actualXC->getDTBPtr(); } + + FunctionalPort *getPhysPort() { return actualXC->getPhysPort(); } + + VirtualPort *getVirtPort(ExecContext *xc = NULL) { return actualXC->getVirtPort(xc); } + + void delVirtPort(VirtualPort *vp) { return actualXC->delVirtPort(vp); } +#else + TranslatingPort *getMemPort() { return actualXC->getMemPort(); } + + Process *getProcessPtr() { return actualXC->getProcessPtr(); } +#endif + + Status status() const { return actualXC->status(); } + + void setStatus(Status new_status) { actualXC->setStatus(new_status); } + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + void activate(int delay = 1) { actualXC->activate(delay); } + + /// Set the status to Suspended. + void suspend() { actualXC->suspend(); } + + /// Set the status to Unallocated. + void deallocate() { actualXC->deallocate(); } + + /// Set the status to Halted. + void halt() { actualXC->halt(); } + +#if FULL_SYSTEM + void dumpFuncProfile() { actualXC->dumpFuncProfile(); } +#endif + + void takeOverFrom(ExecContext *oldContext) + { actualXC->takeOverFrom(oldContext); } + + void regStats(const std::string &name) { actualXC->regStats(name); } + + void serialize(std::ostream &os) { actualXC->serialize(os); } + void unserialize(Checkpoint *cp, const std::string §ion) + { actualXC->unserialize(cp, section); } + +#if FULL_SYSTEM + Event *getQuiesceEvent() { return actualXC->getQuiesceEvent(); } + + Tick readLastActivate() { return actualXC->readLastActivate(); } + Tick readLastSuspend() { return actualXC->readLastSuspend(); } + + void profileClear() { return actualXC->profileClear(); } + void profileSample() { return actualXC->profileSample(); } +#endif + + int getThreadNum() { return actualXC->getThreadNum(); } + + int getInstAsid() { return actualXC->getInstAsid(); } + int getDataAsid() { return actualXC->getDataAsid(); } + + Fault translateInstReq(RequestPtr &req) + { return actualXC->translateInstReq(req); } + + Fault translateDataReadReq(RequestPtr &req) + { return actualXC->translateDataReadReq(req); } + + Fault translateDataWriteReq(RequestPtr &req) + { return actualXC->translateDataWriteReq(req); } + + // @todo: Do I need this? + MachInst getInst() { return actualXC->getInst(); } + + // @todo: Do I need this? + void copyArchRegs(ExecContext *xc) { actualXC->copyArchRegs(xc); } + + void clearArchRegs() { actualXC->clearArchRegs(); } + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx) + { return actualXC->readIntReg(reg_idx); } + + FloatReg readFloatReg(int reg_idx, int width) + { return actualXC->readFloatReg(reg_idx, width); } + + FloatReg readFloatReg(int reg_idx) + { return actualXC->readFloatReg(reg_idx); } + + FloatRegBits readFloatRegBits(int reg_idx, int width) + { return actualXC->readFloatRegBits(reg_idx, width); } + + FloatRegBits readFloatRegBits(int reg_idx) + { return actualXC->readFloatRegBits(reg_idx); } + + void setIntReg(int reg_idx, uint64_t val) + { actualXC->setIntReg(reg_idx, val); } + + void setFloatReg(int reg_idx, FloatReg val, int width) + { actualXC->setFloatReg(reg_idx, val, width); } + + void setFloatReg(int reg_idx, FloatReg val) + { actualXC->setFloatReg(reg_idx, val); } + + void setFloatRegBits(int reg_idx, FloatRegBits val, int width) + { actualXC->setFloatRegBits(reg_idx, val, width); } + + void setFloatRegBits(int reg_idx, FloatRegBits val) + { actualXC->setFloatRegBits(reg_idx, val); } + + uint64_t readPC() { return actualXC->readPC(); } + + void setPC(uint64_t val) { actualXC->setPC(val); } + + uint64_t readNextPC() { return actualXC->readNextPC(); } + + void setNextPC(uint64_t val) { actualXC->setNextPC(val); } + + uint64_t readNextNPC() { return actualXC->readNextNPC(); } + + void setNextNPC(uint64_t val) { actualXC->setNextNPC(val); } + + MiscReg readMiscReg(int misc_reg) + { return actualXC->readMiscReg(misc_reg); } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { return actualXC->readMiscRegWithEffect(misc_reg, fault); } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { return actualXC->setMiscReg(misc_reg, val); } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { return actualXC->setMiscRegWithEffect(misc_reg, val); } + + unsigned readStCondFailures() + { return actualXC->readStCondFailures(); } + + void setStCondFailures(unsigned sc_failures) + { actualXC->setStCondFailures(sc_failures); } + +#if FULL_SYSTEM + int readIntrFlag() { return actualXC->readIntrFlag(); } + + void setIntrFlag(int val) { actualXC->setIntrFlag(val); } + + Fault hwrei() { return actualXC->hwrei(); } + + bool inPalMode() { return actualXC->inPalMode(); } + + bool simPalCheck(int palFunc) { return actualXC->simPalCheck(palFunc); } +#endif + + // @todo: Fix this! + bool misspeculating() { return actualXC->misspeculating(); } + +#if !FULL_SYSTEM + IntReg getSyscallArg(int i) { return actualXC->getSyscallArg(i); } + + // used to shift args for indirect syscall + void setSyscallArg(int i, IntReg val) + { actualXC->setSyscallArg(i, val); } + + void setSyscallReturn(SyscallReturn return_value) + { actualXC->setSyscallReturn(return_value); } + + void syscall(int64_t callnum) { actualXC->syscall(callnum); } + + Counter readFuncExeInst() { return actualXC->readFuncExeInst(); } + + void setFuncExeInst(Counter new_val) + { return actualXC->setFuncExeInst(new_val); } +#endif + + void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) + { + actualXC->changeRegFileContext(param, val); + } +}; + +#endif diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc new file mode 100644 index 000000000..0ed3b43c4 --- /dev/null +++ b/src/cpu/exetrace.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fstream> +#include <iomanip> + +#include "base/loader/symtab.hh" +#include "cpu/base.hh" +#include "cpu/exetrace.hh" +#include "cpu/static_inst.hh" +#include "sim/param.hh" +#include "sim/system.hh" + +using namespace std; + + +//////////////////////////////////////////////////////////////////////// +// +// Methods for the InstRecord object +// + + +void +Trace::InstRecord::dump(ostream &outs) +{ + if (flags[INTEL_FORMAT]) { +#if FULL_SYSTEM + bool is_trace_system = (cpu->system->name() == trace_system); +#else + bool is_trace_system = true; +#endif + if (is_trace_system) { + ccprintf(outs, "%7d ) ", cycle); + outs << "0x" << hex << PC << ":\t"; + if (staticInst->isLoad()) { + outs << "<RD 0x" << hex << addr; + outs << ">"; + } else if (staticInst->isStore()) { + outs << "<WR 0x" << hex << addr; + outs << ">"; + } + outs << endl; + } + } else { + if (flags[PRINT_CYCLE]) + ccprintf(outs, "%7d: ", cycle); + + outs << cpu->name() << " "; + + if (flags[TRACE_MISSPEC]) + outs << (misspeculating ? "-" : "+") << " "; + + if (flags[PRINT_THREAD_NUM]) + outs << "T" << thread << " : "; + + + std::string sym_str; + Addr sym_addr; + if (debugSymbolTable + && debugSymbolTable->findNearestSymbol(PC, sym_str, sym_addr)) { + if (PC != sym_addr) + sym_str += csprintf("+%d", PC - sym_addr); + outs << "@" << sym_str << " : "; + } + else { + outs << "0x" << hex << PC << " : "; + } + + // + // Print decoded instruction + // + +#if defined(__GNUC__) && (__GNUC__ < 3) + // There's a bug in gcc 2.x library that prevents setw() + // from working properly on strings + string mc(staticInst->disassemble(PC, debugSymbolTable)); + while (mc.length() < 26) + mc += " "; + outs << mc; +#else + outs << setw(26) << left << staticInst->disassemble(PC, debugSymbolTable); +#endif + + outs << " : "; + + if (flags[PRINT_OP_CLASS]) { + outs << opClassStrings[staticInst->opClass()] << " : "; + } + + if (flags[PRINT_RESULT_DATA] && data_status != DataInvalid) { + outs << " D="; +#if 0 + if (data_status == DataDouble) + ccprintf(outs, "%f", data.as_double); + else + ccprintf(outs, "%#018x", data.as_int); +#else + ccprintf(outs, "%#018x", data.as_int); +#endif + } + + if (flags[PRINT_EFF_ADDR] && addr_valid) + outs << " A=0x" << hex << addr; + + if (flags[PRINT_INT_REGS] && regs_valid) { + for (int i = 0; i < TheISA::NumIntRegs;) + for (int j = i + 1; i <= j; i++) + ccprintf(outs, "r%02d = %#018x%s", i, + iregs->regs.readReg(i), + ((i == j) ? "\n" : " ")); + outs << "\n"; + } + + if (flags[PRINT_FETCH_SEQ] && fetch_seq_valid) + outs << " FetchSeq=" << dec << fetch_seq; + + if (flags[PRINT_CP_SEQ] && cp_seq_valid) + outs << " CPSeq=" << dec << cp_seq; + + // + // End of line... + // + outs << endl; + } +} + + +vector<bool> Trace::InstRecord::flags(NUM_BITS); +string Trace::InstRecord::trace_system; + +//////////////////////////////////////////////////////////////////////// +// +// Parameter space for per-cycle execution address tracing options. +// Derive from ParamContext so we can override checkParams() function. +// +class ExecutionTraceParamContext : public ParamContext +{ + public: + ExecutionTraceParamContext(const string &_iniSection) + : ParamContext(_iniSection) + { + } + + void checkParams(); // defined at bottom of file +}; + +ExecutionTraceParamContext exeTraceParams("exetrace"); + +Param<bool> exe_trace_spec(&exeTraceParams, "speculative", + "capture speculative instructions", true); + +Param<bool> exe_trace_print_cycle(&exeTraceParams, "print_cycle", + "print cycle number", true); +Param<bool> exe_trace_print_opclass(&exeTraceParams, "print_opclass", + "print op class", true); +Param<bool> exe_trace_print_thread(&exeTraceParams, "print_thread", + "print thread number", true); +Param<bool> exe_trace_print_effaddr(&exeTraceParams, "print_effaddr", + "print effective address", true); +Param<bool> exe_trace_print_data(&exeTraceParams, "print_data", + "print result data", true); +Param<bool> exe_trace_print_iregs(&exeTraceParams, "print_iregs", + "print all integer regs", false); +Param<bool> exe_trace_print_fetchseq(&exeTraceParams, "print_fetchseq", + "print fetch sequence number", false); +Param<bool> exe_trace_print_cp_seq(&exeTraceParams, "print_cpseq", + "print correct-path sequence number", false); +Param<bool> exe_trace_intel_format(&exeTraceParams, "intel_format", + "print trace in intel compatible format", false); +Param<string> exe_trace_system(&exeTraceParams, "trace_system", + "print trace of which system (client or server)", + "client"); + + +// +// Helper function for ExecutionTraceParamContext::checkParams() just +// to get us into the InstRecord namespace +// +void +Trace::InstRecord::setParams() +{ + flags[TRACE_MISSPEC] = exe_trace_spec; + + flags[PRINT_CYCLE] = exe_trace_print_cycle; + flags[PRINT_OP_CLASS] = exe_trace_print_opclass; + flags[PRINT_THREAD_NUM] = exe_trace_print_thread; + flags[PRINT_RESULT_DATA] = exe_trace_print_effaddr; + flags[PRINT_EFF_ADDR] = exe_trace_print_data; + flags[PRINT_INT_REGS] = exe_trace_print_iregs; + flags[PRINT_FETCH_SEQ] = exe_trace_print_fetchseq; + flags[PRINT_CP_SEQ] = exe_trace_print_cp_seq; + flags[INTEL_FORMAT] = exe_trace_intel_format; + trace_system = exe_trace_system; +} + +void +ExecutionTraceParamContext::checkParams() +{ + Trace::InstRecord::setParams(); +} + diff --git a/src/cpu/exetrace.hh b/src/cpu/exetrace.hh new file mode 100644 index 000000000..a26cdc517 --- /dev/null +++ b/src/cpu/exetrace.hh @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EXETRACE_HH__ +#define __EXETRACE_HH__ + +#include <fstream> +#include <vector> + +#include "sim/host.hh" +#include "cpu/inst_seq.hh" // for InstSeqNum +#include "base/trace.hh" +#include "cpu/exec_context.hh" +#include "cpu/static_inst.hh" + +class BaseCPU; + + +namespace Trace { + +class InstRecord : public Record +{ + protected: + typedef TheISA::IntRegFile IntRegFile; + + // The following fields are initialized by the constructor and + // thus guaranteed to be valid. + BaseCPU *cpu; + // need to make this ref-counted so it doesn't go away before we + // dump the record + StaticInstPtr staticInst; + Addr PC; + bool misspeculating; + unsigned thread; + + // The remaining fields are only valid for particular instruction + // types (e.g, addresses for memory ops) or when particular + // options are enabled (e.g., tracing full register contents). + // Each data field has an associated valid flag to indicate + // whether the data field is valid. + Addr addr; + bool addr_valid; + + union { + uint64_t as_int; + double as_double; + } data; + enum { + DataInvalid = 0, + DataInt8 = 1, // set to equal number of bytes + DataInt16 = 2, + DataInt32 = 4, + DataInt64 = 8, + DataDouble = 3 + } data_status; + + InstSeqNum fetch_seq; + bool fetch_seq_valid; + + InstSeqNum cp_seq; + bool cp_seq_valid; + + struct iRegFile { + IntRegFile regs; + }; + iRegFile *iregs; + bool regs_valid; + + public: + InstRecord(Tick _cycle, BaseCPU *_cpu, + const StaticInstPtr &_staticInst, + Addr _pc, bool spec, int _thread) + : Record(_cycle), cpu(_cpu), staticInst(_staticInst), PC(_pc), + misspeculating(spec), thread(_thread) + { + data_status = DataInvalid; + addr_valid = false; + regs_valid = false; + + fetch_seq_valid = false; + cp_seq_valid = false; + } + + virtual ~InstRecord() { } + + virtual void dump(std::ostream &outs); + + void setAddr(Addr a) { addr = a; addr_valid = true; } + + void setData(uint64_t d) { data.as_int = d; data_status = DataInt64; } + void setData(uint32_t d) { data.as_int = d; data_status = DataInt32; } + void setData(uint16_t d) { data.as_int = d; data_status = DataInt16; } + void setData(uint8_t d) { data.as_int = d; data_status = DataInt8; } + + void setData(int64_t d) { setData((uint64_t)d); } + void setData(int32_t d) { setData((uint32_t)d); } + void setData(int16_t d) { setData((uint16_t)d); } + void setData(int8_t d) { setData((uint8_t)d); } + + void setData(double d) { data.as_double = d; data_status = DataDouble; } + + void setFetchSeq(InstSeqNum seq) + { fetch_seq = seq; fetch_seq_valid = true; } + + void setCPSeq(InstSeqNum seq) + { cp_seq = seq; cp_seq_valid = true; } + + void setRegs(const IntRegFile ®s); + + void finalize() { theLog.append(this); } + + enum InstExecFlagBits { + TRACE_MISSPEC = 0, + PRINT_CYCLE, + PRINT_OP_CLASS, + PRINT_THREAD_NUM, + PRINT_RESULT_DATA, + PRINT_EFF_ADDR, + PRINT_INT_REGS, + PRINT_FETCH_SEQ, + PRINT_CP_SEQ, + INTEL_FORMAT, + NUM_BITS + }; + + static std::vector<bool> flags; + static std::string trace_system; + + static void setParams(); + + static bool traceMisspec() { return flags[TRACE_MISSPEC]; } +}; + + +inline void +InstRecord::setRegs(const IntRegFile ®s) +{ + if (!iregs) + iregs = new iRegFile; + + memcpy(&iregs->regs, ®s, sizeof(IntRegFile)); + regs_valid = true; +} + +inline +InstRecord * +getInstRecord(Tick cycle, ExecContext *xc, BaseCPU *cpu, + const StaticInstPtr staticInst, + Addr pc, int thread = 0) +{ + if (DTRACE(InstExec) && + (InstRecord::traceMisspec() || !xc->misspeculating())) { + return new InstRecord(cycle, cpu, staticInst, pc, + xc->misspeculating(), thread); + } + + return NULL; +} + + +} + +#endif // __EXETRACE_HH__ diff --git a/src/cpu/inst_seq.hh b/src/cpu/inst_seq.hh new file mode 100644 index 000000000..8de047af7 --- /dev/null +++ b/src/cpu/inst_seq.hh @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2001, 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __STD_TYPES_HH__ +#define __STD_TYPES_HH__ + +// inst sequence type, used to order instructions in the ready list, +// if this rolls over the ready list order temporarily will get messed +// up, but execution will continue and complete correctly +typedef uint64_t InstSeqNum; + +// inst tag type, used to tag an operation instance in the IQ +typedef unsigned int InstTag; + +#endif // __STD_TYPES_HH__ diff --git a/src/cpu/intr_control.cc b/src/cpu/intr_control.cc new file mode 100644 index 000000000..43e7f654c --- /dev/null +++ b/src/cpu/intr_control.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string> +#include <vector> + +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/intr_control.hh" +#include "sim/builder.hh" +#include "sim/sim_object.hh" + +using namespace std; + +IntrControl::IntrControl(const string &name, BaseCPU *c) + : SimObject(name), cpu(c) +{} + +/* @todo + *Fix the cpu sim object parameter to be a system pointer + *instead, to avoid some extra dereferencing + */ +void +IntrControl::post(int int_num, int index) +{ + std::vector<ExecContext *> &xcvec = cpu->system->execContexts; + BaseCPU *temp = xcvec[0]->getCpuPtr(); + temp->post_interrupt(int_num, index); +} + +void +IntrControl::post(int cpu_id, int int_num, int index) +{ + std::vector<ExecContext *> &xcvec = cpu->system->execContexts; + BaseCPU *temp = xcvec[cpu_id]->getCpuPtr(); + temp->post_interrupt(int_num, index); +} + +void +IntrControl::clear(int int_num, int index) +{ + std::vector<ExecContext *> &xcvec = cpu->system->execContexts; + BaseCPU *temp = xcvec[0]->getCpuPtr(); + temp->clear_interrupt(int_num, index); +} + +void +IntrControl::clear(int cpu_id, int int_num, int index) +{ + std::vector<ExecContext *> &xcvec = cpu->system->execContexts; + BaseCPU *temp = xcvec[cpu_id]->getCpuPtr(); + temp->clear_interrupt(int_num, index); +} + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(IntrControl) + + SimObjectParam<BaseCPU *> cpu; + +END_DECLARE_SIM_OBJECT_PARAMS(IntrControl) + +BEGIN_INIT_SIM_OBJECT_PARAMS(IntrControl) + + INIT_PARAM(cpu, "the cpu") + +END_INIT_SIM_OBJECT_PARAMS(IntrControl) + +CREATE_SIM_OBJECT(IntrControl) +{ + return new IntrControl(getInstanceName(), cpu); +} + +REGISTER_SIM_OBJECT("IntrControl", IntrControl) diff --git a/src/cpu/intr_control.hh b/src/cpu/intr_control.hh new file mode 100644 index 000000000..5ec4e14cb --- /dev/null +++ b/src/cpu/intr_control.hh @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INTR_CONTROL_HH__ +#define __INTR_CONTROL_HH__ + +#include <vector> +#include "base/misc.hh" +#include "cpu/base.hh" +#include "sim/sim_object.hh" +#include "sim/system.hh" + + +class IntrControl : public SimObject +{ + public: + BaseCPU *cpu; + IntrControl(const std::string &name, BaseCPU *c); + + void clear(int int_num, int index = 0); + void post(int int_num, int index = 0); + void clear(int cpu_id, int int_num, int index); + void post(int cpu_id, int int_num, int index); +}; + +#endif // __INTR_CONTROL_HH__ + + + + + + + diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc new file mode 100644 index 000000000..54def1012 --- /dev/null +++ b/src/cpu/memtest/memtest.cc @@ -0,0 +1,440 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// FIX ME: make trackBlkAddr use blocksize from actual cache, not hard coded + +#include <iomanip> +#include <set> +#include <string> +#include <vector> + +#include "base/misc.hh" +#include "base/statistics.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/memtest/memtest.hh" +#include "mem/cache/base_cache.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +using namespace std; +using namespace TheISA; + +int TESTER_ALLOCATOR=0; + +MemTest::MemTest(const string &name, + MemInterface *_cache_interface, + FunctionalMemory *main_mem, + FunctionalMemory *check_mem, + unsigned _memorySize, + unsigned _percentReads, + unsigned _percentCopies, + unsigned _percentUncacheable, + unsigned _progressInterval, + unsigned _percentSourceUnaligned, + unsigned _percentDestUnaligned, + Addr _traceAddr, + Counter _max_loads) + : SimObject(name), + tickEvent(this), + cacheInterface(_cache_interface), + mainMem(main_mem), + checkMem(check_mem), + size(_memorySize), + percentReads(_percentReads), + percentCopies(_percentCopies), + percentUncacheable(_percentUncacheable), + progressInterval(_progressInterval), + nextProgressMessage(_progressInterval), + percentSourceUnaligned(_percentSourceUnaligned), + percentDestUnaligned(percentDestUnaligned), + maxLoads(_max_loads) +{ + vector<string> cmd; + cmd.push_back("/bin/ls"); + vector<string> null_vec; + cpuXC = new CPUExecContext(NULL, 0, mainMem, 0); + + blockSize = cacheInterface->getBlockSize(); + blockAddrMask = blockSize - 1; + traceBlockAddr = blockAddr(_traceAddr); + + //setup data storage with interesting values + uint8_t *data1 = new uint8_t[size]; + uint8_t *data2 = new uint8_t[size]; + uint8_t *data3 = new uint8_t[size]; + memset(data1, 1, size); + memset(data2, 2, size); + memset(data3, 3, size); + curTick = 0; + + baseAddr1 = 0x100000; + baseAddr2 = 0x400000; + uncacheAddr = 0x800000; + + // set up intial memory contents here + mainMem->prot_write(baseAddr1, data1, size); + checkMem->prot_write(baseAddr1, data1, size); + mainMem->prot_write(baseAddr2, data2, size); + checkMem->prot_write(baseAddr2, data2, size); + mainMem->prot_write(uncacheAddr, data3, size); + checkMem->prot_write(uncacheAddr, data3, size); + + delete [] data1; + delete [] data2; + delete [] data3; + + // set up counters + noResponseCycles = 0; + numReads = 0; + tickEvent.schedule(0); + + id = TESTER_ALLOCATOR++; +} + +static void +printData(ostream &os, uint8_t *data, int nbytes) +{ + os << hex << setfill('0'); + // assume little-endian: print bytes from highest address to lowest + for (uint8_t *dp = data + nbytes - 1; dp >= data; --dp) { + os << setw(2) << (unsigned)*dp; + } + os << dec; +} + +void +MemTest::completeRequest(MemReqPtr &req, uint8_t *data) +{ + //Remove the address from the list of outstanding + std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->paddr); + assert(removeAddr != outstandingAddrs.end()); + outstandingAddrs.erase(removeAddr); + + switch (req->cmd) { + case Read: + if (memcmp(req->data, data, req->size) != 0) { + cerr << name() << ": on read of 0x" << hex << req->paddr + << " (0x" << hex << blockAddr(req->paddr) << ")" + << "@ cycle " << dec << curTick + << ", cache returns 0x"; + printData(cerr, req->data, req->size); + cerr << ", expected 0x"; + printData(cerr, data, req->size); + cerr << endl; + fatal(""); + } + + numReads++; + numReadsStat++; + + if (numReads == nextProgressMessage) { + ccprintf(cerr, "%s: completed %d read accesses @%d\n", + name(), numReads, curTick); + nextProgressMessage += progressInterval; + } + + if (numReads >= maxLoads) + SimExit(curTick, "Maximum number of loads reached!"); + break; + + case Write: + numWritesStat++; + break; + + case Copy: + //Also remove dest from outstanding list + removeAddr = outstandingAddrs.find(req->dest); + assert(removeAddr != outstandingAddrs.end()); + outstandingAddrs.erase(removeAddr); + numCopiesStat++; + break; + + default: + panic("invalid command"); + } + + if (blockAddr(req->paddr) == traceBlockAddr) { + cerr << name() << ": completed " + << (req->cmd.isWrite() ? "write" : "read") + << " access of " + << dec << req->size << " bytes at address 0x" + << hex << req->paddr + << " (0x" << hex << blockAddr(req->paddr) << ")" + << ", value = 0x"; + printData(cerr, req->data, req->size); + cerr << " @ cycle " << dec << curTick; + + cerr << endl; + } + + noResponseCycles = 0; + delete [] data; +} + + +void +MemTest::regStats() +{ + using namespace Stats; + + + numReadsStat + .name(name() + ".num_reads") + .desc("number of read accesses completed") + ; + + numWritesStat + .name(name() + ".num_writes") + .desc("number of write accesses completed") + ; + + numCopiesStat + .name(name() + ".num_copies") + .desc("number of copy accesses completed") + ; +} + +void +MemTest::tick() +{ + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + cycles(1)); + + if (++noResponseCycles >= 500000) { + cerr << name() << ": deadlocked at cycle " << curTick << endl; + fatal(""); + } + + if (cacheInterface->isBlocked()) { + return; + } + + //make new request + unsigned cmd = random() % 100; + unsigned offset = random() % size; + unsigned base = random() % 2; + uint64_t data = random(); + unsigned access_size = random() % 4; + unsigned cacheable = random() % 100; + + //If we aren't doing copies, use id as offset, and do a false sharing + //mem tester + if (percentCopies == 0) { + //We can eliminate the lower bits of the offset, and then use the id + //to offset within the blks + offset &= ~63; //Not the low order bits + offset += id; + access_size = 0; + } + + MemReqPtr req = new MemReq(); + + if (cacheable < percentUncacheable) { + req->flags |= UNCACHEABLE; + req->paddr = uncacheAddr + offset; + } else { + req->paddr = ((base) ? baseAddr1 : baseAddr2) + offset; + } + // bool probe = (random() % 2 == 1) && !req->isUncacheable(); + bool probe = false; + + req->size = 1 << access_size; + req->data = new uint8_t[req->size]; + req->paddr &= ~(req->size - 1); + req->time = curTick; + req->xc = cpuXC->getProxy(); + + if (cmd < percentReads) { + // read + + //For now we only allow one outstanding request per addreess per tester + //This means we assume CPU does write forwarding to reads that alias something + //in the cpu store buffer. + if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(req->paddr); + + req->cmd = Read; + uint8_t *result = new uint8_t[8]; + checkMem->access(Read, req->paddr, result, req->size); + if (blockAddr(req->paddr) == traceBlockAddr) { + cerr << name() + << ": initiating read " + << ((probe) ? "probe of " : "access of ") + << dec << req->size << " bytes from addr 0x" + << hex << req->paddr + << " (0x" << hex << blockAddr(req->paddr) << ")" + << " at cycle " + << dec << curTick << endl; + } + if (probe) { + cacheInterface->probeAndUpdate(req); + completeRequest(req, result); + } else { + req->completionEvent = new MemCompleteEvent(req, result, this); + cacheInterface->access(req); + } + } else if (cmd < (100 - percentCopies)){ + // write + + //For now we only allow one outstanding request per addreess per tester + //This means we assume CPU does write forwarding to reads that alias something + //in the cpu store buffer. + if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(req->paddr); + + req->cmd = Write; + memcpy(req->data, &data, req->size); + checkMem->access(Write, req->paddr, req->data, req->size); + if (blockAddr(req->paddr) == traceBlockAddr) { + cerr << name() << ": initiating write " + << ((probe)?"probe of ":"access of ") + << dec << req->size << " bytes (value = 0x"; + printData(cerr, req->data, req->size); + cerr << ") to addr 0x" + << hex << req->paddr + << " (0x" << hex << blockAddr(req->paddr) << ")" + << " at cycle " + << dec << curTick << endl; + } + if (probe) { + cacheInterface->probeAndUpdate(req); + completeRequest(req, NULL); + } else { + req->completionEvent = new MemCompleteEvent(req, NULL, this); + cacheInterface->access(req); + } + } else { + // copy + unsigned source_align = random() % 100; + unsigned dest_align = random() % 100; + unsigned offset2 = random() % size; + + Addr source = ((base) ? baseAddr1 : baseAddr2) + offset; + Addr dest = ((base) ? baseAddr2 : baseAddr1) + offset2; + if (outstandingAddrs.find(source) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(source); + if (outstandingAddrs.find(dest) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(dest); + + if (source_align >= percentSourceUnaligned) { + source = blockAddr(source); + } + if (dest_align >= percentDestUnaligned) { + dest = blockAddr(dest); + } + req->cmd = Copy; + req->flags &= ~UNCACHEABLE; + req->paddr = source; + req->dest = dest; + delete [] req->data; + req->data = new uint8_t[blockSize]; + req->size = blockSize; + if (source == traceBlockAddr || dest == traceBlockAddr) { + cerr << name() + << ": initiating copy of " + << dec << req->size << " bytes from addr 0x" + << hex << source + << " (0x" << hex << blockAddr(source) << ")" + << " to addr 0x" + << hex << dest + << " (0x" << hex << blockAddr(dest) << ")" + << " at cycle " + << dec << curTick << endl; + } + cacheInterface->access(req); + uint8_t result[blockSize]; + checkMem->access(Read, source, &result, blockSize); + checkMem->access(Write, dest, &result, blockSize); + } +} + + +void +MemCompleteEvent::process() +{ + tester->completeRequest(req, data); + delete this; +} + + +const char * +MemCompleteEvent::description() +{ + return "memory access completion"; +} + + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest) + + SimObjectParam<BaseCache *> cache; + SimObjectParam<FunctionalMemory *> main_mem; + SimObjectParam<FunctionalMemory *> check_mem; + Param<unsigned> memory_size; + Param<unsigned> percent_reads; + Param<unsigned> percent_copies; + Param<unsigned> percent_uncacheable; + Param<unsigned> progress_interval; + Param<unsigned> percent_source_unaligned; + Param<unsigned> percent_dest_unaligned; + Param<Addr> trace_addr; + Param<Counter> max_loads; + +END_DECLARE_SIM_OBJECT_PARAMS(MemTest) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest) + + INIT_PARAM(cache, "L1 cache"), + INIT_PARAM(main_mem, "hierarchical memory"), + INIT_PARAM(check_mem, "check memory"), + INIT_PARAM(memory_size, "memory size"), + INIT_PARAM(percent_reads, "target read percentage"), + INIT_PARAM(percent_copies, "target copy percentage"), + INIT_PARAM(percent_uncacheable, "target uncacheable percentage"), + INIT_PARAM(progress_interval, "progress report interval (in accesses)"), + INIT_PARAM(percent_source_unaligned, + "percent of copy source address that are unaligned"), + INIT_PARAM(percent_dest_unaligned, + "percent of copy dest address that are unaligned"), + INIT_PARAM(trace_addr, "address to trace"), + INIT_PARAM(max_loads, "terminate when we have reached this load count") + +END_INIT_SIM_OBJECT_PARAMS(MemTest) + + +CREATE_SIM_OBJECT(MemTest) +{ + return new MemTest(getInstanceName(), cache->getInterface(), main_mem, + check_mem, memory_size, percent_reads, percent_copies, + percent_uncacheable, progress_interval, + percent_source_unaligned, percent_dest_unaligned, + trace_addr, max_loads); +} + +REGISTER_SIM_OBJECT("MemTest", MemTest) diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh new file mode 100644 index 000000000..cdb40a26a --- /dev/null +++ b/src/cpu/memtest/memtest.hh @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_MEMTEST_MEMTEST_HH__ +#define __CPU_MEMTEST_MEMTEST_HH__ + +#include <set> + +#include "base/statistics.hh" +#include "mem/functional/functional.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" +#include "sim/sim_exit.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +class ExecContext; +class MemTest : public SimObject +{ + public: + + MemTest(const std::string &name, + MemInterface *_cache_interface, + FunctionalMemory *main_mem, + FunctionalMemory *check_mem, + unsigned _memorySize, + unsigned _percentReads, + unsigned _percentCopies, + unsigned _percentUncacheable, + unsigned _progressInterval, + unsigned _percentSourceUnaligned, + unsigned _percentDestUnaligned, + Addr _traceAddr, + Counter _max_loads); + + // register statistics + virtual void regStats(); + + inline Tick cycles(int numCycles) const { return numCycles; } + + // main simulation loop (one cycle) + void tick(); + + protected: + class TickEvent : public Event + { + private: + MemTest *cpu; + public: + TickEvent(MemTest *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) {} + void process() {cpu->tick();} + virtual const char *description() { return "tick event"; } + }; + + TickEvent tickEvent; + + MemInterface *cacheInterface; + FunctionalMemory *mainMem; + FunctionalMemory *checkMem; + CPUExecContext *cpuXC; + + unsigned size; // size of testing memory region + + unsigned percentReads; // target percentage of read accesses + unsigned percentCopies; // target percentage of copy accesses + unsigned percentUncacheable; + + int id; + + std::set<unsigned> outstandingAddrs; + + unsigned blockSize; + + Addr blockAddrMask; + + Addr blockAddr(Addr addr) + { + return (addr & ~blockAddrMask); + } + + Addr traceBlockAddr; + + Addr baseAddr1; // fix this to option + Addr baseAddr2; // fix this to option + Addr uncacheAddr; + + unsigned progressInterval; // frequency of progress reports + Tick nextProgressMessage; // access # for next progress report + + unsigned percentSourceUnaligned; + unsigned percentDestUnaligned; + + Tick noResponseCycles; + + uint64_t numReads; + uint64_t maxLoads; + Stats::Scalar<> numReadsStat; + Stats::Scalar<> numWritesStat; + Stats::Scalar<> numCopiesStat; + + // called by MemCompleteEvent::process() + void completeRequest(MemReqPtr &req, uint8_t *data); + + friend class MemCompleteEvent; +}; + + +class MemCompleteEvent : public Event +{ + MemReqPtr req; + uint8_t *data; + MemTest *tester; + + public: + + MemCompleteEvent(MemReqPtr &_req, uint8_t *_data, MemTest *_tester) + : Event(&mainEventQueue), + req(_req), data(_data), tester(_tester) + { + } + + void process(); + + virtual const char *description(); +}; + +#endif // __CPU_MEMTEST_MEMTEST_HH__ + + + diff --git a/src/cpu/o3/2bit_local_pred.cc b/src/cpu/o3/2bit_local_pred.cc new file mode 100644 index 000000000..d9744eec7 --- /dev/null +++ b/src/cpu/o3/2bit_local_pred.cc @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" +#include "cpu/o3/2bit_local_pred.hh" + +DefaultBP::DefaultBP(unsigned _localPredictorSize, + unsigned _localCtrBits, + unsigned _instShiftAmt) + : localPredictorSize(_localPredictorSize), + localCtrBits(_localCtrBits), + instShiftAmt(_instShiftAmt) +{ + // Should do checks here to make sure sizes are correct (powers of 2). + + // Setup the index mask. + indexMask = localPredictorSize - 1; + + DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask); + + // Setup the array of counters for the local predictor. + localCtrs = new SatCounter[localPredictorSize]; + + for (int i = 0; i < localPredictorSize; ++i) + localCtrs[i].setBits(_localCtrBits); + + DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n", + localPredictorSize); + + DPRINTF(Fetch, "Branch predictor: local counter bits: %i\n", localCtrBits); + + DPRINTF(Fetch, "Branch predictor: instruction shift amount: %i\n", + instShiftAmt); +} + +bool +DefaultBP::lookup(Addr &branch_addr) +{ + bool taken; + uint8_t local_prediction; + unsigned local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + assert(local_predictor_idx < localPredictorSize); + + local_prediction = localCtrs[local_predictor_idx].read(); + + DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", + (int)local_prediction); + + taken = getPrediction(local_prediction); + +#if 0 + // Speculative update. + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +#endif + + return taken; +} + +void +DefaultBP::update(Addr &branch_addr, bool taken) +{ + unsigned local_predictor_idx; + + // Update the local predictor. + local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + assert(local_predictor_idx < localPredictorSize); + + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +} + +inline +bool +DefaultBP::getPrediction(uint8_t &count) +{ + // Get the MSB of the count + return (count >> (localCtrBits - 1)); +} + +inline +unsigned +DefaultBP::getLocalIndex(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & indexMask; +} diff --git a/src/cpu/o3/2bit_local_pred.hh b/src/cpu/o3/2bit_local_pred.hh new file mode 100644 index 000000000..97433e542 --- /dev/null +++ b/src/cpu/o3/2bit_local_pred.hh @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__ +#define __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" +#include "cpu/o3/sat_counter.hh" + +class DefaultBP +{ + public: + /** + * Default branch predictor constructor. + */ + DefaultBP(unsigned localPredictorSize, unsigned localCtrBits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, bool taken); + + private: + + /** Returns the taken/not taken prediction given the value of the + * counter. + */ + inline bool getPrediction(uint8_t &count); + + /** Calculates the local index based on the PC. */ + inline unsigned getLocalIndex(Addr &PC); + + /** Array of counters that make up the local predictor. */ + SatCounter *localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Number of bits to shift the PC when calculating index. */ + unsigned instShiftAmt; + + /** Mask to get index bits. */ + unsigned indexMask; +}; + +#endif // __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__ diff --git a/src/cpu/o3/alpha_cpu.cc b/src/cpu/o3/alpha_cpu.cc new file mode 100644 index 000000000..7bc90dae6 --- /dev/null +++ b/src/cpu/o3/alpha_cpu.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha_cpu_impl.hh" +#include "cpu/o3/alpha_dyn_inst.hh" + +// Force instantiation of AlphaFullCPU for all the implemntations that are +// needed. Consider merging this and alpha_dyn_inst.cc, and maybe all +// classes that depend on a certain impl, into one file (alpha_impl.cc?). +template class AlphaFullCPU<AlphaSimpleImpl>; diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh new file mode 100644 index 000000000..8e1e0f42a --- /dev/null +++ b/src/cpu/o3/alpha_cpu.hh @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: Find all the stuff in ExecContext and ev5 that needs to be +// specifically designed for this CPU. + +#ifndef __CPU_O3_CPU_ALPHA_FULL_CPU_HH__ +#define __CPU_O3_CPU_ALPHA_FULL_CPU_HH__ + +#include "cpu/o3/cpu.hh" +#include "arch/isa_traits.hh" +#include "sim/byteswap.hh" + +template <class Impl> +class AlphaFullCPU : public FullO3CPU<Impl> +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef typename Impl::Params Params; + + public: + AlphaFullCPU(Params ¶ms); + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#endif + + public: + void regStats(); + +#if FULL_SYSTEM + //Note that the interrupt stuff from the base CPU might be somewhat + //ISA specific (ie NumInterruptLevels). These functions might not + //be needed in FullCPU though. +// void post_interrupt(int int_num, int index); +// void clear_interrupt(int int_num, int index); +// void clear_interrupts(); + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return NoFault; + } + + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif + + // Later on may want to remove this misc stuff from the regfile and + // have it handled at this level. Might prove to be an issue when + // trying to rename source/destination registers... + MiscReg readMiscReg(int misc_reg) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return 0; + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return NoFault; + } + + // Most of the full system code and syscall emulation is not yet + // implemented. These functions do show what the final interface will + // look like. +#if FULL_SYSTEM + int readIntrFlag(); + void setIntrFlag(int val); + Fault hwrei(); + bool inPalMode() { return AlphaISA::PcPAL(this->regFile.readPC()); } + bool inPalMode(uint64_t PC) + { return AlphaISA::PcPAL(PC); } + + void trap(Fault fault); + bool simPalCheck(int palFunc); + + void processInterrupts(); +#endif + + +#if !FULL_SYSTEM + // Need to change these into regfile calls that directly set a certain + // register. Actually, these functions should handle most of this + // functionality by themselves; should look up the rename and then + // set the register. + IntReg getSyscallArg(int i) + { + return this->cpuXC->readIntReg(AlphaISA::ArgumentReg0 + i); + } + + // used to shift args for indirect syscall + void setSyscallArg(int i, IntReg val) + { + this->cpuXC->setIntReg(AlphaISA::ArgumentReg0 + i, val); + } + + void setSyscallReturn(int64_t return_value) + { + // check for error condition. Alpha syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + const int RegA3 = 19; // only place this is used + if (return_value >= 0) { + // no error + this->cpuXC->setIntReg(RegA3, 0); + this->cpuXC->setIntReg(AlphaISA::ReturnValueReg, return_value); + } else { + // got an error, return details + this->cpuXC->setIntReg(RegA3, (IntReg) -1); + this->cpuXC->setIntReg(AlphaISA::ReturnValueReg, -return_value); + } + } + + void syscall(short thread_num); + void squashStages(); + +#endif + + void copyToXC(); + void copyFromXC(); + + public: +#if FULL_SYSTEM + bool palShadowEnabled; + + // Not sure this is used anywhere. + void intr_post(RegFile *regs, Fault fault, Addr pc); + // Actually used within exec files. Implement properly. + void swapPALShadow(bool use_shadow); + // Called by CPU constructor. Can implement as I please. + void initCPU(RegFile *regs); + // Called by initCPU. Implement as I please. + void initIPRs(RegFile *regs); + + void halt() { panic("Halt not implemented!\n"); } +#endif + + + template <class T> + Fault read(MemReqPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif + + Fault error; + error = this->mem->read(req, data); + data = gtoh(data); + return error; + } + + template <class T> + Fault read(MemReqPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + template <class T> + Fault write(MemReqPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < this->system->execContexts.size(); i++){ + xc = this->system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + +#endif + + return this->mem->write(req, (T)htog(data)); + } + + template <class T> + Fault write(MemReqPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + +}; + +#endif // __CPU_O3_CPU_ALPHA_FULL_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc new file mode 100644 index 000000000..6025b8ef2 --- /dev/null +++ b/src/cpu/o3/alpha_cpu_builder.cc @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/inifile.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "cpu/base.hh" +#include "cpu/exetrace.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "mem/base_mem.hh" +#include "mem/cache/base_cache.hh" +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/debug.hh" +#include "sim/host.hh" +#include "sim/process.hh" +#include "sim/sim_events.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#if FULL_SYSTEM +#include "base/remote_gdb.hh" +#include "mem/functional/memory_control.hh" +#include "mem/functional/physical.hh" +#include "sim/system.hh" +#include "arch/tlb.hh" +#include "arch/vtophys.hh" +#else // !FULL_SYSTEM +#include "mem/functional/functional.hh" +#endif // FULL_SYSTEM + +class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl> +{ + public: + DerivAlphaFullCPU(AlphaSimpleParams p) + : AlphaFullCPU<AlphaSimpleImpl>(p) + { } +}; + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + + Param<int> clock; + Param<int> numThreads; + +#if FULL_SYSTEM +SimObjectParam<System *> system; +Param<int> cpu_id; +SimObjectParam<AlphaITB *> itb; +SimObjectParam<AlphaDTB *> dtb; +#else +SimObjectVectorParam<Process *> workload; +#endif // FULL_SYSTEM +SimObjectParam<FunctionalMemory *> mem; + +Param<Counter> max_insts_any_thread; +Param<Counter> max_insts_all_threads; +Param<Counter> max_loads_any_thread; +Param<Counter> max_loads_all_threads; + +SimObjectParam<BaseCache *> icache; +SimObjectParam<BaseCache *> dcache; + +Param<unsigned> decodeToFetchDelay; +Param<unsigned> renameToFetchDelay; +Param<unsigned> iewToFetchDelay; +Param<unsigned> commitToFetchDelay; +Param<unsigned> fetchWidth; + +Param<unsigned> renameToDecodeDelay; +Param<unsigned> iewToDecodeDelay; +Param<unsigned> commitToDecodeDelay; +Param<unsigned> fetchToDecodeDelay; +Param<unsigned> decodeWidth; + +Param<unsigned> iewToRenameDelay; +Param<unsigned> commitToRenameDelay; +Param<unsigned> decodeToRenameDelay; +Param<unsigned> renameWidth; + +Param<unsigned> commitToIEWDelay; +Param<unsigned> renameToIEWDelay; +Param<unsigned> issueToExecuteDelay; +Param<unsigned> issueWidth; +Param<unsigned> executeWidth; +Param<unsigned> executeIntWidth; +Param<unsigned> executeFloatWidth; +Param<unsigned> executeBranchWidth; +Param<unsigned> executeMemoryWidth; + +Param<unsigned> iewToCommitDelay; +Param<unsigned> renameToROBDelay; +Param<unsigned> commitWidth; +Param<unsigned> squashWidth; + +#if 0 +Param<unsigned> localPredictorSize; +Param<unsigned> localPredictorCtrBits; +#endif +Param<unsigned> local_predictor_size; +Param<unsigned> local_ctr_bits; +Param<unsigned> local_history_table_size; +Param<unsigned> local_history_bits; +Param<unsigned> global_predictor_size; +Param<unsigned> global_ctr_bits; +Param<unsigned> global_history_bits; +Param<unsigned> choice_predictor_size; +Param<unsigned> choice_ctr_bits; + +Param<unsigned> BTBEntries; +Param<unsigned> BTBTagSize; + +Param<unsigned> RASSize; + +Param<unsigned> LQEntries; +Param<unsigned> SQEntries; +Param<unsigned> LFSTSize; +Param<unsigned> SSITSize; + +Param<unsigned> numPhysIntRegs; +Param<unsigned> numPhysFloatRegs; +Param<unsigned> numIQEntries; +Param<unsigned> numROBEntries; + +Param<unsigned> instShiftAmt; + +Param<bool> defer_registration; + +Param<bool> function_trace; +Param<Tick> function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + +#if 0 + INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. " + "Must be a power of 2."), + INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"), +#endif + INIT_PARAM(local_predictor_size, "Size of local predictor"), + INIT_PARAM(local_ctr_bits, "Bits per counter"), + INIT_PARAM(local_history_table_size, "Size of local history table"), + INIT_PARAM(local_history_bits, "Bits for the local history"), + INIT_PARAM(global_predictor_size, "Size of global predictor"), + INIT_PARAM(global_ctr_bits, "Bits per counter"), + INIT_PARAM(global_history_bits, "Bits of history"), + INIT_PARAM(choice_predictor_size, "Size of choice predictor"), + INIT_PARAM(choice_ctr_bits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + +CREATE_SIM_OBJECT(DerivAlphaFullCPU) +{ + DerivAlphaFullCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + AlphaSimpleParams params; + + params.clock = clock; + + params.name = getInstanceName(); + params.numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params.system = system; + params.cpu_id = cpu_id; + params.itb = itb; + params.dtb = dtb; +#else + params.workload = workload; +#endif // FULL_SYSTEM + + params.mem = mem; + + params.max_insts_any_thread = max_insts_any_thread; + params.max_insts_all_threads = max_insts_all_threads; + params.max_loads_any_thread = max_loads_any_thread; + params.max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params.icacheInterface = icache ? icache->getInterface() : NULL; + params.dcacheInterface = dcache ? dcache->getInterface() : NULL; + + params.decodeToFetchDelay = decodeToFetchDelay; + params.renameToFetchDelay = renameToFetchDelay; + params.iewToFetchDelay = iewToFetchDelay; + params.commitToFetchDelay = commitToFetchDelay; + params.fetchWidth = fetchWidth; + + params.renameToDecodeDelay = renameToDecodeDelay; + params.iewToDecodeDelay = iewToDecodeDelay; + params.commitToDecodeDelay = commitToDecodeDelay; + params.fetchToDecodeDelay = fetchToDecodeDelay; + params.decodeWidth = decodeWidth; + + params.iewToRenameDelay = iewToRenameDelay; + params.commitToRenameDelay = commitToRenameDelay; + params.decodeToRenameDelay = decodeToRenameDelay; + params.renameWidth = renameWidth; + + params.commitToIEWDelay = commitToIEWDelay; + params.renameToIEWDelay = renameToIEWDelay; + params.issueToExecuteDelay = issueToExecuteDelay; + params.issueWidth = issueWidth; + params.executeWidth = executeWidth; + params.executeIntWidth = executeIntWidth; + params.executeFloatWidth = executeFloatWidth; + params.executeBranchWidth = executeBranchWidth; + params.executeMemoryWidth = executeMemoryWidth; + + params.iewToCommitDelay = iewToCommitDelay; + params.renameToROBDelay = renameToROBDelay; + params.commitWidth = commitWidth; + params.squashWidth = squashWidth; +#if 0 + params.localPredictorSize = localPredictorSize; + params.localPredictorCtrBits = localPredictorCtrBits; +#endif + params.local_predictor_size = local_predictor_size; + params.local_ctr_bits = local_ctr_bits; + params.local_history_table_size = local_history_table_size; + params.local_history_bits = local_history_bits; + params.global_predictor_size = global_predictor_size; + params.global_ctr_bits = global_ctr_bits; + params.global_history_bits = global_history_bits; + params.choice_predictor_size = choice_predictor_size; + params.choice_ctr_bits = choice_ctr_bits; + + params.BTBEntries = BTBEntries; + params.BTBTagSize = BTBTagSize; + + params.RASSize = RASSize; + + params.LQEntries = LQEntries; + params.SQEntries = SQEntries; + params.SSITSize = SSITSize; + params.LFSTSize = LFSTSize; + + params.numPhysIntRegs = numPhysIntRegs; + params.numPhysFloatRegs = numPhysFloatRegs; + params.numIQEntries = numIQEntries; + params.numROBEntries = numROBEntries; + + params.instShiftAmt = 2; + + params.defReg = defer_registration; + + params.functionTrace = function_trace; + params.functionTraceStart = function_trace_start; + + cpu = new DerivAlphaFullCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU) + diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh new file mode 100644 index 000000000..7c4c2b969 --- /dev/null +++ b/src/cpu/o3/alpha_cpu_impl.hh @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/alpha/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "mem/cache/cache.hh" // for dynamic cast +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/comm.hh" + +#if FULL_SYSTEM +#include "arch/alpha/osfpal.hh" +#include "arch/alpha/isa_traits.hh" +#endif + +template <class Impl> +AlphaFullCPU<Impl>::AlphaFullCPU(Params ¶ms) + : FullO3CPU<Impl>(params) +{ + DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); + + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); +} + +template <class Impl> +void +AlphaFullCPU<Impl>::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + +#if !FULL_SYSTEM + +// Will probably need to know which thread is calling syscall +// Will need to pass that information in to the DynInst when it is constructed, +// so that this call can be made with the proper thread number. +template <class Impl> +void +AlphaFullCPU<Impl>::syscall(short thread_num) +{ + DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); + + // Commit stage needs to run as well. + this->commit.tick(); + + squashStages(); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->funcExeInst); + + // Copy over all important state to xc once all the unrolling is done. + copyToXC(); + + // This is hardcoded to thread 0 while the CPU is only single threaded. + this->thread[0]->syscall(); + + // Copy over all important state back to CPU. + copyFromXC(); + + // Decrease funcExeInst by one as the normal commit will handle + // incrememnting it. + --(this->funcExeInst); +} + +// This is not a pretty function, and should only be used if it is necessary +// to fake having everything squash all at once (ie for non-full system +// syscalls). Maybe put this at the FullCPU level? +template <class Impl> +void +AlphaFullCPU<Impl>::squashStages() +{ + InstSeqNum rob_head = this->rob.readHeadSeqNum(); + + // Now hack the time buffer to put this sequence number in the places + // where the stages might read it. + for (int i = 0; i < 5; ++i) + { + this->timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; + } + + this->fetch.squash(this->rob.readHeadNextPC()); + this->fetchQueue.advance(); + + this->decode.squash(); + this->decodeQueue.advance(); + + this->rename.squash(); + this->renameQueue.advance(); + this->renameQueue.advance(); + + // Be sure to advance the IEW queues so that the commit stage doesn't + // try to set an instruction as completed at the same time that it + // might be deleting it. + this->iew.squash(); + this->iewQueue.advance(); + this->iewQueue.advance(); + // Needs to tell the LSQ to write back all of its data + this->iew.lsqWriteback(); + + this->rob.squash(rob_head); + this->commit.setSquashing(); + + // Now hack the time buffer to clear the sequence numbers in the places + // where the stages might read it.? + for (int i = 0; i < 5; ++i) + { + this->timeBuffer.access(-i)->commitInfo.doneSeqNum = 0; + } + +} + +#endif // FULL_SYSTEM + +template <class Impl> +void +AlphaFullCPU<Impl>::copyToXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = this->renameMap.lookup(i); + this->cpuXC->setIntReg(i, this->regFile.readIntReg(renamed_reg)); + DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", + renamed_reg, this->regFile.intRegFile[renamed_reg]); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + this->cpuXC->setFloatRegBits(i, + this->regFile.readFloatRegBits(renamed_reg)); + } + + this->cpuXC->setMiscReg(AlphaISA::Fpcr_DepTag, + this->regFile.readMiscReg(AlphaISA::Fpcr_DepTag)); + this->cpuXC->setMiscReg(AlphaISA::Uniq_DepTag, + this->regFile.readMiscReg(AlphaISA::Uniq_DepTag)); + this->cpuXC->setMiscReg(AlphaISA::Lock_Flag_DepTag, + this->regFile.readMiscReg(AlphaISA::Lock_Flag_DepTag)); + this->cpuXC->setMiscReg(AlphaISA::Lock_Addr_DepTag, + this->regFile.readMiscReg(AlphaISA::Lock_Addr_DepTag)); + + this->cpuXC->setPC(this->rob.readHeadPC()); + this->cpuXC->setNextPC(this->cpuXC->readPC()+4); + +#if !FULL_SYSTEM + this->cpuXC->setFuncExeInst(this->funcExeInst); +#endif +} + +// This function will probably mess things up unless the ROB is empty and +// there are no instructions in the pipeline. +template <class Impl> +void +AlphaFullCPU<Impl>::copyFromXC() +{ + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < AlphaISA::NumIntRegs; ++i) + { + renamed_reg = this->renameMap.lookup(i); + + DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, this->regFile.intRegFile[renamed_reg], + this->cpuXC->readIntReg(i)); + + this->regFile.setIntReg(renamed_reg, this->cpuXC->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); + this->regFile.setFloatRegBits(renamed_reg, + this->cpuXC->readFloatRegBits(i)); + } + + // Then loop through the misc registers. + this->regFile.setMiscReg(AlphaISA::Fpcr_DepTag, + this->cpuXC->readMiscReg(AlphaISA::Fpcr_DepTag)); + this->regFile.setMiscReg(AlphaISA::Uniq_DepTag, + this->cpuXC->readMiscReg(AlphaISA::Uniq_DepTag)); + this->regFile.setMiscReg(AlphaISA::Lock_Flag_DepTag, + this->cpuXC->readMiscReg(AlphaISA::Lock_Flag_DepTag)); + this->regFile.setMiscReg(AlphaISA::Lock_Addr_DepTag, + this->cpuXC->readMiscReg(AlphaISA::Lock_Addr_DepTag)); + + // Then finally set the PC and the next PC. +// regFile.pc = cpuXC->regs.pc; +// regFile.npc = cpuXC->regs.npc; +#if !FULL_SYSTEM + this->funcExeInst = this->cpuXC->readFuncExeInst(); +#endif +} + +#if FULL_SYSTEM + +template <class Impl> +int +AlphaFullCPU<Impl>::readIntrFlag() +{ + return this->regFile.readIntrFlag(); +} + +template <class Impl> +void +AlphaFullCPU<Impl>::setIntrFlag(int val) +{ + this->regFile.setIntrFlag(val); +} + +// Can force commit stage to squash and stuff. +template <class Impl> +Fault +AlphaFullCPU<Impl>::hwrei() +{ + if (!inPalMode()) + return new AlphaISA::UnimplementedOpcodeFault; + + this->setNextPC(this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR)); + +// kernelStats.hwrei(); + + if ((this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR) & 1) == 0) +// AlphaISA::swap_palshadow(®s, false); + + this->checkInterrupts = true; + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template <class Impl> +bool +AlphaFullCPU<Impl>::simPalCheck(int palFunc) +{ +// kernelStats.callpal(palFunc); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (this->system->breakpoint()) + return false; + break; + } + + return true; +} + +// Probably shouldn't be able to switch to the trap handler as quickly as +// this. Also needs to get the exception restart address from the commit +// stage. +template <class Impl> +void +AlphaFullCPU<Impl>::trap(Fault fault) +{ +/* // Keep in mind that a trap may be initiated by fetch if there's a TLB + // miss + uint64_t PC = this->commit.readCommitPC(); + + DPRINTF(Fault, "Fault %s\n", fault->name()); + this->recordEvent(csprintf("Fault %s", fault->name())); + + //kernelStats.fault(fault); + + if (fault->isA<ArithmeticFault>()) + panic("Arithmetic traps are unimplemented!"); + + // exception restart address - Get the commit PC + if (!fault->isA<InterruptFault>() || !inPalMode(PC)) + this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR, PC); + + if (fault->isA<PalFault>() || fault->isA<ArithmeticFault>()) + // || fault == InterruptFault && !PC_PAL(regs.pc) + { + // traps... skip faulting instruction + AlphaISA::MiscReg ipr_exc_addr = + this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR); + this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR, + ipr_exc_addr + 4); + } + + if (!inPalMode(PC)) + swapPALShadow(true); + + this->regFile.setPC(this->regFile.miscRegs.readReg(AlphaISA::IPR_PAL_BASE) + + (dynamic_cast<AlphaFault *>(fault.get()))->vect()); + this->regFile.setNextPC(PC + sizeof(MachInst));*/ +} + +template <class Impl> +void +AlphaFullCPU<Impl>::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that exists + // within isa_fullsys_traits.hh. +} + +// swap_palshadow swaps in the values of the shadow registers and +// swaps them with the values of the physical registers that map to the +// same logical index. +template <class Impl> +void +AlphaFullCPU<Impl>::swapPALShadow(bool use_shadow) +{ + if (palShadowEnabled == use_shadow) + panic("swap_palshadow: wrong PAL shadow state"); + + palShadowEnabled = use_shadow; + + // Will have to lookup in rename map to get physical registers, then + // swap. +} + +#endif // FULL_SYSTEM diff --git a/src/cpu/o3/alpha_dyn_inst.cc b/src/cpu/o3/alpha_dyn_inst.cc new file mode 100644 index 000000000..72ac77d95 --- /dev/null +++ b/src/cpu/o3/alpha_dyn_inst.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst_impl.hh" +#include "cpu/o3/alpha_impl.hh" + +// Force instantiation of AlphaDynInst for all the implementations that +// are needed. +template class AlphaDynInst<AlphaSimpleImpl>; diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha_dyn_inst.hh new file mode 100644 index 000000000..5b8a05e5c --- /dev/null +++ b/src/cpu/o3/alpha_dyn_inst.hh @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_ALPHA_DYN_INST_HH__ +#define __CPU_O3_CPU_ALPHA_DYN_INST_HH__ + +#include "cpu/base_dyn_inst.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/inst_seq.hh" + +/** + * Mostly implementation specific AlphaDynInst. It is templated in case there + * are other implementations that are similar enough to be able to use this + * class without changes. This is mainly useful if there are multiple similar + * CPU implementations of the same ISA. + */ + +template <class Impl> +class AlphaDynInst : public BaseDynInst<Impl> +{ + public: + /** Typedef for the CPU. */ + typedef typename Impl::FullCPU FullCPU; + + /** Binary machine instruction type. */ + typedef TheISA::MachInst MachInst; + /** Logical register index type. */ + typedef TheISA::RegIndex RegIndex; + /** Integer register index type. */ + typedef TheISA::IntReg IntReg; + /** Misc register index type. */ + typedef TheISA::MiscReg MiscReg; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + public: + /** BaseDynInst constructor given a binary instruction. */ + AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + AlphaDynInst(StaticInstPtr &_staticInst); + + /** Executes the instruction.*/ + Fault execute() + { + return this->fault = this->staticInst->execute(this, this->traceData); + } + + public: + MiscReg readMiscReg(int misc_reg) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return 0; + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return 0; + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return NoFault; + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + // Dummy function for now. + // @todo: Fix this once reg file gets fixed. + return NoFault; + } + +#if FULL_SYSTEM + Fault hwrei(); + int readIntrFlag(); + void setIntrFlag(int val); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else + void syscall(); +#endif + + + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return this->cpu->readIntReg(_srcRegIdx[idx]); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatReg(_srcRegIdx[idx], width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + return this->cpu->readFloatReg(_srcRegIdx[idx]); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx], width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx]); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(_destRegIdx[idx], val); + this->instResult.integer = val; + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + this->cpu->setFloatReg(_destRegIdx[idx], val, width); + this->instResult.fp = val; + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + this->cpu->setFloatReg(_destRegIdx[idx], val); + this->instResult.dbl = val; + } + + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val, width); + this->instResult.integer = val; + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val); + this->instResult.integer = val; + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + public: + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } +}; + +#endif // __CPU_O3_CPU_ALPHA_DYN_INST_HH__ + diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha_dyn_inst_impl.hh new file mode 100644 index 000000000..96b7d3430 --- /dev/null +++ b/src/cpu/o3/alpha_dyn_inst_impl.hh @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" + +template <class Impl> +AlphaDynInst<Impl>::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < this->staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = this->staticInst->destRegIdx(i); + } + + for (int i = 0; i < this->staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } + +} + +template <class Impl> +AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr &_staticInst) + : BaseDynInst<Impl>(_staticInst) +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < _staticInst->numDestRegs(); i++) + { + _destRegIdx[i] = _staticInst->destRegIdx(i); + } + + for (int i = 0; i < _staticInst->numSrcRegs(); i++) + { + _srcRegIdx[i] = _staticInst->srcRegIdx(i); + } +} + +#if FULL_SYSTEM +template <class Impl> +Fault +AlphaDynInst<Impl>::hwrei() +{ + return this->cpu->hwrei(); +} + +template <class Impl> +int +AlphaDynInst<Impl>::readIntrFlag() +{ +return this->cpu->readIntrFlag(); +} + +template <class Impl> +void +AlphaDynInst<Impl>::setIntrFlag(int val) +{ + this->cpu->setIntrFlag(val); +} + +template <class Impl> +bool +AlphaDynInst<Impl>::inPalMode() +{ + return this->cpu->inPalMode(); +} + +template <class Impl> +void +AlphaDynInst<Impl>::trap(Fault fault) +{ + this->cpu->trap(fault); +} + +template <class Impl> +bool +AlphaDynInst<Impl>::simPalCheck(int palFunc) +{ + return this->cpu->simPalCheck(palFunc); +} +#else +template <class Impl> +void +AlphaDynInst<Impl>::syscall() +{ + this->cpu->syscall(this->threadNumber); +} +#endif + diff --git a/src/cpu/o3/alpha_impl.hh b/src/cpu/o3/alpha_impl.hh new file mode 100644 index 000000000..5e39fcb37 --- /dev/null +++ b/src/cpu/o3/alpha_impl.hh @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_ALPHA_IMPL_HH__ +#define __CPU_O3_CPU_ALPHA_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" + +#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/cpu_policy.hh" + +// Forward declarations. +template <class Impl> +class AlphaDynInst; + +template <class Impl> +class AlphaFullCPU; + +/** Implementation specific struct that defines several key things to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific FullCPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct AlphaSimpleImpl +{ + /** The type of MachInst. */ + typedef TheISA::MachInst MachInst; + + /** The CPU policy to be used (ie fetch, decode, etc.). */ + typedef SimpleCPUPolicy<AlphaSimpleImpl> CPUPol; + + /** The DynInst to be used. */ + typedef AlphaDynInst<AlphaSimpleImpl> DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr<DynInst> DynInstPtr; + + /** The FullCPU to be used. */ + typedef AlphaFullCPU<AlphaSimpleImpl> FullCPU; + + /** The Params to be passed to each stage. */ + typedef AlphaSimpleParams Params; + + enum { + MaxWidth = 8 + }; +}; + +#endif // __CPU_O3_CPU_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh new file mode 100644 index 000000000..79b0937e3 --- /dev/null +++ b/src/cpu/o3/alpha_params.hh @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_ALPHA_SIMPLE_PARAMS_HH__ +#define __CPU_O3_CPU_ALPHA_SIMPLE_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" + +//Forward declarations +class System; +class AlphaITB; +class AlphaDTB; +class FunctionalMemory; +class Process; +class MemInterface; + +/** + * This file defines the parameters that will be used for the AlphaFullCPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams : public BaseFullCPU::Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; AlphaDTB *dtb; +#else + std::vector<Process *> workload; + Process *process; +#endif // FULL_SYSTEM + + FunctionalMemory *mem; + + // + // Caches + // + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + // + // Fetch + // + unsigned decodeToFetchDelay; + unsigned renameToFetchDelay; + unsigned iewToFetchDelay; + unsigned commitToFetchDelay; + unsigned fetchWidth; + + // + // Decode + // + unsigned renameToDecodeDelay; + unsigned iewToDecodeDelay; + unsigned commitToDecodeDelay; + unsigned fetchToDecodeDelay; + unsigned decodeWidth; + + // + // Rename + // + unsigned iewToRenameDelay; + unsigned commitToRenameDelay; + unsigned decodeToRenameDelay; + unsigned renameWidth; + + // + // IEW + // + unsigned commitToIEWDelay; + unsigned renameToIEWDelay; + unsigned issueToExecuteDelay; + unsigned issueWidth; + unsigned executeWidth; + unsigned executeIntWidth; + unsigned executeFloatWidth; + unsigned executeBranchWidth; + unsigned executeMemoryWidth; + + // + // Commit + // + unsigned iewToCommitDelay; + unsigned renameToROBDelay; + unsigned commitWidth; + unsigned squashWidth; + + // + // Branch predictor (BP & BTB) + // +/* + unsigned localPredictorSize; + unsigned localPredictorCtrBits; +*/ + + unsigned local_predictor_size; + unsigned local_ctr_bits; + unsigned local_history_table_size; + unsigned local_history_bits; + unsigned global_predictor_size; + unsigned global_ctr_bits; + unsigned global_history_bits; + unsigned choice_predictor_size; + unsigned choice_ctr_bits; + + unsigned BTBEntries; + unsigned BTBTagSize; + + unsigned RASSize; + + // + // Load store queue + // + unsigned LQEntries; + unsigned SQEntries; + + // + // Memory dependence + // + unsigned SSITSize; + unsigned LFSTSize; + + // + // Miscellaneous + // + unsigned numPhysIntRegs; + unsigned numPhysFloatRegs; + unsigned numIQEntries; + unsigned numROBEntries; + + // Probably can get this from somewhere. + unsigned instShiftAmt; + + bool defReg; +}; + +#endif // __CPU_O3_CPU_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc new file mode 100644 index 000000000..85bd6f0a6 --- /dev/null +++ b/src/cpu/o3/bpred_unit.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/bpred_unit_impl.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha_dyn_inst.hh" + +template class TwobitBPredUnit<AlphaSimpleImpl>; diff --git a/src/cpu/o3/bpred_unit.hh b/src/cpu/o3/bpred_unit.hh new file mode 100644 index 000000000..2725684f7 --- /dev/null +++ b/src/cpu/o3/bpred_unit.hh @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BPRED_UNIT_HH__ +#define __BPRED_UNIT_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" +#include "base/statistics.hh" +#include "cpu/inst_seq.hh" + +#include "cpu/o3/2bit_local_pred.hh" +#include "cpu/o3/tournament_pred.hh" +#include "cpu/o3/btb.hh" +#include "cpu/o3/ras.hh" + +#include <list> + +/** + * Basically a wrapper class to hold both the branch predictor + * and the BTB. Right now I'm unsure of the implementation; it would + * be nicer to have something closer to the CPUPolicy or the Impl where + * this is just typedefs, but it forces the upper level stages to be + * aware of the constructors of the BP and the BTB. The nicer thing + * to do is have this templated on the Impl, accept the usual Params + * object, and be able to call the constructors on the BP and BTB. + */ +template<class Impl> +class TwobitBPredUnit +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + TwobitBPredUnit(Params ¶ms); + + void regStats(); + + bool predict(DynInstPtr &inst, Addr &PC); + + void update(const InstSeqNum &done_sn); + + void squash(const InstSeqNum &squashed_sn); + + void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, + bool actually_taken); + + bool BPLookup(Addr &inst_PC) + { return BP.lookup(inst_PC); } + + bool BTBValid(Addr &inst_PC) + { return BTB.valid(inst_PC); } + + Addr BTBLookup(Addr &inst_PC) + { return BTB.lookup(inst_PC); } + + // Will want to include global history. + void BPUpdate(Addr &inst_PC, bool taken) + { BP.update(inst_PC, taken); } + + void BTBUpdate(Addr &inst_PC, Addr &target_PC) + { BTB.update(inst_PC, target_PC); } + + private: + struct PredictorHistory { + PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC, + const bool pred_taken) + : seqNum(seq_num), PC(inst_PC), predTaken(pred_taken), + globalHistory(0), usedRAS(0), wasCall(0), RASIndex(0), + RASTarget(0) + { } + + InstSeqNum seqNum; + + Addr PC; + + bool predTaken; + + unsigned globalHistory; + + bool usedRAS; + + bool wasCall; + + unsigned RASIndex; + + Addr RASTarget; + }; + + std::list<PredictorHistory> predHist; + + DefaultBP BP; + + DefaultBTB BTB; + + ReturnAddrStack RAS; + + Stats::Scalar<> lookups; + Stats::Scalar<> condPredicted; + Stats::Scalar<> condIncorrect; + Stats::Scalar<> BTBLookups; + Stats::Scalar<> BTBHits; + Stats::Scalar<> BTBCorrect; + Stats::Scalar<> usedRAS; + Stats::Scalar<> RASIncorrect; +}; + +#endif // __BPRED_UNIT_HH__ diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh new file mode 100644 index 000000000..8d16a0cdf --- /dev/null +++ b/src/cpu/o3/bpred_unit_impl.hh @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/o3/bpred_unit.hh" + +template<class Impl> +TwobitBPredUnit<Impl>::TwobitBPredUnit(Params ¶ms) + : BP(params.local_predictor_size, + params.local_ctr_bits, + params.instShiftAmt), + BTB(params.BTBEntries, + params.BTBTagSize, + params.instShiftAmt), + RAS(params.RASSize) +{ +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::regStats() +{ + lookups + .name(name() + ".BPredUnit.lookups") + .desc("Number of BP lookups") + ; + + condPredicted + .name(name() + ".BPredUnit.condPredicted") + .desc("Number of conditional branches predicted") + ; + + condIncorrect + .name(name() + ".BPredUnit.condIncorrect") + .desc("Number of conditional branches incorrect") + ; + + BTBLookups + .name(name() + ".BPredUnit.BTBLookups") + .desc("Number of BTB lookups") + ; + + BTBHits + .name(name() + ".BPredUnit.BTBHits") + .desc("Number of BTB hits") + ; + + BTBCorrect + .name(name() + ".BPredUnit.BTBCorrect") + .desc("Number of correct BTB predictions (this stat may not " + "work properly.") + ; + + usedRAS + .name(name() + ".BPredUnit.usedRAS") + .desc("Number of times the RAS was used.") + ; + + RASIncorrect + .name(name() + ".BPredUnit.RASInCorrect") + .desc("Number of incorrect RAS predictions.") + ; +} + +template <class Impl> +bool +TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC) +{ + // See if branch predictor predicts taken. + // If so, get its target addr either from the BTB or the RAS. + // Once that's done, speculatively update the predictor? + // Save off record of branch stuff so the RAS can be fixed + // up once it's done. + + using TheISA::MachInst; + + bool pred_taken = false; + Addr target; + + ++lookups; + + if (inst->isUncondCtrl()) { + DPRINTF(Fetch, "BranchPred: Unconditional control.\n"); + pred_taken = true; + } else { + ++condPredicted; + + pred_taken = BPLookup(PC); + + DPRINTF(Fetch, "BranchPred: Branch predictor predicted %i for PC %#x" + "\n", pred_taken, inst->readPC()); + } + + PredictorHistory predict_record(inst->seqNum, PC, pred_taken); + + // Now lookup in the BTB or RAS. + if (pred_taken) { + if (inst->isReturn()) { + ++usedRAS; + + // If it's a function return call, then look up the address + // in the RAS. + target = RAS.top(); + + // Record the top entry of the RAS, and its index. + predict_record.usedRAS = true; + predict_record.RASIndex = RAS.topIdx(); + predict_record.RASTarget = target; + + RAS.pop(); + + DPRINTF(Fetch, "BranchPred: Instruction %#x is a return, RAS " + "predicted target: %#x, RAS index: %i.\n", + inst->readPC(), target, predict_record.RASIndex); + } else { + ++BTBLookups; + + if (inst->isCall()) { + RAS.push(PC+sizeof(MachInst)); + + // Record that it was a call so that the top RAS entry can + // be popped off if the speculation is incorrect. + predict_record.wasCall = true; + + DPRINTF(Fetch, "BranchPred: Instruction %#x was a call, " + "adding %#x to the RAS.\n", + inst->readPC(), PC+sizeof(MachInst)); + } + + if (BTB.valid(PC)) { + ++BTBHits; + + //If it's anything else, use the BTB to get the target addr. + target = BTB.lookup(PC); + + DPRINTF(Fetch, "BranchPred: Instruction %#x predicted target " + "is %#x.\n", inst->readPC(), target); + + } else { + DPRINTF(Fetch, "BranchPred: BTB doesn't have a valid entry." + "\n"); + pred_taken = false; + } + + } + } + + if (pred_taken) { + // Set the PC and the instruction's predicted target. + PC = target; + inst->setPredTarg(target); + } else { + PC = PC + sizeof(MachInst); + inst->setPredTarg(PC); + } + + predHist.push_front(predict_record); + + assert(!predHist.empty()); + + return pred_taken; +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn) +{ + DPRINTF(Fetch, "BranchPred: Commiting branches until sequence number " + "%i.\n", done_sn); + + while (!predHist.empty() && predHist.back().seqNum <= done_sn) { + assert(!predHist.empty()); + + // Update the branch predictor with the correct results of branches. + BP.update(predHist.back().PC, predHist.back().predTaken); + + predHist.pop_back(); + } +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn) +{ + while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { + if (predHist.front().usedRAS) { + DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " + "target: %#x.\n", + predHist.front().RASIndex, + predHist.front().RASTarget); + + RAS.restore(predHist.front().RASIndex, + predHist.front().RASTarget); + } else if (predHist.front().wasCall) { + DPRINTF(Fetch, "BranchPred: Removing speculative entry added " + "to the RAS.\n"); + + RAS.pop(); + } + + predHist.pop_front(); + } +} + +template <class Impl> +void +TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, + const Addr &corr_target, + const bool actually_taken) +{ + // Now that we know that a branch was mispredicted, we need to undo + // all the branches that have been seen up until this branch and + // fix up everything. + + ++condIncorrect; + + DPRINTF(Fetch, "BranchPred: Squashing from sequence number %i, " + "setting target to %#x.\n", + squashed_sn, corr_target); + + while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { + + if (predHist.front().usedRAS) { + DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " + "target: %#x.\n", + predHist.front().RASIndex, + predHist.front().RASTarget); + + RAS.restore(predHist.front().RASIndex, + predHist.front().RASTarget); + } else if (predHist.front().wasCall) { + DPRINTF(Fetch, "BranchPred: Removing speculative entry added " + "to the RAS.\n"); + + RAS.pop(); + } + + predHist.pop_front(); + } + + predHist.front().predTaken = actually_taken; + + if (predHist.front().usedRAS) { + ++RASIncorrect; + } + + BP.update(predHist.front().PC, actually_taken); + + BTB.update(predHist.front().PC, corr_target); +} diff --git a/src/cpu/o3/btb.cc b/src/cpu/o3/btb.cc new file mode 100644 index 000000000..2d39c3856 --- /dev/null +++ b/src/cpu/o3/btb.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/intmath.hh" +#include "base/trace.hh" +#include "cpu/o3/btb.hh" + +using namespace TheISA; + +DefaultBTB::DefaultBTB(unsigned _numEntries, + unsigned _tagBits, + unsigned _instShiftAmt) + : numEntries(_numEntries), + tagBits(_tagBits), + instShiftAmt(_instShiftAmt) +{ + // @todo Check to make sure num_entries is valid (a power of 2) + + DPRINTF(Fetch, "BTB: Creating BTB object.\n"); + + btb = new BTBEntry[numEntries]; + + for (int i = 0; i < numEntries; ++i) + { + btb[i].valid = false; + } + + idxMask = numEntries - 1; + + tagMask = (1 << tagBits) - 1; + + tagShiftAmt = instShiftAmt + floorLog2(numEntries); +} + +inline +unsigned +DefaultBTB::getIndex(const Addr &inst_PC) +{ + // Need to shift PC over by the word offset. + return (inst_PC >> instShiftAmt) & idxMask; +} + +inline +Addr +DefaultBTB::getTag(const Addr &inst_PC) +{ + return (inst_PC >> tagShiftAmt) & tagMask; +} + +bool +DefaultBTB::valid(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + assert(btb_idx < numEntries); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return true; + } else { + return false; + } +} + +// @todo Create some sort of return struct that has both whether or not the +// address is valid, and also the address. For now will just use addr = 0 to +// represent invalid entry. +Addr +DefaultBTB::lookup(const Addr &inst_PC) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + assert(btb_idx < numEntries); + + if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { + return btb[btb_idx].target; + } else { + return 0; + } +} + +void +DefaultBTB::update(const Addr &inst_PC, const Addr &target) +{ + unsigned btb_idx = getIndex(inst_PC); + + assert(btb_idx < numEntries); + + btb[btb_idx].valid = true; + btb[btb_idx].target = target; + btb[btb_idx].tag = getTag(inst_PC); +} diff --git a/src/cpu/o3/btb.hh b/src/cpu/o3/btb.hh new file mode 100644 index 000000000..77bdc32ea --- /dev/null +++ b/src/cpu/o3/btb.hh @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_BTB_HH__ +#define __CPU_O3_CPU_BTB_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" + +class DefaultBTB +{ + private: + struct BTBEntry + { + BTBEntry() + : tag(0), target(0), valid(false) + { + } + + Addr tag; + Addr target; + bool valid; + }; + + public: + DefaultBTB(unsigned numEntries, unsigned tagBits, + unsigned instShiftAmt); + + Addr lookup(const Addr &inst_PC); + + bool valid(const Addr &inst_PC); + + void update(const Addr &inst_PC, const Addr &target_PC); + + private: + inline unsigned getIndex(const Addr &inst_PC); + + inline Addr getTag(const Addr &inst_PC); + + BTBEntry *btb; + + unsigned numEntries; + + unsigned idxMask; + + unsigned tagBits; + + unsigned tagMask; + + unsigned instShiftAmt; + + unsigned tagShiftAmt; +}; + +#endif // __CPU_O3_CPU_BTB_HH__ diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh new file mode 100644 index 000000000..c74c77ddf --- /dev/null +++ b/src/cpu/o3/comm.hh @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_COMM_HH__ +#define __CPU_O3_CPU_COMM_HH__ + +#include <vector> + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" +#include "sim/host.hh" + +// Find better place to put this typedef. +// The impl might be the best place for this. +typedef short int PhysRegIndex; + +template<class Impl> +struct SimpleFetchSimpleDecode { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template<class Impl> +struct SimpleDecodeSimpleRename { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template<class Impl> +struct SimpleRenameSimpleIEW { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template<class Impl> +struct SimpleIEWSimpleCommit { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; + + bool squash; + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + InstSeqNum squashedSeqNum; +}; + +template<class Impl> +struct IssueStruct { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +struct TimeBufStruct { + struct decodeComm { + bool squash; + bool stall; + bool predIncorrect; + uint64_t branchAddr; + + InstSeqNum doneSeqNum; + + // Might want to package this kind of branch stuff into a single + // struct as it is used pretty frequently. + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + }; + + decodeComm decodeInfo; + + // Rename can't actually tell anything to squash or send a new PC back + // because it doesn't do anything along those lines. But maybe leave + // these fields in here to keep the stages mostly orthagonal. + struct renameComm { + bool squash; + bool stall; + + uint64_t nextPC; + }; + + renameComm renameInfo; + + struct iewComm { + bool stall; + + // Also eventually include skid buffer space. + unsigned freeIQEntries; + }; + + iewComm iewInfo; + + struct commitComm { + bool squash; + bool stall; + unsigned freeROBEntries; + + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + + bool robSquashing; + + // Represents the instruction that has either been retired or + // squashed. Similar to having a single bus that broadcasts the + // retired or squashed sequence number. + InstSeqNum doneSeqNum; + + // Extra bit of information so that the LDSTQ only updates when it + // needs to. + bool commitIsLoad; + + // Communication specifically to the IQ to tell the IQ that it can + // schedule a non-speculative instruction. + InstSeqNum nonSpecSeqNum; + }; + + commitComm commitInfo; +}; + +#endif //__CPU_O3_CPU_COMM_HH__ diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc new file mode 100644 index 000000000..cf33d7f8b --- /dev/null +++ b/src/cpu/o3/commit.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/commit_impl.hh" + +template class SimpleCommit<AlphaSimpleImpl>; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh new file mode 100644 index 000000000..580c1a316 --- /dev/null +++ b/src/cpu/o3/commit.hh @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: Maybe have a special method for handling interrupts/traps. +// +// Traps: Have IEW send a signal to commit saying that there's a trap to +// be handled. Have commit send the PC back to the fetch stage, along +// with the current commit PC. Fetch will directly access the IPR and save +// off all the proper stuff. Commit can send out a squash, or something +// close to it. +// Do the same for hwrei(). However, requires that commit be specifically +// built to support that kind of stuff. Probably not horrible to have +// commit support having the CPU tell it to squash the other stages and +// restart at a given address. The IPR register does become an issue. +// Probably not a big deal if the IPR stuff isn't cycle accurate. Can just +// have the original function handle writing to the IPR register. + +#ifndef __CPU_O3_CPU_SIMPLE_COMMIT_HH__ +#define __CPU_O3_CPU_SIMPLE_COMMIT_HH__ + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "mem/memory_interface.hh" + +template<class Impl> +class SimpleCommit +{ + public: + // Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; + + typedef typename CPUPol::ROB ROB; + + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + + public: + // I don't believe commit can block, so it will only have two + // statuses for now. + // Actually if there's a cache access that needs to block (ie + // uncachable load or just a mem access in commit) then the stage + // may have to wait. + enum Status { + Running, + Idle, + ROBSquashing, + DcacheMissStall, + DcacheMissComplete + }; + + private: + Status _status; + + public: + SimpleCommit(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr); + + void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); + + void setROB(ROB *rob_ptr); + + void tick(); + + void commit(); + + private: + + void commitInsts(); + + bool commitHead(DynInstPtr &head_inst, unsigned inst_num); + + void getInsts(); + + void markCompletedInsts(); + + public: + uint64_t readCommitPC(); + + void setSquashing() { _status = ROBSquashing; } + + private: + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer<TimeStruct>::wire toIEW; + + /** Wire to read information from IEW (for ROB). */ + typename TimeBuffer<TimeStruct>::wire robInfoFromIEW; + + /** IEW instruction queue interface. */ + TimeBuffer<IEWStruct> *iewQueue; + + /** Wire to read information from IEW queue. */ + typename TimeBuffer<IEWStruct>::wire fromIEW; + + /** Rename instruction queue interface, for ROB. */ + TimeBuffer<RenameStruct> *renameQueue; + + /** Wire to read information from rename queue. */ + typename TimeBuffer<RenameStruct>::wire fromRename; + + /** ROB interface. */ + ROB *rob; + + /** Pointer to FullCPU. */ + FullCPU *cpu; + + /** Memory interface. Used for d-cache accesses. */ + MemInterface *dcacheInterface; + + private: + /** IEW to Commit delay, in ticks. */ + unsigned iewToCommitDelay; + + /** Rename to ROB delay, in ticks. */ + unsigned renameToROBDelay; + + /** Rename width, in instructions. Used so ROB knows how many + * instructions to get from the rename instruction queue. + */ + unsigned renameWidth; + + /** IEW width, in instructions. Used so ROB knows how many + * instructions to get from the IEW instruction queue. + */ + unsigned iewWidth; + + /** Commit width, in instructions. */ + unsigned commitWidth; + + Stats::Scalar<> commitCommittedInsts; + Stats::Scalar<> commitSquashedInsts; + Stats::Scalar<> commitSquashEvents; + Stats::Scalar<> commitNonSpecStalls; + Stats::Scalar<> commitCommittedBranches; + Stats::Scalar<> commitCommittedLoads; + Stats::Scalar<> commitCommittedMemRefs; + Stats::Scalar<> branchMispredicts; + + Stats::Distribution<> n_committed_dist; +}; + +#endif // __CPU_O3_CPU_SIMPLE_COMMIT_HH__ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh new file mode 100644 index 000000000..e289bc0c0 --- /dev/null +++ b/src/cpu/o3/commit_impl.hh @@ -0,0 +1,502 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/timebuf.hh" +#include "cpu/o3/commit.hh" +#include "cpu/exetrace.hh" + +template <class Impl> +SimpleCommit<Impl>::SimpleCommit(Params ¶ms) + : dcacheInterface(params.dcacheInterface), + iewToCommitDelay(params.iewToCommitDelay), + renameToROBDelay(params.renameToROBDelay), + renameWidth(params.renameWidth), + iewWidth(params.executeWidth), + commitWidth(params.commitWidth) +{ + _status = Idle; +} + +template <class Impl> +void +SimpleCommit<Impl>::regStats() +{ + commitCommittedInsts + .name(name() + ".commitCommittedInsts") + .desc("The number of committed instructions") + .prereq(commitCommittedInsts); + commitSquashedInsts + .name(name() + ".commitSquashedInsts") + .desc("The number of squashed insts skipped by commit") + .prereq(commitSquashedInsts); + commitSquashEvents + .name(name() + ".commitSquashEvents") + .desc("The number of times commit is told to squash") + .prereq(commitSquashEvents); + commitNonSpecStalls + .name(name() + ".commitNonSpecStalls") + .desc("The number of times commit has been forced to stall to " + "communicate backwards") + .prereq(commitNonSpecStalls); + commitCommittedBranches + .name(name() + ".commitCommittedBranches") + .desc("The number of committed branches") + .prereq(commitCommittedBranches); + commitCommittedLoads + .name(name() + ".commitCommittedLoads") + .desc("The number of committed loads") + .prereq(commitCommittedLoads); + commitCommittedMemRefs + .name(name() + ".commitCommittedMemRefs") + .desc("The number of committed memory references") + .prereq(commitCommittedMemRefs); + branchMispredicts + .name(name() + ".branchMispredicts") + .desc("The number of times a branch was mispredicted") + .prereq(branchMispredicts); + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(Stats::pdf) + ; +} + +template <class Impl> +void +SimpleCommit<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template <class Impl> +void +SimpleCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(Commit, "Commit: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to send information back to IEW. + toIEW = timeBuffer->getWire(0); + + // Setup wire to read data from IEW (for the ROB). + robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay); +} + +template <class Impl> +void +SimpleCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr) +{ + DPRINTF(Commit, "Commit: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to get instructions from rename (for the ROB). + fromRename = renameQueue->getWire(-renameToROBDelay); +} + +template <class Impl> +void +SimpleCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) +{ + DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to get instructions from IEW. + fromIEW = iewQueue->getWire(-iewToCommitDelay); +} + +template <class Impl> +void +SimpleCommit<Impl>::setROB(ROB *rob_ptr) +{ + DPRINTF(Commit, "Commit: Setting ROB pointer.\n"); + rob = rob_ptr; +} + +template <class Impl> +void +SimpleCommit<Impl>::tick() +{ + // If the ROB is currently in its squash sequence, then continue + // to squash. In this case, commit does not do anything. Otherwise + // run commit. + if (_status == ROBSquashing) { + if (rob->isDoneSquashing()) { + _status = Running; + } else { + rob->doSquash(); + + // Send back sequence number of tail of ROB, so other stages + // can squash younger instructions. Note that really the only + // stage that this is important for is the IEW stage; other + // stages can just clear all their state as long as selective + // replay isn't used. + toIEW->commitInfo.doneSeqNum = rob->readTailSeqNum(); + toIEW->commitInfo.robSquashing = true; + } + } else { + commit(); + } + + markCompletedInsts(); + + // Writeback number of free ROB entries here. + DPRINTF(Commit, "Commit: ROB has %d free entries.\n", + rob->numFreeEntries()); + toIEW->commitInfo.freeROBEntries = rob->numFreeEntries(); +} + +template <class Impl> +void +SimpleCommit<Impl>::commit() +{ + ////////////////////////////////////// + // Check for interrupts + ////////////////////////////////////// + + // Process interrupts if interrupts are enabled and not in PAL mode. + // Take the PC from commit and write it to the IPR, then squash. The + // interrupt completing will take care of restoring the PC from that value + // in the IPR. Look at IPR[EXC_ADDR]; + // hwrei() is what resets the PC to the place where instruction execution + // beings again. +#if FULL_SYSTEM + if (//checkInterrupts && + cpu->check_interrupts() && + !cpu->inPalMode(readCommitPC())) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. + + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); + } +#endif // FULL_SYSTEM + + //////////////////////////////////// + // Check for squash signal, handle that first + //////////////////////////////////// + + // Want to mainly check if the IEW stage is telling the ROB to squash. + // Should I also check if the commit stage is telling the ROB to squah? + // This might be necessary to keep the same timing between the IQ and + // the ROB... + if (fromIEW->squash) { + DPRINTF(Commit, "Commit: Squashing instructions in the ROB.\n"); + + _status = ROBSquashing; + + InstSeqNum squashed_inst = fromIEW->squashedSeqNum; + + rob->squash(squashed_inst); + + // Send back the sequence number of the squashed instruction. + toIEW->commitInfo.doneSeqNum = squashed_inst; + + // Send back the squash signal to tell stages that they should squash. + toIEW->commitInfo.squash = true; + + // Send back the rob squashing signal so other stages know that the + // ROB is in the process of squashing. + toIEW->commitInfo.robSquashing = true; + + toIEW->commitInfo.branchMispredict = fromIEW->branchMispredict; + + toIEW->commitInfo.branchTaken = fromIEW->branchTaken; + + toIEW->commitInfo.nextPC = fromIEW->nextPC; + + toIEW->commitInfo.mispredPC = fromIEW->mispredPC; + + if (toIEW->commitInfo.branchMispredict) { + ++branchMispredicts; + } + } + + if (_status != ROBSquashing) { + // If we're not currently squashing, then get instructions. + getInsts(); + + // Try to commit any instructions. + commitInsts(); + } + + // If the ROB is empty, we can set this stage to idle. Use this + // in the future when the Idle status will actually be utilized. +#if 0 + if (rob->isEmpty()) { + DPRINTF(Commit, "Commit: ROB is empty. Status changed to idle.\n"); + _status = Idle; + // Schedule an event so that commit will actually wake up + // once something gets put in the ROB. + } +#endif +} + +// Loop that goes through as many instructions in the ROB as possible and +// tries to commit them. The actual work for committing is done by the +// commitHead() function. +template <class Impl> +void +SimpleCommit<Impl>::commitInsts() +{ + //////////////////////////////////// + // Handle commit + // Note that commit will be handled prior to the ROB so that the ROB + // only tries to commit instructions it has in this current cycle, and + // not instructions it is writing in during this cycle. + // Can't commit and squash things at the same time... + //////////////////////////////////// + + if (rob->isEmpty()) + return; + + DynInstPtr head_inst = rob->readHeadInst(); + + unsigned num_committed = 0; + + // Commit as many instructions as possible until the commit bandwidth + // limit is reached, or it becomes impossible to commit any more. + while (!rob->isEmpty() && + head_inst->readyToCommit() && + num_committed < commitWidth) + { + DPRINTF(Commit, "Commit: Trying to commit head instruction.\n"); + + // If the head instruction is squashed, it is ready to retire at any + // time. However, we need to avoid updating any other state + // incorrectly if it's already been squashed. + if (head_inst->isSquashed()) { + + DPRINTF(Commit, "Commit: Retiring squashed instruction from " + "ROB.\n"); + + // Tell ROB to retire head instruction. This retires the head + // inst in the ROB without affecting any other stages. + rob->retireHead(); + + ++commitSquashedInsts; + + } else { + // Increment the total number of non-speculative instructions + // executed. + // Hack for now: it really shouldn't happen until after the + // commit is deemed to be successful, but this count is needed + // for syscalls. + cpu->funcExeInst++; + + // Try to commit the head instruction. + bool commit_success = commitHead(head_inst, num_committed); + + // Update what instruction we are looking at if the commit worked. + if (commit_success) { + ++num_committed; + + // Send back which instruction has been committed. + // @todo: Update this later when a wider pipeline is used. + // Hmm, can't really give a pointer here...perhaps the + // sequence number instead (copy). + toIEW->commitInfo.doneSeqNum = head_inst->seqNum; + + ++commitCommittedInsts; + + if (!head_inst->isNop()) { + cpu->instDone(); + } + } else { + break; + } + } + + // Update the pointer to read the next instruction in the ROB. + head_inst = rob->readHeadInst(); + } + + DPRINTF(CommitRate, "%i\n", num_committed); + n_committed_dist.sample(num_committed); +} + +template <class Impl> +bool +SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num) +{ + // Make sure instruction is valid + assert(head_inst); + + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. + if (!head_inst->isExecuted()) { + // Keep this number correct. We have not yet actually executed + // and committed this instruction. + cpu->funcExeInst--; + + if (head_inst->isNonSpeculative()) { + DPRINTF(Commit, "Commit: Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + head_inst->readPC()); + + toIEW->commitInfo.nonSpecSeqNum = head_inst->seqNum; + + // Change the instruction so it won't try to commit again until + // it is executed. + head_inst->clearCanCommit(); + + ++commitNonSpecStalls; + + return false; + } else { + panic("Commit: Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + + // Now check if it's one of the special trap or barrier or + // serializing instructions. + if (head_inst->isThreadSync() || + head_inst->isSerializing() || + head_inst->isMemBarrier() || + head_inst->isWriteBarrier() ) + { + // Not handled for now. Mem barriers and write barriers are safe + // to simply let commit as memory accesses only happen once they + // reach the head of commit. Not sure about the other two. + panic("Serializing or barrier instructions" + " are not handled yet.\n"); + } + + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = head_inst->getFault(); + + if (inst_fault != NoFault) { + if (!head_inst->isNop()) { +#if FULL_SYSTEM + cpu->trap(inst_fault); +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + head_inst->PC); +#endif // FULL_SYSTEM + } + } + + // Check if we're really ready to commit. If not then return false. + // I'm pretty sure all instructions should be able to commit if they've + // reached this far. For now leave this in as a check. + if (!rob->isHeadReady()) { + panic("Commit: Unable to commit head instruction!\n"); + return false; + } + + // If it's a branch, then send back branch prediction update info + // to the fetch stage. + // This should be handled in the iew stage if a mispredict happens... + + if (head_inst->isControl()) { + +#if 0 + toIEW->nextPC = head_inst->readPC(); + //Maybe switch over to BTB incorrect. + toIEW->btbMissed = head_inst->btbMiss(); + toIEW->target = head_inst->nextPC; + //Maybe also include global history information. + //This simple version will have no branch prediction however. +#endif + + ++commitCommittedBranches; + } + + // Now that the instruction is going to be committed, finalize its + // trace data. + if (head_inst->traceData) { + head_inst->traceData->finalize(); + } + + //Finally clear the head ROB entry. + rob->retireHead(); + + // Return true to indicate that we have committed an instruction. + return true; +} + +template <class Impl> +void +SimpleCommit<Impl>::getInsts() +{ + ////////////////////////////////////// + // Handle ROB functions + ////////////////////////////////////// + + // Read any issued instructions and place them into the ROB. Do this + // prior to squashing to avoid having instructions in the ROB that + // don't get squashed properly. + int insts_to_process = min((int)renameWidth, fromRename->size); + + for (int inst_num = 0; + inst_num < insts_to_process; + ++inst_num) + { + if (!fromRename->insts[inst_num]->isSquashed()) { + DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", + fromRename->insts[inst_num]->readPC()); + rob->insertInst(fromRename->insts[inst_num]); + } else { + DPRINTF(Commit, "Commit: Instruction %i PC %#x was " + "squashed, skipping.\n", + fromRename->insts[inst_num]->seqNum, + fromRename->insts[inst_num]->readPC()); + } + } +} + +template <class Impl> +void +SimpleCommit<Impl>::markCompletedInsts() +{ + // Grab completed insts out of the IEW instruction queue, and mark + // instructions completed within the ROB. + for (int inst_num = 0; + inst_num < fromIEW->size && fromIEW->insts[inst_num]; + ++inst_num) + { + DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n", + fromIEW->insts[inst_num]->readPC(), + fromIEW->insts[inst_num]->seqNum); + + // Mark the instruction as ready to commit. + fromIEW->insts[inst_num]->setCanCommit(); + } +} + +template <class Impl> +uint64_t +SimpleCommit<Impl>::readCommitPC() +{ + return rob->readHeadPC(); +} diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc new file mode 100644 index 000000000..a268dbc23 --- /dev/null +++ b/src/cpu/o3/cpu.cc @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config/full_system.hh" + +#if FULL_SYSTEM +#include "sim/system.hh" +#else +#include "sim/process.hh" +#endif +#include "sim/root.hh" + +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/cpu.hh" + +using namespace std; + +BaseFullCPU::BaseFullCPU(Params ¶ms) + : BaseCPU(¶ms), cpu_id(0) +{ +} + +template <class Impl> +FullO3CPU<Impl>::TickEvent::TickEvent(FullO3CPU<Impl> *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::TickEvent::process() +{ + cpu->tick(); +} + +template <class Impl> +const char * +FullO3CPU<Impl>::TickEvent::description() +{ + return "FullO3CPU tick event"; +} + +//Call constructor to all the pipeline stages here +template <class Impl> +FullO3CPU<Impl>::FullO3CPU(Params ¶ms) +#if FULL_SYSTEM + : BaseFullCPU(params), +#else + : BaseFullCPU(params), +#endif // FULL_SYSTEM + tickEvent(this), + fetch(params), + decode(params), + rename(params), + iew(params), + commit(params), + + regFile(params.numPhysIntRegs, params.numPhysFloatRegs), + + freeList(TheISA::NumIntRegs, params.numPhysIntRegs, + TheISA::NumFloatRegs, params.numPhysFloatRegs), + + renameMap(TheISA::NumIntRegs, params.numPhysIntRegs, + TheISA::NumFloatRegs, params.numPhysFloatRegs, + TheISA::NumMiscRegs, + TheISA::ZeroReg, + TheISA::ZeroReg + TheISA::NumIntRegs), + + rob(params.numROBEntries, params.squashWidth), + + // What to pass to these time buffers? + // For now just have these time buffers be pretty big. + timeBuffer(5, 5), + fetchQueue(5, 5), + decodeQueue(5, 5), + renameQueue(5, 5), + iewQueue(5, 5), + + cpuXC(NULL), + + globalSeqNum(1), + +#if FULL_SYSTEM + system(params.system), + memCtrl(system->memctrl), + physmem(system->physmem), + itb(params.itb), + dtb(params.dtb), + mem(params.mem), +#else + // Hardcoded for a single thread!! + mem(params.workload[0]->getMemory()), +#endif // FULL_SYSTEM + + icacheInterface(params.icacheInterface), + dcacheInterface(params.dcacheInterface), + deferRegistration(params.defReg), + numInsts(0), + funcExeInst(0) +{ + _status = Idle; + +#if !FULL_SYSTEM + thread.resize(this->number_of_threads); +#endif + + for (int i = 0; i < this->number_of_threads; ++i) { +#if FULL_SYSTEM + assert(i == 0); + thread[i] = new CPUExecContext(this, 0, system, itb, dtb, mem); + system->execContexts[i] = thread[i]->getProxy(); + + execContexts.push_back(system->execContexts[i]); +#else + if (i < params.workload.size()) { + DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, " + "process is %#x", + i, params.workload[i]->prog_entry, thread[i]); + thread[i] = new CPUExecContext(this, i, params.workload[i], i); + } + assert(params.workload[i]->getMemory() != NULL); + assert(mem != NULL); + execContexts.push_back(thread[i]->getProxy()); +#endif // !FULL_SYSTEM + } + + // Note that this is a hack so that my code which still uses xc-> will + // still work. I should remove this eventually + cpuXC = thread[0]; + + // The stages also need their CPU pointer setup. However this must be + // done at the upper level CPU because they have pointers to the upper + // level CPU, and not this FullO3CPU. + + // Give each of the stages the time buffer they will use. + fetch.setTimeBuffer(&timeBuffer); + decode.setTimeBuffer(&timeBuffer); + rename.setTimeBuffer(&timeBuffer); + iew.setTimeBuffer(&timeBuffer); + commit.setTimeBuffer(&timeBuffer); + + // Also setup each of the stages' queues. + fetch.setFetchQueue(&fetchQueue); + decode.setFetchQueue(&fetchQueue); + decode.setDecodeQueue(&decodeQueue); + rename.setDecodeQueue(&decodeQueue); + rename.setRenameQueue(&renameQueue); + iew.setRenameQueue(&renameQueue); + iew.setIEWQueue(&iewQueue); + commit.setIEWQueue(&iewQueue); + commit.setRenameQueue(&renameQueue); + + // Setup the rename map for whichever stages need it. + rename.setRenameMap(&renameMap); + iew.setRenameMap(&renameMap); + + // Setup the free list for whichever stages need it. + rename.setFreeList(&freeList); + renameMap.setFreeList(&freeList); + + // Setup the ROB for whichever stages need it. + commit.setROB(&rob); +} + +template <class Impl> +FullO3CPU<Impl>::~FullO3CPU() +{ +} + +template <class Impl> +void +FullO3CPU<Impl>::fullCPURegStats() +{ + // Register any of the FullCPU's stats here. +} + +template <class Impl> +void +FullO3CPU<Impl>::tick() +{ + DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n"); + + //Tick each of the stages if they're actually running. + //Will want to figure out a way to unschedule itself if they're all + //going to be idle for a long time. + fetch.tick(); + + decode.tick(); + + rename.tick(); + + iew.tick(); + + commit.tick(); + + // Now advance the time buffers, unless the stage is stalled. + timeBuffer.advance(); + + fetchQueue.advance(); + decodeQueue.advance(); + renameQueue.advance(); + iewQueue.advance(); + + if (_status == Running && !tickEvent.scheduled()) + tickEvent.schedule(curTick + 1); +} + +template <class Impl> +void +FullO3CPU<Impl>::init() +{ + if(!deferRegistration) + { + this->registerExecContexts(); + + // Need to do a copy of the xc->regs into the CPU's regfile so + // that it can start properly. +#if FULL_SYSTEM + ExecContext *src_xc = system->execContexts[0]; + TheISA::initCPU(src_xc, src_xc->readCpuId()); +#else + ExecContext *src_xc = thread[0]->getProxy(); +#endif + // First loop through the integer registers. + for (int i = 0; i < TheISA::NumIntRegs; ++i) + { + regFile.intRegFile[i] = src_xc->readIntReg(i); + } + + // Then loop through the floating point registers. + for (int i = 0; i < TheISA::NumFloatRegs; ++i) + { + regFile.floatRegFile.setRegBits(i, src_xc->readRegBits(i)) + } +/* + // Then loop through the misc registers. + regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr; +*/ + // Then finally set the PC and the next PC. + regFile.pc = src_xc->readPC(); + regFile.npc = src_xc->readNextPC(); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::activateContext(int thread_num, int delay) +{ + // Needs to set each stage to running as well. + + scheduleTickEvent(delay); + + _status = Running; +} + +template <class Impl> +void +FullO3CPU<Impl>::suspendContext(int thread_num) +{ + panic("suspendContext unimplemented!"); +} + +template <class Impl> +void +FullO3CPU<Impl>::deallocateContext(int thread_num) +{ + panic("deallocateContext unimplemented!"); +} + +template <class Impl> +void +FullO3CPU<Impl>::haltContext(int thread_num) +{ + panic("haltContext unimplemented!"); +} + +template <class Impl> +void +FullO3CPU<Impl>::switchOut() +{ + panic("FullO3CPU does not have a switch out function.\n"); +} + +template <class Impl> +void +FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + assert(!tickEvent.scheduled()); + + // Set all status's to active, schedule the + // CPU's tick event. + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + if (xc->status() == ExecContext::Active && _status != Running) { + _status = Running; + tickEvent.schedule(curTick); + } + } +} + +template <class Impl> +InstSeqNum +FullO3CPU<Impl>::getAndIncrementInstSeq() +{ + // Hopefully this works right. + return globalSeqNum++; +} + +template <class Impl> +uint64_t +FullO3CPU<Impl>::readIntReg(int reg_idx) +{ + return regFile.readIntReg(reg_idx); +} + +template <class Impl> +FloatReg +FullO3CPU<Impl>::readFloatReg(int reg_idx, int width) +{ + return regFile.readFloatReg(reg_idx, width); +} + +template <class Impl> +FloatReg +FullO3CPU<Impl>::readFloatReg(int reg_idx) +{ + return regFile.readFloatReg(reg_idx); +} + +template <class Impl> +FloatRegBits +FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width) +{ + return regFile.readFloatRegBits(reg_idx, width); +} + +template <class Impl> +FloatRegBits +FullO3CPU<Impl>::readFloatRegBits(int reg_idx) +{ + return regFile.readFloatRegBits(reg_idx); +} + +template <class Impl> +void +FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val) +{ + regFile.setIntReg(reg_idx, val); +} + +template <class Impl> +void +FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val, int width) +{ + regFile.setFloatReg(reg_idx, val, width); +} + +template <class Impl> +void +FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val) +{ + regFile.setFloatReg(reg_idx, val); +} + +template <class Impl> +void +FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val, int width) +{ + regFile.setFloatRegBits(reg_idx, val, width); +} + +template <class Impl> +void +FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + regFile.setFloatRegBits(reg_idx, val); +} + +template <class Impl> +uint64_t +FullO3CPU<Impl>::readPC() +{ + return regFile.readPC(); +} + +template <class Impl> +void +FullO3CPU<Impl>::setNextPC(uint64_t val) +{ + regFile.setNextPC(val); +} + +template <class Impl> +void +FullO3CPU<Impl>::setPC(Addr new_PC) +{ + regFile.setPC(new_PC); +} + +template <class Impl> +void +FullO3CPU<Impl>::addInst(DynInstPtr &inst) +{ + instList.push_back(inst); +} + +template <class Impl> +void +FullO3CPU<Impl>::instDone() +{ + // Keep an instruction count. + numInsts++; + + // Check for instruction-count-based events. + comInstEventQueue[0]->serviceEvents(numInsts); +} + +template <class Impl> +void +FullO3CPU<Impl>::removeBackInst(DynInstPtr &inst) +{ + DynInstPtr inst_to_delete; + + // Walk through the instruction list, removing any instructions + // that were inserted after the given instruction, inst. + while (instList.back() != inst) + { + assert(!instList.empty()); + + // Obtain the pointer to the instruction. + inst_to_delete = instList.back(); + + DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", + inst_to_delete->seqNum, inst_to_delete->readPC()); + + // Remove the instruction from the list. + instList.pop_back(); + + // Mark it as squashed. + inst_to_delete->setSquashed(); + } +} + +template <class Impl> +void +FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst) +{ + DynInstPtr inst_to_remove; + + // The front instruction should be the same one being asked to be removed. + assert(instList.front() == inst); + + // Remove the front instruction. + inst_to_remove = inst; + instList.pop_front(); + + DPRINTF(FullCPU, "FullCPU: Removing committed instruction %#x, PC %#x\n", + inst_to_remove, inst_to_remove->readPC()); +} + +template <class Impl> +void +FullO3CPU<Impl>::removeInstsNotInROB() +{ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + "list.\n"); + + DynInstPtr rob_tail = rob.readTailInst(); + + removeBackInst(rob_tail); +} + +template <class Impl> +void +FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num) +{ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + "list.\n"); + + DynInstPtr inst_to_delete; + + while (instList.back()->seqNum > seq_num) { + assert(!instList.empty()); + + // Obtain the pointer to the instruction. + inst_to_delete = instList.back(); + + DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", + inst_to_delete->seqNum, inst_to_delete->readPC()); + + // Remove the instruction from the list. + instList.back() = NULL; + instList.pop_back(); + + // Mark it as squashed. + inst_to_delete->setSquashed(); + } + +} + +template <class Impl> +void +FullO3CPU<Impl>::removeAllInsts() +{ + instList.clear(); +} + +template <class Impl> +void +FullO3CPU<Impl>::dumpInsts() +{ + int num = 0; + typename list<DynInstPtr>::iterator inst_list_it = instList.begin(); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n", + num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, + (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); + inst_list_it++; + ++num; + } +} + +template <class Impl> +void +FullO3CPU<Impl>::wakeDependents(DynInstPtr &inst) +{ + iew.wakeDependents(inst); +} + +// Forward declaration of FullO3CPU. +template class FullO3CPU<AlphaSimpleImpl>; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh new file mode 100644 index 000000000..f7c80e8a1 --- /dev/null +++ b/src/cpu/o3/cpu.hh @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +//Todo: Add in a lot of the functions that are ISA specific. Also define +//the functions that currently exist within the base cpu class. Define +//everything for the simobject stuff so it can be serialized and +//instantiated, add in debugging statements everywhere. Have CPU schedule +//itself properly. Threads! +// Avoid running stages and advancing queues if idle/stalled. + +#ifndef __CPU_O3_CPU_FULL_CPU_HH__ +#define __CPU_O3_CPU_FULL_CPU_HH__ + +#include <iostream> +#include <list> +#include <vector> + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/cpu_policy.hh" +#include "sim/process.hh" + +class ExecContext; +class FunctionalMemory; +class Process; + +class BaseFullCPU : public BaseCPU +{ + //Stuff that's pretty ISA independent will go here. + public: + typedef BaseCPU::Params Params; + +#if FULL_SYSTEM + BaseFullCPU(Params ¶ms); +#else + BaseFullCPU(Params ¶ms); +#endif // FULL_SYSTEM + + protected: + int cpu_id; +}; + +template <class Impl> +class FullO3CPU : public BaseFullCPU +{ + public: + //Put typedefs from the Impl here. + typedef typename Impl::CPUPol CPUPolicy; + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + enum Status { + Running, + Idle, + Halted, + Blocked // ? + }; + + Status _status; + + private: + class TickEvent : public Event + { + private: + FullO3CPU<Impl> *cpu; + + public: + TickEvent(FullO3CPU<Impl> *c); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + tickEvent.reschedule(curTick + delay); + else if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + delay); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + public: + FullO3CPU(Params ¶ms); + ~FullO3CPU(); + + void fullCPURegStats(); + + void tick(); + + void init(); + + void activateContext(int thread_num, int delay); + void suspendContext(int thread_num); + void deallocateContext(int thread_num); + void haltContext(int thread_num); + + void switchOut(); + void takeOverFrom(BaseCPU *oldCPU); + + /** Get the current instruction sequence number, and increment it. */ + InstSeqNum getAndIncrementInstSeq(); + +#if FULL_SYSTEM + /** Check if this address is a valid instruction address. */ + bool validInstAddr(Addr addr) { return true; } + + /** Check if this address is a valid data address. */ + bool validDataAddr(Addr addr) { return true; } + + /** Get instruction asid. */ + int getInstAsid() + { return regFile.miscRegs.getInstAsid(); } + + /** Get data asid. */ + int getDataAsid() + { return regFile.miscRegs.getDataAsid(); } +#else + bool validInstAddr(Addr addr) + { return thread[0]->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return thread[0]->validDataAddr(addr); } + + int getInstAsid() { return thread[0]->getInstAsid(); } + int getDataAsid() { return thread[0]->getDataAsid(); } + +#endif + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx); + + FloatReg readFloatReg(int reg_idx); + + FloatReg readFloatReg(int reg_idx, int width); + + FloatRegBits readFloatRegBits(int reg_idx); + + FloatRegBits readFloatRegBits(int reg_idx, int width); + + void setIntReg(int reg_idx, uint64_t val); + + void setFloatReg(int reg_idx, FloatReg val, int width); + + void setFloatReg(int reg_idx, FloatReg val, int width); + + void setFloatRegBits(int reg_idx, FloatRegBits val); + + void setFloatRegBits(int reg_idx, FloatRegBits val); + + uint64_t readPC(); + + void setNextPC(uint64_t val); + + void setPC(Addr new_PC); + + /** Function to add instruction onto the head of the list of the + * instructions. Used when new instructions are fetched. + */ + void addInst(DynInstPtr &inst); + + /** Function to tell the CPU that an instruction has completed. */ + void instDone(); + + /** Remove all instructions in back of the given instruction, but leave + * that instruction in the list. This is useful in a squash, when there + * are instructions in this list that don't exist in structures such as + * the ROB. The instruction doesn't have to be the last instruction in + * the list, but will be once this function completes. + * @todo: Remove only up until that inst? Squashed inst is most likely + * valid. + */ + void removeBackInst(DynInstPtr &inst); + + /** Remove an instruction from the front of the list. It is expected + * that there are no instructions in front of it (that is, none are older + * than the instruction being removed). Used when retiring instructions. + * @todo: Remove the argument to this function, and just have it remove + * last instruction once it's verified that commit has the same ordering + * as the instruction list. + */ + void removeFrontInst(DynInstPtr &inst); + + /** Remove all instructions that are not currently in the ROB. */ + void removeInstsNotInROB(); + + /** Remove all instructions younger than the given sequence number. */ + void removeInstsUntil(const InstSeqNum &seq_num); + + /** Remove all instructions from the list. */ + void removeAllInsts(); + + void dumpInsts(); + + /** Basically a wrapper function so that instructions executed at + * commit can tell the instruction queue that they have completed. + * Eventually this hack should be removed. + */ + void wakeDependents(DynInstPtr &inst); + + public: + /** List of all the instructions in flight. */ + list<DynInstPtr> instList; + + //not sure these should be private. + protected: + /** The fetch stage. */ + typename CPUPolicy::Fetch fetch; + + /** The fetch stage's status. */ + typename CPUPolicy::Fetch::Status fetchStatus; + + /** The decode stage. */ + typename CPUPolicy::Decode decode; + + /** The decode stage's status. */ + typename CPUPolicy::Decode::Status decodeStatus; + + /** The dispatch stage. */ + typename CPUPolicy::Rename rename; + + /** The dispatch stage's status. */ + typename CPUPolicy::Rename::Status renameStatus; + + /** The issue/execute/writeback stages. */ + typename CPUPolicy::IEW iew; + + /** The issue/execute/writeback stage's status. */ + typename CPUPolicy::IEW::Status iewStatus; + + /** The commit stage. */ + typename CPUPolicy::Commit commit; + + /** The fetch stage's status. */ + typename CPUPolicy::Commit::Status commitStatus; + + //Might want to just pass these objects in to the constructors of the + //appropriate stage. regFile is in iew, freeList in dispatch, renameMap + //in dispatch, and the rob in commit. + /** The register file. */ + typename CPUPolicy::RegFile regFile; + + /** The free list. */ + typename CPUPolicy::FreeList freeList; + + /** The rename map. */ + typename CPUPolicy::RenameMap renameMap; + + /** The re-order buffer. */ + typename CPUPolicy::ROB rob; + + public: + /** Typedefs from the Impl to get the structs that each of the + * time buffers should use. + */ + typedef typename CPUPolicy::TimeStruct TimeStruct; + + typedef typename CPUPolicy::FetchStruct FetchStruct; + + typedef typename CPUPolicy::DecodeStruct DecodeStruct; + + typedef typename CPUPolicy::RenameStruct RenameStruct; + + typedef typename CPUPolicy::IEWStruct IEWStruct; + + /** The main time buffer to do backwards communication. */ + TimeBuffer<TimeStruct> timeBuffer; + + /** The fetch stage's instruction queue. */ + TimeBuffer<FetchStruct> fetchQueue; + + /** The decode stage's instruction queue. */ + TimeBuffer<DecodeStruct> decodeQueue; + + /** The rename stage's instruction queue. */ + TimeBuffer<RenameStruct> renameQueue; + + /** The IEW stage's instruction queue. */ + TimeBuffer<IEWStruct> iewQueue; + + public: + /** The temporary exec context to support older accessors. */ + CPUExecContext *cpuXC; + + /** Temporary function to get pointer to exec context. */ + ExecContext *xcBase() + { + return thread[0]->getProxy(); + } + + CPUExecContext *cpuXCBase() + { + return thread[0]; + } + + InstSeqNum globalSeqNum; + +#if FULL_SYSTEM + System *system; + + MemoryController *memCtrl; + PhysicalMemory *physmem; + + AlphaITB *itb; + AlphaDTB *dtb; + +// SWContext *swCtx; +#endif + std::vector<CPUExecContext *> thread; + + FunctionalMemory *mem; + + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + bool deferRegistration; + + Counter numInsts; + + Counter funcExeInst; +}; + +#endif diff --git a/src/cpu/o3/cpu_policy.hh b/src/cpu/o3/cpu_policy.hh new file mode 100644 index 000000000..41f06f81b --- /dev/null +++ b/src/cpu/o3/cpu_policy.hh @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_CPU_POLICY_HH__ +#define __CPU_O3_CPU_CPU_POLICY_HH__ + +#include "cpu/o3/bpred_unit.hh" +#include "cpu/o3/free_list.hh" +#include "cpu/o3/inst_queue.hh" +#include "cpu/o3/ldstq.hh" +#include "cpu/o3/mem_dep_unit.hh" +#include "cpu/o3/regfile.hh" +#include "cpu/o3/rename_map.hh" +#include "cpu/o3/rob.hh" +#include "cpu/o3/store_set.hh" + +#include "cpu/o3/commit.hh" +#include "cpu/o3/decode.hh" +#include "cpu/o3/fetch.hh" +#include "cpu/o3/iew.hh" +#include "cpu/o3/rename.hh" + +#include "cpu/o3/comm.hh" + +template<class Impl> +struct SimpleCPUPolicy +{ + typedef TwobitBPredUnit<Impl> BPredUnit; + typedef PhysRegFile<Impl> RegFile; + typedef SimpleFreeList FreeList; + typedef SimpleRenameMap RenameMap; + typedef ROB<Impl> ROB; + typedef InstructionQueue<Impl> IQ; + typedef MemDepUnit<StoreSet, Impl> MemDepUnit; + typedef LDSTQ<Impl> LDSTQ; + + typedef SimpleFetch<Impl> Fetch; + typedef SimpleDecode<Impl> Decode; + typedef SimpleRename<Impl> Rename; + typedef SimpleIEW<Impl> IEW; + typedef SimpleCommit<Impl> Commit; + + /** The struct for communication between fetch and decode. */ + typedef SimpleFetchSimpleDecode<Impl> FetchStruct; + + /** The struct for communication between decode and rename. */ + typedef SimpleDecodeSimpleRename<Impl> DecodeStruct; + + /** The struct for communication between rename and IEW. */ + typedef SimpleRenameSimpleIEW<Impl> RenameStruct; + + /** The struct for communication between IEW and commit. */ + typedef SimpleIEWSimpleCommit<Impl> IEWStruct; + + /** The struct for communication within the IEW stage. */ + typedef IssueStruct<Impl> IssueStruct; + + /** The struct for all backwards communication. */ + typedef TimeBufStruct TimeStruct; + +}; + +#endif //__CPU_O3_CPU_CPU_POLICY_HH__ diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc new file mode 100644 index 000000000..290648318 --- /dev/null +++ b/src/cpu/o3/decode.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/decode_impl.hh" + +template class SimpleDecode<AlphaSimpleImpl>; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh new file mode 100644 index 000000000..5b9a0f822 --- /dev/null +++ b/src/cpu/o3/decode.hh @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_SIMPLE_DECODE_HH__ +#define __CPU_O3_CPU_SIMPLE_DECODE_HH__ + +#include <queue> + +#include "base/statistics.hh" +#include "base/timebuf.hh" + +template<class Impl> +class SimpleDecode +{ + private: + // Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; + + // Typedefs from the CPU policy. + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + public: + // The only time decode will become blocked is if dispatch becomes + // blocked, which means IQ or ROB is probably full. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking + }; + + private: + // May eventually need statuses on a per thread basis. + Status _status; + + public: + SimpleDecode(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr); + + void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr); + + void tick(); + + void decode(); + + private: + inline bool fetchInstsValid(); + + void block(); + + inline void unblock(); + + void squash(DynInstPtr &inst); + + public: + // Might want to make squash a friend function. + void squash(); + + private: + // Interfaces to objects outside of decode. + /** CPU interface. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get rename's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + // Might not be the best name as not only fetch will read it. + typename TimeBuffer<TimeStruct>::wire toFetch; + + /** Decode instruction queue. */ + TimeBuffer<DecodeStruct> *decodeQueue; + + /** Wire used to write any information heading to rename. */ + typename TimeBuffer<DecodeStruct>::wire toRename; + + /** Fetch instruction queue interface. */ + TimeBuffer<FetchStruct> *fetchQueue; + + /** Wire to get fetch's output from fetch queue. */ + typename TimeBuffer<FetchStruct>::wire fromFetch; + + /** Skid buffer between fetch and decode. */ + std::queue<FetchStruct> skidBuffer; + + //Consider making these unsigned to avoid any confusion. + /** Rename to decode delay, in ticks. */ + unsigned renameToDecodeDelay; + + /** IEW to decode delay, in ticks. */ + unsigned iewToDecodeDelay; + + /** Commit to decode delay, in ticks. */ + unsigned commitToDecodeDelay; + + /** Fetch to decode delay, in ticks. */ + unsigned fetchToDecodeDelay; + + /** The width of decode, in instructions. */ + unsigned decodeWidth; + + /** The instruction that decode is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; + + Stats::Scalar<> decodeIdleCycles; + Stats::Scalar<> decodeBlockedCycles; + Stats::Scalar<> decodeUnblockCycles; + Stats::Scalar<> decodeSquashCycles; + Stats::Scalar<> decodeBranchMispred; + Stats::Scalar<> decodeControlMispred; + Stats::Scalar<> decodeDecodedInsts; + Stats::Scalar<> decodeSquashedInsts; +}; + +#endif // __CPU_O3_CPU_SIMPLE_DECODE_HH__ diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh new file mode 100644 index 000000000..463f0ddac --- /dev/null +++ b/src/cpu/o3/decode_impl.hh @@ -0,0 +1,425 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/decode.hh" + +template<class Impl> +SimpleDecode<Impl>::SimpleDecode(Params ¶ms) + : renameToDecodeDelay(params.renameToDecodeDelay), + iewToDecodeDelay(params.iewToDecodeDelay), + commitToDecodeDelay(params.commitToDecodeDelay), + fetchToDecodeDelay(params.fetchToDecodeDelay), + decodeWidth(params.decodeWidth), + numInst(0) +{ + DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth); + _status = Idle; +} + +template <class Impl> +void +SimpleDecode<Impl>::regStats() +{ + decodeIdleCycles + .name(name() + ".decodeIdleCycles") + .desc("Number of cycles decode is idle") + .prereq(decodeIdleCycles); + decodeBlockedCycles + .name(name() + ".decodeBlockedCycles") + .desc("Number of cycles decode is blocked") + .prereq(decodeBlockedCycles); + decodeUnblockCycles + .name(name() + ".decodeUnblockCycles") + .desc("Number of cycles decode is unblocking") + .prereq(decodeUnblockCycles); + decodeSquashCycles + .name(name() + ".decodeSquashCycles") + .desc("Number of cycles decode is squashing") + .prereq(decodeSquashCycles); + decodeBranchMispred + .name(name() + ".decodeBranchMispred") + .desc("Number of times decode detected a branch misprediction") + .prereq(decodeBranchMispred); + decodeControlMispred + .name(name() + ".decodeControlMispred") + .desc("Number of times decode detected an instruction incorrectly" + " predicted as a control") + .prereq(decodeControlMispred); + decodeDecodedInsts + .name(name() + ".decodeDecodedInsts") + .desc("Number of instructions handled by decode") + .prereq(decodeDecodedInsts); + decodeSquashedInsts + .name(name() + ".decodeSquashedInsts") + .desc("Number of squashed instructions handled by decode") + .prereq(decodeSquashedInsts); +} + +template<class Impl> +void +SimpleDecode<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Decode, "Decode: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template<class Impl> +void +SimpleDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(Decode, "Decode: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to write information back to fetch. + toFetch = timeBuffer->getWire(0); + + // Create wires to get information from proper places in time buffer. + fromRename = timeBuffer->getWire(-renameToDecodeDelay); + fromIEW = timeBuffer->getWire(-iewToDecodeDelay); + fromCommit = timeBuffer->getWire(-commitToDecodeDelay); +} + +template<class Impl> +void +SimpleDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr) +{ + DPRINTF(Decode, "Decode: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to write information to proper place in decode queue. + toRename = decodeQueue->getWire(0); +} + +template<class Impl> +void +SimpleDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) +{ + DPRINTF(Decode, "Decode: Setting fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Setup wire to read information from fetch queue. + fromFetch = fetchQueue->getWire(-fetchToDecodeDelay); +} + +template<class Impl> +inline bool +SimpleDecode<Impl>::fetchInstsValid() +{ + return fromFetch->size > 0; +} + +template<class Impl> +void +SimpleDecode<Impl>::block() +{ + DPRINTF(Decode, "Decode: Blocking.\n"); + + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template<class Impl> +inline void +SimpleDecode<Impl>::unblock() +{ + DPRINTF(Decode, "Decode: Unblocking, going to remove " + "instructions from skid buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Finished unblocking.\n"); + _status = Running; + } +} + +// This squash is specifically for when Decode detects a PC-relative branch +// was predicted incorrectly. +template<class Impl> +void +SimpleDecode<Impl>::squash(DynInstPtr &inst) +{ + DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction " + "detected at decode.\n"); + Addr new_PC = inst->readNextPC(); + + toFetch->decodeInfo.branchMispredict = true; + toFetch->decodeInfo.doneSeqNum = inst->seqNum; + toFetch->decodeInfo.predIncorrect = true; + toFetch->decodeInfo.squash = true; + toFetch->decodeInfo.nextPC = new_PC; + toFetch->decodeInfo.branchTaken = true; + + // Set status to squashing. + _status = Squashing; + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) { + skidBuffer.pop(); + } + + // Squash instructions up until this one + // Slightly unrealistic! + cpu->removeInstsUntil(inst->seqNum); +} + +template<class Impl> +void +SimpleDecode<Impl>::squash() +{ + DPRINTF(Decode, "Decode: Squashing.\n"); + // Set status to squashing. + _status = Squashing; + + // Maybe advance the time buffer? Not sure what to do in the normal + // case. + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } +} + +template<class Impl> +void +SimpleDecode<Impl>::tick() +{ + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + if (_status != Blocked && _status != Squashing) { + DPRINTF(Decode, "Decode: Not blocked, so attempting to run " + "stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + decode(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + ++decodeUnblockCycles; + + if (fetchInstsValid()) { + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromFetch); + } + + unblock(); + } + } else if (_status == Blocked) { + ++decodeBlockedCycles; + + if (fetchInstsValid()) { + block(); + } + + if (!fromRename->renameInfo.stall && + !fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall) { + DPRINTF(Decode, "Decode: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this + // stage is done unblocking. + toFetch->decodeInfo.stall = true; + } else { + DPRINTF(Decode, "Decode: Still blocked.\n"); + toFetch->decodeInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + } else if (_status == Squashing) { + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + _status = Running; + } else if (fromCommit->commitInfo.squash) { + ++decodeSquashCycles; + + squash(); + } + } +} + +template<class Impl> +void +SimpleDecode<Impl>::decode() +{ + // Check time buffer if being told to squash. + if (fromCommit->commitInfo.squash) { + squash(); + return; + } + + // Check time buffer if being told to stall. + if (fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) { + block(); + return; + } + + // Check fetch queue to see if instructions are available. + // If no available instructions, do nothing, unless this stage is + // currently unblocking. + if (!fetchInstsValid() && _status != Unblocking) { + DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); + // Should I change the status to idle? + ++decodeIdleCycles; + return; + } + + // Might be better to use a base DynInst * instead? + DynInstPtr inst; + + unsigned to_rename_index = 0; + + int insts_available = _status == Unblocking ? + skidBuffer.front().size - numInst : + fromFetch->size; + + // Debug block... +#if 0 + if (insts_available) { + DPRINTF(Decode, "Decode: Instructions available.\n"); + } else { + if (_status == Unblocking && skidBuffer.empty()) { + DPRINTF(Decode, "Decode: No instructions available, skid buffer " + "empty.\n"); + } else if (_status != Unblocking && + !fromFetch->insts[0]) { + DPRINTF(Decode, "Decode: No instructions available, fetch queue " + "empty.\n"); + } else { + panic("Decode: No instructions available, unexpected condition!" + "\n"); + } + } +#endif + + while (insts_available > 0) + { + DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); + + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromFetch->insts[numInst]; + + DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n", + inst->seqNum, inst->readPC()); + + if (inst->isSquashed()) { + DPRINTF(Decode, "Decode: Instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + ++decodeSquashedInsts; + + ++numInst; + --insts_available; + + continue; + } + + + // Also check if instructions have no source registers. Mark + // them as ready to issue at any time. Not sure if this check + // should exist here or at a later stage; however it doesn't matter + // too much for function correctness. + // Isn't this handled by the inst queue? + if (inst->numSrcRegs() == 0) { + inst->setCanIssue(); + } + + // This current instruction is valid, so add it into the decode + // queue. The next instruction may not be valid, so check to + // see if branches were predicted correctly. + toRename->insts[to_rename_index] = inst; + + ++(toRename->size); + + // Ensure that if it was predicted as a branch, it really is a + // branch. + if (inst->predTaken() && !inst->isControl()) { + panic("Instruction predicted as a branch!"); + + ++decodeControlMispred; + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + + // Go ahead and compute any PC-relative branches. + + if (inst->isDirectCtrl() && inst->isUncondCtrl()) { + + inst->setNextPC(inst->branchTarget()); + + if (inst->mispredicted()) { + ++decodeBranchMispred; + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst); + break; + } + } + + // Normally can check if a direct branch has the right target + // addr (either the immediate, or the branch PC + 4) and redirect + // fetch if it's incorrect. + + // Increment which instruction we're looking at. + ++numInst; + ++to_rename_index; + ++decodeDecodedInsts; + + --insts_available; + } + + numInst = 0; +} diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc new file mode 100644 index 000000000..8ad5e6565 --- /dev/null +++ b/src/cpu/o3/fetch.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/fetch_impl.hh" + +template class SimpleFetch<AlphaSimpleImpl>; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh new file mode 100644 index 000000000..cc64800d9 --- /dev/null +++ b/src/cpu/o3/fetch.hh @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: SMT fetch, +// Add a way to get a stage's current status. + +#ifndef __CPU_O3_CPU_SIMPLE_FETCH_HH__ +#define __CPU_O3_CPU_SIMPLE_FETCH_HH__ + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/pc_event.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" + +/** + * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch + * will stall if there's an Icache miss, but otherwise assumes a one cycle + * Icache hit. + */ + +template <class Impl> +class SimpleFetch +{ + public: + /** Typedefs from Impl. */ + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::BPredUnit BPredUnit; + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + /** Typedefs from ISA. */ + typedef TheISA::MachInst MachInst; + + public: + enum Status { + Running, + Idle, + Squashing, + Blocked, + IcacheMissStall, + IcacheMissComplete + }; + + // May eventually need statuses on a per thread basis. + Status _status; + + bool stalled; + + public: + class CacheCompletionEvent : public Event + { + private: + SimpleFetch *fetch; + + public: + CacheCompletionEvent(SimpleFetch *_fetch); + + virtual void process(); + virtual const char *description(); + }; + + public: + /** SimpleFetch constructor. */ + SimpleFetch(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer); + + void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr); + + void processCacheCompletion(); + + private: + /** + * Looks up in the branch predictor to see if the next PC should be + * either next PC+=MachInst or a branch target. + * @param next_PC Next PC variable passed in by reference. It is + * expected to be set to the current PC; it will be updated with what + * the next PC will be. + * @return Whether or not a branch was predicted as taken. + */ + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); + + /** + * Fetches the cache line that contains fetch_PC. Returns any + * fault that happened. Puts the data into the class variable + * cacheData. + * @param fetch_PC The PC address that is being fetched from. + * @return Any fault that occured. + */ + Fault fetchCacheLine(Addr fetch_PC); + + inline void doSquash(const Addr &new_PC); + + void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num); + + public: + // Figure out PC vs next PC and how it should be updated + void squash(const Addr &new_PC); + + void tick(); + + void fetch(); + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = TheISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + private: + /** Pointer to the FullCPU. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get decode's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromDecode; + + /** Wire to get rename's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Internal fetch instruction queue. */ + TimeBuffer<FetchStruct> *fetchQueue; + + //Might be annoying how this name is different than the queue. + /** Wire used to write any information heading to decode. */ + typename TimeBuffer<FetchStruct>::wire toDecode; + + /** Icache interface. */ + MemInterface *icacheInterface; + + /** BPredUnit. */ + BPredUnit branchPred; + + /** Memory request used to access cache. */ + MemReqPtr memReq; + + /** Decode to fetch delay, in ticks. */ + unsigned decodeToFetchDelay; + + /** Rename to fetch delay, in ticks. */ + unsigned renameToFetchDelay; + + /** IEW to fetch delay, in ticks. */ + unsigned iewToFetchDelay; + + /** Commit to fetch delay, in ticks. */ + unsigned commitToFetchDelay; + + /** The width of fetch in instructions. */ + unsigned fetchWidth; + + /** Cache block size. */ + int cacheBlkSize; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + /** Size of instructions. */ + int instSize; + + /** Icache stall statistics. */ + Counter lastIcacheStall; + + Stats::Scalar<> icacheStallCycles; + Stats::Scalar<> fetchedInsts; + Stats::Scalar<> predictedBranches; + Stats::Scalar<> fetchCycles; + Stats::Scalar<> fetchSquashCycles; + Stats::Scalar<> fetchBlockedCycles; + Stats::Scalar<> fetchedCacheLines; + + Stats::Distribution<> fetch_nisn_dist; +}; + +#endif //__CPU_O3_CPU_SIMPLE_FETCH_HH__ diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh new file mode 100644 index 000000000..8029fc732 --- /dev/null +++ b/src/cpu/o3/fetch_impl.hh @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Remove this later; used only for debugging. +#define OPCODE(X) (X >> 26) & 0x3f + +#include "arch/isa_traits.hh" +#include "sim/byteswap.hh" +#include "cpu/exetrace.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "cpu/o3/fetch.hh" + +#include "sim/root.hh" + +template<class Impl> +SimpleFetch<Impl>::CacheCompletionEvent +::CacheCompletionEvent(SimpleFetch *_fetch) + : Event(&mainEventQueue), + fetch(_fetch) +{ +} + +template<class Impl> +void +SimpleFetch<Impl>::CacheCompletionEvent::process() +{ + fetch->processCacheCompletion(); +} + +template<class Impl> +const char * +SimpleFetch<Impl>::CacheCompletionEvent::description() +{ + return "SimpleFetch cache completion event"; +} + +template<class Impl> +SimpleFetch<Impl>::SimpleFetch(Params ¶ms) + : icacheInterface(params.icacheInterface), + branchPred(params), + decodeToFetchDelay(params.decodeToFetchDelay), + renameToFetchDelay(params.renameToFetchDelay), + iewToFetchDelay(params.iewToFetchDelay), + commitToFetchDelay(params.commitToFetchDelay), + fetchWidth(params.fetchWidth) +{ + DPRINTF(Fetch, "Fetch: Fetch constructor called\n"); + + // Set status to idle. + _status = Idle; + + // Create a new memory request. + memReq = new MemReq(); + // Not sure of this parameter. I think it should be based on the + // thread number. +#if !FULL_SYSTEM + memReq->asid = 0; +#else + memReq->asid = 0; +#endif // FULL_SYSTEM + memReq->data = new uint8_t[64]; + + // Size of cache block. + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + + // Get the size of an instruction. + instSize = sizeof(MachInst); + + // Create space to store a cache line. + cacheData = new uint8_t[cacheBlkSize]; +} + +template <class Impl> +void +SimpleFetch<Impl>::regStats() +{ + icacheStallCycles + .name(name() + ".icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts + .name(name() + ".fetchedInsts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); + predictedBranches + .name(name() + ".predictedBranches") + .desc("Number of branches that fetch has predicted taken") + .prereq(predictedBranches); + fetchCycles + .name(name() + ".fetchCycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); + fetchSquashCycles + .name(name() + ".fetchSquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); + fetchBlockedCycles + .name(name() + ".fetchBlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); + fetchedCacheLines + .name(name() + ".fetchedCacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + + fetch_nisn_dist + .init(/* base value */ 0, + /* last value */ fetchWidth, + /* bucket size */ 1) + .name(name() + ".FETCH:rate_dist") + .desc("Number of instructions fetched each cycle (Total)") + .flags(Stats::pdf) + ; + + branchPred.regStats(); +} + +template<class Impl> +void +SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n"); + cpu = cpu_ptr; + // This line will be removed eventually. + memReq->xc = cpu->xcBase(); +} + +template<class Impl> +void +SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) +{ + DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n"); + timeBuffer = time_buffer; + + // Create wires to get information from proper places in time buffer. + fromDecode = timeBuffer->getWire(-decodeToFetchDelay); + fromRename = timeBuffer->getWire(-renameToFetchDelay); + fromIEW = timeBuffer->getWire(-iewToFetchDelay); + fromCommit = timeBuffer->getWire(-commitToFetchDelay); +} + +template<class Impl> +void +SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr) +{ + DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Create wire to write information to proper place in fetch queue. + toDecode = fetchQueue->getWire(0); +} + +template<class Impl> +void +SimpleFetch<Impl>::processCacheCompletion() +{ + DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n"); + + // Only change the status if it's still waiting on the icache access + // to return. + // Can keep track of how many cache accesses go unused due to + // misspeculation here. + if (_status == IcacheMissStall) + _status = IcacheMissComplete; +} + +template <class Impl> +bool +SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) +{ + // Do branch prediction check here. + // A bit of a misnomer...next_PC is actually the current PC until + // this function updates it. + bool predict_taken; + + if (!inst->isControl()) { + next_PC = next_PC + instSize; + inst->setPredTarg(next_PC); + return false; + } + + predict_taken = branchPred.predict(inst, next_PC); + + if (predict_taken) { + ++predictedBranches; + } + + return predict_taken; +} + +template <class Impl> +Fault +SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC) +{ + // Check if the instruction exists within the cache. + // If it does, then proceed on to read the instruction and the rest + // of the instructions in the cache line until either the end of the + // cache line or a predicted taken branch is encountered. + +#if FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + + Fault fault = NoFault; + + // Align the fetch PC so it's at the start of a cache block. + fetch_PC = icacheBlockAlignPC(fetch_PC); + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq->cmd = Read; + memReq->reset(fetch_PC, cacheBlkSize, flags); + + // Translate the instruction request. + // Should this function be + // in the CPU class ? Probably...ITB/DTB should exist within the + // CPU. + + fault = cpu->translateInstReq(memReq); + + // In the case of faults, the fetch stage may need to stall and wait + // on what caused the fetch (ITB or Icache miss). + + // If translation was successful, attempt to read the first + // instruction. + if (fault == NoFault) { + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); + fault = cpu->mem->read(memReq, cacheData); + // This read may change when the mem interface changes. + + fetchedCacheLines++; + } + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == NoFault) { + DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); + memReq->completionEvent = NULL; + + memReq->time = curTick; + + MemAccessResult result = icacheInterface->access(memReq); + + // If the cache missed (in this model functional and timing + // memories are different), then schedule an event to wake + // up this stage once the cache miss completes. + if (result != MA_HIT && icacheInterface->doEvents()) { + memReq->completionEvent = new CacheCompletionEvent(this); + + // How does current model work as far as individual + // stages scheduling/unscheduling? + // Perhaps have only the main CPU scheduled/unscheduled, + // and have it choose what stages to run appropriately. + + DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); + _status = IcacheMissStall; + } + } + + return fault; +} + +template <class Impl> +inline void +SimpleFetch<Impl>::doSquash(const Addr &new_PC) +{ + DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); + + cpu->setNextPC(new_PC + instSize); + cpu->setPC(new_PC); + + // Clear the icache miss if it's outstanding. + if (_status == IcacheMissStall && icacheInterface) { + DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n"); + // @todo: Use an actual thread number here. + icacheInterface->squash(0); + } + + _status = Squashing; + + ++fetchSquashCycles; +} + +template<class Impl> +void +SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC, + const InstSeqNum &seq_num) +{ + DPRINTF(Fetch, "Fetch: Squashing from decode.\n"); + + doSquash(new_PC); + + // Tell the CPU to remove any instructions that are in flight between + // fetch and decode. + cpu->removeInstsUntil(seq_num); +} + +template <class Impl> +void +SimpleFetch<Impl>::squash(const Addr &new_PC) +{ + DPRINTF(Fetch, "Fetch: Squash from commit.\n"); + + doSquash(new_PC); + + // Tell the CPU to remove any instructions that are not in the ROB. + cpu->removeInstsNotInROB(); +} + +template<class Impl> +void +SimpleFetch<Impl>::tick() +{ + // Check squash signals from commit. + if (fromCommit->commitInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from commit.\n"); + + // In any case, squash. + squash(fromCommit->commitInfo.nextPC); + + // Also check if there's a mispredict that happened. + if (fromCommit->commitInfo.branchMispredict) { + branchPred.squash(fromCommit->commitInfo.doneSeqNum, + fromCommit->commitInfo.nextPC, + fromCommit->commitInfo.branchTaken); + } else { + branchPred.squash(fromCommit->commitInfo.doneSeqNum); + } + + return; + } else if (fromCommit->commitInfo.doneSeqNum) { + // Update the branch predictor if it wasn't a squashed instruction + // that was braodcasted. + branchPred.update(fromCommit->commitInfo.doneSeqNum); + } + + // Check ROB squash signals from commit. + if (fromCommit->commitInfo.robSquashing) { + DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); + + // Continue to squash. + _status = Squashing; + + ++fetchSquashCycles; + return; + } + + // Check squash signals from decode. + if (fromDecode->decodeInfo.squash) { + DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " + "from decode.\n"); + + // Update the branch predictor. + if (fromDecode->decodeInfo.branchMispredict) { + branchPred.squash(fromDecode->decodeInfo.doneSeqNum, + fromDecode->decodeInfo.nextPC, + fromDecode->decodeInfo.branchTaken); + } else { + branchPred.squash(fromDecode->decodeInfo.doneSeqNum); + } + + if (_status != Squashing) { + // Squash unless we're already squashing? + squashFromDecode(fromDecode->decodeInfo.nextPC, + fromDecode->decodeInfo.doneSeqNum); + return; + } + } + + // Check if any of the stall signals are high. + if (fromDecode->decodeInfo.stall || + fromRename->renameInfo.stall || + fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) + { + // Block stage, regardless of current status. + + DPRINTF(Fetch, "Fetch: Stalling stage.\n"); + DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " + "Commit: %i\n", + fromDecode->decodeInfo.stall, + fromRename->renameInfo.stall, + fromIEW->iewInfo.stall, + fromCommit->commitInfo.stall); + + _status = Blocked; + + ++fetchBlockedCycles; + return; + } else if (_status == Blocked) { + // Unblock stage if status is currently blocked and none of the + // stall signals are being held high. + _status = Running; + + ++fetchBlockedCycles; + return; + } + + // If fetch has reached this point, then there are no squash signals + // still being held high. Check if fetch is in the squashing state; + // if so, fetch can switch to running. + // Similarly, there are no blocked signals still being held high. + // Check if fetch is in the blocked state; if so, fetch can switch to + // running. + if (_status == Squashing) { + DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n"); + + // Switch status to running + _status = Running; + + ++fetchCycles; + + fetch(); + } else if (_status != IcacheMissStall) { + DPRINTF(Fetch, "Fetch: Running stage.\n"); + + ++fetchCycles; + + fetch(); + } +} + +template<class Impl> +void +SimpleFetch<Impl>::fetch() +{ + ////////////////////////////////////////// + // Start actual fetch + ////////////////////////////////////////// + + // The current PC. + Addr fetch_PC = cpu->readPC(); + + // Fault code for memory access. + Fault fault = NoFault; + + // If returning from the delay of a cache miss, then update the status + // to running, otherwise do the cache access. Possibly move this up + // to tick() function. + if (_status == IcacheMissComplete) { + DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); + + // Reset the completion event to NULL. + memReq->completionEvent = NULL; + + _status = Running; + } else { + DPRINTF(Fetch, "Fetch: Attempting to translate and read " + "instruction, starting at PC %08p.\n", + fetch_PC); + + fault = fetchCacheLine(fetch_PC); + } + + // If we had a stall due to an icache miss, then return. It'd + // be nicer if this were handled through the kind of fault that + // is returned by the function. + if (_status == IcacheMissStall) { + return; + } + + // As far as timing goes, the CPU will need to send an event through + // the MemReq in order to be woken up once the memory access completes. + // Probably have a status on a per thread basis so each thread can + // block independently and be woken up independently. + + Addr next_PC = fetch_PC; + InstSeqNum inst_seq; + MachInst inst; + unsigned offset = fetch_PC & cacheBlkMask; + unsigned fetched; + + if (fault == NoFault) { + // If the read of the first instruction was successful, then grab the + // instructions from the rest of the cache line and put them into the + // queue heading to decode. + + DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); + + ////////////////////////// + // Fetch first instruction + ////////////////////////// + + // Need to keep track of whether or not a predicted branch + // ended this fetch block. + bool predicted_branch = false; + + for (fetched = 0; + offset < cacheBlkSize && + fetched < fetchWidth && + !predicted_branch; + ++fetched) + { + + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); + + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - instSize); + + // Get the instruction from the array of the cache line. + inst = gtoh(*reinterpret_cast<MachInst *> + (&cacheData[offset])); + + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC, + inst_seq, cpu); + + DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", + inst_seq, instruction->readPC()); + + DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", + OPCODE(inst)); + + instruction->traceData = + Trace::getInstRecord(curTick, cpu->xcBase(), cpu, + instruction->staticInst, + instruction->readPC(), 0); + + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + + // Add instruction to the CPU's list of instructions. + cpu->addInst(instruction); + + // Write the instruction to the first slot in the queue + // that heads to decode. + toDecode->insts[fetched] = instruction; + + toDecode->size++; + + // Increment stat of fetched instructions. + ++fetchedInsts; + + // Move to the next instruction, unless we have a branch. + fetch_PC = next_PC; + + offset+= instSize; + } + + fetch_nisn_dist.sample(fetched); + } + + // Now that fetching is completed, update the PC to signify what the next + // cycle will be. Might want to move this to the beginning of this + // function so that the PC updates at the beginning of everything. + // Or might want to leave setting the PC to the main CPU, with fetch + // only changing the nextPC (will require correct determination of + // next PC). + if (fault == NoFault) { + DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC); + cpu->setPC(next_PC); + cpu->setNextPC(next_PC + instSize); + } else { + // If the issue was an icache miss, then we can just return and + // wait until it is handled. + if (_status == IcacheMissStall) { + return; + } + + // Handle the fault. + // This stage will not be able to continue until all the ROB + // slots are empty, at which point the fault can be handled. + // The only other way it can wake up is if a squash comes along + // and changes the PC. Not sure how to handle that case...perhaps + // have it handled by the upper level CPU class which peeks into the + // time buffer and sees if a squash comes along, in which case it + // changes the status. + + DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n"); + + _status = Blocked; +#if FULL_SYSTEM +// cpu->trap(fault); + // Send a signal to the ROB indicating that there's a trap from the + // fetch stage that needs to be handled. Need to indicate that + // there's a fault, and the fault type. +#else // !FULL_SYSTEM + fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); +#endif // FULL_SYSTEM + } +} diff --git a/src/cpu/o3/free_list.cc b/src/cpu/o3/free_list.cc new file mode 100644 index 000000000..6f0b4be1e --- /dev/null +++ b/src/cpu/o3/free_list.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" + +#include "cpu/o3/free_list.hh" + +SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs) +{ + DPRINTF(FreeList, "FreeList: Creating new free list object.\n"); + + // DEBUG stuff. + freeIntRegsScoreboard.resize(numPhysicalIntRegs); + + freeFloatRegsScoreboard.resize(numPhysicalRegs); + + for (PhysRegIndex i = 0; i < numLogicalIntRegs; ++i) { + freeIntRegsScoreboard[i] = 0; + } + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. + for (PhysRegIndex i = numLogicalIntRegs; + i < numPhysicalIntRegs; ++i) + { + freeIntRegs.push(i); + + freeIntRegsScoreboard[i] = 1; + } + + for (PhysRegIndex i = 0; i < numPhysicalIntRegs + numLogicalFloatRegs; + ++i) + { + freeFloatRegsScoreboard[i] = 0; + } + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. Because the + // float registers' indices start where the physical registers end, + // some math must be done to determine where the free registers start. + for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs; + i < numPhysicalRegs; ++i) + { + freeFloatRegs.push(i); + + freeFloatRegsScoreboard[i] = 1; + } +} + diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh new file mode 100644 index 000000000..0b85dba1e --- /dev/null +++ b/src/cpu/o3/free_list.hh @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_FREE_LIST_HH__ +#define __CPU_O3_CPU_FREE_LIST_HH__ + +#include <iostream> +#include <queue> + +#include "arch/isa_traits.hh" +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/o3/comm.hh" + +/** + * FreeList class that simply holds the list of free integer and floating + * point registers. Can request for a free register of either type, and + * also send back free registers of either type. This is a very simple + * class, but it should be sufficient for most implementations. Like all + * other classes, it assumes that the indices for the floating point + * registers starts after the integer registers end. Hence the variable + * numPhysicalIntRegs is logically equivalent to the baseFP dependency. + * Note that + * while this most likely should be called FreeList, the name "FreeList" + * is used in a typedef within the CPU Policy, and therefore no class + * can be named simply "FreeList". + * @todo: Give a better name to the base FP dependency. + */ +class SimpleFreeList +{ + private: + /** The list of free integer registers. */ + std::queue<PhysRegIndex> freeIntRegs; + + /** The list of free floating point registers. */ + std::queue<PhysRegIndex> freeFloatRegs; + + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Total number of physical registers. */ + int numPhysicalRegs; + + /** DEBUG stuff below. */ + std::vector<int> freeIntRegsScoreboard; + + std::vector<bool> freeFloatRegsScoreboard; + + public: + SimpleFreeList(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs); + + inline PhysRegIndex getIntReg(); + + inline PhysRegIndex getFloatReg(); + + inline void addReg(PhysRegIndex freed_reg); + + inline void addIntReg(PhysRegIndex freed_reg); + + inline void addFloatReg(PhysRegIndex freed_reg); + + bool hasFreeIntRegs() + { return !freeIntRegs.empty(); } + + bool hasFreeFloatRegs() + { return !freeFloatRegs.empty(); } + + int numFreeIntRegs() + { return freeIntRegs.size(); } + + int numFreeFloatRegs() + { return freeFloatRegs.size(); } +}; + +inline PhysRegIndex +SimpleFreeList::getIntReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free integer register.\n"); + if (freeIntRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeIntRegs.front(); + + freeIntRegs.pop(); + + // DEBUG + assert(freeIntRegsScoreboard[free_reg]); + freeIntRegsScoreboard[free_reg] = 0; + + return(free_reg); +} + +inline PhysRegIndex +SimpleFreeList::getFloatReg() +{ + DPRINTF(Rename, "FreeList: Trying to get free float register.\n"); + if (freeFloatRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeFloatRegs.front(); + + freeFloatRegs.pop(); + + // DEBUG + assert(freeFloatRegsScoreboard[free_reg]); + freeFloatRegsScoreboard[free_reg] = 0; + + return(free_reg); +} + +inline void +SimpleFreeList::addReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing register %i.\n", freed_reg); + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + if (freed_reg < numPhysicalIntRegs) { + freeIntRegs.push(freed_reg); + + // DEBUG + assert(freeIntRegsScoreboard[freed_reg] == false); + freeIntRegsScoreboard[freed_reg] = 1; + } else if (freed_reg < numPhysicalRegs) { + freeFloatRegs.push(freed_reg); + + // DEBUG + assert(freeFloatRegsScoreboard[freed_reg] == false); + freeFloatRegsScoreboard[freed_reg] = 1; + } +} + +inline void +SimpleFreeList::addIntReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg); + + // DEBUG + assert(!freeIntRegsScoreboard[freed_reg]); + freeIntRegsScoreboard[freed_reg] = 1; + + freeIntRegs.push(freed_reg); +} + +inline void +SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) +{ + DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg); + + // DEBUG + assert(!freeFloatRegsScoreboard[freed_reg]); + freeFloatRegsScoreboard[freed_reg] = 1; + + freeFloatRegs.push(freed_reg); +} + +#endif // __CPU_O3_CPU_FREE_LIST_HH__ diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc new file mode 100644 index 000000000..45b5610e7 --- /dev/null +++ b/src/cpu/o3/iew.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/iew_impl.hh" +#include "cpu/o3/inst_queue.hh" + +template class SimpleIEW<AlphaSimpleImpl>; diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh new file mode 100644 index 000000000..1e370d4e6 --- /dev/null +++ b/src/cpu/o3/iew.hh @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +//Todo: Update with statuses. +//Need to handle delaying writes to the writeback bus if it's full at the +//given time. + +#ifndef __CPU_O3_CPU_SIMPLE_IEW_HH__ +#define __CPU_O3_CPU_SIMPLE_IEW_HH__ + +#include <queue> + +#include "config/full_system.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/o3/comm.hh" + +template<class Impl> +class SimpleIEW +{ + private: + //Typedefs from Impl + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::IQ IQ; + typedef typename CPUPol::RenameMap RenameMap; + typedef typename CPUPol::LDSTQ LDSTQ; + + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::IssueStruct IssueStruct; + + friend class Impl::FullCPU; + public: + enum Status { + Running, + Blocked, + Idle, + Squashing, + Unblocking + }; + + private: + Status _status; + Status _issueStatus; + Status _exeStatus; + Status _wbStatus; + + public: + class WritebackEvent : public Event { + private: + DynInstPtr inst; + SimpleIEW<Impl> *iewStage; + + public: + WritebackEvent(DynInstPtr &_inst, SimpleIEW<Impl> *_iew); + + virtual void process(); + virtual const char *description(); + }; + + public: + SimpleIEW(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr); + + void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void squash(); + + void squashDueToBranch(DynInstPtr &inst); + + void squashDueToMem(DynInstPtr &inst); + + void block(); + + inline void unblock(); + + void wakeDependents(DynInstPtr &inst); + + void instToCommit(DynInstPtr &inst); + + private: + void dispatchInsts(); + + void executeInsts(); + + public: + void tick(); + + void iew(); + + //Interfaces to objects inside and outside of IEW. + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer<TimeStruct>::wire toRename; + + /** Rename instruction queue interface. */ + TimeBuffer<RenameStruct> *renameQueue; + + /** Wire to get rename's output from rename queue. */ + typename TimeBuffer<RenameStruct>::wire fromRename; + + /** Issue stage queue. */ + TimeBuffer<IssueStruct> issueToExecQueue; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer<IssueStruct>::wire fromIssue; + + /** + * IEW stage time buffer. Holds ROB indices of instructions that + * can be marked as completed. + */ + TimeBuffer<IEWStruct> *iewQueue; + + /** Wire to write infromation heading to commit. */ + typename TimeBuffer<IEWStruct>::wire toCommit; + + //Will need internal queue to hold onto instructions coming from + //the rename stage in case of a stall. + /** Skid buffer between rename and IEW. */ + std::queue<RenameStruct> skidBuffer; + + protected: + /** Instruction queue. */ + IQ instQueue; + + LDSTQ ldstQueue; + +#if !FULL_SYSTEM + public: + void lsqWriteback(); +#endif + + private: + /** Pointer to rename map. Might not want this stage to directly + * access this though... + */ + RenameMap *renameMap; + + /** CPU interface. */ + FullCPU *cpu; + + private: + /** Commit to IEW delay, in ticks. */ + unsigned commitToIEWDelay; + + /** Rename to IEW delay, in ticks. */ + unsigned renameToIEWDelay; + + /** + * Issue to execute delay, in ticks. What this actually represents is + * the amount of time it takes for an instruction to wake up, be + * scheduled, and sent to a FU for execution. + */ + unsigned issueToExecuteDelay; + + /** Width of issue's read path, in instructions. The read path is both + * the skid buffer and the rename instruction queue. + * Note to self: is this really different than issueWidth? + */ + unsigned issueReadWidth; + + /** Width of issue, in instructions. */ + unsigned issueWidth; + + /** Width of execute, in instructions. Might make more sense to break + * down into FP vs int. + */ + unsigned executeWidth; + + /** Number of cycles stage has been squashing. Used so that the stage + * knows when it can start unblocking, which is when the previous stage + * has received the stall signal and clears up its outputs. + */ + unsigned cyclesSquashing; + + Stats::Scalar<> iewIdleCycles; + Stats::Scalar<> iewSquashCycles; + Stats::Scalar<> iewBlockCycles; + Stats::Scalar<> iewUnblockCycles; +// Stats::Scalar<> iewWBInsts; + Stats::Scalar<> iewDispatchedInsts; + Stats::Scalar<> iewDispSquashedInsts; + Stats::Scalar<> iewDispLoadInsts; + Stats::Scalar<> iewDispStoreInsts; + Stats::Scalar<> iewDispNonSpecInsts; + Stats::Scalar<> iewIQFullEvents; + Stats::Scalar<> iewExecutedInsts; + Stats::Scalar<> iewExecLoadInsts; + Stats::Scalar<> iewExecStoreInsts; + Stats::Scalar<> iewExecSquashedInsts; + Stats::Scalar<> memOrderViolationEvents; + Stats::Scalar<> predictedTakenIncorrect; +}; + +#endif // __CPU_O3_CPU_IEW_HH__ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh new file mode 100644 index 000000000..85217dd10 --- /dev/null +++ b/src/cpu/o3/iew_impl.hh @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// @todo: Fix the instantaneous communication among all the stages within +// iew. There's a clear delay between issue and execute, yet backwards +// communication happens simultaneously. +// Update the statuses for each stage. + +#include <queue> + +#include "base/timebuf.hh" +#include "cpu/o3/iew.hh" + +template<class Impl> +SimpleIEW<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, + SimpleIEW<Impl> *_iew) + : Event(&mainEventQueue, CPU_Tick_Pri), inst(_inst), iewStage(_iew) +{ + this->setFlags(Event::AutoDelete); +} + +template<class Impl> +void +SimpleIEW<Impl>::WritebackEvent::process() +{ + DPRINTF(IEW, "IEW: WRITEBACK EVENT!!!!\n"); + + // Need to insert instruction into queue to commit + iewStage->instToCommit(inst); + // Need to execute second half of the instruction, do actual writing to + // registers and such + inst->execute(); +} + +template<class Impl> +const char * +SimpleIEW<Impl>::WritebackEvent::description() +{ + return "LSQ writeback event"; +} + +template<class Impl> +SimpleIEW<Impl>::SimpleIEW(Params ¶ms) + : // Just make this time buffer really big for now + issueToExecQueue(5, 5), + instQueue(params), + ldstQueue(params), + commitToIEWDelay(params.commitToIEWDelay), + renameToIEWDelay(params.renameToIEWDelay), + issueToExecuteDelay(params.issueToExecuteDelay), + issueReadWidth(params.issueWidth), + issueWidth(params.issueWidth), + executeWidth(params.executeWidth) +{ + DPRINTF(IEW, "IEW: executeIntWidth: %i.\n", params.executeIntWidth); + _status = Idle; + _issueStatus = Idle; + _exeStatus = Idle; + _wbStatus = Idle; + + // Setup wire to read instructions coming from issue. + fromIssue = issueToExecQueue.getWire(-issueToExecuteDelay); + + // Instruction queue needs the queue between issue and execute. + instQueue.setIssueToExecuteQueue(&issueToExecQueue); + + ldstQueue.setIEW(this); +} + +template <class Impl> +void +SimpleIEW<Impl>::regStats() +{ + instQueue.regStats(); + + iewIdleCycles + .name(name() + ".iewIdleCycles") + .desc("Number of cycles IEW is idle"); + + iewSquashCycles + .name(name() + ".iewSquashCycles") + .desc("Number of cycles IEW is squashing"); + + iewBlockCycles + .name(name() + ".iewBlockCycles") + .desc("Number of cycles IEW is blocking"); + + iewUnblockCycles + .name(name() + ".iewUnblockCycles") + .desc("Number of cycles IEW is unblocking"); + +// iewWBInsts; + + iewDispatchedInsts + .name(name() + ".iewDispatchedInsts") + .desc("Number of instructions dispatched to IQ"); + + iewDispSquashedInsts + .name(name() + ".iewDispSquashedInsts") + .desc("Number of squashed instructions skipped by dispatch"); + + iewDispLoadInsts + .name(name() + ".iewDispLoadInsts") + .desc("Number of dispatched load instructions"); + + iewDispStoreInsts + .name(name() + ".iewDispStoreInsts") + .desc("Number of dispatched store instructions"); + + iewDispNonSpecInsts + .name(name() + ".iewDispNonSpecInsts") + .desc("Number of dispatched non-speculative instructions"); + + iewIQFullEvents + .name(name() + ".iewIQFullEvents") + .desc("Number of times the IQ has become full, causing a stall"); + + iewExecutedInsts + .name(name() + ".iewExecutedInsts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .name(name() + ".iewExecLoadInsts") + .desc("Number of load instructions executed"); + + iewExecStoreInsts + .name(name() + ".iewExecStoreInsts") + .desc("Number of store instructions executed"); + + iewExecSquashedInsts + .name(name() + ".iewExecSquashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + memOrderViolationEvents + .name(name() + ".memOrderViolationEvents") + .desc("Number of memory order violations"); + + predictedTakenIncorrect + .name(name() + ".predictedTakenIncorrect") + .desc("Number of branches that were predicted taken incorrectly"); +} + +template<class Impl> +void +SimpleIEW<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(IEW, "IEW: Setting CPU pointer.\n"); + cpu = cpu_ptr; + + instQueue.setCPU(cpu_ptr); + ldstQueue.setCPU(cpu_ptr); +} + +template<class Impl> +void +SimpleIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(IEW, "IEW: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from commit. + fromCommit = timeBuffer->getWire(-commitToIEWDelay); + + // Setup wire to write information back to previous stages. + toRename = timeBuffer->getWire(0); + + // Instruction queue also needs main time buffer. + instQueue.setTimeBuffer(tb_ptr); +} + +template<class Impl> +void +SimpleIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to read information from rename queue. + fromRename = renameQueue->getWire(-renameToIEWDelay); +} + +template<class Impl> +void +SimpleIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr) +{ + DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to write instructions to commit. + toCommit = iewQueue->getWire(0); +} + +template<class Impl> +void +SimpleIEW<Impl>::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(IEW, "IEW: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template<class Impl> +void +SimpleIEW<Impl>::squash() +{ + DPRINTF(IEW, "IEW: Squashing all instructions.\n"); + _status = Squashing; + + // Tell the IQ to start squashing. + instQueue.squash(); + + // Tell the LDSTQ to start squashing. + ldstQueue.squash(fromCommit->commitInfo.doneSeqNum); +} + +template<class Impl> +void +SimpleIEW<Impl>::squashDueToBranch(DynInstPtr &inst) +{ + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", + inst->PC); + // Perhaps leave the squashing up to the ROB stage to tell it when to + // squash? + _status = Squashing; + + // Tell rename to squash through the time buffer. + toCommit->squash = true; + // Also send PC update information back to prior stages. + toCommit->squashedSeqNum = inst->seqNum; + toCommit->mispredPC = inst->readPC(); + toCommit->nextPC = inst->readNextPC(); + toCommit->branchMispredict = true; + // Prediction was incorrect, so send back inverse. + toCommit->branchTaken = inst->readNextPC() != + (inst->readPC() + sizeof(TheISA::MachInst)); +} + +template<class Impl> +void +SimpleIEW<Impl>::squashDueToMem(DynInstPtr &inst) +{ + DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", + inst->PC); + // Perhaps leave the squashing up to the ROB stage to tell it when to + // squash? + _status = Squashing; + + // Tell rename to squash through the time buffer. + toCommit->squash = true; + // Also send PC update information back to prior stages. + toCommit->squashedSeqNum = inst->seqNum; + toCommit->nextPC = inst->readNextPC(); +} + +template<class Impl> +void +SimpleIEW<Impl>::block() +{ + DPRINTF(IEW, "IEW: Blocking.\n"); + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromRename); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template<class Impl> +inline void +SimpleIEW<Impl>::unblock() +{ + // Check if there's information in the skid buffer. If there is, then + // set status to unblocking, otherwise set it directly to running. + DPRINTF(IEW, "IEW: Reading instructions out of the skid " + "buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toRename->iewInfo.stall = true; + } else { + DPRINTF(IEW, "IEW: Stage is done unblocking.\n"); + _status = Running; + } +} + +template<class Impl> +void +SimpleIEW<Impl>::wakeDependents(DynInstPtr &inst) +{ + instQueue.wakeDependents(inst); +} + + +template<class Impl> +void +SimpleIEW<Impl>::instToCommit(DynInstPtr &inst) +{ + +} + +template <class Impl> +void +SimpleIEW<Impl>::dispatchInsts() +{ + //////////////////////////////////////// + // DISPATCH/ISSUE stage + //////////////////////////////////////// + + //Put into its own function? + //Add instructions to IQ if there are any instructions there + + // Check if there are any instructions coming from rename, and we're. + // not squashing. + if (fromRename->size > 0) { + int insts_to_add = fromRename->size; + + // Loop through the instructions, putting them in the instruction + // queue. + for (int inst_num = 0; inst_num < insts_to_add; ++inst_num) + { + DynInstPtr inst = fromRename->insts[inst_num]; + + // Make sure there's a valid instruction there. + assert(inst); + + DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", + inst->readPC()); + + // Be sure to mark these instructions as ready so that the + // commit stage can go ahead and execute them, and mark + // them as issued so the IQ doesn't reprocess them. + if (inst->isSquashed()) { + ++iewDispSquashedInsts; + continue; + } else if (instQueue.isFull()) { + DPRINTF(IEW, "IEW: Issue: IQ has become full.\n"); + // Call function to start blocking. + block(); + // Tell previous stage to stall. + toRename->iewInfo.stall = true; + + ++iewIQFullEvents; + break; + } else if (inst->isLoad()) { + DPRINTF(IEW, "IEW: Issue: Memory instruction " + "encountered, adding to LDSTQ.\n"); + + // Reserve a spot in the load store queue for this + // memory access. + ldstQueue.insertLoad(inst); + + ++iewDispLoadInsts; + } else if (inst->isStore()) { + ldstQueue.insertStore(inst); + + ++iewDispStoreInsts; + } else if (inst->isNonSpeculative()) { + DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " + "encountered, skipping.\n"); + + // Same hack as with stores. + inst->setCanCommit(); + + // Specificall insert it as nonspeculative. + instQueue.insertNonSpec(inst); + + ++iewDispNonSpecInsts; + + continue; + } else if (inst->isNop()) { + DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " + ", skipping.\n"); + + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + + continue; + } else if (inst->isExecuted()) { + assert(0 && "Instruction shouldn't be executed.\n"); + DPRINTF(IEW, "IEW: Issue: Executed branch encountered, " + "skipping.\n"); + + inst->setIssued(); + inst->setCanCommit(); + + instQueue.advanceTail(inst); + + continue; + } + + // If the instruction queue is not full, then add the + // instruction. + instQueue.insert(fromRename->insts[inst_num]); + + ++iewDispatchedInsts; + } + } +} + +template <class Impl> +void +SimpleIEW<Impl>::executeInsts() +{ + //////////////////////////////////////// + //EXECUTE/WRITEBACK stage + //////////////////////////////////////// + + //Put into its own function? + //Similarly should probably have separate execution for int vs FP. + // Above comment is handled by the issue queue only issuing a valid + // mix of int/fp instructions. + //Actually okay to just have one execution, buuuuuut will need + //somewhere that defines the execution latency of all instructions. + // @todo: Move to the FU pool used in the current full cpu. + + int fu_usage = 0; + bool fetch_redirect = false; + int inst_slot = 0; + int time_slot = 0; + + // Execute/writeback any instructions that are available. + for (int inst_num = 0; + fu_usage < executeWidth && /* Haven't exceeded available FU's. */ + inst_num < issueWidth && + fromIssue->insts[inst_num]; + ++inst_num) { + + DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); + + // Get instruction from issue's queue. + DynInstPtr inst = fromIssue->insts[inst_num]; + + DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + toCommit->insts[inst_num] = inst; + + ++iewExecSquashedInsts; + + continue; + } + + inst->setExecuted(); + + // If an instruction is executed, then count it towards FU usage. + ++fu_usage; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef()) { + DPRINTF(IEW, "IEW: Execute: Calculating address for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + ldstQueue.executeLoad(inst); + + ++iewExecLoadInsts; + } else if (inst->isStore()) { + ldstQueue.executeStore(inst); + + ++iewExecStoreInsts; + } else { + panic("IEW: Unexpected memory type!\n"); + } + + } else { + inst->execute(); + + ++iewExecutedInsts; + } + + // First check the time slot that this instruction will write + // to. If there are free write ports at the time, then go ahead + // and write the instruction to that time. If there are not, + // keep looking back to see where's the first time there's a + // free slot. What happens if you run out of free spaces? + // For now naively assume that all instructions take one cycle. + // Otherwise would have to look into the time buffer based on the + // latency of the instruction. + (*iewQueue)[time_slot].insts[inst_slot]; + while ((*iewQueue)[time_slot].insts[inst_slot]) { + if (inst_slot < issueWidth) { + ++inst_slot; + } else { + ++time_slot; + inst_slot = 0; + } + + assert(time_slot < 5); + } + + // May actually have to work this out, especially with loads and stores + + // Add finished instruction to queue to commit. + (*iewQueue)[time_slot].insts[inst_slot] = inst; + (*iewQueue)[time_slot].size++; + + // Check if branch was correct. This check happens after the + // instruction is added to the queue because even if the branch + // is mispredicted, the branch instruction itself is still valid. + // Only handle this if there hasn't already been something that + // redirects fetch in this group of instructions. + if (!fetch_redirect) { + if (inst->mispredicted()) { + fetch_redirect = true; + + DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); + DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. + squashDueToBranch(inst); + + if (inst->predTaken()) { + predictedTakenIncorrect++; + } + } else if (ldstQueue.violation()) { + fetch_redirect = true; + + // Get the DynInst that caused the violation. + DynInstPtr violator = ldstQueue.getMemDepViolator(); + + DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Tell the instruction queue that a violation has occured. + instQueue.violation(inst, violator); + + // Squash. + squashDueToMem(inst); + + ++memOrderViolationEvents; + } + } + } +} + +template<class Impl> +void +SimpleIEW<Impl>::tick() +{ + // Considering putting all the state-determining stuff in this section. + + // Try to fill up issue queue with as many instructions as bandwidth + // allows. + // Decode should try to execute as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + + // Check if the stage is in a running status. + if (_status != Blocked && _status != Squashing) { + DPRINTF(IEW, "IEW: Status is not blocked, attempting to run " + "stage.\n"); + iew(); + + // If it's currently unblocking, check to see if it should switch + // to running. + if (_status == Unblocking) { + unblock(); + + ++iewUnblockCycles; + } + } else if (_status == Squashing) { + + DPRINTF(IEW, "IEW: Still squashing.\n"); + + // Check if stage should remain squashing. Stop squashing if the + // squash signal clears. + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + DPRINTF(IEW, "IEW: Done squashing, changing status to " + "running.\n"); + + _status = Running; + instQueue.stopSquash(); + } else { + instQueue.doSquash(); + } + + ++iewSquashCycles; + } else if (_status == Blocked) { + // Continue to tell previous stage to stall. + toRename->iewInfo.stall = true; + + // Check if possible stall conditions have cleared. + if (!fromCommit->commitInfo.stall && + !instQueue.isFull()) { + DPRINTF(IEW, "IEW: Stall signals cleared, going to unblock.\n"); + _status = Unblocking; + } + + // If there's still instructions coming from rename, continue to + // put them on the skid buffer. + if (fromRename->size == 0) { + block(); + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + } + + ++iewBlockCycles; + } + + // @todo: Maybe put these at the beginning, so if it's idle it can + // return early. + // Write back number of free IQ entries here. + toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries(); + + ldstQueue.writebackStores(); + + // Check the committed load/store signals to see if there's a load + // or store to commit. Also check if it's being told to execute a + // nonspeculative instruction. + // This is pretty inefficient... + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum); + ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum); + } + + if (fromCommit->commitInfo.nonSpecSeqNum != 0) { + instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum); + } + + DPRINTF(IEW, "IEW: IQ has %i free entries.\n", + instQueue.numFreeEntries()); +} + +template<class Impl> +void +SimpleIEW<Impl>::iew() +{ + // Might want to put all state checks in the tick() function. + // Check if being told to stall from commit. + if (fromCommit->commitInfo.stall) { + block(); + return; + } else if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + // Also check if commit is telling this stage to squash. + squash(); + return; + } + + dispatchInsts(); + + // Have the instruction queue try to schedule any ready instructions. + instQueue.scheduleReadyInsts(); + + executeInsts(); + + // Loop through the head of the time buffer and wake any dependents. + // These instructions are about to write back. In the simple model + // this loop can really happen within the previous loop, but when + // instructions have actual latencies, this loop must be separate. + // Also mark scoreboard that this instruction is finally complete. + // Either have IEW have direct access to rename map, or have this as + // part of backwards communication. + for (int inst_num = 0; inst_num < issueWidth && + toCommit->insts[inst_num]; inst_num++) + { + DynInstPtr inst = toCommit->insts[inst_num]; + + DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n", + inst->readPC()); + + if(!inst->isSquashed()) { + instQueue.wakeDependents(inst); + + for (int i = 0; i < inst->numDestRegs(); i++) + { + renameMap->markAsReady(inst->renamedDestRegIdx(i)); + } + } + } + + // Also should advance its own time buffers if the stage ran. + // Not the best place for it, but this works (hopefully). + issueToExecQueue.advance(); +} + +#if !FULL_SYSTEM +template<class Impl> +void +SimpleIEW<Impl>::lsqWriteback() +{ + ldstQueue.writebackAllInsts(); +} +#endif diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc new file mode 100644 index 000000000..2ff2282b4 --- /dev/null +++ b/src/cpu/o3/inst_queue.cc @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template class InstructionQueue<AlphaSimpleImpl>; + +template<> +unsigned +InstructionQueue<AlphaSimpleImpl>::DependencyEntry::mem_alloc_counter = 0; diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh new file mode 100644 index 000000000..43fe96c49 --- /dev/null +++ b/src/cpu/o3/inst_queue.hh @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_INST_QUEUE_HH__ +#define __CPU_O3_CPU_INST_QUEUE_HH__ + +#include <list> +#include <map> +#include <queue> +#include <vector> + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "sim/host.hh" + +/** + * A standard instruction queue class. It holds ready instructions, in + * order, in seperate priority queues to facilitate the scheduling of + * instructions. The IQ uses a separate linked list to track dependencies. + * Similar to the rename map and the free list, it expects that + * floating point registers have their indices start after the integer + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer + * and 96-191 are fp). This remains true even for both logical and + * physical register indices. + */ +template <class Impl> +class InstructionQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; + + // Typedef of iterator through the list of instructions. Might be + // better to untie this from the FullCPU or pass its information to + // the stages. + typedef typename std::list<DynInstPtr>::iterator ListIt; + + /** + * Struct for comparing entries to be added to the priority queue. This + * gives reverse ordering to the instructions in terms of sequence + * numbers: the instructions with smaller sequence numbers (and hence + * are older) will be at the top of the priority queue. + */ + struct pqCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + /** + * Struct for comparing entries to be added to the set. This gives + * standard ordering in terms of sequence numbers. + */ + struct setCompare + { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum < rhs->seqNum; + } + }; + + typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare> + ReadyInstQueue; + + InstructionQueue(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu); + + void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + unsigned numFreeEntries(); + + bool isFull(); + + void insert(DynInstPtr &new_inst); + + void insertNonSpec(DynInstPtr &new_inst); + + void advanceTail(DynInstPtr &inst); + + void scheduleReadyInsts(); + + void scheduleNonSpec(const InstSeqNum &inst); + + void wakeDependents(DynInstPtr &completed_inst); + + void violation(DynInstPtr &store, DynInstPtr &faulting_load); + + // Change this to take in the sequence number + void squash(); + + void doSquash(); + + void stopSquash(); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ + MemDepUnit memDepUnit; + + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer<IssueStruct> *issueToExecuteQueue; + + /** The backwards time buffer. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + enum InstList { + Int, + Float, + Branch, + Memory, + Misc, + Squashed, + None + }; + + /** List of ready int instructions. Used to keep track of the order in + * which instructions should issue. + */ + ReadyInstQueue readyIntInsts; + + /** List of ready floating point instructions. */ + ReadyInstQueue readyFloatInsts; + + /** List of ready branch instructions. */ + ReadyInstQueue readyBranchInsts; + + /** List of ready miscellaneous instructions. */ + ReadyInstQueue readyMiscInsts; + + /** List of squashed instructions (which are still valid and in IQ). + * Implemented using a priority queue; the entries must contain both + * the IQ index and sequence number of each instruction so that + * ordering based on sequence numbers can be used. + */ + ReadyInstQueue squashedInsts; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is most efficient to be + * able to search by the sequence number alone. + */ + std::map<InstSeqNum, DynInstPtr> nonSpecInsts; + + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t; + + /** Number of free IQ entries left. */ + unsigned freeEntries; + + /** The number of entries in the instruction queue. */ + unsigned numEntries; + + /** The number of integer instructions that can be issued in one + * cycle. + */ + unsigned intWidth; + + /** The number of floating point instructions that can be issued + * in one cycle. + */ + unsigned floatWidth; + + /** The number of branches that can be issued in one cycle. */ + unsigned branchWidth; + + /** The number of memory instructions that can be issued in one cycle. */ + unsigned memoryWidth; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; + + //The number of physical registers in the CPU. + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; + + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within IEW. + */ + unsigned commitToIEWDelay; + + ////////////////////////////////// + // Variables needed for squashing + ////////////////////////////////// + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Iterator that points to the youngest instruction in the IQ. */ + ListIt tail; + + /** Iterator that points to the last instruction that has been squashed. + * This will not be valid unless the IQ is in the process of squashing. + */ + ListIt squashIt; + + /////////////////////////////////// + // Dependency graph stuff + /////////////////////////////////// + + class DependencyEntry + { + public: + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; + + //This function, and perhaps this whole class, stand out a little + //bit as they don't fit a classification well. I want access + //to the underlying structure of the linked list, yet at + //the same time it feels like this should be something abstracted + //away. So for now it will sit here, within the IQ, until + //a better implementation is decided upon. + // This function probably shouldn't be within the entry... + void insert(DynInstPtr &new_inst); + + void remove(DynInstPtr &inst_to_remove); + + // Debug variable, remove when done testing. + static unsigned mem_alloc_counter; + }; + + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DependencyEntry *dependGraph; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. It + * is basically a secondary scoreboard, and should pretty much mirror + * the scoreboard that exists in the rename map. + */ + vector<bool> regScoreboard; + + bool addToDependents(DynInstPtr &new_inst); + void insertDependency(DynInstPtr &new_inst); + void createDependency(DynInstPtr &new_inst); + + void addIfReady(DynInstPtr &inst); + + private: + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); + + /** Debugging function to dump out the dependency graph. + */ + void dumpDependGraph(); + + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + + Stats::Scalar<> iqInstsAdded; + Stats::Scalar<> iqNonSpecInstsAdded; +// Stats::Scalar<> iqIntInstsAdded; + Stats::Scalar<> iqIntInstsIssued; +// Stats::Scalar<> iqFloatInstsAdded; + Stats::Scalar<> iqFloatInstsIssued; +// Stats::Scalar<> iqBranchInstsAdded; + Stats::Scalar<> iqBranchInstsIssued; +// Stats::Scalar<> iqMemInstsAdded; + Stats::Scalar<> iqMemInstsIssued; +// Stats::Scalar<> iqMiscInstsAdded; + Stats::Scalar<> iqMiscInstsIssued; + Stats::Scalar<> iqSquashedInstsIssued; + Stats::Scalar<> iqLoopSquashStalls; + Stats::Scalar<> iqSquashedInstsExamined; + Stats::Scalar<> iqSquashedOperandsExamined; + Stats::Scalar<> iqSquashedNonSpecRemoved; + +}; + +#endif //__CPU_O3_CPU_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh new file mode 100644 index 000000000..048dc7c00 --- /dev/null +++ b/src/cpu/o3/inst_queue_impl.hh @@ -0,0 +1,1136 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: +// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// it; either do in reverse order, or have added instructions put into a +// different ready queue that, in scheduleRreadyInsts(), gets put onto the +// normal ready queue. This would however give only a one cycle delay, +// but probably is more flexible to actually add in a delay parameter than +// just running it backwards. + +#include <limits> +#include <vector> + +#include "sim/root.hh" + +#include "cpu/o3/inst_queue.hh" + +// Either compile error or max int due to sign extension. +// Hack to avoid compile warnings. +const InstSeqNum MaxInstSeqNum = std::numeric_limits<InstSeqNum>::max(); + +template <class Impl> +InstructionQueue<Impl>::InstructionQueue(Params ¶ms) + : memDepUnit(params), + numEntries(params.numIQEntries), + intWidth(params.executeIntWidth), + floatWidth(params.executeFloatWidth), + branchWidth(params.executeBranchWidth), + memoryWidth(params.executeMemoryWidth), + totalWidth(params.issueWidth), + numPhysIntRegs(params.numPhysIntRegs), + numPhysFloatRegs(params.numPhysFloatRegs), + commitToIEWDelay(params.commitToIEWDelay) +{ + // Initialize the number of free IQ entries. + freeEntries = numEntries; + + // Set the number of physical registers as the number of int + float + numPhysRegs = numPhysIntRegs + numPhysFloatRegs; + + DPRINTF(IQ, "IQ: There are %i physical registers.\n", numPhysRegs); + + //Create an entry for each physical register within the + //dependency graph. + dependGraph = new DependencyEntry[numPhysRegs]; + + // Resize the register scoreboard. + regScoreboard.resize(numPhysRegs); + + // Initialize all the head pointers to point to NULL, and all the + // entries as unready. + // Note that in actuality, the registers corresponding to the logical + // registers start off as ready. However this doesn't matter for the + // IQ as the instruction should have been correctly told if those + // registers are ready in rename. Thus it can all be initialized as + // unready. + for (int i = 0; i < numPhysRegs; ++i) + { + dependGraph[i].next = NULL; + dependGraph[i].inst = NULL; + regScoreboard[i] = false; + } + +} + +template <class Impl> +void +InstructionQueue<Impl>::regStats() +{ + iqInstsAdded + .name(name() + ".iqInstsAdded") + .desc("Number of instructions added to the IQ (excludes non-spec)") + .prereq(iqInstsAdded); + + iqNonSpecInstsAdded + .name(name() + ".iqNonSpecInstsAdded") + .desc("Number of non-speculative instructions added to the IQ") + .prereq(iqNonSpecInstsAdded); + +// iqIntInstsAdded; + + iqIntInstsIssued + .name(name() + ".iqIntInstsIssued") + .desc("Number of integer instructions issued") + .prereq(iqIntInstsIssued); + +// iqFloatInstsAdded; + + iqFloatInstsIssued + .name(name() + ".iqFloatInstsIssued") + .desc("Number of float instructions issued") + .prereq(iqFloatInstsIssued); + +// iqBranchInstsAdded; + + iqBranchInstsIssued + .name(name() + ".iqBranchInstsIssued") + .desc("Number of branch instructions issued") + .prereq(iqBranchInstsIssued); + +// iqMemInstsAdded; + + iqMemInstsIssued + .name(name() + ".iqMemInstsIssued") + .desc("Number of memory instructions issued") + .prereq(iqMemInstsIssued); + +// iqMiscInstsAdded; + + iqMiscInstsIssued + .name(name() + ".iqMiscInstsIssued") + .desc("Number of miscellaneous instructions issued") + .prereq(iqMiscInstsIssued); + + iqSquashedInstsIssued + .name(name() + ".iqSquashedInstsIssued") + .desc("Number of squashed instructions issued") + .prereq(iqSquashedInstsIssued); + + iqLoopSquashStalls + .name(name() + ".iqLoopSquashStalls") + .desc("Number of times issue loop had to restart due to squashed " + "inst; mainly for profiling") + .prereq(iqLoopSquashStalls); + + iqSquashedInstsExamined + .name(name() + ".iqSquashedInstsExamined") + .desc("Number of squashed instructions iterated over during squash;" + " mainly for profiling") + .prereq(iqSquashedInstsExamined); + + iqSquashedOperandsExamined + .name(name() + ".iqSquashedOperandsExamined") + .desc("Number of squashed operands that are examined and possibly " + "removed from graph") + .prereq(iqSquashedOperandsExamined); + + iqSquashedNonSpecRemoved + .name(name() + ".iqSquashedNonSpecRemoved") + .desc("Number of squashed non-spec instructions that were removed") + .prereq(iqSquashedNonSpecRemoved); + + // Tell mem dependence unit to reg stats as well. + memDepUnit.regStats(); +} + +template <class Impl> +void +InstructionQueue<Impl>::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + tail = cpu->instList.begin(); +} + +template <class Impl> +void +InstructionQueue<Impl>::setIssueToExecuteQueue( + TimeBuffer<IssueStruct> *i2e_ptr) +{ + DPRINTF(IQ, "IQ: Set the issue to execute queue.\n"); + issueToExecuteQueue = i2e_ptr; +} + +template <class Impl> +void +InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(IQ, "IQ: Set the time buffer.\n"); + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + +template <class Impl> +unsigned +InstructionQueue<Impl>::numFreeEntries() +{ + return freeEntries; +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template <class Impl> +bool +InstructionQueue<Impl>::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +template <class Impl> +void +InstructionQueue<Impl>::insert(DynInstPtr &new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != new_inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + + // Decrease the number of free entries. + --freeEntries; + + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit.insert(new_inst); + // Uh..forgot to look it up and put it on the proper dependency list + // if the instruction should not go yet. + } else { + // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); + } + + ++iqInstsAdded; + + assert(freeEntries == (numEntries - countInsts())); +} + +template <class Impl> +void +InstructionQueue<Impl>::insertNonSpec(DynInstPtr &inst) +{ + nonSpecInsts[inst->seqNum] = inst; + + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + // Decrease the number of free entries. + --freeEntries; + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (inst->isMemRef()) { + memDepUnit.insertNonSpec(inst); + } + + ++iqNonSpecInstsAdded; +} + +// Slightly hack function to advance the tail iterator in the case that +// the IEW stage issues an instruction that is not added to the IQ. This +// is needed in case a long chain of such instructions occurs. +// I don't think this is used anymore. +template <class Impl> +void +InstructionQueue<Impl>::advanceTail(DynInstPtr &inst) +{ + // Make sure the instruction is valid + assert(inst); + + DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", + inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + // If the IQ currently has nothing in it, then there's a possibility + // that the tail iterator is invalid (might have been pointing at an + // instruction that was retired). Reset the tail iterator. + if (freeEntries == numEntries) { + tail = cpu->instList.begin(); + } + + // Move the tail iterator. Instructions may not have been issued + // to the IQ, so we may have to increment the iterator more than once. + while ((*tail) != inst) { + tail++; + + // Make sure the tail iterator points at something legal. + assert(tail != cpu->instList.end()); + } + + assert(freeEntries <= numEntries); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); +} + +// Need to make sure the number of float and integer instructions +// issued does not exceed the total issue bandwidth. +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +template <class Impl> +void +InstructionQueue<Impl>::scheduleReadyInsts() +{ + DPRINTF(IQ, "IQ: Attempting to schedule ready instructions from " + "the IQ.\n"); + + int int_issued = 0; + int float_issued = 0; + int branch_issued = 0; + int memory_issued = 0; + int squashed_issued = 0; + int total_issued = 0; + + IssueStruct *i2e_info = issueToExecuteQueue->access(0); + + bool insts_available = !readyBranchInsts.empty() || + !readyIntInsts.empty() || + !readyFloatInsts.empty() || + !memDepUnit.empty() || + !readyMiscInsts.empty() || + !squashedInsts.empty(); + + // Note: Requires a globally defined constant. + InstSeqNum oldest_inst = MaxInstSeqNum; + InstList list_with_oldest = None; + + // Temporary values. + DynInstPtr int_head_inst; + DynInstPtr float_head_inst; + DynInstPtr branch_head_inst; + DynInstPtr mem_head_inst; + DynInstPtr misc_head_inst; + DynInstPtr squashed_head_inst; + + // Somewhat nasty code to look at all of the lists where issuable + // instructions are located, and choose the oldest instruction among + // those lists. Consider a rewrite in the future. + while (insts_available && total_issued < totalWidth) + { + // Set this to false. Each if-block is required to set it to true + // if there were instructions available this check. This will cause + // this loop to run once more than necessary, but avoids extra calls. + insts_available = false; + + oldest_inst = MaxInstSeqNum; + + list_with_oldest = None; + + if (!readyIntInsts.empty() && + int_issued < intWidth) { + + insts_available = true; + + int_head_inst = readyIntInsts.top(); + + if (int_head_inst->isSquashed()) { + readyIntInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } + + oldest_inst = int_head_inst->seqNum; + + list_with_oldest = Int; + } + + if (!readyFloatInsts.empty() && + float_issued < floatWidth) { + + insts_available = true; + + float_head_inst = readyFloatInsts.top(); + + if (float_head_inst->isSquashed()) { + readyFloatInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (float_head_inst->seqNum < oldest_inst) { + oldest_inst = float_head_inst->seqNum; + + list_with_oldest = Float; + } + } + + if (!readyBranchInsts.empty() && + branch_issued < branchWidth) { + + insts_available = true; + + branch_head_inst = readyBranchInsts.top(); + + if (branch_head_inst->isSquashed()) { + readyBranchInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (branch_head_inst->seqNum < oldest_inst) { + oldest_inst = branch_head_inst->seqNum; + + list_with_oldest = Branch; + } + + } + + if (!memDepUnit.empty() && + memory_issued < memoryWidth) { + + insts_available = true; + + mem_head_inst = memDepUnit.top(); + + if (mem_head_inst->isSquashed()) { + memDepUnit.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (mem_head_inst->seqNum < oldest_inst) { + oldest_inst = mem_head_inst->seqNum; + + list_with_oldest = Memory; + } + } + + if (!readyMiscInsts.empty()) { + + insts_available = true; + + misc_head_inst = readyMiscInsts.top(); + + if (misc_head_inst->isSquashed()) { + readyMiscInsts.pop(); + + ++iqLoopSquashStalls; + + continue; + } else if (misc_head_inst->seqNum < oldest_inst) { + oldest_inst = misc_head_inst->seqNum; + + list_with_oldest = Misc; + } + } + + if (!squashedInsts.empty()) { + + insts_available = true; + + squashed_head_inst = squashedInsts.top(); + + if (squashed_head_inst->seqNum < oldest_inst) { + list_with_oldest = Squashed; + } + + } + + DynInstPtr issuing_inst = NULL; + + switch (list_with_oldest) { + case None: + DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing " + "inst is %#x.\n", issuing_inst); + break; + + case Int: + issuing_inst = int_head_inst; + readyIntInsts.pop(); + ++int_issued; + DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Float: + issuing_inst = float_head_inst; + readyFloatInsts.pop(); + ++float_issued; + DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Branch: + issuing_inst = branch_head_inst; + readyBranchInsts.pop(); + ++branch_issued; + DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Memory: + issuing_inst = mem_head_inst; + + memDepUnit.pop(); + ++memory_issued; + DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Misc: + issuing_inst = misc_head_inst; + readyMiscInsts.pop(); + + ++iqMiscInstsIssued; + + DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n", + issuing_inst->readPC()); + break; + + case Squashed: + assert(0 && "Squashed insts should not issue any more!"); + squashedInsts.pop(); + // Set the squashed instruction as able to commit so that commit + // can just drop it from the ROB. This is a bit faked. + ++squashed_issued; + ++freeEntries; + + DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n", + squashed_head_inst->readPC()); + break; + } + + if (list_with_oldest != None && list_with_oldest != Squashed) { + i2e_info->insts[total_issued] = issuing_inst; + i2e_info->size++; + + issuing_inst->setIssued(); + + ++freeEntries; + ++total_issued; + } + + assert(freeEntries == (numEntries - countInsts())); + } + + iqIntInstsIssued += int_issued; + iqFloatInstsIssued += float_issued; + iqBranchInstsIssued += branch_issued; + iqMemInstsIssued += memory_issued; + iqSquashedInstsIssued += squashed_issued; +} + +template <class Impl> +void +InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst) +{ + DPRINTF(IQ, "IQ: Marking nonspeculative instruction with sequence " + "number %i as ready to execute.\n", inst); + + non_spec_it_t inst_it = nonSpecInsts.find(inst); + + assert(inst_it != nonSpecInsts.end()); + + // Mark this instruction as ready to issue. + (*inst_it).second->setCanIssue(); + + // Now schedule the instruction. + if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); + } else { + memDepUnit.nonSpecInstReady((*inst_it).second); + } + + nonSpecInsts.erase(inst_it); +} + +template <class Impl> +void +InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst) +{ + DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); + //Look at the physical destination register of the DynInst + //and look it up on the dependency graph. Then mark as ready + //any instructions within the instruction queue. + DependencyEntry *curr; + + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. + + if (completed_inst->isMemRef()) { + memDepUnit.wakeDependents(completed_inst); + } + + for (int dest_reg_idx = 0; + dest_reg_idx < completed_inst->numDestRegs(); + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle this. + if (dest_reg >= numPhysRegs) { + continue; + } + + DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n", + (int) dest_reg); + + //Maybe abstract this part into a function. + //Go through the dependency chain, marking the registers as ready + //within the waiting instructions. + while (dependGraph[dest_reg].next) { + + curr = dependGraph[dest_reg].next; + + DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n", + curr->inst->readPC()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is ready. + // However that would mean that the dependency graph entries + // would need to hold the src_reg_idx. + curr->inst->markSrcRegReady(); + + addIfReady(curr->inst); + + dependGraph[dest_reg].next = curr->next; + + DependencyEntry::mem_alloc_counter--; + + curr->inst = NULL; + + delete curr; + } + + // Reset the head node now that all of its dependents have been woken + // up. + dependGraph[dest_reg].next = NULL; + dependGraph[dest_reg].inst = NULL; + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } +} + +template <class Impl> +void +InstructionQueue<Impl>::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ + memDepUnit.violation(store, faulting_load); +} + +template <class Impl> +void +InstructionQueue<Impl>::squash() +{ + DPRINTF(IQ, "IQ: Starting to squash instructions in the IQ.\n"); + + // Read instruction sequence number of last instruction out of the + // time buffer. + squashedSeqNum = fromCommit->commitInfo.doneSeqNum; + + // Setup the squash iterator to point to the tail. + squashIt = tail; + + // Call doSquash if there are insts in the IQ + if (freeEntries != numEntries) { + doSquash(); + } + + // Also tell the memory dependence unit to squash. + memDepUnit.squash(squashedSeqNum); +} + +template <class Impl> +void +InstructionQueue<Impl>::doSquash() +{ + // Make sure the squash iterator isn't pointing to nothing. + assert(squashIt != cpu->instList.end()); + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum != 0); + + DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); + + // Squash any instructions younger than the squashed sequence number + // given. + while ((*squashIt)->seqNum > squashedSeqNum) { + DynInstPtr squashed_inst = (*squashIt); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (!squashed_inst->isIssued() && + !squashed_inst->isSquashedInIQ()) { + + // Remove the instruction from the dependency list. + // Hack for now: These below don't add themselves to the + // dependency list, so don't try to remove them. + if (!squashed_inst->isNonSpeculative()/* && + !squashed_inst->isStore()*/ + ) { + + for (int src_reg_idx = 0; + src_reg_idx < squashed_inst->numSrcRegs(); + src_reg_idx++) + { + PhysRegIndex src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && + src_reg < numPhysRegs) { + dependGraph[src_reg].remove(squashed_inst); + } + + ++iqSquashedOperandsExamined; + } + + // Might want to remove producers as well. + } else { + nonSpecInsts[squashed_inst->seqNum] = NULL; + + nonSpecInsts.erase(squashed_inst->seqNum); + + ++iqSquashedNonSpecRemoved; + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + +// squashedInsts.push(squashed_inst); + squashed_inst->setIssued(); + squashed_inst->setCanCommit(); + + ++freeEntries; + + DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + + --squashIt; + ++iqSquashedInstsExamined; + } + + assert(freeEntries <= numEntries); + + if (freeEntries == numEntries) { + tail = cpu->instList.end(); + } + +} + +template <class Impl> +void +InstructionQueue<Impl>::stopSquash() +{ + // Clear up the squash variables to ensure that squashing doesn't + // get called improperly. + squashedSeqNum = 0; + + squashIt = cpu->instList.end(); +} + +template <class Impl> +void +InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DependencyEntry *new_entry = new DependencyEntry; + new_entry->next = this->next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + this->next = new_entry; + + ++mem_alloc_counter; +} + +template <class Impl> +void +InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove) +{ + DependencyEntry *prev = this; + DependencyEntry *curr = this->next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + // Find the instruction to remove within the dependency linked list. + while(curr->inst != inst_to_remove) + { + prev = curr; + curr = curr->next; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --mem_alloc_counter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template <class Impl> +bool +InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { + PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (src_reg >= numPhysRegs) { + continue; + } else if (regScoreboard[src_reg] == false) { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + + dependGraph[src_reg].insert(new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { + DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->readPC(), src_reg); + // Mark a register ready within the instruction. + new_inst->markSrcRegReady(); + } + } + } + + return return_val; +} + +template <class Impl> +void +InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst) +{ + //Actually nothing really needs to be marked when an + //instruction becomes the producer of a register's value, + //but for convenience a ptr to the producing instruction will + //be placed in the head node of the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + + // Instructions that use the misc regs will have a reg number + // higher than the normal physical registers. In this case these + // registers are not renamed, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg >= numPhysRegs) { + continue; + } + + dependGraph[dest_reg].inst = new_inst; + + if (dependGraph[dest_reg].next) { + dumpDependGraph(); + panic("IQ: Dependency graph not empty!"); + } + + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg] = false; + } +} + +template <class Impl> +void +InstructionQueue<Impl>::addIfReady(DynInstPtr &inst) +{ + //If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + + //Add the instruction to the proper ready list. + if (inst->isControl()) { + + DPRINTF(IQ, "IQ: Branch instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyBranchInsts.push(inst); + + } else if (inst->isMemRef()) { + + DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n"); + + // Message to the mem dependence unit that this instruction has + // its registers ready. + + memDepUnit.regsReady(inst); + +#if 0 + if (memDepUnit.readyToIssue(inst)) { + DPRINTF(IQ, "IQ: Memory instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyMemInsts.push(inst); + } else { + // Make dependent on the store. + // Will need some way to get the store instruction it should + // be dependent upon; then when the store issues it can + // put the instruction on the ready list. + // Yet another tree? + assert(0 && "Instruction has no way to actually issue"); + } +#endif + + } else if (inst->isInteger()) { + + DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyIntInsts.push(inst); + + } else if (inst->isFloating()) { + + DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " + "putting it onto the ready list, PC %#x.\n", + inst->readPC()); + readyFloatInsts.push(inst); + + } else { + DPRINTF(IQ, "IQ: Miscellaneous instruction is ready to issue, " + "putting it onto the ready list, PC %#x..\n", + inst->readPC()); + + readyMiscInsts.push(inst); + } + } +} + +/* + * Caution, this function must not be called prior to tail being updated at + * least once, otherwise it will fail the assertion. This is because + * instList.begin() actually changes upon the insertion of an element into the + * list when the list is empty. + */ +template <class Impl> +int +InstructionQueue<Impl>::countInsts() +{ + ListIt count_it = cpu->instList.begin(); + int total_insts = 0; + + if (tail == cpu->instList.end()) + return 0; + + while (count_it != tail) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } + + ++count_it; + + assert(count_it != cpu->instList.end()); + } + + // Need to count the tail iterator as well. + if (count_it != cpu->instList.end() && + (*count_it) && + !(*count_it)->isIssued()) { + ++total_insts; + } + + return total_insts; +} + +template <class Impl> +void +InstructionQueue<Impl>::dumpDependGraph() +{ + DependencyEntry *curr; + + for (int i = 0; i < numPhysRegs; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x consumer: ", i, + curr->inst->readPC()); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x ", curr->inst->readPC()); + } + + cprintf("\n"); + } +} + +template <class Impl> +void +InstructionQueue<Impl>::dumpLists() +{ + cprintf("Ready integer list size: %i\n", readyIntInsts.size()); + + cprintf("Ready float list size: %i\n", readyFloatInsts.size()); + + cprintf("Ready branch list size: %i\n", readyBranchInsts.size()); + + cprintf("Ready misc list size: %i\n", readyMiscInsts.size()); + + cprintf("Squashed list size: %i\n", squashedInsts.size()); + + cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + + non_spec_it_t non_spec_it = nonSpecInsts.begin(); + + cprintf("Non speculative list: "); + + while (non_spec_it != nonSpecInsts.end()) { + cprintf("%#x ", (*non_spec_it).second->readPC()); + ++non_spec_it; + } + + cprintf("\n"); + +} diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc new file mode 100644 index 000000000..9c1e7f9d8 --- /dev/null +++ b/src/cpu/o3/mem_dep_unit.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/store_set.hh" +#include "cpu/o3/mem_dep_unit_impl.hh" + +// Force instantation of memory dependency unit using store sets and +// AlphaSimpleImpl. +template class MemDepUnit<StoreSet, AlphaSimpleImpl>; diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh new file mode 100644 index 000000000..ca63577a1 --- /dev/null +++ b/src/cpu/o3/mem_dep_unit.hh @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_MEM_DEP_UNIT_HH__ +#define __CPU_O3_CPU_MEM_DEP_UNIT_HH__ + +#include <map> +#include <set> + +#include "base/statistics.hh" +#include "cpu/inst_seq.hh" + +/** + * Memory dependency unit class. This holds the memory dependence predictor. + * As memory operations are issued to the IQ, they are also issued to this + * unit, which then looks up the prediction as to what they are dependent + * upon. This unit must be checked prior to a memory operation being able + * to issue. Although this is templated, it's somewhat hard to make a generic + * memory dependence unit. This one is mostly for store sets; it will be + * quite limited in what other memory dependence predictions it can also + * utilize. Thus this class should be most likely be rewritten for other + * dependence prediction schemes. + */ +template <class MemDepPred, class Impl> +class MemDepUnit { + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + MemDepUnit(Params ¶ms); + + void regStats(); + + void insert(DynInstPtr &inst); + + void insertNonSpec(DynInstPtr &inst); + + // Will want to make this operation relatively fast. Right now it + // is somewhat slow. + DynInstPtr &top(); + + void pop(); + + void regsReady(DynInstPtr &inst); + + void nonSpecInstReady(DynInstPtr &inst); + + void issue(DynInstPtr &inst); + + void wakeDependents(DynInstPtr &inst); + + void squash(const InstSeqNum &squashed_num); + + void violation(DynInstPtr &store_inst, DynInstPtr &violating_load); + + inline bool empty() + { return readyInsts.empty(); } + + private: + typedef typename std::set<InstSeqNum>::iterator sn_it_t; + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator dyn_it_t; + + // Forward declarations so that the following two typedefs work. + class Dependency; + class ltDependency; + + typedef typename std::set<Dependency, ltDependency>::iterator dep_it_t; + typedef typename std::map<InstSeqNum, vector<dep_it_t> >::iterator + sd_it_t; + + struct Dependency { + Dependency(const InstSeqNum &_seqNum) + : seqNum(_seqNum), regsReady(0), memDepReady(0) + { } + + Dependency(const InstSeqNum &_seqNum, bool _regsReady, + bool _memDepReady) + : seqNum(_seqNum), regsReady(_regsReady), + memDepReady(_memDepReady) + { } + + InstSeqNum seqNum; + mutable bool regsReady; + mutable bool memDepReady; + mutable sd_it_t storeDep; + }; + + struct ltDependency { + bool operator() (const Dependency &lhs, const Dependency &rhs) + { + return lhs.seqNum < rhs.seqNum; + } + }; + + inline void moveToReady(dep_it_t &woken_inst); + + /** List of instructions that have passed through rename, yet are still + * waiting on either a memory dependence to resolve or source registers to + * become available before they can issue. + */ + std::set<Dependency, ltDependency> waitingInsts; + + /** List of instructions that have all their predicted memory dependences + * resolved and their source registers ready. + */ + std::set<InstSeqNum> readyInsts; + + // Change this to hold a vector of iterators, which will point to the + // entry of the waiting instructions. + /** List of stores' sequence numbers, each of which has a vector of + * iterators. The iterators point to the appropriate node within + * waitingInsts that has the depenendent instruction. + */ + std::map<InstSeqNum, vector<dep_it_t> > storeDependents; + + // For now will implement this as a map...hash table might not be too + // bad, or could move to something that mimics the current dependency + // graph. + std::map<InstSeqNum, DynInstPtr> memInsts; + + // Iterator pointer to the top instruction which has is ready. + // Is set by the top() call. + dyn_it_t topInst; + + /** The memory dependence predictor. It is accessed upon new + * instructions being added to the IQ, and responds by telling + * this unit what instruction the newly added instruction is dependent + * upon. + */ + MemDepPred depPred; + + Stats::Scalar<> insertedLoads; + Stats::Scalar<> insertedStores; + Stats::Scalar<> conflictingLoads; + Stats::Scalar<> conflictingStores; +}; + +#endif // __CPU_O3_CPU_MEM_DEP_UNIT_HH__ diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh new file mode 100644 index 000000000..296db4c4e --- /dev/null +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <map> + +#include "cpu/o3/mem_dep_unit.hh" + +template <class MemDepPred, class Impl> +MemDepUnit<MemDepPred, Impl>::MemDepUnit(Params ¶ms) + : depPred(params.SSITSize, params.LFSTSize) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n"); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::regStats() +{ + insertedLoads + .name(name() + ".memDep.insertedLoads") + .desc("Number of loads inserted to the mem dependence unit."); + + insertedStores + .name(name() + ".memDep.insertedStores") + .desc("Number of stores inserted to the mem dependence unit."); + + conflictingLoads + .name(name() + ".memDep.conflictingLoads") + .desc("Number of conflicting loads."); + + conflictingStores + .name(name() + ".memDep.conflictingStores") + .desc("Number of conflicting stores."); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency unresolved_dependencies(inst_seq_num); + + InstSeqNum producing_store = depPred.checkInst(inst->readPC()); + + if (producing_store == 0 || + storeDependents.find(producing_store) == storeDependents.end()) { + + DPRINTF(MemDepUnit, "MemDepUnit: No dependency for inst PC " + "%#x.\n", inst->readPC()); + + unresolved_dependencies.storeDep = storeDependents.end(); + + if (inst->readyToIssue()) { + readyInsts.insert(inst_seq_num); + } else { + unresolved_dependencies.memDepReady = true; + + waitingInsts.insert(unresolved_dependencies); + } + } else { + DPRINTF(MemDepUnit, "MemDepUnit: Adding to dependency list; " + "inst PC %#x is dependent on seq num %i.\n", + inst->readPC(), producing_store); + + if (inst->readyToIssue()) { + unresolved_dependencies.regsReady = true; + } + + // Find the store that this instruction is dependent on. + sd_it_t store_loc = storeDependents.find(producing_store); + + assert(store_loc != storeDependents.end()); + + // Record the location of the store that this instruction is + // dependent on. + unresolved_dependencies.storeDep = store_loc; + + // If it's not already ready, then add it to the renamed + // list and the dependencies. + dep_it_t inst_loc = + (waitingInsts.insert(unresolved_dependencies)).first; + + // Add this instruction to the list of dependents. + (*store_loc).second.push_back(inst_loc); + + assert(!(*store_loc).second.empty()); + + if (inst->isLoad()) { + ++conflictingLoads; + } else { + ++conflictingStores; + } + } + + if (inst->isStore()) { + DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", + inst->readPC()); + + depPred.insertStore(inst->readPC(), inst_seq_num); + + // Make sure this store isn't already in this list. + assert(storeDependents.find(inst_seq_num) == storeDependents.end()); + + // Put a dependency entry in at the store's sequence number. + // Uh, not sure how this works...I want to create an entry but + // I don't have anything to put into the value yet. + storeDependents[inst_seq_num]; + + assert(storeDependents.size() != 0); + + ++insertedStores; + + } else if (inst->isLoad()) { + ++insertedLoads; + } else { + panic("MemDepUnit: Unknown type! (most likely a barrier)."); + } + + memInsts[inst_seq_num] = inst; +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst) +{ + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency non_spec_inst(inst_seq_num); + + non_spec_inst.storeDep = storeDependents.end(); + + waitingInsts.insert(non_spec_inst); + + // Might want to turn this part into an inline function or something. + // It's shared between both insert functions. + if (inst->isStore()) { + DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", + inst->readPC()); + + depPred.insertStore(inst->readPC(), inst_seq_num); + + // Make sure this store isn't already in this list. + assert(storeDependents.find(inst_seq_num) == storeDependents.end()); + + // Put a dependency entry in at the store's sequence number. + // Uh, not sure how this works...I want to create an entry but + // I don't have anything to put into the value yet. + storeDependents[inst_seq_num]; + + assert(storeDependents.size() != 0); + + ++insertedStores; + + } else if (inst->isLoad()) { + ++insertedLoads; + } else { + panic("MemDepUnit: Unknown type! (most likely a barrier)."); + } + + memInsts[inst_seq_num] = inst; +} + +template <class MemDepPred, class Impl> +typename Impl::DynInstPtr & +MemDepUnit<MemDepPred, Impl>::top() +{ + topInst = memInsts.find( (*readyInsts.begin()) ); + + DPRINTF(MemDepUnit, "MemDepUnit: Top instruction is PC %#x.\n", + (*topInst).second->readPC()); + + return (*topInst).second; +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::pop() +{ + DPRINTF(MemDepUnit, "MemDepUnit: Removing instruction PC %#x.\n", + (*topInst).second->readPC()); + + wakeDependents((*topInst).second); + + issue((*topInst).second); + + memInsts.erase(topInst); + + topInst = memInsts.end(); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::regsReady(DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Marking registers as ready for " + "instruction PC %#x.\n", + inst->readPC()); + + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency inst_to_find(inst_seq_num); + + dep_it_t waiting_inst = waitingInsts.find(inst_to_find); + + assert(waiting_inst != waitingInsts.end()); + + if ((*waiting_inst).memDepReady) { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction has its memory " + "dependencies resolved, adding it to the ready list.\n"); + + moveToReady(waiting_inst); + } else { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction still waiting on " + "memory dependency.\n"); + + (*waiting_inst).regsReady = true; + } +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Marking non speculative " + "instruction PC %#x as ready.\n", + inst->readPC()); + + InstSeqNum inst_seq_num = inst->seqNum; + + Dependency inst_to_find(inst_seq_num); + + dep_it_t waiting_inst = waitingInsts.find(inst_to_find); + + assert(waiting_inst != waitingInsts.end()); + + moveToReady(waiting_inst); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst) +{ + assert(readyInsts.find(inst->seqNum) != readyInsts.end()); + + DPRINTF(MemDepUnit, "MemDepUnit: Issuing instruction PC %#x.\n", + inst->readPC()); + + // Remove the instruction from the ready list. + readyInsts.erase(inst->seqNum); + + depPred.issued(inst->readPC(), inst->seqNum, inst->isStore()); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst) +{ + // Only stores have dependents. + if (!inst->isStore()) { + return; + } + + // Wake any dependencies. + sd_it_t sd_it = storeDependents.find(inst->seqNum); + + // If there's no entry, then return. Really there should only be + // no entry if the instruction is a load. + if (sd_it == storeDependents.end()) { + DPRINTF(MemDepUnit, "MemDepUnit: Instruction PC %#x, sequence " + "number %i has no dependents.\n", + inst->readPC(), inst->seqNum); + + return; + } + + for (int i = 0; i < (*sd_it).second.size(); ++i ) { + dep_it_t woken_inst = (*sd_it).second[i]; + + DPRINTF(MemDepUnit, "MemDepUnit: Waking up a dependent inst, " + "sequence number %i.\n", + (*woken_inst).seqNum); +#if 0 + // Should we have reached instructions that are actually squashed, + // there will be no more useful instructions in this dependency + // list. Break out early. + if (waitingInsts.find(woken_inst) == waitingInsts.end()) { + DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x " + "are squashed, starting at SN %i. Breaking early.\n", + inst->readPC(), woken_inst); + break; + } +#endif + + if ((*woken_inst).regsReady) { + moveToReady(woken_inst); + } else { + (*woken_inst).memDepReady = true; + } + } + + storeDependents.erase(sd_it); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num) +{ + + if (!waitingInsts.empty()) { + dep_it_t waiting_it = waitingInsts.end(); + + --waiting_it; + + // Remove entries from the renamed list as long as we haven't reached + // the end and the entries continue to be younger than the squashed. + while (!waitingInsts.empty() && + (*waiting_it).seqNum > squashed_num) + { + if (!(*waiting_it).memDepReady && + (*waiting_it).storeDep != storeDependents.end()) { + sd_it_t sd_it = (*waiting_it).storeDep; + + // Make sure the iterator that the store has pointing + // back is actually to this instruction. + assert((*sd_it).second.back() == waiting_it); + + // Now remove this from the store's list of dependent + // instructions. + (*sd_it).second.pop_back(); + } + + waitingInsts.erase(waiting_it--); + } + } + + if (!readyInsts.empty()) { + sn_it_t ready_it = readyInsts.end(); + + --ready_it; + + // Same for the ready list. + while (!readyInsts.empty() && + (*ready_it) > squashed_num) + { + readyInsts.erase(ready_it--); + } + } + + if (!storeDependents.empty()) { + sd_it_t dep_it = storeDependents.end(); + + --dep_it; + + // Same for the dependencies list. + while (!storeDependents.empty() && + (*dep_it).first > squashed_num) + { + // This store's list of dependent instructions should be empty. + assert((*dep_it).second.empty()); + + storeDependents.erase(dep_it--); + } + } + + // Tell the dependency predictor to squash as well. + depPred.squash(squashed_num); +} + +template <class MemDepPred, class Impl> +void +MemDepUnit<MemDepPred, Impl>::violation(DynInstPtr &store_inst, + DynInstPtr &violating_load) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Passing violating PCs to store sets," + " load: %#x, store: %#x\n", violating_load->readPC(), + store_inst->readPC()); + // Tell the memory dependence unit of the violation. + depPred.violation(violating_load->readPC(), store_inst->readPC()); +} + +template <class MemDepPred, class Impl> +inline void +MemDepUnit<MemDepPred, Impl>::moveToReady(dep_it_t &woken_inst) +{ + DPRINTF(MemDepUnit, "MemDepUnit: Adding instruction sequence number %i " + "to the ready list.\n", (*woken_inst).seqNum); + + // Add it to the ready list. + readyInsts.insert((*woken_inst).seqNum); + + // Remove it from the waiting instructions. + waitingInsts.erase(woken_inst); +} diff --git a/src/cpu/o3/ras.cc b/src/cpu/o3/ras.cc new file mode 100644 index 000000000..0a7d6ca63 --- /dev/null +++ b/src/cpu/o3/ras.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/ras.hh" + +ReturnAddrStack::ReturnAddrStack(unsigned _numEntries) + : numEntries(_numEntries), usedEntries(0), + tos(0) +{ + addrStack = new Addr[numEntries]; + + for (int i = 0; i < numEntries; ++i) + addrStack[i] = 0; +} + +void +ReturnAddrStack::push(const Addr &return_addr) +{ + incrTos(); + + addrStack[tos] = return_addr; + + if (usedEntries != numEntries) { + ++usedEntries; + } +} + +void +ReturnAddrStack::pop() +{ + // Not sure it's possible to really track usedEntries properly. +// assert(usedEntries > 0); + + if (usedEntries > 0) { + --usedEntries; + } + + decrTos(); +} + +void +ReturnAddrStack::restore(unsigned top_entry_idx, + const Addr &restored_target) +{ + tos = top_entry_idx; + + addrStack[tos] = restored_target; +} diff --git a/src/cpu/o3/ras.hh b/src/cpu/o3/ras.hh new file mode 100644 index 000000000..46d98181e --- /dev/null +++ b/src/cpu/o3/ras.hh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_RAS_HH__ +#define __CPU_O3_CPU_RAS_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" + +class ReturnAddrStack +{ + public: + ReturnAddrStack(unsigned numEntries); + + Addr top() + { return addrStack[tos]; } + + unsigned topIdx() + { return tos; } + + void push(const Addr &return_addr); + + void pop(); + + void restore(unsigned top_entry_idx, const Addr &restored_target); + + private: + inline void incrTos() + { if (++tos == numEntries) tos = 0; } + + inline void decrTos() + { tos = (tos == 0 ? numEntries - 1 : tos - 1); } + + Addr *addrStack; + + unsigned numEntries; + + unsigned usedEntries; + + unsigned tos; +}; + +#endif // __CPU_O3_CPU_RAS_HH__ diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh new file mode 100644 index 000000000..a5cfa8f3c --- /dev/null +++ b/src/cpu/o3/regfile.hh @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_REGFILE_HH__ +#define __CPU_O3_CPU_REGFILE_HH__ + +// @todo: Destructor + +#include "arch/isa_traits.hh" +#include "arch/faults.hh" +#include "base/trace.hh" +#include "config/full_system.hh" +#include "cpu/o3/comm.hh" + +#if FULL_SYSTEM +#include "kern/kernel_stats.hh" + +#endif + +// This really only depends on the ISA, and not the Impl. It might be nicer +// to see if I can make it depend on nothing... +// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, +// and should go in the AlphaFullCPU. + +template <class Impl> +class PhysRegFile +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::MiscRegFile MiscRegFile; + typedef TheISA::MiscReg MiscReg; + + //Note that most of the definitions of the IntReg, FloatReg, etc. exist + //within the Impl/ISA class and not within this PhysRegFile class. + + //Will need some way to allow stuff like swap_palshadow to access the + //correct registers. Might require code changes to swap_palshadow and + //other execution contexts. + + //Will make these registers public for now, but they probably should + //be private eventually with some accessor functions. + public: + typedef typename Impl::FullCPU FullCPU; + + PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs); + + //Everything below should be pretty well identical to the normal + //register file that exists within AlphaISA class. + //The duplication is unfortunate but it's better than having + //different ways to access certain registers. + + //Add these in later when everything else is in place +// void serialize(std::ostream &os); +// void unserialize(Checkpoint *cp, const std::string §ion); + + uint64_t readIntReg(PhysRegIndex reg_idx) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Access to int register %i, has data " + "%i\n", int(reg_idx), intRegFile[reg_idx]); + return intRegFile[reg_idx]; + } + + FloatReg readFloatReg(PhysRegIndex reg_idx, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatReg floatReg = floatRegFile.readReg(reg_idx, width); + + DPRINTF(IEW, "RegFile: Access to %d byte float register %i, has " + "data %8.8d\n", int(reg_idx), (double)floatReg); + + return floatReg; + } + + FloatReg readFloatReg(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatReg floatReg = floatRegFile.readReg(reg_idx); + + DPRINTF(IEW, "RegFile: Access to float register %i, has " + "data %8.8d\n", int(reg_idx), (double)floatReg); + + return floatReg; + } + + FloatRegBits readFloatRegBits(PhysRegIndex reg_idx, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx, width); + + DPRINTF(IEW, "RegFile: Access to %d byte float register %i as int, " + "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); + + return floatRegBits; + } + + FloatRegBits readFloatRegBits(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx); + + DPRINTF(IEW, "RegFile: Access to float register %i as int, " + "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); + + return floatRegBits; + } + + void setIntReg(PhysRegIndex reg_idx, uint64_t val) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n", + int(reg_idx), val); + + intRegFile[reg_idx] = val; + } + + void setFloatReg(PhysRegIndex reg_idx, FloatReg val, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8d\n", + int(reg_idx), (double)val); + + floatRegFile.setReg(reg_idx, val, width); + } + + void setFloatReg(PhysRegIndex reg_idx, FloatReg val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8d\n", + int(reg_idx), (double)val); + + floatRegFile.setReg(reg_idx, val); + } + + void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", + int(reg_idx), (uint64_t)val); + + floatRegFile.setRegBits(reg_idx, val, width); + } + + void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", + int(reg_idx), (uint64_t)val); + + floatRegFile.setRegBits(reg_idx, val); + } + + uint64_t readPC() + { + return pc; + } + + void setPC(uint64_t val) + { + pc = val; + } + + void setNextPC(uint64_t val) + { + npc = val; + } + + //Consider leaving this stuff and below in some implementation specific + //file as opposed to the general register file. Or have a derived class. + MiscReg readMiscReg(int misc_reg) + { + // Dummy function for now. + // @todo: Fix this once proxy XC is used. + return 0; + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + // Dummy function for now. + // @todo: Fix this once proxy XC is used. + return NoFault; + } + +#if FULL_SYSTEM + int readIntrFlag() { return intrflag; } + void setIntrFlag(int val) { intrflag = val; } +#endif + + // These should be private eventually, but will be public for now + // so that I can hack around the initregs issue. + public: + /** (signed) integer register file. */ + IntReg *intRegFile; + + /** Floating point register file. */ + FloatReg *floatRegFile; + + /** Miscellaneous register file. */ + MiscRegFile miscRegs; + + /** Program counter. */ + Addr pc; + + /** Next-cycle program counter. */ + Addr npc; + +#if FULL_SYSTEM + private: + // This is ISA specifc stuff; remove it eventually once ISAImpl is used +// IntReg palregs[NumIntRegs]; // PAL shadow registers + int intrflag; // interrupt flag + bool pal_shadow; // using pal_shadow registers +#endif + + private: + FullCPU *cpu; + + public: + void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } + + unsigned numPhysicalIntRegs; + unsigned numPhysicalFloatRegs; +}; + +template <class Impl> +PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs) + : numPhysicalIntRegs(_numPhysicalIntRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs) +{ + intRegFile = new IntReg[numPhysicalIntRegs]; + floatRegFile = new FloatReg[numPhysicalFloatRegs]; + + memset(intRegFile, 0, sizeof(*intRegFile)); + memset(floatRegFile, 0, sizeof(*floatRegFile)); +} + +#endif // __CPU_O3_CPU_REGFILE_HH__ diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc new file mode 100644 index 000000000..6e9ee23da --- /dev/null +++ b/src/cpu/o3/rename.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/rename_impl.hh" + +template class SimpleRename<AlphaSimpleImpl>; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh new file mode 100644 index 000000000..07b442964 --- /dev/null +++ b/src/cpu/o3/rename.hh @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: +// Fix up trap and barrier handling. +// May want to have different statuses to differentiate the different stall +// conditions. + +#ifndef __CPU_O3_CPU_SIMPLE_RENAME_HH__ +#define __CPU_O3_CPU_SIMPLE_RENAME_HH__ + +#include <list> + +#include "base/statistics.hh" +#include "base/timebuf.hh" + +// Will need rename maps for both the int reg file and fp reg file. +// Or change rename map class to handle both. (RegFile handles both.) +template<class Impl> +class SimpleRename +{ + public: + // Typedefs from the Impl. + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + // Typedefs from the CPUPol + typedef typename CPUPol::FreeList FreeList; + typedef typename CPUPol::RenameMap RenameMap; + + // Typedefs from the ISA. + typedef TheISA::RegIndex RegIndex; + + public: + // Rename will block if ROB becomes full or issue queue becomes full, + // or there are no free registers to rename to. + // Only case where rename squashes is if IEW squashes. + enum Status { + Running, + Idle, + Squashing, + Blocked, + Unblocking, + BarrierStall + }; + + private: + Status _status; + + public: + SimpleRename(Params ¶ms); + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr); + + void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr); + + void setRenameMap(RenameMap *rm_ptr); + + void setFreeList(FreeList *fl_ptr); + + void dumpHistory(); + + void tick(); + + void rename(); + + void squash(); + + private: + void block(); + + inline void unblock(); + + void doSquash(); + + void removeFromHistory(InstSeqNum inst_seq_num); + + inline void renameSrcRegs(DynInstPtr &inst); + + inline void renameDestRegs(DynInstPtr &inst); + + inline int calcFreeROBEntries(); + + inline int calcFreeIQEntries(); + + /** Holds the previous information for each rename. + * Note that often times the inst may have been deleted, so only access + * the pointer for the address and do not dereference it. + */ + struct RenameHistory { + RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg, + PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg) + : instSeqNum(_instSeqNum), archReg(_archReg), + newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg), + placeHolder(false) + { + } + + /** Constructor used specifically for cases where a place holder + * rename history entry is being made. + */ + RenameHistory(InstSeqNum _instSeqNum) + : instSeqNum(_instSeqNum), archReg(0), newPhysReg(0), + prevPhysReg(0), placeHolder(true) + { + } + + InstSeqNum instSeqNum; + RegIndex archReg; + PhysRegIndex newPhysReg; + PhysRegIndex prevPhysReg; + bool placeHolder; + }; + + std::list<RenameHistory> historyBuffer; + + /** CPU interface. */ + FullCPU *cpu; + + // Interfaces to objects outside of rename. + /** Time buffer interface. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to get IEW's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromIEW; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Wire to write infromation heading to previous stages. */ + // Might not be the best name as not only decode will read it. + typename TimeBuffer<TimeStruct>::wire toDecode; + + /** Rename instruction queue. */ + TimeBuffer<RenameStruct> *renameQueue; + + /** Wire to write any information heading to IEW. */ + typename TimeBuffer<RenameStruct>::wire toIEW; + + /** Decode instruction queue interface. */ + TimeBuffer<DecodeStruct> *decodeQueue; + + /** Wire to get decode's output from decode queue. */ + typename TimeBuffer<DecodeStruct>::wire fromDecode; + + /** Skid buffer between rename and decode. */ + std::queue<DecodeStruct> skidBuffer; + + /** Rename map interface. */ + SimpleRenameMap *renameMap; + + /** Free list interface. */ + FreeList *freeList; + + /** Delay between iew and rename, in ticks. */ + int iewToRenameDelay; + + /** Delay between decode and rename, in ticks. */ + int decodeToRenameDelay; + + /** Delay between commit and rename, in ticks. */ + unsigned commitToRenameDelay; + + /** Rename width, in instructions. */ + unsigned renameWidth; + + /** Commit width, in instructions. Used so rename knows how many + * instructions might have freed registers in the previous cycle. + */ + unsigned commitWidth; + + /** The instruction that rename is currently on. It needs to have + * persistent state so that when a stall occurs in the middle of a + * group of instructions, it can restart at the proper instruction. + */ + unsigned numInst; + + Stats::Scalar<> renameSquashCycles; + Stats::Scalar<> renameIdleCycles; + Stats::Scalar<> renameBlockCycles; + Stats::Scalar<> renameUnblockCycles; + Stats::Scalar<> renameRenamedInsts; + Stats::Scalar<> renameSquashedInsts; + Stats::Scalar<> renameROBFullEvents; + Stats::Scalar<> renameIQFullEvents; + Stats::Scalar<> renameFullRegistersEvents; + Stats::Scalar<> renameRenamedOperands; + Stats::Scalar<> renameRenameLookups; + Stats::Scalar<> renameHBPlaceHolders; + Stats::Scalar<> renameCommittedMaps; + Stats::Scalar<> renameUndoneMaps; + Stats::Scalar<> renameValidUndoneMaps; +}; + +#endif // __CPU_O3_CPU_SIMPLE_RENAME_HH__ diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh new file mode 100644 index 000000000..2068b36ab --- /dev/null +++ b/src/cpu/o3/rename_impl.hh @@ -0,0 +1,754 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <list> + +#include "config/full_system.hh" +#include "cpu/o3/rename.hh" + +template <class Impl> +SimpleRename<Impl>::SimpleRename(Params ¶ms) + : iewToRenameDelay(params.iewToRenameDelay), + decodeToRenameDelay(params.decodeToRenameDelay), + commitToRenameDelay(params.commitToRenameDelay), + renameWidth(params.renameWidth), + commitWidth(params.commitWidth), + numInst(0) +{ + _status = Idle; +} + +template <class Impl> +void +SimpleRename<Impl>::regStats() +{ + renameSquashCycles + .name(name() + ".renameSquashCycles") + .desc("Number of cycles rename is squashing") + .prereq(renameSquashCycles); + renameIdleCycles + .name(name() + ".renameIdleCycles") + .desc("Number of cycles rename is idle") + .prereq(renameIdleCycles); + renameBlockCycles + .name(name() + ".renameBlockCycles") + .desc("Number of cycles rename is blocking") + .prereq(renameBlockCycles); + renameUnblockCycles + .name(name() + ".renameUnblockCycles") + .desc("Number of cycles rename is unblocking") + .prereq(renameUnblockCycles); + renameRenamedInsts + .name(name() + ".renameRenamedInsts") + .desc("Number of instructions processed by rename") + .prereq(renameRenamedInsts); + renameSquashedInsts + .name(name() + ".renameSquashedInsts") + .desc("Number of squashed instructions processed by rename") + .prereq(renameSquashedInsts); + renameROBFullEvents + .name(name() + ".renameROBFullEvents") + .desc("Number of times rename has considered the ROB 'full'") + .prereq(renameROBFullEvents); + renameIQFullEvents + .name(name() + ".renameIQFullEvents") + .desc("Number of times rename has considered the IQ 'full'") + .prereq(renameIQFullEvents); + renameFullRegistersEvents + .name(name() + ".renameFullRegisterEvents") + .desc("Number of times there has been no free registers") + .prereq(renameFullRegistersEvents); + renameRenamedOperands + .name(name() + ".renameRenamedOperands") + .desc("Number of destination operands rename has renamed") + .prereq(renameRenamedOperands); + renameRenameLookups + .name(name() + ".renameRenameLookups") + .desc("Number of register rename lookups that rename has made") + .prereq(renameRenameLookups); + renameHBPlaceHolders + .name(name() + ".renameHBPlaceHolders") + .desc("Number of place holders added to the history buffer") + .prereq(renameHBPlaceHolders); + renameCommittedMaps + .name(name() + ".renameCommittedMaps") + .desc("Number of HB maps that are committed") + .prereq(renameCommittedMaps); + renameUndoneMaps + .name(name() + ".renameUndoneMaps") + .desc("Number of HB maps that are undone due to squashing") + .prereq(renameUndoneMaps); + renameValidUndoneMaps + .name(name() + ".renameValidUndoneMaps") + .desc("Number of HB maps that are undone, and are not place holders") + .prereq(renameValidUndoneMaps); +} + +template <class Impl> +void +SimpleRename<Impl>::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Rename, "Rename: Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template <class Impl> +void +SimpleRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(Rename, "Rename: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from IEW stage. + fromIEW = timeBuffer->getWire(-iewToRenameDelay); + + // Setup wire to read infromation from time buffer, from commit stage. + fromCommit = timeBuffer->getWire(-commitToRenameDelay); + + // Setup wire to write information to previous stages. + toDecode = timeBuffer->getWire(0); +} + +template <class Impl> +void +SimpleRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to write information to future stages. + toIEW = renameQueue->getWire(0); +} + +template <class Impl> +void +SimpleRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr) +{ + DPRINTF(Rename, "Rename: Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to get information from decode. + fromDecode = decodeQueue->getWire(-decodeToRenameDelay); +} + +template <class Impl> +void +SimpleRename<Impl>::setRenameMap(RenameMap *rm_ptr) +{ + DPRINTF(Rename, "Rename: Setting rename map pointer.\n"); + renameMap = rm_ptr; +} + +template <class Impl> +void +SimpleRename<Impl>::setFreeList(FreeList *fl_ptr) +{ + DPRINTF(Rename, "Rename: Setting free list pointer.\n"); + freeList = fl_ptr; +} + +template <class Impl> +void +SimpleRename<Impl>::dumpHistory() +{ + typename list<RenameHistory>::iterator buf_it = historyBuffer.begin(); + + while (buf_it != historyBuffer.end()) + { + cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys " + "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg, + (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg); + + buf_it++; + } +} + +template <class Impl> +void +SimpleRename<Impl>::block() +{ + DPRINTF(Rename, "Rename: Blocking.\n"); + // Set status to Blocked. + _status = Blocked; + + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template <class Impl> +inline void +SimpleRename<Impl>::unblock() +{ + DPRINTF(Rename, "Rename: Read instructions out of skid buffer this " + "cycle.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toDecode->renameInfo.stall = true; + } else { + DPRINTF(Rename, "Rename: Done unblocking.\n"); + _status = Running; + } +} + +template <class Impl> +void +SimpleRename<Impl>::doSquash() +{ + typename list<RenameHistory>::iterator hb_it = historyBuffer.begin(); + + InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum; + +#if FULL_SYSTEM + assert(!historyBuffer.empty()); +#else + // After a syscall squashes everything, the history buffer may be empty + // but the ROB may still be squashing instructions. + if (historyBuffer.empty()) { + return; + } +#endif // FULL_SYSTEM + + // Go through the most recent instructions, undoing the mappings + // they did and freeing up the registers. + while ((*hb_it).instSeqNum > squashed_seq_num) + { + assert(hb_it != historyBuffer.end()); + + DPRINTF(Rename, "Rename: Removing history entry with sequence " + "number %i.\n", (*hb_it).instSeqNum); + + // If it's not simply a place holder, then add the registers. + if (!(*hb_it).placeHolder) { + // Tell the rename map to set the architected register to the + // previous physical register that it was renamed to. + renameMap->setEntry(hb_it->archReg, hb_it->prevPhysReg); + + // Put the renamed physical register back on the free list. + freeList->addReg(hb_it->newPhysReg); + + ++renameValidUndoneMaps; + } + + historyBuffer.erase(hb_it++); + + ++renameUndoneMaps; + } +} + +template <class Impl> +void +SimpleRename<Impl>::squash() +{ + DPRINTF(Rename, "Rename: Squashing instructions.\n"); + // Set the status to Squashing. + _status = Squashing; + + numInst = 0; + + // Clear the skid buffer in case it has any data in it. + while (!skidBuffer.empty()) + { + skidBuffer.pop(); + } + + doSquash(); +} + +template<class Impl> +void +SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num) +{ + DPRINTF(Rename, "Rename: Removing a committed instruction from the " + "history buffer, until sequence number %lli.\n", inst_seq_num); + typename list<RenameHistory>::iterator hb_it = historyBuffer.end(); + + --hb_it; + + if (hb_it->instSeqNum > inst_seq_num) { + DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure " + "that a syscall happened recently.\n"); + return; + } + + while ((*hb_it).instSeqNum != inst_seq_num) + { + // Make sure we haven't gone off the end of the list. + assert(hb_it != historyBuffer.end()); + + // In theory instructions at the end of the history buffer + // should be older than the instruction being removed, which + // means they will have a lower sequence number. Also the + // instruction being removed from the history really should + // be the last instruction in the list, as it is the instruction + // that was just committed that is being removed. + assert(hb_it->instSeqNum < inst_seq_num); + DPRINTF(Rename, "Rename: Freeing up older rename of reg %i, sequence" + " number %i.\n", + (*hb_it).prevPhysReg, (*hb_it).instSeqNum); + + if (!(*hb_it).placeHolder) { + freeList->addReg((*hb_it).prevPhysReg); + ++renameCommittedMaps; + } + + historyBuffer.erase(hb_it--); + } + + // Finally free up the previous register of the finished instruction + // itself. + if (!(*hb_it).placeHolder) { + freeList->addReg(hb_it->prevPhysReg); + ++renameCommittedMaps; + } + + historyBuffer.erase(hb_it); +} + +template <class Impl> +inline void +SimpleRename<Impl>::renameSrcRegs(DynInstPtr &inst) +{ + unsigned num_src_regs = inst->numSrcRegs(); + + // Get the architectual register numbers from the source and + // destination operands, and redirect them to the right register. + // Will need to mark dependencies though. + for (int src_idx = 0; src_idx < num_src_regs; src_idx++) + { + RegIndex src_reg = inst->srcRegIdx(src_idx); + + // Look up the source registers to get the phys. register they've + // been renamed to, and set the sources to those registers. + PhysRegIndex renamed_reg = renameMap->lookup(src_reg); + + DPRINTF(Rename, "Rename: Looking up arch reg %i, got " + "physical reg %i.\n", (int)src_reg, (int)renamed_reg); + + inst->renameSrcReg(src_idx, renamed_reg); + + // Either incorporate it into the info passed back, + // or make another function call to see if that register is + // ready or not. + if (renameMap->isReady(renamed_reg)) { + DPRINTF(Rename, "Rename: Register is ready.\n"); + + inst->markSrcRegReady(src_idx); + } + + ++renameRenameLookups; + } +} + +template <class Impl> +inline void +SimpleRename<Impl>::renameDestRegs(DynInstPtr &inst) +{ + typename SimpleRenameMap::RenameInfo rename_result; + + unsigned num_dest_regs = inst->numDestRegs(); + + // If it's an instruction with no destination registers, then put + // a placeholder within the history buffer. It might be better + // to not put it in the history buffer at all (other than branches, + // which always need at least a place holder), and differentiate + // between instructions with and without destination registers + // when getting from commit the instructions that committed. + if (num_dest_regs == 0) { + RenameHistory hb_entry(inst->seqNum); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding placeholder instruction to " + "history buffer, sequence number %lli.\n", + inst->seqNum); + + ++renameHBPlaceHolders; + } else { + + // Rename the destination registers. + for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) + { + RegIndex dest_reg = inst->destRegIdx(dest_idx); + + // Get the physical register that the destination will be + // renamed to. + rename_result = renameMap->rename(dest_reg); + + DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " + "reg %i.\n", (int)dest_reg, + (int)rename_result.first); + + // Record the rename information so that a history can be kept. + RenameHistory hb_entry(inst->seqNum, dest_reg, + rename_result.first, + rename_result.second); + + historyBuffer.push_front(hb_entry); + + DPRINTF(Rename, "Rename: Adding instruction to history buffer, " + "sequence number %lli.\n", + (*historyBuffer.begin()).instSeqNum); + + // Tell the instruction to rename the appropriate destination + // register (dest_idx) to the new physical register + // (rename_result.first), and record the previous physical + // register that the same logical register was renamed to + // (rename_result.second). + inst->renameDestReg(dest_idx, + rename_result.first, + rename_result.second); + + ++renameRenamedOperands; + } + } +} + +template <class Impl> +inline int +SimpleRename<Impl>::calcFreeROBEntries() +{ + return fromCommit->commitInfo.freeROBEntries - + renameWidth * iewToRenameDelay; +} + +template <class Impl> +inline int +SimpleRename<Impl>::calcFreeIQEntries() +{ + return fromIEW->iewInfo.freeIQEntries - renameWidth * iewToRenameDelay; +} + +template<class Impl> +void +SimpleRename<Impl>::tick() +{ + // Rename will need to try to rename as many instructions as it + // has bandwidth, unless it is blocked. + + // Check if _status is BarrierStall. If so, then check if the number + // of free ROB entries is equal to the number of total ROB entries. + // Once equal then wake this stage up. Set status to unblocking maybe. + + if (_status != Blocked && _status != Squashing) { + DPRINTF(Rename, "Rename: Status is not blocked, will attempt to " + "run stage.\n"); + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(_status == Unblocking ? !skidBuffer.empty() : 1); + + rename(); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + if (_status == Unblocking) { + ++renameUnblockCycles; + + if (fromDecode->size > 0) { + // Add the current inputs onto the skid buffer, so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromDecode); + } + + unblock(); + } + } else if (_status == Blocked) { + ++renameBlockCycles; + + // If stage is blocked and still receiving valid instructions, + // make sure to store them in the skid buffer. + if (fromDecode->size > 0) { + + block(); + + // Continue to tell previous stage to stall. + toDecode->renameInfo.stall = true; + } + + if (!fromIEW->iewInfo.stall && + !fromCommit->commitInfo.stall && + calcFreeROBEntries() > 0 && + calcFreeIQEntries() > 0 && + renameMap->numFreeEntries() > 0) { + + // Need to be sure to check all blocking conditions above. + // If they have cleared, then start unblocking. + DPRINTF(Rename, "Rename: Stall signals cleared, going to " + "unblock.\n"); + _status = Unblocking; + + // Continue to tell previous stage to block until this stage + // is done unblocking. + toDecode->renameInfo.stall = true; + } else { + // Otherwise no conditions have changed. Tell previous + // stage to continue blocking. + toDecode->renameInfo.stall = true; + } + + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + squash(); + return; + } + } else if (_status == Squashing) { + ++renameSquashCycles; + + if (fromCommit->commitInfo.squash) { + squash(); + } else if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { + + DPRINTF(Rename, "Rename: Done squashing, going to running.\n"); + _status = Running; + rename(); + } else { + doSquash(); + } + } + + // Ugly code, revamp all of the tick() functions eventually. + if (fromCommit->commitInfo.doneSeqNum != 0 && _status != Squashing) { +#if !FULL_SYSTEM + if (!fromCommit->commitInfo.squash) { + removeFromHistory(fromCommit->commitInfo.doneSeqNum); + } +#else + removeFromHistory(fromCommit->commitInfo.doneSeqNum); +#endif + } + +} + +template<class Impl> +void +SimpleRename<Impl>::rename() +{ + // Check if any of the stages ahead of rename are telling rename + // to squash. The squash() function will also take care of fixing up + // the rename map and the free list. + if (fromCommit->commitInfo.squash || + fromCommit->commitInfo.robSquashing) { + DPRINTF(Rename, "Rename: Receiving signal from Commit to squash.\n"); + squash(); + return; + } + + // Check if time buffer is telling this stage to stall. + if (fromIEW->iewInfo.stall || + fromCommit->commitInfo.stall) { + DPRINTF(Rename, "Rename: Receiving signal from IEW/Commit to " + "stall.\n"); + block(); + return; + } + + // Check if the current status is squashing. If so, set its status + // to running and resume execution the next cycle. + if (_status == Squashing) { + DPRINTF(Rename, "Rename: Done squashing.\n"); + _status = Running; + return; + } + + // Check the decode queue to see if instructions are available. + // If there are no available instructions to rename, then do nothing. + // Or, if the stage is currently unblocking, then go ahead and run it. + if (fromDecode->size == 0 && _status != Unblocking) { + DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n"); + // Should I change status to idle? + return; + } + + //////////////////////////////////// + // Actual rename part. + //////////////////////////////////// + + DynInstPtr inst; + + // If we're unblocking, then we may be in the middle of an instruction + // group. Subtract off numInst to get the proper number of instructions + // left. + int insts_available = _status == Unblocking ? + skidBuffer.front().size - numInst : + fromDecode->size; + + bool block_this_cycle = false; + + // Will have to do a different calculation for the number of free + // entries. Number of free entries recorded on this cycle - + // renameWidth * renameToDecodeDelay + int free_rob_entries = calcFreeROBEntries(); + int free_iq_entries = calcFreeIQEntries(); + int min_iq_rob = min(free_rob_entries, free_iq_entries); + + unsigned to_iew_index = 0; + + // Check if there's any space left. + if (min_iq_rob <= 0) { + DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ " + "entries.\n" + "Rename: ROB has %d free entries.\n" + "Rename: IQ has %d free entries.\n", + free_rob_entries, + free_iq_entries); + block(); + // Tell previous stage to stall. + toDecode->renameInfo.stall = true; + + if (free_rob_entries <= 0) { + ++renameROBFullEvents; + } else { + ++renameIQFullEvents; + } + + return; + } else if (min_iq_rob < insts_available) { + DPRINTF(Rename, "Rename: Will have to block this cycle. Only " + "%i insts can be renamed due to IQ/ROB limits.\n", + min_iq_rob); + + insts_available = min_iq_rob; + + block_this_cycle = true; + + if (free_rob_entries < free_iq_entries) { + ++renameROBFullEvents; + } else { + ++renameIQFullEvents; + } + } + + while (insts_available > 0) { + DPRINTF(Rename, "Rename: Sending instructions to iew.\n"); + + // Get the next instruction either from the skid buffer or the + // decode queue. + inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : + fromDecode->insts[numInst]; + + if (inst->isSquashed()) { + DPRINTF(Rename, "Rename: instruction %i with PC %#x is " + "squashed, skipping.\n", + inst->seqNum, inst->readPC()); + + // Go to the next instruction. + ++numInst; + + ++renameSquashedInsts; + + // Decrement how many instructions are available. + --insts_available; + + continue; + } + + DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n", + inst->seqNum, inst->readPC()); + + // If it's a trap instruction, then it needs to wait here within + // rename until the ROB is empty. Needs a way to detect that the + // ROB is empty. Maybe an event? + // Would be nice if it could be avoided putting this into a + // specific stage and instead just put it into the AlphaFullCPU. + // Might not really be feasible though... + // (EXCB, TRAPB) + if (inst->isSerializing()) { + panic("Rename: Serializing instruction encountered.\n"); + DPRINTF(Rename, "Rename: Serializing instruction " + "encountered.\n"); + + // Change status over to BarrierStall so that other stages know + // what this is blocked on. + _status = BarrierStall; + + block_this_cycle = true; + + break; + } + + // Check here to make sure there are enough destination registers + // to rename to. Otherwise block. + if (renameMap->numFreeEntries() < inst->numDestRegs()) + { + DPRINTF(Rename, "Rename: Blocking due to lack of free " + "physical registers to rename to.\n"); + // Need some sort of event based on a register being freed. + + block_this_cycle = true; + + ++renameFullRegistersEvents; + + break; + } + + renameSrcRegs(inst); + + renameDestRegs(inst); + + // Put instruction in rename queue. + toIEW->insts[to_iew_index] = inst; + ++(toIEW->size); + + // Decrease the number of free ROB and IQ entries. + --free_rob_entries; + --free_iq_entries; + + // Increment which instruction we're on. + ++to_iew_index; + ++numInst; + + ++renameRenamedInsts; + + // Decrement how many instructions are available. + --insts_available; + } + + // Check if there's any instructions left that haven't yet been renamed. + // If so then block. + if (block_this_cycle) { + block(); + + toDecode->renameInfo.stall = true; + } else { + // If we had a successful rename and didn't have to exit early, then + // reset numInst so it will refer to the correct instruction on next + // run. + numInst = 0; + } +} diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc new file mode 100644 index 000000000..10963f7de --- /dev/null +++ b/src/cpu/o3/rename_map.cc @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <vector> + +#include "cpu/o3/rename_map.hh" + +using namespace std; + +// Todo: Consider making functions inline. Avoid having things that are +// using the zero register or misc registers from adding on the registers +// to the free list. Possibly remove the direct communication between +// this and the freelist. Considering making inline bool functions that +// determine if the register is a logical int, logical fp, physical int, +// physical fp, etc. + +SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numMiscRegs(_numMiscRegs), + intZeroReg(_intZeroReg), + floatZeroReg(_floatZeroReg) +{ + DPRINTF(Rename, "Rename: Creating rename map. Phys: %i / %i, Float: " + "%i / %i.\n", numLogicalIntRegs, numPhysicalIntRegs, + numLogicalFloatRegs, numPhysicalFloatRegs); + + numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; + + numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; + + //Create the rename maps, and their scoreboards. + intRenameMap = new RenameEntry[numLogicalIntRegs]; + floatRenameMap = new RenameEntry[numLogicalRegs]; + + // Should combine this into one scoreboard. + intScoreboard.resize(numPhysicalIntRegs); + floatScoreboard.resize(numPhysicalRegs); + miscScoreboard.resize(numPhysicalRegs + numMiscRegs); + + // Initialize the entries in the integer rename map to point to the + // physical registers of the same index, and consider each register + // ready until the first rename occurs. + for (RegIndex index = 0; index < numLogicalIntRegs; ++index) + { + intRenameMap[index].physical_reg = index; + intScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numLogicalIntRegs; + index < numPhysicalIntRegs; + ++index) + { + intScoreboard[index] = 0; + } + + int float_reg_idx = numPhysicalIntRegs; + + // Initialize the entries in the floating point rename map to point to + // the physical registers of the same index, and consider each register + // ready until the first rename occurs. + // Although the index refers purely to architected registers, because + // the floating reg indices come after the integer reg indices, they + // may exceed the size of a normal RegIndex (short). + for (PhysRegIndex index = numLogicalIntRegs; + index < numLogicalRegs; ++index) + { + floatRenameMap[index].physical_reg = float_reg_idx++; + } + + for (PhysRegIndex index = numPhysicalIntRegs; + index < numPhysicalIntRegs + numLogicalFloatRegs; ++index) + { + floatScoreboard[index] = 1; + } + + // Initialize the rest of the physical registers (the ones that don't + // directly map to a logical register) as unready. + for (PhysRegIndex index = numPhysicalIntRegs + numLogicalFloatRegs; + index < numPhysicalRegs; + ++index) + { + floatScoreboard[index] = 0; + } + + // Initialize the entries in the misc register scoreboard to be ready. + for (PhysRegIndex index = numPhysicalRegs; + index < numPhysicalRegs + numMiscRegs; ++index) + { + miscScoreboard[index] = 1; + } +} + +SimpleRenameMap::~SimpleRenameMap() +{ + // Delete the rename maps as they were allocated with new. + delete [] intRenameMap; + delete [] floatRenameMap; +} + +void +SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr) +{ + //Setup the interface to the freelist. + freeList = fl_ptr; +} + + +// Don't allow this stage to fault; force that check to the rename stage. +// Simply ask to rename a logical register and get back a new physical +// register index. +SimpleRenameMap::RenameInfo +SimpleRenameMap::rename(RegIndex arch_reg) +{ + PhysRegIndex renamed_reg; + PhysRegIndex prev_reg; + + if (arch_reg < numLogicalIntRegs) { + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = intRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != intZeroReg) { + // Get a free physical register to rename to. + renamed_reg = freeList->getIntReg(); + + // Update the integer rename map. + intRenameMap[arch_reg].physical_reg = renamed_reg; + + assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs); + + // Mark register as not ready. + intScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = intZeroReg; + } + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base offset for floating point registers. +// arch_reg = arch_reg - numLogicalIntRegs; + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = floatRenameMap[arch_reg].physical_reg; + + // If it's not referencing the zero register, then mark the register + // as not ready. + if (arch_reg != floatZeroReg) { + // Get a free floating point register to rename to. + renamed_reg = freeList->getFloatReg(); + + // Update the floating point rename map. + floatRenameMap[arch_reg].physical_reg = renamed_reg; + + assert(renamed_reg < numPhysicalRegs && + renamed_reg >= numPhysicalIntRegs); + + // Mark register as not ready. + floatScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = floatZeroReg; + } + } else { + // Subtract off the base offset for miscellaneous registers. + arch_reg = arch_reg - numLogicalRegs; + + // No renaming happens to the misc. registers. They are simply the + // registers that come after all the physical registers; thus + // take the base architected register and add the physical registers + // to it. + renamed_reg = arch_reg + numPhysicalRegs; + + // Set the previous register to the same register; mainly it must be + // known that the prev reg was outside the range of normal registers + // so the free list can avoid adding it. + prev_reg = renamed_reg; + + assert(renamed_reg < numPhysicalRegs + numMiscRegs); + + miscScoreboard[renamed_reg] = false; + } + + return RenameInfo(renamed_reg, prev_reg); +} + +//Perhaps give this a pair as a return value, of the physical register +//and whether or not it's ready. +PhysRegIndex +SimpleRenameMap::lookup(RegIndex arch_reg) +{ + if (arch_reg < numLogicalIntRegs) { + return intRenameMap[arch_reg].physical_reg; + } else if (arch_reg < numLogicalRegs) { + // Subtract off the base FP offset. +// arch_reg = arch_reg - numLogicalIntRegs; + + return floatRenameMap[arch_reg].physical_reg; + } else { + // Subtract off the misc registers offset. + arch_reg = arch_reg - numLogicalRegs; + + // Misc. regs don't rename, so simply add the base arch reg to + // the number of physical registers. + return numPhysicalRegs + arch_reg; + } +} + +bool +SimpleRenameMap::isReady(PhysRegIndex phys_reg) +{ + if (phys_reg < numPhysicalIntRegs) { + return intScoreboard[phys_reg]; + } else if (phys_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. +// phys_reg = phys_reg - numPhysicalIntRegs; + + return floatScoreboard[phys_reg]; + } else { + // Subtract off the misc registers offset. +// phys_reg = phys_reg - numPhysicalRegs; + + return miscScoreboard[phys_reg]; + } +} + +// In this implementation the miscellaneous registers do not actually rename, +// so this function does not allow you to try to change their mappings. +void +SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg) +{ + if (arch_reg < numLogicalIntRegs) { + DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n", + (int)arch_reg, renamed_reg); + + intRenameMap[arch_reg].physical_reg = renamed_reg; + } else { + assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); + + DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n", + (int)arch_reg - numLogicalIntRegs, renamed_reg); + + floatRenameMap[arch_reg].physical_reg = renamed_reg; + } +} + +void +SimpleRenameMap::squash(vector<RegIndex> freed_regs, + vector<UnmapInfo> unmaps) +{ + panic("Not sure this function should be called."); + + // Not sure the rename map should be able to access the free list + // like this. + while (!freed_regs.empty()) { + RegIndex free_register = freed_regs.back(); + + if (free_register < numPhysicalIntRegs) { + freeList->addIntReg(free_register); + } else { + // Subtract off the base FP dependence tag. + free_register = free_register - numPhysicalIntRegs; + freeList->addFloatReg(free_register); + } + + freed_regs.pop_back(); + } + + // Take unmap info and roll back the rename map. +} + +void +SimpleRenameMap::markAsReady(PhysRegIndex ready_reg) +{ + DPRINTF(Rename, "Rename map: Marking register %i as ready.\n", + (int)ready_reg); + + if (ready_reg < numPhysicalIntRegs) { + assert(ready_reg >= 0); + + intScoreboard[ready_reg] = 1; + } else if (ready_reg < numPhysicalRegs) { + + // Subtract off the base FP offset. +// ready_reg = ready_reg - numPhysicalIntRegs; + + floatScoreboard[ready_reg] = 1; + } else { + //Subtract off the misc registers offset. +// ready_reg = ready_reg - numPhysicalRegs; + + miscScoreboard[ready_reg] = 1; + } +} + +int +SimpleRenameMap::numFreeEntries() +{ + int free_int_regs = freeList->numFreeIntRegs(); + int free_float_regs = freeList->numFreeFloatRegs(); + + if (free_int_regs < free_float_regs) { + return free_int_regs; + } else { + return free_float_regs; + } +} diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh new file mode 100644 index 000000000..57be4a64a --- /dev/null +++ b/src/cpu/o3/rename_map.hh @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: Create destructor. +// Have it so that there's a more meaningful name given to the variable +// that marks the beginning of the FP registers. + +#ifndef __CPU_O3_CPU_RENAME_MAP_HH__ +#define __CPU_O3_CPU_RENAME_MAP_HH__ + +#include <iostream> +#include <utility> +#include <vector> + +#include "cpu/o3/free_list.hh" +//For RegIndex +#include "arch/isa_traits.hh" + +class SimpleRenameMap +{ + protected: + typedef TheISA::RegIndex RegIndex; + public: + /** + * Pair of a logical register and a physical register. Tells the + * previous mapping of a logical register to a physical register. + * Used to roll back the rename map to a previous state. + */ + typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo; + + /** + * Pair of a physical register and a physical register. Used to + * return the physical register that a logical register has been + * renamed to, and the previous physical register that the same + * logical register was previously mapped to. + */ + typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo; + + public: + //Constructor + SimpleRenameMap(unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + RegIndex _intZeroReg, + RegIndex _floatZeroReg); + + /** Destructor. */ + ~SimpleRenameMap(); + + void setFreeList(SimpleFreeList *fl_ptr); + + //Tell rename map to get a free physical register for a given + //architected register. Not sure it should have a return value, + //but perhaps it should have some sort of fault in case there are + //no free registers. + RenameInfo rename(RegIndex arch_reg); + + PhysRegIndex lookup(RegIndex phys_reg); + + bool isReady(PhysRegIndex arch_reg); + + /** + * Marks the given register as ready, meaning that its value has been + * calculated and written to the register file. + * @param ready_reg The index of the physical register that is now ready. + */ + void markAsReady(PhysRegIndex ready_reg); + + void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg); + + void squash(std::vector<RegIndex> freed_regs, + std::vector<UnmapInfo> unmaps); + + int numFreeEntries(); + + private: + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Number of miscellaneous registers. */ + int numMiscRegs; + + /** Number of logical integer + float registers. */ + int numLogicalRegs; + + /** Number of physical integer + float registers. */ + int numPhysicalRegs; + + /** The integer zero register. This implementation assumes it is always + * zero and never can be anything else. + */ + RegIndex intZeroReg; + + /** The floating point zero register. This implementation assumes it is + * always zero and never can be anything else. + */ + RegIndex floatZeroReg; + + class RenameEntry + { + public: + PhysRegIndex physical_reg; + bool valid; + + RenameEntry() + : physical_reg(0), valid(false) + { } + }; + + /** Integer rename map. */ + RenameEntry *intRenameMap; + + /** Floating point rename map. */ + RenameEntry *floatRenameMap; + + /** Free list interface. */ + SimpleFreeList *freeList; + + // Might want to make all these scoreboards into one large scoreboard. + + /** Scoreboard of physical integer registers, saying whether or not they + * are ready. + */ + std::vector<bool> intScoreboard; + + /** Scoreboard of physical floating registers, saying whether or not they + * are ready. + */ + std::vector<bool> floatScoreboard; + + /** Scoreboard of miscellaneous registers, saying whether or not they + * are ready. + */ + std::vector<bool> miscScoreboard; +}; + +#endif //__CPU_O3_CPU_RENAME_MAP_HH__ diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc new file mode 100644 index 000000000..c10f782fd --- /dev/null +++ b/src/cpu/o3/rob.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/rob_impl.hh" + +// Force instantiation of InstructionQueue. +template class ROB<AlphaSimpleImpl>; diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh new file mode 100644 index 000000000..1185564ad --- /dev/null +++ b/src/cpu/o3/rob.hh @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: Probably add in support for scheduling events (more than one as +// well) on the case of the ROB being empty or full. Considering tracking +// free entries instead of insts in ROB. Differentiate between squashing +// all instructions after the instruction, and all instructions after *and* +// including that instruction. + +#ifndef __CPU_O3_CPU_ROB_HH__ +#define __CPU_O3_CPU_ROB_HH__ + +#include <utility> +#include <vector> + +/** + * ROB class. Uses the instruction list that exists within the CPU to + * represent the ROB. This class doesn't contain that list, but instead + * a pointer to the CPU to get access to the list. The ROB, in this first + * implementation, is largely what drives squashing. + */ +template <class Impl> +class ROB +{ + protected: + typedef TheISA::RegIndex RegIndex; + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + + typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo_t; + typedef typename list<DynInstPtr>::iterator InstIt_t; + + public: + /** ROB constructor. + * @param _numEntries Number of entries in ROB. + * @param _squashWidth Number of instructions that can be squashed in a + * single cycle. + */ + ROB(unsigned _numEntries, unsigned _squashWidth); + + /** Function to set the CPU pointer, necessary due to which object the ROB + * is created within. + * @param cpu_ptr Pointer to the implementation specific full CPU object. + */ + void setCPU(FullCPU *cpu_ptr); + + /** Function to insert an instruction into the ROB. The parameter inst is + * not truly required, but is useful for checking correctness. Note + * that whatever calls this function must ensure that there is enough + * space within the ROB for the new instruction. + * @param inst The instruction being inserted into the ROB. + * @todo Remove the parameter once correctness is ensured. + */ + void insertInst(DynInstPtr &inst); + + /** Returns pointer to the head instruction within the ROB. There is + * no guarantee as to the return value if the ROB is empty. + * @retval Pointer to the DynInst that is at the head of the ROB. + */ + DynInstPtr readHeadInst() { return cpu->instList.front(); } + + DynInstPtr readTailInst() { return (*tail); } + + void retireHead(); + + bool isHeadReady(); + + unsigned numFreeEntries(); + + bool isFull() + { return numInstsInROB == numEntries; } + + bool isEmpty() + { return numInstsInROB == 0; } + + void doSquash(); + + void squash(InstSeqNum squash_num); + + uint64_t readHeadPC(); + + uint64_t readHeadNextPC(); + + InstSeqNum readHeadSeqNum(); + + uint64_t readTailPC(); + + InstSeqNum readTailSeqNum(); + + /** Checks if the ROB is still in the process of squashing instructions. + * @retval Whether or not the ROB is done squashing. + */ + bool isDoneSquashing() const { return doneSquashing; } + + /** This is more of a debugging function than anything. Use + * numInstsInROB to get the instructions in the ROB unless you are + * double checking that variable. + */ + int countInsts(); + + private: + + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Number of instructions in the ROB. */ + unsigned numEntries; + + /** Number of instructions that can be squashed in a single cycle. */ + unsigned squashWidth; + + /** Iterator pointing to the instruction which is the last instruction + * in the ROB. This may at times be invalid (ie when the ROB is empty), + * however it should never be incorrect. + */ + InstIt_t tail; + + /** Iterator used for walking through the list of instructions when + * squashing. Used so that there is persistent state between cycles; + * when squashing, the instructions are marked as squashed but not + * immediately removed, meaning the tail iterator remains the same before + * and after a squash. + * This will always be set to cpu->instList.end() if it is invalid. + */ + InstIt_t squashIt; + + /** Number of instructions in the ROB. */ + int numInstsInROB; + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Is the ROB done squashing. */ + bool doneSquashing; +}; + +#endif //__CPU_O3_CPU_ROB_HH__ diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh new file mode 100644 index 000000000..e7a5671d9 --- /dev/null +++ b/src/cpu/o3/rob_impl.hh @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_ROB_IMPL_HH__ +#define __CPU_O3_CPU_ROB_IMPL_HH__ + +#include "config/full_system.hh" +#include "cpu/o3/rob.hh" + +template <class Impl> +ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth) + : numEntries(_numEntries), + squashWidth(_squashWidth), + numInstsInROB(0), + squashedSeqNum(0) +{ + doneSquashing = true; +} + +template <class Impl> +void +ROB<Impl>::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + // Set the tail to the beginning of the CPU instruction list so that + // upon the first instruction being inserted into the ROB, the tail + // iterator can simply be incremented. + tail = cpu->instList.begin(); + + // Set the squash iterator to the end of the instruction list. + squashIt = cpu->instList.end(); +} + +template <class Impl> +int +ROB<Impl>::countInsts() +{ + // Start at 1; if the tail matches cpu->instList.begin(), then there is + // one inst in the ROB. + int return_val = 1; + + // There are quite a few special cases. Do not use this function other + // than for debugging purposes. + if (cpu->instList.begin() == cpu->instList.end()) { + // In this case there are no instructions in the list. The ROB + // must be empty. + return 0; + } else if (tail == cpu->instList.end()) { + // In this case, the tail is not yet pointing to anything valid. + // The ROB must be empty. + return 0; + } + + // Iterate through the ROB from the head to the tail, counting the + // entries. + for (InstIt_t i = cpu->instList.begin(); i != tail; ++i) + { + assert(i != cpu->instList.end()); + ++return_val; + } + + return return_val; + + // Because the head won't be tracked properly until the ROB gets the + // first instruction, and any time that the ROB is empty and has not + // yet gotten the instruction, this function doesn't work. +// return numInstsInROB; +} + +template <class Impl> +void +ROB<Impl>::insertInst(DynInstPtr &inst) +{ + // Make sure we have the right number of instructions. + assert(numInstsInROB == countInsts()); + // Make sure the instruction is valid. + assert(inst); + + DPRINTF(ROB, "ROB: Adding inst PC %#x to the ROB.\n", inst->readPC()); + + // If the ROB is full then exit. + assert(numInstsInROB != numEntries); + + ++numInstsInROB; + + // Increment the tail iterator, moving it one instruction back. + // There is a special case if the ROB was empty prior to this insertion, + // in which case the tail will be pointing at instList.end(). If that + // happens, then reset the tail to the beginning of the list. + if (tail != cpu->instList.end()) { + ++tail; + } else { + tail = cpu->instList.begin(); + } + + // Make sure the tail iterator is actually pointing at the instruction + // added. + assert((*tail) == inst); + + DPRINTF(ROB, "ROB: Now has %d instructions.\n", numInstsInROB); + +} + +// Whatever calls this function needs to ensure that it properly frees up +// registers prior to this function. +template <class Impl> +void +ROB<Impl>::retireHead() +{ + assert(numInstsInROB == countInsts()); + assert(numInstsInROB > 0); + + // Get the head ROB instruction. + DynInstPtr head_inst = cpu->instList.front(); + + // Make certain this can retire. + assert(head_inst->readyToCommit()); + + DPRINTF(ROB, "ROB: Retiring head instruction of the ROB, " + "instruction PC %#x, seq num %i\n", head_inst->readPC(), + head_inst->seqNum); + + // Keep track of how many instructions are in the ROB. + --numInstsInROB; + + // Tell CPU to remove the instruction from the list of instructions. + // A special case is needed if the instruction being retired is the + // only instruction in the ROB; otherwise the tail iterator will become + // invalidated. + cpu->removeFrontInst(head_inst); + + if (numInstsInROB == 0) { + tail = cpu->instList.end(); + } +} + +template <class Impl> +bool +ROB<Impl>::isHeadReady() +{ + if (numInstsInROB != 0) { + return cpu->instList.front()->readyToCommit(); + } + + return false; +} + +template <class Impl> +unsigned +ROB<Impl>::numFreeEntries() +{ + assert(numInstsInROB == countInsts()); + + return numEntries - numInstsInROB; +} + +template <class Impl> +void +ROB<Impl>::doSquash() +{ + DPRINTF(ROB, "ROB: Squashing instructions.\n"); + + assert(squashIt != cpu->instList.end()); + + for (int numSquashed = 0; + numSquashed < squashWidth && (*squashIt)->seqNum != squashedSeqNum; + ++numSquashed) + { + // Ensure that the instruction is younger. + assert((*squashIt)->seqNum > squashedSeqNum); + + DPRINTF(ROB, "ROB: Squashing instruction PC %#x, seq num %i.\n", + (*squashIt)->readPC(), (*squashIt)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*squashIt)->setSquashed(); + + (*squashIt)->setCanCommit(); + + // Special case for when squashing due to a syscall. It's possible + // that the squash happened after the head instruction was already + // committed, meaning that (*squashIt)->seqNum != squashedSeqNum + // will never be false. Normally the squash would never be able + // to go past the head of the ROB; in this case it might, so it + // must be handled otherwise it will segfault. +#if !FULL_SYSTEM + if (squashIt == cpu->instList.begin()) { + DPRINTF(ROB, "ROB: Reached head of instruction list while " + "squashing.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + + return; + } +#endif + + // Move the tail iterator to the next instruction. + squashIt--; + } + + + // Check if ROB is done squashing. + if ((*squashIt)->seqNum == squashedSeqNum) { + DPRINTF(ROB, "ROB: Done squashing instructions.\n"); + + squashIt = cpu->instList.end(); + + doneSquashing = true; + } +} + +template <class Impl> +void +ROB<Impl>::squash(InstSeqNum squash_num) +{ + DPRINTF(ROB, "ROB: Starting to squash within the ROB.\n"); + doneSquashing = false; + + squashedSeqNum = squash_num; + + assert(tail != cpu->instList.end()); + + squashIt = tail; + + doSquash(); +} + +template <class Impl> +uint64_t +ROB<Impl>::readHeadPC() +{ + assert(numInstsInROB == countInsts()); + + DynInstPtr head_inst = cpu->instList.front(); + + return head_inst->readPC(); +} + +template <class Impl> +uint64_t +ROB<Impl>::readHeadNextPC() +{ + assert(numInstsInROB == countInsts()); + + DynInstPtr head_inst = cpu->instList.front(); + + return head_inst->readNextPC(); +} + +template <class Impl> +InstSeqNum +ROB<Impl>::readHeadSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + DynInstPtr head_inst = cpu->instList.front(); + + return head_inst->seqNum; +} + +template <class Impl> +uint64_t +ROB<Impl>::readTailPC() +{ + assert(numInstsInROB == countInsts()); + + assert(tail != cpu->instList.end()); + + return (*tail)->readPC(); +} + +template <class Impl> +InstSeqNum +ROB<Impl>::readTailSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + return (*tail)->seqNum; +} + +#endif // __CPU_O3_CPU_ROB_IMPL_HH__ diff --git a/src/cpu/o3/sat_counter.cc b/src/cpu/o3/sat_counter.cc new file mode 100644 index 000000000..d20fff650 --- /dev/null +++ b/src/cpu/o3/sat_counter.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/misc.hh" +#include "cpu/o3/sat_counter.hh" + +SatCounter::SatCounter() + : maxVal(0), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits) + : maxVal((1 << bits) - 1), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits, unsigned initial_val) + : maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + panic("BP: Initial counter value exceeds max size."); + } +} + +void +SatCounter::setBits(unsigned bits) +{ + maxVal = (1 << bits) - 1; +} + +void +SatCounter::increment() +{ + if(counter < maxVal) { + ++counter; + } +} + +void +SatCounter::decrement() +{ + if(counter > 0) { + --counter; + } +} diff --git a/src/cpu/o3/sat_counter.hh b/src/cpu/o3/sat_counter.hh new file mode 100644 index 000000000..b7cfe6423 --- /dev/null +++ b/src/cpu/o3/sat_counter.hh @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_SAT_COUNTER_HH__ +#define __CPU_O3_CPU_SAT_COUNTER_HH__ + +#include "sim/host.hh" + +/** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ +class SatCounter +{ + public: + /** + * Constructor for the counter. + */ + SatCounter(); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits); + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, unsigned initial_val); + + /** + * Sets the number of bits. + */ + void setBits(unsigned bits); + + /** + * Increments the counter's current value. + */ + void increment(); + + /** + * Decrements the counter's current value. + */ + void decrement(); + + /** + * Read the counter's value. + */ + const uint8_t read() const + { + return counter; + } + + private: + uint8_t maxVal; + uint8_t counter; +}; + +#endif // __CPU_O3_CPU_SAT_COUNTER_HH__ diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc new file mode 100644 index 000000000..11023f4a8 --- /dev/null +++ b/src/cpu/o3/store_set.cc @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" +#include "cpu/o3/store_set.hh" + +StoreSet::StoreSet(int _SSIT_size, int _LFST_size) + : SSIT_size(_SSIT_size), LFST_size(_LFST_size) +{ + DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); + DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", + SSIT_size, LFST_size); + + SSIT = new SSID[SSIT_size]; + + validSSIT.resize(SSIT_size); + + for (int i = 0; i < SSIT_size; ++i) + validSSIT[i] = false; + + LFST = new InstSeqNum[LFST_size]; + + validLFST.resize(LFST_size); + + SSCounters = new int[LFST_size]; + + for (int i = 0; i < LFST_size; ++i) + { + validLFST[i] = false; + SSCounters[i] = 0; + } + + index_mask = SSIT_size - 1; + + offset_bits = 2; +} + +void +StoreSet::violation(Addr store_PC, Addr load_PC) +{ + int load_index = calcIndex(load_PC); + int store_index = calcIndex(store_PC); + + assert(load_index < SSIT_size && store_index < SSIT_size); + + bool valid_load_SSID = validSSIT[load_index]; + bool valid_store_SSID = validSSIT[store_index]; + + if (!valid_load_SSID && !valid_store_SSID) { + // Calculate a new SSID here. + SSID new_set = calcSSID(load_PC); + + validSSIT[load_index] = true; + + SSIT[load_index] = new_set; + + validSSIT[store_index] = true; + + SSIT[store_index] = new_set; + + assert(new_set < LFST_size); + + SSCounters[new_set]++; + + + DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid " + "storeset, creating a new one: %i for load %#x, store %#x\n", + new_set, load_PC, store_PC); + } else if (valid_load_SSID && !valid_store_SSID) { + SSID load_SSID = SSIT[load_index]; + + validSSIT[store_index] = true; + + SSIT[store_index] = load_SSID; + + assert(load_SSID < LFST_size); + + SSCounters[load_SSID]++; + + DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding " + "store to that set: %i for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else if (!valid_load_SSID && valid_store_SSID) { + SSID store_SSID = SSIT[store_index]; + + validSSIT[load_index] = true; + + SSIT[load_index] = store_SSID; + + // Because we are having a load point to an already existing set, + // the size of the store set is not incremented. + + DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for " + "load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } else { + SSID load_SSID = SSIT[load_index]; + SSID store_SSID = SSIT[store_index]; + + assert(load_SSID < LFST_size && store_SSID < LFST_size); + + int load_SS_size = SSCounters[load_SSID]; + int store_SS_size = SSCounters[store_SSID]; + + // If the load has the bigger store set, then assign the store + // to the same store set as the load. Otherwise vice-versa. + if (load_SS_size > store_SS_size) { + SSIT[store_index] = load_SSID; + + SSCounters[load_SSID]++; + SSCounters[store_SSID]--; + + DPRINTF(StoreSet, "StoreSet: Load had bigger store set: %i; " + "for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else { + SSIT[load_index] = store_SSID; + + SSCounters[store_SSID]++; + SSCounters[load_SSID]--; + + DPRINTF(StoreSet, "StoreSet: Store had bigger store set: %i; " + "for load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } + } +} + +void +StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num) +{ + // Does nothing. + return; +} + +void +StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num) +{ + int index = calcIndex(store_PC); + + int store_SSID; + + assert(index < SSIT_size); + + if (!validSSIT[index]) { + // Do nothing if there's no valid entry. + return; + } else { + store_SSID = SSIT[index]; + + assert(store_SSID < LFST_size); + + // Update the last store that was fetched with the current one. + LFST[store_SSID] = store_seq_num; + + validLFST[store_SSID] = 1; + + DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n", + store_PC, store_SSID); + } +} + +InstSeqNum +StoreSet::checkInst(Addr PC) +{ + int index = calcIndex(PC); + + int inst_SSID; + + assert(index < SSIT_size); + + if (!validSSIT[index]) { + DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n", + PC, index); + + // Return 0 if there's no valid entry. + return 0; + } else { + inst_SSID = SSIT[index]; + + assert(inst_SSID < LFST_size); + + if (!validLFST[inst_SSID]) { + + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no " + "dependency\n", PC, index, inst_SSID); + + return 0; + } else { + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST " + "inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]); + + return LFST[inst_SSID]; + } + } +} + +void +StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) +{ + // This only is updated upon a store being issued. + if (!is_store) { + return; + } + + int index = calcIndex(issued_PC); + + int store_SSID; + + assert(index < SSIT_size); + + // Make sure the SSIT still has a valid entry for the issued store. + if (!validSSIT[index]) { + return; + } + + store_SSID = SSIT[index]; + + assert(store_SSID < LFST_size); + + // If the last fetched store in the store set refers to the store that + // was just issued, then invalidate the entry. + if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) { + DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n"); + validLFST[store_SSID] = false; + } +} + +void +StoreSet::squash(InstSeqNum squashed_num) +{ + // Not really sure how to do this well. + // Generally this is small enough that it should be okay; short circuit + // evaluation should take care of invalid entries. + + DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n", + squashed_num); + + for (int i = 0; i < LFST_size; ++i) { + if (validLFST[i] && LFST[i] < squashed_num) { + validLFST[i] = false; + } + } +} + +void +StoreSet::clear() +{ + for (int i = 0; i < SSIT_size; ++i) { + validSSIT[i] = false; + } + + for (int i = 0; i < LFST_size; ++i) { + validLFST[i] = false; + } +} + diff --git a/src/cpu/o3/store_set.hh b/src/cpu/o3/store_set.hh new file mode 100644 index 000000000..5a885d838 --- /dev/null +++ b/src/cpu/o3/store_set.hh @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_STORE_SET_HH__ +#define __CPU_O3_CPU_STORE_SET_HH__ + +#include <vector> + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" + +class StoreSet +{ + public: + typedef unsigned SSID; + + public: + StoreSet(int SSIT_size, int LFST_size); + + void violation(Addr store_PC, Addr load_PC); + + void insertLoad(Addr load_PC, InstSeqNum load_seq_num); + + void insertStore(Addr store_PC, InstSeqNum store_seq_num); + + InstSeqNum checkInst(Addr PC); + + void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); + + void squash(InstSeqNum squashed_num); + + void clear(); + + private: + inline int calcIndex(Addr PC) + { return (PC >> offset_bits) & index_mask; } + + inline SSID calcSSID(Addr PC) + { return ((PC ^ (PC >> 10)) % LFST_size); } + + SSID *SSIT; + + std::vector<bool> validSSIT; + + InstSeqNum *LFST; + + std::vector<bool> validLFST; + + int *SSCounters; + + int SSIT_size; + + int LFST_size; + + int index_mask; + + // HACK: Hardcoded for now. + int offset_bits; +}; + +#endif // __CPU_O3_CPU_STORE_SET_HH__ diff --git a/src/cpu/o3/tournament_pred.cc b/src/cpu/o3/tournament_pred.cc new file mode 100644 index 000000000..3fb580510 --- /dev/null +++ b/src/cpu/o3/tournament_pred.cc @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/tournament_pred.hh" + +TournamentBP::TournamentBP(unsigned _local_predictor_size, + unsigned _local_ctr_bits, + unsigned _local_history_table_size, + unsigned _local_history_bits, + unsigned _global_predictor_size, + unsigned _global_ctr_bits, + unsigned _global_history_bits, + unsigned _choice_predictor_size, + unsigned _choice_ctr_bits, + unsigned _instShiftAmt) + : localPredictorSize(_local_predictor_size), + localCtrBits(_local_ctr_bits), + localHistoryTableSize(_local_history_table_size), + localHistoryBits(_local_history_bits), + globalPredictorSize(_global_predictor_size), + globalCtrBits(_global_ctr_bits), + globalHistoryBits(_global_history_bits), + choicePredictorSize(_global_predictor_size), + choiceCtrBits(_choice_ctr_bits), + instShiftAmt(_instShiftAmt) +{ + //Should do checks here to make sure sizes are correct (powers of 2) + + //Setup the array of counters for the local predictor + localCtrs = new SatCounter[localPredictorSize]; + + for (int i = 0; i < localPredictorSize; ++i) + localCtrs[i].setBits(localCtrBits); + + //Setup the history table for the local table + localHistoryTable = new unsigned[localHistoryTableSize]; + + for (int i = 0; i < localHistoryTableSize; ++i) + localHistoryTable[i] = 0; + + // Setup the local history mask + localHistoryMask = (1 << localHistoryBits) - 1; + + //Setup the array of counters for the global predictor + globalCtrs = new SatCounter[globalPredictorSize]; + + for (int i = 0; i < globalPredictorSize; ++i) + globalCtrs[i].setBits(globalCtrBits); + + //Clear the global history + globalHistory = 0; + // Setup the global history mask + globalHistoryMask = (1 << globalHistoryBits) - 1; + + //Setup the array of counters for the choice predictor + choiceCtrs = new SatCounter[choicePredictorSize]; + + for (int i = 0; i < choicePredictorSize; ++i) + choiceCtrs[i].setBits(choiceCtrBits); + + threshold = (1 << (localCtrBits - 1)) - 1; + threshold = threshold / 2; +} + +inline +unsigned +TournamentBP::calcLocHistIdx(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1); +} + +inline +void +TournamentBP::updateHistoriesTaken(unsigned local_history_idx) +{ + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1) | 1; +} + +inline +void +TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx) +{ + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1); +} + +bool +TournamentBP::lookup(Addr &branch_addr) +{ + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + + uint8_t global_prediction; + uint8_t choice_prediction; + + //Lookup in the local predictor to get its branch prediction + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = localHistoryTable[local_history_idx] + & localHistoryMask; + local_prediction = localCtrs[local_predictor_idx].read(); + + //Lookup in the global predictor to get its branch prediction + global_prediction = globalCtrs[globalHistory].read(); + + //Lookup in the choice predictor to see which one to use + choice_prediction = choiceCtrs[globalHistory].read(); + + //@todo Put a threshold value in for the three predictors that can + // be set through the constructor (so this isn't hard coded). + //Also should put some of this code into functions. + if (choice_prediction > threshold) { + if (global_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].increment(); + localCtrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].decrement(); + localCtrs[local_history_idx].decrement(); + + return false; + } + } else { + if (local_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].increment(); + localCtrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].decrement(); + localCtrs[local_history_idx].decrement(); + + return false; + } + } +} + +// Update the branch predictor if it predicted a branch wrong. +void +TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken) +{ + + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + bool local_pred_taken; + + uint8_t global_prediction; + bool global_pred_taken; + + // Load the correct global history into the register. + globalHistory = correct_gh; + + // Get the local predictor's current prediction, remove the incorrect + // update, and update the local predictor + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = localHistoryTable[local_history_idx]; + local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask; + + local_prediction = localCtrs[local_predictor_idx].read(); + local_pred_taken = local_prediction > threshold; + + //Get the global predictor's current prediction, and update the + //global predictor + global_prediction = globalCtrs[globalHistory].read(); + global_pred_taken = global_prediction > threshold; + + //Update the choice predictor to tell it which one was correct + if (local_pred_taken != global_pred_taken) { + //If the local prediction matches the actual outcome, decerement + //the counter. Otherwise increment the counter. + if (local_pred_taken == taken) { + choiceCtrs[globalHistory].decrement(); + } else { + choiceCtrs[globalHistory].increment(); + } + } + + if (taken) { + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); + + localCtrs[local_predictor_idx].increment(); + globalCtrs[globalHistory].increment(); + + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] |= 1; + } + else { + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); + + localCtrs[local_predictor_idx].decrement(); + globalCtrs[globalHistory].decrement(); + + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] &= ~1; + } +} diff --git a/src/cpu/o3/tournament_pred.hh b/src/cpu/o3/tournament_pred.hh new file mode 100644 index 000000000..cb93c2f67 --- /dev/null +++ b/src/cpu/o3/tournament_pred.hh @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_CPU_TOURNAMENT_PRED_HH__ +#define __CPU_O3_CPU_TOURNAMENT_PRED_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" +#include "cpu/o3/sat_counter.hh" + +class TournamentBP +{ + public: + /** + * Default branch predictor constructor. + */ + TournamentBP(unsigned local_predictor_size, + unsigned local_ctr_bits, + unsigned local_history_table_size, + unsigned local_history_bits, + unsigned global_predictor_size, + unsigned global_history_bits, + unsigned global_ctr_bits, + unsigned choice_predictor_size, + unsigned choice_ctr_bits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, unsigned global_history, bool taken); + + inline unsigned readGlobalHist() { return globalHistory; } + + private: + + inline bool getPrediction(uint8_t &count); + + inline unsigned calcLocHistIdx(Addr &branch_addr); + + inline void updateHistoriesTaken(unsigned local_history_idx); + + inline void updateHistoriesNotTaken(unsigned local_history_idx); + + /** Local counters. */ + SatCounter *localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Array of local history table entries. */ + unsigned *localHistoryTable; + + /** Size of the local history table. */ + unsigned localHistoryTableSize; + + /** Number of bits for each entry of the local history table. + * @todo Doesn't this come from the size of the local predictor? + */ + unsigned localHistoryBits; + + /** Mask to get the proper local history. */ + unsigned localHistoryMask; + + + /** Array of counters that make up the global predictor. */ + SatCounter *globalCtrs; + + /** Size of the global predictor. */ + unsigned globalPredictorSize; + + /** Number of bits of the global predictor's counters. */ + unsigned globalCtrBits; + + /** Global history register. */ + unsigned globalHistory; + + /** Number of bits for the global history. */ + unsigned globalHistoryBits; + + /** Mask to get the proper global history. */ + unsigned globalHistoryMask; + + + /** Array of counters that make up the choice predictor. */ + SatCounter *choiceCtrs; + + /** Size of the choice predictor (identical to the global predictor). */ + unsigned choicePredictorSize; + + /** Number of bits of the choice predictor's counters. */ + unsigned choiceCtrBits; + + /** Number of bits to shift the instruction over to get rid of the word + * offset. + */ + unsigned instShiftAmt; + + /** Threshold for the counter value; above the threshold is taken, + * equal to or below the threshold is not taken. + */ + unsigned threshold; +}; + +#endif // __CPU_O3_CPU_TOURNAMENT_PRED_HH__ diff --git a/src/cpu/op_class.cc b/src/cpu/op_class.cc new file mode 100644 index 000000000..00136ded5 --- /dev/null +++ b/src/cpu/op_class.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/op_class.hh" + +/** OpClass enum -> description string */ +const char * +opClassStrings[Num_OpClasses] = +{ + "(null)", + "IntAlu", + "IntMult", + "IntDiv", + "FloatAdd", + "FloatCmp", + "FloatCvt", + "FloatMult", + "FloatDiv", + "FloatSqrt", + "MemRead", + "MemWrite", + "IprAccess", + "InstPrefetch" +}; + diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh new file mode 100644 index 000000000..cdb40a0fb --- /dev/null +++ b/src/cpu/op_class.hh @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU__OP_CLASS_HH__ +#define __CPU__OP_CLASS_HH__ + +/** + * @file + * Definition of operation classes. + */ + +/** + * Instruction operation classes. These classes are used for + * assigning instructions to functional units. + */ +enum OpClass { + No_OpClass = 0, ///< Instruction does not use a functional unit + IntAluOp, ///< Integer ALU operaton (add/sub/logical) + IntMultOp, ///< Integer multiply + IntDivOp, ///< Integer divide + FloatAddOp, ///< Floating point add/subtract + FloatCmpOp, ///< Floating point comparison + FloatCvtOp, ///< Floating point<->integer conversion + FloatMultOp, ///< Floating point multiply + FloatDivOp, ///< Floating point divide + FloatSqrtOp, ///< Floating point square root + MemReadOp, ///< Memory read port + MemWriteOp, ///< Memory write port + IprAccessOp, ///< Internal Processor Register read/write port + InstPrefetchOp, ///< Instruction prefetch port (on I-cache) + Num_OpClasses ///< Total number of operation classes +}; + +/** + * Array mapping OpClass enum values to strings. Defined in op_class.cc. + */ +extern const char *opClassStrings[]; + +#endif // __CPU__OP_CLASS_HH__ diff --git a/src/cpu/ozone/cpu.cc b/src/cpu/ozone/cpu.cc new file mode 100644 index 000000000..cbeca9d3b --- /dev/null +++ b/src/cpu/ozone/cpu.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ooo_cpu/ooo_cpu_impl.hh" +#include "cpu/ooo_cpu/ooo_dyn_inst.hh" +#include "cpu/ooo_cpu/ooo_impl.hh" + +template class OoOCPU<OoOImpl>; diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh new file mode 100644 index 000000000..fa849bb09 --- /dev/null +++ b/src/cpu/ozone/cpu.hh @@ -0,0 +1,638 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OOO_CPU_OOO_CPU_HH__ +#define __CPU_OOO_CPU_OOO_CPU_HH__ + +#include "base/statistics.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "encumbered/cpu/full/fu_pool.hh" +#include "cpu/ooo_cpu/ea_list.hh" +#include "cpu/pc_event.hh" +#include "cpu/static_inst.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" + +// forward declarations +#if FULL_SYSTEM +class Processor; +class AlphaITB; +class AlphaDTB; +class PhysicalMemory; + +class RemoteGDB; +class GDBListener; + +#else + +class Process; + +#endif // FULL_SYSTEM + +class Checkpoint; +class MemInterface; + +namespace Trace { + class InstRecord; +} + +/** + * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with + * simple out-of-order capabilities added to it. It is still a 1 CPI machine + * (?), but is capable of handling cache misses. Basically it models having + * a ROB/IQ by only allowing a certain amount of instructions to execute while + * the cache miss is outstanding. + */ + +template <class Impl> +class OoOCPU : public BaseCPU +{ + private: + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + + public: + // main simulation loop (one cycle) + void tick(); + + private: + struct TickEvent : public Event + { + OoOCPU *cpu; + int width; + + TickEvent(OoOCPU *c, int w); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + tickEvent.reschedule(curTick + delay); + else if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + delay); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + private: + Trace::InstRecord *traceData; + + template<typename T> + void trace_data(T data); + + public: + // + enum Status { + Running, + Idle, + IcacheMiss, + IcacheMissComplete, + DcacheMissStall, + SwitchedOut + }; + + private: + Status _status; + + public: + void post_interrupt(int int_num, int index); + + void zero_fill_64(Addr addr) { + static int warned = 0; + if (!warned) { + warn ("WH64 is not implemented"); + warned = 1; + } + }; + + struct Params : public BaseCPU::Params + { + MemInterface *icache_interface; + MemInterface *dcache_interface; + int width; +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; + FunctionalMemory *mem; +#else + Process *process; +#endif + int issueWidth; + }; + + OoOCPU(Params *params); + + virtual ~OoOCPU(); + + void init(); + + private: + void copyFromXC(); + + public: + // execution context + ExecContext *xc; + + void switchOut(); + void takeOverFrom(BaseCPU *oldCPU); + +#if FULL_SYSTEM + Addr dbg_vtophys(Addr addr); + + bool interval_stats; +#endif + + // L1 instruction cache + MemInterface *icacheInterface; + + // L1 data cache + MemInterface *dcacheInterface; + + FuncUnitPool *fuPool; + + // Refcounted pointer to the one memory request. + MemReqPtr cacheMemReq; + + class ICacheCompletionEvent : public Event + { + private: + OoOCPU *cpu; + + public: + ICacheCompletionEvent(OoOCPU *_cpu); + + virtual void process(); + virtual const char *description(); + }; + + // Will need to create a cache completion event upon any memory miss. + ICacheCompletionEvent iCacheCompletionEvent; + + class DCacheCompletionEvent; + + typedef typename + std::list<DCacheCompletionEvent>::iterator DCacheCompEventIt; + + class DCacheCompletionEvent : public Event + { + private: + OoOCPU *cpu; + DynInstPtr inst; + DCacheCompEventIt dcceIt; + + public: + DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst, + DCacheCompEventIt &_dcceIt); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + protected: + std::list<DCacheCompletionEvent> dCacheCompList; + DCacheCompEventIt dcceIt; + + private: + Status status() const { return _status; } + + virtual void activateContext(int thread_num, int delay); + virtual void suspendContext(int thread_num); + virtual void deallocateContext(int thread_num); + virtual void haltContext(int thread_num); + + // statistics + virtual void regStats(); + virtual void resetStats(); + + // number of simulated instructions + Counter numInst; + Counter startNumInst; + Stats::Scalar<> numInsts; + + virtual Counter totalInstructions() const + { + return numInst - startNumInst; + } + + // number of simulated memory references + Stats::Scalar<> numMemRefs; + + // number of simulated loads + Counter numLoad; + Counter startNumLoad; + + // number of idle cycles + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; + + // number of cycles stalled for I-cache misses + Stats::Scalar<> icacheStallCycles; + Counter lastIcacheStall; + + // number of cycles stalled for D-cache misses + Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + void processICacheCompletion(); + + public: + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#if FULL_SYSTEM + bool validInstAddr(Addr addr) { return true; } + bool validDataAddr(Addr addr) { return true; } + int getInstAsid() { return xc->regs.instAsid(); } + int getDataAsid() { return xc->regs.dataAsid(); } + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + bool validInstAddr(Addr addr) + { return xc->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return xc->validDataAddr(addr); } + + int getInstAsid() { return xc->asid; } + int getDataAsid() { return xc->asid; } + + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return NoFault; + } + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif + + template <class T> + Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst); + + template <class T> + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res, DynInstPtr inst); + + void prefetch(Addr addr, unsigned flags) + { + // need to do this... + } + + void writeHint(Addr addr, int size, unsigned flags) + { + // need to do this... + } + + Fault copySrcTranslate(Addr src); + + Fault copy(Addr dest); + + private: + bool executeInst(DynInstPtr &inst); + + void renameInst(DynInstPtr &inst); + + void addInst(DynInstPtr &inst); + + void commitHeadInst(); + + bool getOneInst(); + + Fault fetchCacheLine(); + + InstSeqNum getAndIncrementInstSeq(); + + bool ambigMemAddr; + + private: + InstSeqNum globalSeqNum; + + DynInstPtr renameTable[TheISA::TotalNumRegs]; + DynInstPtr commitTable[TheISA::TotalNumRegs]; + + // Might need a table of the shadow registers as well. +#if FULL_SYSTEM + DynInstPtr palShadowTable[TheISA::NumIntRegs]; +#endif + + public: + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + // In the OoO case these shouldn't read from the XC but rather from the + // rename table of DynInsts. Also these likely shouldn't be called very + // often, other than when adding things into the xc during say a syscall. + + uint64_t readIntReg(StaticInst *si, int idx) + { + return xc->readIntReg(si->srcRegIdx(idx)); + } + + FloatReg readFloatReg(StaticInst *si, int idx, width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatReg(reg_idx, width); + } + + FloatReg readFloatReg(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatReg(reg_idx); + } + + FloatRegBits readFloatRegBits(StaticInst *si, int idx, int width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegBits(reg_idx, width); + } + + FloatRegBits readFloatRegBits(StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return xc->readFloatRegBits(reg_idx); + } + + void setIntReg(StaticInst *si, int idx, uint64_t val) + { + xc->setIntReg(si->destRegIdx(idx), val); + } + + void setFloatReg(StaticInst *si, int idx, FloatReg val, int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatReg(reg_idx, val, width); + } + + void setFloatReg(StaticInst *si, int idx, FloatReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatReg(reg_idx, val); + } + + void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val, int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegBits(reg_idx, val, width); + } + + void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + xc->setFloatRegBits(reg_idx, val); + } + + uint64_t readPC() { return PC; } + void setNextPC(Addr val) { nextPC = val; } + + private: + Addr PC; + Addr nextPC; + + unsigned issueWidth; + + bool fetchRedirExcp; + bool fetchRedirBranch; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + unsigned cacheBlkSize; + + Addr cacheBlkPC; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + protected: + bool cacheBlkValid; + + private: + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = TheISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + unsigned instSize; + + // ROB tracking stuff. + DynInstPtr robHeadPtr; + DynInstPtr robTailPtr; + unsigned robSize; + unsigned robInsts; + + // List of outstanding EA instructions. + protected: + EAList eaList; + + public: + void branchToTarget(Addr val) + { + if (!fetchRedirExcp) { + fetchRedirBranch = true; + PC = val; + } + } + + // ISA stuff: + uint64_t readUniq() { return xc->readUniq(); } + void setUniq(uint64_t val) { xc->setUniq(val); } + + uint64_t readFpcr() { return xc->readFpcr(); } + void setFpcr(uint64_t val) { xc->setFpcr(val); } + +#if FULL_SYSTEM + uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); } + Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); } + Fault hwrei() { return xc->hwrei(); } + int readIntrFlag() { return xc->readIntrFlag(); } + void setIntrFlag(int val) { xc->setIntrFlag(val); } + bool inPalMode() { return xc->inPalMode(); } + void trap(Fault fault) { fault->invoke(xc); } + bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); } +#else + void syscall() { xc->syscall(); } +#endif + + ExecContext *xcBase() { return xc; } +}; + + +// precise architected memory state accessor macros +template <class Impl> +template <class T> +Fault +OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst) +{ + MemReqPtr readReq = new MemReq(); + readReq->xc = xc; + readReq->asid = 0; + readReq->data = new uint8_t[64]; + + readReq->reset(addr, sizeof(T), flags); + + // translate to physical address - This might be an ISA impl call + Fault fault = translateDataReadReq(readReq); + + // do functional access + if (fault == NoFault) + fault = xc->mem->read(readReq, data); +#if 0 + if (traceData) { + traceData->setAddr(addr); + if (fault == NoFault) + traceData->setData(data); + } +#endif + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + readReq->cmd = Read; + readReq->completionEvent = NULL; + readReq->time = curTick; + /*MemAccessResult result = */dcacheInterface->access(readReq); + + if (dcacheInterface->doEvents()) { + readReq->completionEvent = new DCacheCompletionEvent(this, inst, + dcceIt); + } + } + + if (!dcacheInterface && (readReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); + + return fault; +} + +template <class Impl> +template <class T> +Fault +OoOCPU<Impl>::write(T data, Addr addr, unsigned flags, + uint64_t *res, DynInstPtr inst) +{ + MemReqPtr writeReq = new MemReq(); + writeReq->xc = xc; + writeReq->asid = 0; + writeReq->data = new uint8_t[64]; + +#if 0 + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } +#endif + + writeReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = translateDataWriteReq(writeReq); + + // do functional access + if (fault == NoFault) + fault = xc->write(writeReq, data); + + if (fault == NoFault && dcacheInterface) { + writeReq->cmd = Write; + memcpy(writeReq->data,(uint8_t *)&data,writeReq->size); + writeReq->completionEvent = NULL; + writeReq->time = curTick; + /*MemAccessResult result = */dcacheInterface->access(writeReq); + + if (dcacheInterface->doEvents()) { + writeReq->completionEvent = new DCacheCompletionEvent(this, inst, + dcceIt); + } + } + + if (res && (fault == NoFault)) + *res = writeReq->result; + + if (!dcacheInterface && (writeReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); + + return fault; +} + + +#endif // __CPU_OOO_CPU_OOO_CPU_HH__ diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh new file mode 100644 index 000000000..e7ed3cfe0 --- /dev/null +++ b/src/cpu/ozone/cpu_impl.hh @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OOO_CPU_OOO_IMPL_HH__ +#define __CPU_OOO_CPU_OOO_IMPL_HH__ + +#include "arch/isa_traits.hh" + +template <class Impl> +class OoOCPU; + +template <class Impl> +class OoODynInst; + +struct OoOImpl { + typedef AlphaISA ISA; + typedef OoOCPU<OoOImpl> OoOCPU; + typedef OoOCPU FullCPU; + typedef OoODynInst<OoOImpl> DynInst; + typedef RefCountingPtr<DynInst> DynInstPtr; +}; + +#endif // __CPU_OOO_CPU_OOO_IMPL_HH__ diff --git a/src/cpu/ozone/ea_list.cc b/src/cpu/ozone/ea_list.cc new file mode 100644 index 000000000..6114a0ca1 --- /dev/null +++ b/src/cpu/ozone/ea_list.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ooo_cpu/ea_list.hh" + +void +EAList::addAddr(const InstSeqNum &new_sn, const Addr &new_ea) +{ + instEA newEA(new_sn, new_ea); + + eaList.push_back(newEA); +} + +void +EAList::clearAddr(const InstSeqNum &sn_to_clear, const Addr &ea_to_clear) +{ + eaListIt list_it = eaList.begin(); + + while (list_it != eaList.end() && (*list_it).first != sn_to_clear) { + assert((*list_it).second == ea_to_clear); + } +} + +bool +EAList::checkConflict(const InstSeqNum &check_sn, const Addr &check_ea) const +{ + const constEAListIt list_it = eaList.begin(); + + while (list_it != eaList.end() && (*list_it).first < check_sn) { + if ((*list_it).second == check_ea) { + return true; + } + } + + return false; +} + +void +EAList::clear() +{ + eaList.clear(); +} + +void +EAList::commit(const InstSeqNum &commit_sn) +{ + while (!eaList.empty() && eaList.front().first <= commit_sn) { + eaList.pop_front(); + } +} diff --git a/src/cpu/ozone/ea_list.hh b/src/cpu/ozone/ea_list.hh new file mode 100644 index 000000000..c0eee4bb8 --- /dev/null +++ b/src/cpu/ozone/ea_list.hh @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_EA_LIST_HH__ +#define __CPU_EA_LIST_HH__ + +#include <list> +#include <utility> + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" + +/** + * Simple class to hold onto a list of pairs, each pair having a memory + * instruction's sequence number and effective addr. This list can be used + * for memory disambiguation. However, if I ever want to forward results, I + * may have to use a list that holds DynInstPtrs. Hence this may change in + * the future. + */ +class EAList { + private: + typedef std::pair<InstSeqNum, Addr> instEA; + typedef std::list<instEA>::iterator eaListIt; + typedef std::list<instEA>::const_iterator constEAListIt; + + std::list<instEA> eaList; + + public: + EAList() { } + ~EAList() { } + + void addAddr(const InstSeqNum &new_sn, const Addr &new_ea); + + void clearAddr(const InstSeqNum &sn_to_clear, const Addr &ea_to_clear); + + /** Checks if any instructions older than check_sn have a conflicting + * address with check_ea. Note that this function does not handle the + * sequence number rolling over. + */ + bool checkConflict(const InstSeqNum &check_sn, const Addr &check_ea) const; + + void clear(); + + void commit(const InstSeqNum &commit_sn); +}; + +#endif // __CPU_EA_LIST_HH__ diff --git a/src/cpu/pc_event.cc b/src/cpu/pc_event.cc new file mode 100644 index 000000000..050bf1a88 --- /dev/null +++ b/src/cpu/pc_event.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <algorithm> +#include <map> +#include <string> +#include <utility> + +#include "base/trace.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/pc_event.hh" +#include "sim/debug.hh" +#include "sim/root.hh" +#include "sim/system.hh" + +using namespace std; + +PCEventQueue::PCEventQueue() +{} + +PCEventQueue::~PCEventQueue() +{} + +bool +PCEventQueue::remove(PCEvent *event) +{ + int removed = 0; + range_t range = equal_range(event); + for (iterator i = range.first; i != range.second; ++i) { + if (*i == event) { + DPRINTF(PCEvent, "PC based event removed at %#x: %s\n", + event->pc(), event->descr()); + pc_map.erase(i); + ++removed; + } + } + + return removed > 0; +} + +bool +PCEventQueue::schedule(PCEvent *event) +{ + pc_map.push_back(event); + sort(pc_map.begin(), pc_map.end(), MapCompare()); + + DPRINTF(PCEvent, "PC based event scheduled for %#x: %s\n", + event->pc(), event->descr()); + + return true; +} + +bool +PCEventQueue::doService(ExecContext *xc) +{ + Addr pc = xc->readPC() & ~0x3; + int serviced = 0; + range_t range = equal_range(pc); + for (iterator i = range.first; i != range.second; ++i) { + // Make sure that the pc wasn't changed as the side effect of + // another event. This for example, prevents two invocations + // of the SkipFuncEvent. Maybe we should have separate PC + // event queues for each processor? + if (pc != (xc->readPC() & ~0x3)) + continue; + + DPRINTF(PCEvent, "PC based event serviced at %#x: %s\n", + (*i)->pc(), (*i)->descr()); + + (*i)->process(xc); + ++serviced; + } + + return serviced > 0; +} + +void +PCEventQueue::dump() const +{ + const_iterator i = pc_map.begin(); + const_iterator e = pc_map.end(); + + for (; i != e; ++i) + cprintf("%d: event at %#x: %s\n", curTick, (*i)->pc(), + (*i)->descr()); +} + +PCEventQueue::range_t +PCEventQueue::equal_range(Addr pc) +{ + return std::equal_range(pc_map.begin(), pc_map.end(), pc, MapCompare()); +} + +BreakPCEvent::BreakPCEvent(PCEventQueue *q, const std::string &desc, Addr addr, + bool del) + : PCEvent(q, desc, addr), remove(del) +{ +} + +void +BreakPCEvent::process(ExecContext *xc) +{ + StringWrap name(xc->getCpuPtr()->name() + ".break_event"); + DPRINTFN("break event %s triggered\n", descr()); + debug_break(); + if (remove) + delete this; +} + +#if FULL_SYSTEM +extern "C" +void +sched_break_pc_sys(System *sys, Addr addr) +{ + new BreakPCEvent(&sys->pcEventQueue, "debug break", addr, true); +} + +extern "C" +void +sched_break_pc(Addr addr) +{ + for (vector<System *>::iterator sysi = System::systemList.begin(); + sysi != System::systemList.end(); ++sysi) { + sched_break_pc_sys(*sysi, addr); + } + +} +#endif diff --git a/src/cpu/pc_event.hh b/src/cpu/pc_event.hh new file mode 100644 index 000000000..32b7f3ef5 --- /dev/null +++ b/src/cpu/pc_event.hh @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PC_EVENT_HH__ +#define __PC_EVENT_HH__ + +#include <vector> + +#include "base/misc.hh" + +class ExecContext; +class PCEventQueue; + +class PCEvent +{ + protected: + std::string description; + PCEventQueue *queue; + Addr evpc; + + public: + PCEvent(PCEventQueue *q, const std::string &desc, Addr pc); + + virtual ~PCEvent() { if (queue) remove(); } + + // for DPRINTF + virtual const std::string name() const { return description; } + + std::string descr() const { return description; } + Addr pc() const { return evpc; } + + bool remove(); + virtual void process(ExecContext *xc) = 0; +}; + +class PCEventQueue +{ + protected: + typedef PCEvent * record_t; + class MapCompare { + public: + bool operator()(const record_t &l, const record_t &r) const { + return l->pc() < r->pc(); + } + bool operator()(const record_t &l, Addr pc) const { + return l->pc() < pc; + } + bool operator()(Addr pc, const record_t &r) const { + return pc < r->pc(); + } + }; + typedef std::vector<record_t> map_t; + + public: + typedef map_t::iterator iterator; + typedef map_t::const_iterator const_iterator; + + protected: + typedef std::pair<iterator, iterator> range_t; + typedef std::pair<const_iterator, const_iterator> const_range_t; + + protected: + map_t pc_map; + + bool doService(ExecContext *xc); + + public: + PCEventQueue(); + ~PCEventQueue(); + + bool remove(PCEvent *event); + bool schedule(PCEvent *event); + bool service(ExecContext *xc) + { + if (pc_map.empty()) + return false; + + return doService(xc); + } + + range_t equal_range(Addr pc); + range_t equal_range(PCEvent *event) { return equal_range(event->pc()); } + + void dump() const; +}; + + +inline +PCEvent::PCEvent(PCEventQueue *q, const std::string &desc, Addr pc) + : description(desc), queue(q), evpc(pc) +{ + queue->schedule(this); +} + +inline bool +PCEvent::remove() +{ + if (!queue) + panic("cannot remove an uninitialized event;"); + + return queue->remove(this); +} + +class BreakPCEvent : public PCEvent +{ + protected: + bool remove; + + public: + BreakPCEvent(PCEventQueue *q, const std::string &desc, Addr addr, + bool del = false); + virtual void process(ExecContext *xc); +}; + +#endif // __PC_EVENT_HH__ diff --git a/src/cpu/profile.cc b/src/cpu/profile.cc new file mode 100644 index 000000000..fe3458b61 --- /dev/null +++ b/src/cpu/profile.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string> + +#include "base/bitfield.hh" +#include "base/callback.hh" +#include "base/statistics.hh" +#include "base/trace.hh" +#include "base/loader/symtab.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/profile.hh" + +using namespace std; + +ProfileNode::ProfileNode() + : count(0) +{ } + +void +ProfileNode::dump(const string &symbol, uint64_t id, const SymbolTable *symtab, + ostream &os) const +{ + ccprintf(os, "%#x %s %d ", id, symbol, count); + ChildList::const_iterator i, end = children.end(); + for (i = children.begin(); i != end; ++i) { + const ProfileNode *node = i->second; + ccprintf(os, "%#x ", (intptr_t)node); + } + + ccprintf(os, "\n"); + + for (i = children.begin(); i != end; ++i) { + Addr addr = i->first; + string symbol; + if (addr == 1) + symbol = "user"; + else if (addr == 2) + symbol = "console"; + else if (addr == 3) + symbol = "unknown"; + else if (!symtab->findSymbol(addr, symbol)) + panic("could not find symbol for address %#x\n", addr); + + const ProfileNode *node = i->second; + node->dump(symbol, (intptr_t)node, symtab, os); + } +} + +void +ProfileNode::clear() +{ + count = 0; + ChildList::iterator i, end = children.end(); + for (i = children.begin(); i != end; ++i) + i->second->clear(); +} + +FunctionProfile::FunctionProfile(const SymbolTable *_symtab) + : reset(0), symtab(_symtab) +{ + reset = new MakeCallback<FunctionProfile, &FunctionProfile::clear>(this); + Stats::registerResetCallback(reset); +} + +FunctionProfile::~FunctionProfile() +{ + if (reset) + delete reset; +} + +ProfileNode * +FunctionProfile::consume(const vector<Addr> &stack) +{ + ProfileNode *current = ⊤ + for (int i = 0, size = stack.size(); i < size; ++i) { + ProfileNode *&ptr = current->children[stack[size - i - 1]]; + if (ptr == NULL) + ptr = new ProfileNode; + + current = ptr; + } + + return current; +} + +void +FunctionProfile::clear() +{ + top.clear(); + pc_count.clear(); +} + +void +FunctionProfile::dump(ExecContext *xc, ostream &os) const +{ + ccprintf(os, ">>>PC data\n"); + map<Addr, Counter>::const_iterator i, end = pc_count.end(); + for (i = pc_count.begin(); i != end; ++i) { + Addr pc = i->first; + Counter count = i->second; + + std::string symbol; + if (pc == 1) + ccprintf(os, "user %d\n", count); + else if (symtab->findSymbol(pc, symbol) && !symbol.empty()) + ccprintf(os, "%s %d\n", symbol, count); + else + ccprintf(os, "%#x %d\n", pc, count); + } + + ccprintf(os, ">>>function data\n"); + top.dump("top", 0, symtab, os); +} + +void +FunctionProfile::sample(ProfileNode *node, Addr pc) +{ + node->count++; + + Addr symaddr; + if (symtab->findNearestAddr(pc, symaddr)) { + pc_count[symaddr]++; + } else { + // record PC even if we don't have a symbol to avoid + // silently biasing the histogram + pc_count[pc]++; + } +} diff --git a/src/cpu/profile.hh b/src/cpu/profile.hh new file mode 100644 index 000000000..d55c9eec9 --- /dev/null +++ b/src/cpu/profile.hh @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_PROFILE_HH__ +#define __CPU_PROFILE_HH__ + +#include <map> + +#include "cpu/static_inst.hh" +#include "sim/host.hh" +#include "arch/stacktrace.hh" + +class ExecContext; + +class ProfileNode +{ + private: + friend class FunctionProfile; + + typedef std::map<Addr, ProfileNode *> ChildList; + ChildList children; + + public: + Counter count; + + public: + ProfileNode(); + + void dump(const std::string &symbol, uint64_t id, + const SymbolTable *symtab, std::ostream &os) const; + void clear(); +}; + +class Callback; +class FunctionProfile +{ + private: + Callback *reset; + const SymbolTable *symtab; + ProfileNode top; + std::map<Addr, Counter> pc_count; + StackTrace trace; + + public: + FunctionProfile(const SymbolTable *symtab); + ~FunctionProfile(); + + ProfileNode *consume(ExecContext *xc, StaticInstPtr inst); + ProfileNode *consume(const std::vector<Addr> &stack); + void clear(); + void dump(ExecContext *xc, std::ostream &out) const; + void sample(ProfileNode *node, Addr pc); +}; + +inline ProfileNode * +FunctionProfile::consume(ExecContext *xc, StaticInstPtr inst) +{ + if (!trace.trace(xc, inst)) + return NULL; + trace.dprintf(); + return consume(trace.getstack()); +} + +#endif // __CPU_PROFILE_HH__ diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc new file mode 100644 index 000000000..e9422b9c0 --- /dev/null +++ b/src/cpu/simple/atomic.cc @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/utility.hh" +#include "cpu/exetrace.hh" +#include "cpu/simple/atomic.hh" +#include "mem/packet_impl.hh" +#include "sim/builder.hh" + +using namespace std; +using namespace TheISA; + +AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + + +void +AtomicSimpleCPU::TickEvent::process() +{ + cpu->tick(); +} + +const char * +AtomicSimpleCPU::TickEvent::description() +{ + return "AtomicSimpleCPU tick event"; +} + + +void +AtomicSimpleCPU::init() +{ + //Create Memory Ports (conect them up) + Port *mem_dport = mem->getPort(""); + dcachePort.setPeer(mem_dport); + mem_dport->setPeer(&dcachePort); + + Port *mem_iport = mem->getPort(""); + icachePort.setPeer(mem_iport); + mem_iport->setPeer(&icachePort); + + BaseCPU::init(); +#if FULL_SYSTEM + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + + // initialize CPU, including PC + TheISA::initCPU(xc, xc->readCpuId()); + } +#endif +} + +bool +AtomicSimpleCPU::CpuPort::recvTiming(Packet *pkt) +{ + panic("AtomicSimpleCPU doesn't expect recvAtomic callback!"); + return true; +} + +Tick +AtomicSimpleCPU::CpuPort::recvAtomic(Packet *pkt) +{ + panic("AtomicSimpleCPU doesn't expect recvAtomic callback!"); + return curTick; +} + +void +AtomicSimpleCPU::CpuPort::recvFunctional(Packet *pkt) +{ + panic("AtomicSimpleCPU doesn't expect recvFunctional callback!"); +} + +void +AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); +} + +Packet * +AtomicSimpleCPU::CpuPort::recvRetry() +{ + panic("AtomicSimpleCPU doesn't expect recvRetry callback!"); + return NULL; +} + + +AtomicSimpleCPU::AtomicSimpleCPU(Params *p) + : BaseSimpleCPU(p), tickEvent(this), + width(p->width), simulate_stalls(p->simulate_stalls), + icachePort(this), dcachePort(this) +{ + _status = Idle; + + ifetch_req = new Request(true); + ifetch_req->setAsid(0); + // @todo fix me and get the real cpu iD!!! + ifetch_req->setCpuNum(0); + ifetch_req->setSize(sizeof(MachInst)); + ifetch_pkt = new Packet; + ifetch_pkt->cmd = Read; + ifetch_pkt->dataStatic(&inst); + ifetch_pkt->req = ifetch_req; + ifetch_pkt->size = sizeof(MachInst); + ifetch_pkt->dest = Packet::Broadcast; + + data_read_req = new Request(true); + // @todo fix me and get the real cpu iD!!! + data_read_req->setCpuNum(0); + data_read_req->setAsid(0); + data_read_pkt = new Packet; + data_read_pkt->cmd = Read; + data_read_pkt->dataStatic(&dataReg); + data_read_pkt->req = data_read_req; + data_read_pkt->dest = Packet::Broadcast; + + data_write_req = new Request(true); + // @todo fix me and get the real cpu iD!!! + data_write_req->setCpuNum(0); + data_write_req->setAsid(0); + data_write_pkt = new Packet; + data_write_pkt->cmd = Write; + data_write_pkt->req = data_write_req; + data_write_pkt->dest = Packet::Broadcast; +} + + +AtomicSimpleCPU::~AtomicSimpleCPU() +{ +} + +void +AtomicSimpleCPU::serialize(ostream &os) +{ + BaseSimpleCPU::serialize(os); + SERIALIZE_ENUM(_status); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); +} + +void +AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) +{ + BaseSimpleCPU::unserialize(cp, section); + UNSERIALIZE_ENUM(_status); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); +} + +void +AtomicSimpleCPU::switchOut(Sampler *s) +{ + sampler = s; + if (status() == Running) { + _status = SwitchedOut; + + tickEvent.squash(); + } + sampler->signalSwitched(); +} + + +void +AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + assert(!tickEvent.scheduled()); + + // if any of this CPU's ExecContexts are active, mark the CPU as + // running and schedule its tick event. + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + if (xc->status() == ExecContext::Active && _status != Running) { + _status = Running; + tickEvent.schedule(curTick); + break; + } + } +} + + +void +AtomicSimpleCPU::activateContext(int thread_num, int delay) +{ + assert(thread_num == 0); + assert(cpuXC); + + assert(_status == Idle); + assert(!tickEvent.scheduled()); + + notIdleFraction++; + tickEvent.schedule(curTick + cycles(delay)); + _status = Running; +} + + +void +AtomicSimpleCPU::suspendContext(int thread_num) +{ + assert(thread_num == 0); + assert(cpuXC); + + assert(_status == Running); + + // tick event may not be scheduled if this gets called from inside + // an instruction's execution, e.g. "quiesce" + if (tickEvent.scheduled()) + tickEvent.deschedule(); + + notIdleFraction--; + _status = Idle; +} + + +template <class T> +Fault +AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) +{ + data_read_req->setVaddr(addr); + data_read_req->setSize(sizeof(T)); + data_read_req->setFlags(flags); + data_read_req->setTime(curTick); + + if (traceData) { + traceData->setAddr(addr); + } + + // translate to physical address + Fault fault = cpuXC->translateDataReadReq(data_read_req); + + // Now do the access. + if (fault == NoFault) { + data_read_pkt->reset(); + data_read_pkt->addr = data_read_req->getPaddr(); + data_read_pkt->size = sizeof(T); + + dcache_complete = dcachePort.sendAtomic(data_read_pkt); + dcache_access = true; + + assert(data_read_pkt->result == Success); + data = data_read_pkt->get<T>(); + + } + + // This will need a new way to tell if it has a dcache attached. + if (data_read_req->getFlags() & UNCACHEABLE) + recordEvent("Uncached Read"); + + return fault; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template +Fault +AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); + +template +Fault +AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); + +template +Fault +AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); + +template +Fault +AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags) +{ + return read(addr, *(uint64_t*)&data, flags); +} + +template<> +Fault +AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags) +{ + return read(addr, *(uint32_t*)&data, flags); +} + + +template<> +Fault +AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) +{ + return read(addr, (uint32_t&)data, flags); +} + + +template <class T> +Fault +AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + data_write_req->setVaddr(addr); + data_write_req->setTime(curTick); + data_write_req->setSize(sizeof(T)); + data_write_req->setFlags(flags); + + if (traceData) { + traceData->setAddr(addr); + } + + // translate to physical address + Fault fault = cpuXC->translateDataWriteReq(data_write_req); + + // Now do the access. + if (fault == NoFault) { + data_write_pkt->reset(); + data = htog(data); + data_write_pkt->dataStatic(&data); + data_write_pkt->addr = data_write_req->getPaddr(); + data_write_pkt->size = sizeof(T); + + dcache_complete = dcachePort.sendAtomic(data_write_pkt); + dcache_access = true; + + assert(data_write_pkt->result == Success); + + if (res && data_write_req->getFlags() & LOCKED) { + *res = data_write_req->getScResult(); + } + } + + // This will need a new way to tell if it's hooked up to a cache or not. + if (data_write_req->getFlags() & UNCACHEABLE) + recordEvent("Uncached Write"); + + // If the write needs to have a fault on the access, consider calling + // changeStatus() and changing it to "bad addr write" or something. + return fault; +} + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +template +Fault +AtomicSimpleCPU::write(uint64_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +AtomicSimpleCPU::write(uint32_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +AtomicSimpleCPU::write(uint16_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +AtomicSimpleCPU::write(uint8_t data, Addr addr, + unsigned flags, uint64_t *res); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint64_t*)&data, addr, flags, res); +} + +template<> +Fault +AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint32_t*)&data, addr, flags, res); +} + + +template<> +Fault +AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) +{ + return write((uint32_t)data, addr, flags, res); +} + + +void +AtomicSimpleCPU::tick() +{ + Tick latency = cycles(1); // instruction takes one cycle by default + + for (int i = 0; i < width; ++i) { + numCycles++; + + checkForInterrupts(); + + ifetch_req->resetMin(); + ifetch_pkt->reset(); + Fault fault = setupFetchPacket(ifetch_pkt); + + if (fault == NoFault) { + Tick icache_complete = icachePort.sendAtomic(ifetch_pkt); + // ifetch_req is initialized to read the instruction directly + // into the CPU object's inst field. + + dcache_access = false; // assume no dcache access + preExecute(); + fault = curStaticInst->execute(this, traceData); + postExecute(); + + if (traceData) { + traceData->finalize(); + } + + if (simulate_stalls) { + // This calculation assumes that the icache and dcache + // access latencies are always a multiple of the CPU's + // cycle time. If not, the next tick event may get + // scheduled at a non-integer multiple of the CPU + // cycle time. + Tick icache_stall = icache_complete - curTick - cycles(1); + Tick dcache_stall = + dcache_access ? dcache_complete - curTick - cycles(1) : 0; + latency += icache_stall + dcache_stall; + } + + } + + advancePC(fault); + } + + if (_status != Idle) + tickEvent.schedule(curTick + latency); +} + + +//////////////////////////////////////////////////////////////////////// +// +// AtomicSimpleCPU Simulation Object +// +BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) + + Param<Counter> max_insts_any_thread; + Param<Counter> max_insts_all_threads; + Param<Counter> max_loads_any_thread; + Param<Counter> max_loads_all_threads; + SimObjectParam<MemObject *> mem; + +#if FULL_SYSTEM + SimObjectParam<AlphaITB *> itb; + SimObjectParam<AlphaDTB *> dtb; + SimObjectParam<System *> system; + Param<int> cpu_id; + Param<Tick> profile; +#else + SimObjectParam<Process *> workload; +#endif // FULL_SYSTEM + + Param<int> clock; + + Param<bool> defer_registration; + Param<int> width; + Param<bool> function_trace; + Param<Tick> function_trace_start; + Param<bool> simulate_stalls; + +END_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) + + INIT_PARAM(max_insts_any_thread, + "terminate when any thread reaches this inst count"), + INIT_PARAM(max_insts_all_threads, + "terminate when all threads have reached this inst count"), + INIT_PARAM(max_loads_any_thread, + "terminate when any thread reaches this load count"), + INIT_PARAM(max_loads_all_threads, + "terminate when all threads have reached this load count"), + INIT_PARAM(mem, "memory"), + +#if FULL_SYSTEM + INIT_PARAM(itb, "Instruction TLB"), + INIT_PARAM(dtb, "Data TLB"), + INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(profile, ""), +#else + INIT_PARAM(workload, "processes to run"), +#endif // FULL_SYSTEM + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + INIT_PARAM(width, "cpu width"), + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace"), + INIT_PARAM(simulate_stalls, "Simulate cache stall cycles") + +END_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) + + +CREATE_SIM_OBJECT(AtomicSimpleCPU) +{ + AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params(); + params->name = getInstanceName(); + params->numberOfThreads = 1; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + params->deferRegistration = defer_registration; + params->clock = clock; + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + params->width = width; + params->simulate_stalls = simulate_stalls; + params->mem = mem; + +#if FULL_SYSTEM + params->itb = itb; + params->dtb = dtb; + params->system = system; + params->cpu_id = cpu_id; + params->profile = profile; +#else + params->process = workload; +#endif + + AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params); + return cpu; +} + +REGISTER_SIM_OBJECT("AtomicSimpleCPU", AtomicSimpleCPU) + diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh new file mode 100644 index 000000000..d0ba085f0 --- /dev/null +++ b/src/cpu/simple/atomic.hh @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_SIMPLE_ATOMIC_HH__ +#define __CPU_SIMPLE_ATOMIC_HH__ + +#include "cpu/simple/base.hh" + +class AtomicSimpleCPU : public BaseSimpleCPU +{ + public: + + struct Params : public BaseSimpleCPU::Params { + int width; + bool simulate_stalls; + }; + + AtomicSimpleCPU(Params *params); + virtual ~AtomicSimpleCPU(); + + virtual void init(); + + public: + // + enum Status { + Running, + Idle, + SwitchedOut + }; + + protected: + Status _status; + + Status status() const { return _status; } + + private: + + struct TickEvent : public Event + { + AtomicSimpleCPU *cpu; + + TickEvent(AtomicSimpleCPU *c); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + const int width; + const bool simulate_stalls; + + // main simulation loop (one cycle) + void tick(); + + class CpuPort : public Port + { + + AtomicSimpleCPU *cpu; + + public: + + CpuPort(AtomicSimpleCPU *_cpu) + : cpu(_cpu) + { } + + protected: + + virtual bool recvTiming(Packet *pkt); + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt); + + virtual void recvStatusChange(Status status); + + virtual Packet *recvRetry(); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + }; + + CpuPort icachePort; + CpuPort dcachePort; + + Request *ifetch_req; + Packet *ifetch_pkt; + Request *data_read_req; + Packet *data_read_pkt; + Request *data_write_req; + Packet *data_write_pkt; + + bool dcache_access; + Tick dcache_complete; + + public: + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + void switchOut(Sampler *s); + void takeOverFrom(BaseCPU *oldCPU); + + virtual void activateContext(int thread_num, int delay); + virtual void suspendContext(int thread_num); + + template <class T> + Fault read(Addr addr, T &data, unsigned flags); + + template <class T> + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); +}; + +#endif // __CPU_SIMPLE_ATOMIC_HH__ diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc new file mode 100644 index 000000000..30c002ed5 --- /dev/null +++ b/src/cpu/simple/base.cc @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/utility.hh" +#include "base/cprintf.hh" +#include "base/inifile.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "base/pollevent.hh" +#include "base/range.hh" +#include "base/stats/events.hh" +#include "base/trace.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "cpu/profile.hh" +#include "cpu/sampler/sampler.hh" +#include "cpu/simple/base.hh" +#include "cpu/smt.hh" +#include "cpu/static_inst.hh" +#include "kern/kernel_stats.hh" +#include "mem/packet_impl.hh" +#include "sim/byteswap.hh" +#include "sim/builder.hh" +#include "sim/debug.hh" +#include "sim/host.hh" +#include "sim/sim_events.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#if FULL_SYSTEM +#include "base/remote_gdb.hh" +#include "sim/system.hh" +#include "arch/tlb.hh" +#include "arch/stacktrace.hh" +#include "arch/vtophys.hh" +#else // !FULL_SYSTEM +#include "mem/mem_object.hh" +#endif // FULL_SYSTEM + +using namespace std; +using namespace TheISA; + +BaseSimpleCPU::BaseSimpleCPU(Params *p) + : BaseCPU(p), mem(p->mem), cpuXC(NULL) +{ +#if FULL_SYSTEM + cpuXC = new CPUExecContext(this, 0, p->system, p->itb, p->dtb); +#else + cpuXC = new CPUExecContext(this, /* thread_num */ 0, p->process, + /* asid */ 0, mem); +#endif // !FULL_SYSTEM + + xcProxy = cpuXC->getProxy(); + + numInst = 0; + startNumInst = 0; + numLoad = 0; + startNumLoad = 0; + lastIcacheStall = 0; + lastDcacheStall = 0; + + execContexts.push_back(xcProxy); +} + +BaseSimpleCPU::~BaseSimpleCPU() +{ +} + +void +BaseSimpleCPU::deallocateContext(int thread_num) +{ + // for now, these are equivalent + suspendContext(thread_num); +} + + +void +BaseSimpleCPU::haltContext(int thread_num) +{ + // for now, these are equivalent + suspendContext(thread_num); +} + + +void +BaseSimpleCPU::regStats() +{ + using namespace Stats; + + BaseCPU::regStats(); + + numInsts + .name(name() + ".num_insts") + .desc("Number of instructions executed") + ; + + numMemRefs + .name(name() + ".num_refs") + .desc("Number of memory references") + ; + + notIdleFraction + .name(name() + ".not_idle_fraction") + .desc("Percentage of non-idle cycles") + ; + + idleFraction + .name(name() + ".idle_fraction") + .desc("Percentage of idle cycles") + ; + + icacheStallCycles + .name(name() + ".icache_stall_cycles") + .desc("ICache total stall cycles") + .prereq(icacheStallCycles) + ; + + dcacheStallCycles + .name(name() + ".dcache_stall_cycles") + .desc("DCache total stall cycles") + .prereq(dcacheStallCycles) + ; + + icacheRetryCycles + .name(name() + ".icache_retry_cycles") + .desc("ICache total retry cycles") + .prereq(icacheRetryCycles) + ; + + dcacheRetryCycles + .name(name() + ".dcache_retry_cycles") + .desc("DCache total retry cycles") + .prereq(dcacheRetryCycles) + ; + + idleFraction = constant(1.0) - notIdleFraction; +} + +void +BaseSimpleCPU::resetStats() +{ + startNumInst = numInst; + // notIdleFraction = (_status != Idle); +} + +void +BaseSimpleCPU::serialize(ostream &os) +{ + BaseCPU::serialize(os); + SERIALIZE_SCALAR(inst); + nameOut(os, csprintf("%s.xc", name())); + cpuXC->serialize(os); +} + +void +BaseSimpleCPU::unserialize(Checkpoint *cp, const string §ion) +{ + BaseCPU::unserialize(cp, section); + UNSERIALIZE_SCALAR(inst); + cpuXC->unserialize(cp, csprintf("%s.xc", section)); +} + +void +change_thread_state(int thread_number, int activate, int priority) +{ +} + +Fault +BaseSimpleCPU::copySrcTranslate(Addr src) +{ +#if 0 + static bool no_warn = true; + int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; + // Only support block sizes of 64 atm. + assert(blk_size == 64); + int offset = src & (blk_size - 1); + + // Make sure block doesn't span page + if (no_warn && + (src & PageMask) != ((src + blk_size) & PageMask) && + (src >> 40) != 0xfffffc) { + warn("Copied block source spans pages %x.", src); + no_warn = false; + } + + memReq->reset(src & ~(blk_size - 1), blk_size); + + // translate to physical address + Fault fault = cpuXC->translateDataReadReq(req); + + if (fault == NoFault) { + cpuXC->copySrcAddr = src; + cpuXC->copySrcPhysAddr = memReq->paddr + offset; + } else { + assert(!fault->isAlignmentFault()); + + cpuXC->copySrcAddr = 0; + cpuXC->copySrcPhysAddr = 0; + } + return fault; +#else + return NoFault; +#endif +} + +Fault +BaseSimpleCPU::copy(Addr dest) +{ +#if 0 + static bool no_warn = true; + int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; + // Only support block sizes of 64 atm. + assert(blk_size == 64); + uint8_t data[blk_size]; + //assert(cpuXC->copySrcAddr); + int offset = dest & (blk_size - 1); + + // Make sure block doesn't span page + if (no_warn && + (dest & PageMask) != ((dest + blk_size) & PageMask) && + (dest >> 40) != 0xfffffc) { + no_warn = false; + warn("Copied block destination spans pages %x. ", dest); + } + + memReq->reset(dest & ~(blk_size -1), blk_size); + // translate to physical address + Fault fault = cpuXC->translateDataWriteReq(req); + + if (fault == NoFault) { + Addr dest_addr = memReq->paddr + offset; + // Need to read straight from memory since we have more than 8 bytes. + memReq->paddr = cpuXC->copySrcPhysAddr; + cpuXC->mem->read(memReq, data); + memReq->paddr = dest_addr; + cpuXC->mem->write(memReq, data); + if (dcacheInterface) { + memReq->cmd = Copy; + memReq->completionEvent = NULL; + memReq->paddr = cpuXC->copySrcPhysAddr; + memReq->dest = dest_addr; + memReq->size = 64; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + dcacheInterface->access(memReq); + } + } + else + assert(!fault->isAlignmentFault()); + + return fault; +#else + panic("copy not implemented"); + return NoFault; +#endif +} + +#if FULL_SYSTEM +Addr +BaseSimpleCPU::dbg_vtophys(Addr addr) +{ + return vtophys(xcProxy, addr); +} +#endif // FULL_SYSTEM + +#if FULL_SYSTEM +void +BaseSimpleCPU::post_interrupt(int int_num, int index) +{ + BaseCPU::post_interrupt(int_num, index); + + if (cpuXC->status() == ExecContext::Suspended) { + DPRINTF(IPI,"Suspended Processor awoke\n"); + cpuXC->activate(); + } +} +#endif // FULL_SYSTEM + +void +BaseSimpleCPU::checkForInterrupts() +{ +#if FULL_SYSTEM + if (checkInterrupts && check_interrupts() && !cpuXC->inPalMode()) { + int ipl = 0; + int summary = 0; + checkInterrupts = false; + + if (cpuXC->readMiscReg(IPR_SIRR)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (cpuXC->readMiscReg(IPR_SIRR) & (ULL(1) << i)) { + // See table 4-19 of 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = cpuXC->cpu->intr_status(); + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + + if (cpuXC->readMiscReg(IPR_ASTRR)) + panic("asynchronous traps not implemented\n"); + + if (ipl && ipl > cpuXC->readMiscReg(IPR_IPLR)) { + cpuXC->setMiscReg(IPR_ISR, summary); + cpuXC->setMiscReg(IPR_INTID, ipl); + + Fault(new InterruptFault)->invoke(xcProxy); + + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + cpuXC->readMiscReg(IPR_IPLR), ipl, summary); + } + } +#endif +} + + +Fault +BaseSimpleCPU::setupFetchPacket(Packet *ifetch_pkt) +{ + // Try to fetch an instruction + + // set up memory request for instruction fetch + + DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p NNPC:%08p\n",cpuXC->readPC(), + cpuXC->readNextPC(),cpuXC->readNextNPC()); + + Request *ifetch_req = ifetch_pkt->req; + ifetch_req->setVaddr(cpuXC->readPC() & ~3); + ifetch_req->setTime(curTick); +#if FULL_SYSTEM + ifetch_req->setFlags((cpuXC->readPC() & 1) ? PHYSICAL : 0); +#else + ifetch_req->setFlags(0); +#endif + + Fault fault = cpuXC->translateInstReq(ifetch_req); + + if (fault == NoFault) { + ifetch_pkt->addr = ifetch_req->getPaddr(); + } + + return fault; +} + + +void +BaseSimpleCPU::preExecute() +{ + // maintain $r0 semantics + cpuXC->setIntReg(ZeroReg, 0); +#if THE_ISA == ALPHA_ISA + cpuXC->setFloatReg(ZeroReg, 0.0); +#endif // ALPHA_ISA + + // keep an instruction count + numInst++; + numInsts++; + + cpuXC->func_exe_inst++; + + // check for instruction-count-based events + comInstEventQueue[0]->serviceEvents(numInst); + + // decode the instruction + inst = gtoh(inst); + curStaticInst = StaticInst::decode(makeExtMI(inst, cpuXC->readPC())); + + traceData = Trace::getInstRecord(curTick, xcProxy, this, curStaticInst, + cpuXC->readPC()); + + DPRINTF(Decode,"Decode: Decoded %s instruction (opcode: 0x%x): 0x%x\n", + curStaticInst->getName(), curStaticInst->getOpcode(), + curStaticInst->machInst); + +#if FULL_SYSTEM + cpuXC->setInst(inst); +#endif // FULL_SYSTEM +} + +void +BaseSimpleCPU::postExecute() +{ +#if FULL_SYSTEM + if (system->kernelBinning->fnbin) { + assert(kernelStats); + system->kernelBinning->execute(xcProxy, inst); + } + + if (cpuXC->profile) { + bool usermode = + (cpuXC->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0; + cpuXC->profilePC = usermode ? 1 : cpuXC->readPC(); + ProfileNode *node = cpuXC->profile->consume(xcProxy, inst); + if (node) + cpuXC->profileNode = node; + } +#endif + + if (curStaticInst->isMemRef()) { + numMemRefs++; + } + + if (curStaticInst->isLoad()) { + ++numLoad; + comLoadEventQueue[0]->serviceEvents(numLoad); + } + + traceFunctions(cpuXC->readPC()); +} + + +void +BaseSimpleCPU::advancePC(Fault fault) +{ + if (fault != NoFault) { +#if FULL_SYSTEM + fault->invoke(xcProxy); +#else // !FULL_SYSTEM + fatal("fault (%s) detected @ PC %08p", fault->name(), cpuXC->readPC()); +#endif // FULL_SYSTEM + } + else { + // go to the next instruction + cpuXC->setPC(cpuXC->readNextPC()); +#if THE_ISA == ALPHA_ISA + cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst)); +#else + cpuXC->setNextPC(cpuXC->readNextNPC()); + cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst)); +#endif + + } + +#if FULL_SYSTEM + Addr oldpc; + do { + oldpc = cpuXC->readPC(); + system->pcEventQueue.service(xcProxy); + } while (oldpc != cpuXC->readPC()); +#endif +} + diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh new file mode 100644 index 000000000..4c0e6f3c7 --- /dev/null +++ b/src/cpu/simple/base.hh @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_SIMPLE_BASE_HH__ +#define __CPU_SIMPLE_BASE_HH__ + +#include "base/statistics.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/pc_event.hh" +#include "cpu/sampler/sampler.hh" +#include "cpu/static_inst.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +#include "mem/request.hh" +#include "sim/eventq.hh" + +// forward declarations +#if FULL_SYSTEM +class Processor; +class AlphaITB; +class AlphaDTB; +class MemObject; + +class RemoteGDB; +class GDBListener; + +#else + +class Process; + +#endif // FULL_SYSTEM + +class ExecContext; +class Checkpoint; + +namespace Trace { + class InstRecord; +} + + +class BaseSimpleCPU : public BaseCPU +{ + protected: + typedef TheISA::MachInst MachInst; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + + MemObject *mem; + + protected: + Trace::InstRecord *traceData; + + public: + void post_interrupt(int int_num, int index); + + void zero_fill_64(Addr addr) { + static int warned = 0; + if (!warned) { + warn ("WH64 is not implemented"); + warned = 1; + } + }; + + public: + struct Params : public BaseCPU::Params + { + MemObject *mem; +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#else + Process *process; +#endif + }; + BaseSimpleCPU(Params *params); + virtual ~BaseSimpleCPU(); + + public: + // execution context + CPUExecContext *cpuXC; + + ExecContext *xcProxy; + +#if FULL_SYSTEM + Addr dbg_vtophys(Addr addr); + + bool interval_stats; +#endif + + // current instruction + MachInst inst; + + // Static data storage + TheISA::IntReg dataReg; + + // Pointer to the sampler that is telling us to switchover. + // Used to signal the completion of the pipe drain and schedule + // the next switchover + Sampler *sampler; + + StaticInstPtr curStaticInst; + + void checkForInterrupts(); + Fault setupFetchPacket(Packet *ifetch_pkt); + void preExecute(); + void postExecute(); + void advancePC(Fault fault); + + virtual void deallocateContext(int thread_num); + virtual void haltContext(int thread_num); + + // statistics + virtual void regStats(); + virtual void resetStats(); + + // number of simulated instructions + Counter numInst; + Counter startNumInst; + Stats::Scalar<> numInsts; + + virtual Counter totalInstructions() const + { + return numInst - startNumInst; + } + + // number of simulated memory references + Stats::Scalar<> numMemRefs; + + // number of simulated loads + Counter numLoad; + Counter startNumLoad; + + // number of idle cycles + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; + + // number of cycles stalled for I-cache responses + Stats::Scalar<> icacheStallCycles; + Counter lastIcacheStall; + + // number of cycles stalled for I-cache retries + Stats::Scalar<> icacheRetryCycles; + Counter lastIcacheRetry; + + // number of cycles stalled for D-cache responses + Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + // number of cycles stalled for D-cache retries + Stats::Scalar<> dcacheRetryCycles; + Counter lastDcacheRetry; + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + // These functions are only used in CPU models that split + // effective address computation from the actual memory access. + void setEA(Addr EA) { panic("BaseSimpleCPU::setEA() not implemented\n"); } + Addr getEA() { panic("BaseSimpleCPU::getEA() not implemented\n"); } + + void prefetch(Addr addr, unsigned flags) + { + // need to do this... + } + + void writeHint(Addr addr, int size, unsigned flags) + { + // need to do this... + } + + Fault copySrcTranslate(Addr src); + + Fault copy(Addr dest); + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return cpuXC->readIntReg(si->srcRegIdx(idx)); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatReg(reg_idx, width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatReg(reg_idx); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatRegBits(reg_idx, width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatRegBits(reg_idx); + } + + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + cpuXC->setIntReg(si->destRegIdx(idx), val); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatReg(reg_idx, val, width); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatReg(reg_idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatRegBits(reg_idx, val, width); + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatRegBits(reg_idx, val); + } + + uint64_t readPC() { return cpuXC->readPC(); } + uint64_t readNextPC() { return cpuXC->readNextPC(); } + uint64_t readNextNPC() { return cpuXC->readNextNPC(); } + + void setPC(uint64_t val) { cpuXC->setPC(val); } + void setNextPC(uint64_t val) { cpuXC->setNextPC(val); } + void setNextNPC(uint64_t val) { cpuXC->setNextNPC(val); } + + MiscReg readMiscReg(int misc_reg) + { + return cpuXC->readMiscReg(misc_reg); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return cpuXC->readMiscRegWithEffect(misc_reg, fault); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + return cpuXC->setMiscReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return cpuXC->setMiscRegWithEffect(misc_reg, val); + } + +#if FULL_SYSTEM + Fault hwrei() { return cpuXC->hwrei(); } + int readIntrFlag() { return cpuXC->readIntrFlag(); } + void setIntrFlag(int val) { cpuXC->setIntrFlag(val); } + bool inPalMode() { return cpuXC->inPalMode(); } + void ev5_trap(Fault fault) { fault->invoke(xcProxy); } + bool simPalCheck(int palFunc) { return cpuXC->simPalCheck(palFunc); } +#else + void syscall(int64_t callnum) { cpuXC->syscall(callnum); } +#endif + + bool misspeculating() { return cpuXC->misspeculating(); } + ExecContext *xcBase() { return xcProxy; } +}; + +#endif // __CPU_SIMPLE_BASE_HH__ diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc new file mode 100644 index 000000000..70b88c4b1 --- /dev/null +++ b/src/cpu/simple/timing.cc @@ -0,0 +1,570 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/utility.hh" +#include "cpu/exetrace.hh" +#include "cpu/simple/timing.hh" +#include "mem/packet_impl.hh" +#include "sim/builder.hh" + +using namespace std; +using namespace TheISA; + + +void +TimingSimpleCPU::init() +{ + //Create Memory Ports (conect them up) + Port *mem_dport = mem->getPort(""); + dcachePort.setPeer(mem_dport); + mem_dport->setPeer(&dcachePort); + + Port *mem_iport = mem->getPort(""); + icachePort.setPeer(mem_iport); + mem_iport->setPeer(&icachePort); + + BaseCPU::init(); +#if FULL_SYSTEM + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + + // initialize CPU, including PC + TheISA::initCPU(xc, xc->readCpuId()); + } +#endif +} + +Tick +TimingSimpleCPU::CpuPort::recvAtomic(Packet *pkt) +{ + panic("TimingSimpleCPU doesn't expect recvAtomic callback!"); + return curTick; +} + +void +TimingSimpleCPU::CpuPort::recvFunctional(Packet *pkt) +{ + panic("TimingSimpleCPU doesn't expect recvFunctional callback!"); +} + +void +TimingSimpleCPU::CpuPort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("TimingSimpleCPU doesn't expect recvStatusChange callback!"); +} + +TimingSimpleCPU::TimingSimpleCPU(Params *p) + : BaseSimpleCPU(p), icachePort(this), dcachePort(this) +{ + _status = Idle; + ifetch_pkt = dcache_pkt = NULL; +} + + +TimingSimpleCPU::~TimingSimpleCPU() +{ +} + +void +TimingSimpleCPU::serialize(ostream &os) +{ + BaseSimpleCPU::serialize(os); + SERIALIZE_ENUM(_status); +} + +void +TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) +{ + BaseSimpleCPU::unserialize(cp, section); + UNSERIALIZE_ENUM(_status); +} + +void +TimingSimpleCPU::switchOut(Sampler *s) +{ + sampler = s; + if (status() == Running) { + _status = SwitchedOut; + } + sampler->signalSwitched(); +} + + +void +TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + // if any of this CPU's ExecContexts are active, mark the CPU as + // running and schedule its tick event. + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + if (xc->status() == ExecContext::Active && _status != Running) { + _status = Running; + break; + } + } +} + + +void +TimingSimpleCPU::activateContext(int thread_num, int delay) +{ + assert(thread_num == 0); + assert(cpuXC); + + assert(_status == Idle); + + notIdleFraction++; + _status = Running; + // kick things off by initiating the fetch of the next instruction + Event *e = + new EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch>(this, true); + e->schedule(curTick + cycles(delay)); +} + + +void +TimingSimpleCPU::suspendContext(int thread_num) +{ + assert(thread_num == 0); + assert(cpuXC); + + panic("TimingSimpleCPU::suspendContext not implemented"); + + assert(_status == Running); + + notIdleFraction--; + _status = Idle; +} + + +template <class T> +Fault +TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) +{ + Request *data_read_req = new Request(true); + + data_read_req->setVaddr(addr); + data_read_req->setSize(sizeof(T)); + data_read_req->setFlags(flags); + data_read_req->setTime(curTick); + + if (traceData) { + traceData->setAddr(data_read_req->getVaddr()); + } + + // translate to physical address + Fault fault = cpuXC->translateDataReadReq(data_read_req); + + // Now do the access. + if (fault == NoFault) { + Packet *data_read_pkt = new Packet; + data_read_pkt->cmd = Read; + data_read_pkt->req = data_read_req; + data_read_pkt->dataDynamic<T>(new T); + data_read_pkt->addr = data_read_req->getPaddr(); + data_read_pkt->size = sizeof(T); + data_read_pkt->dest = Packet::Broadcast; + + if (!dcachePort.sendTiming(data_read_pkt)) { + _status = DcacheRetry; + dcache_pkt = data_read_pkt; + } else { + _status = DcacheWaitResponse; + dcache_pkt = NULL; + } + } + + // This will need a new way to tell if it has a dcache attached. + if (data_read_req->getFlags() & UNCACHEABLE) + recordEvent("Uncached Read"); + + return fault; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template +Fault +TimingSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); + +template +Fault +TimingSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); + +template +Fault +TimingSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); + +template +Fault +TimingSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +TimingSimpleCPU::read(Addr addr, double &data, unsigned flags) +{ + return read(addr, *(uint64_t*)&data, flags); +} + +template<> +Fault +TimingSimpleCPU::read(Addr addr, float &data, unsigned flags) +{ + return read(addr, *(uint32_t*)&data, flags); +} + + +template<> +Fault +TimingSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) +{ + return read(addr, (uint32_t&)data, flags); +} + + +template <class T> +Fault +TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + Request *data_write_req = new Request(true); + data_write_req->setVaddr(addr); + data_write_req->setTime(curTick); + data_write_req->setSize(sizeof(T)); + data_write_req->setFlags(flags); + + // translate to physical address + Fault fault = cpuXC->translateDataWriteReq(data_write_req); + // Now do the access. + if (fault == NoFault) { + Packet *data_write_pkt = new Packet; + data_write_pkt->cmd = Write; + data_write_pkt->req = data_write_req; + data_write_pkt->allocate(); + data_write_pkt->size = sizeof(T); + data_write_pkt->set(data); + data_write_pkt->addr = data_write_req->getPaddr(); + data_write_pkt->dest = Packet::Broadcast; + + if (!dcachePort.sendTiming(data_write_pkt)) { + _status = DcacheRetry; + dcache_pkt = data_write_pkt; + } else { + _status = DcacheWaitResponse; + dcache_pkt = NULL; + } + } + + // This will need a new way to tell if it's hooked up to a cache or not. + if (data_write_req->getFlags() & UNCACHEABLE) + recordEvent("Uncached Write"); + + // If the write needs to have a fault on the access, consider calling + // changeStatus() and changing it to "bad addr write" or something. + return fault; +} + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +template +Fault +TimingSimpleCPU::write(uint64_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +TimingSimpleCPU::write(uint32_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +TimingSimpleCPU::write(uint16_t data, Addr addr, + unsigned flags, uint64_t *res); + +template +Fault +TimingSimpleCPU::write(uint8_t data, Addr addr, + unsigned flags, uint64_t *res); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +TimingSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint64_t*)&data, addr, flags, res); +} + +template<> +Fault +TimingSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint32_t*)&data, addr, flags, res); +} + + +template<> +Fault +TimingSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) +{ + return write((uint32_t)data, addr, flags, res); +} + + +void +TimingSimpleCPU::fetch() +{ + checkForInterrupts(); + + Request *ifetch_req = new Request(true); + ifetch_req->setSize(sizeof(MachInst)); + + ifetch_pkt = new Packet; + ifetch_pkt->cmd = Read; + ifetch_pkt->dataStatic(&inst); + ifetch_pkt->req = ifetch_req; + ifetch_pkt->size = sizeof(MachInst); + ifetch_pkt->dest = Packet::Broadcast; + + Fault fault = setupFetchPacket(ifetch_pkt); + if (fault == NoFault) { + if (!icachePort.sendTiming(ifetch_pkt)) { + // Need to wait for retry + _status = IcacheRetry; + } else { + // Need to wait for cache to respond + _status = IcacheWaitResponse; + // ownership of packet transferred to memory system + ifetch_pkt = NULL; + } + } else { + panic("TimingSimpleCPU fetch fault handling not implemented"); + } +} + + +void +TimingSimpleCPU::completeInst(Fault fault) +{ + postExecute(); + + if (traceData) { + traceData->finalize(); + } + + advancePC(fault); + + if (_status == Running) { + // kick off fetch of next instruction... callback from icache + // response will cause that instruction to be executed, + // keeping the CPU running. + fetch(); + } +} + + +void +TimingSimpleCPU::completeIfetch() +{ + // received a response from the icache: execute the received + // instruction + assert(_status == IcacheWaitResponse); + _status = Running; + preExecute(); + if (curStaticInst->isMemRef()) { + // load or store: just send to dcache + Fault fault = curStaticInst->initiateAcc(this, traceData); + assert(fault == NoFault); + assert(_status == DcacheWaitResponse); + // instruction will complete in dcache response callback + } else { + // non-memory instruction: execute completely now + Fault fault = curStaticInst->execute(this, traceData); + completeInst(fault); + } +} + + +bool +TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt) +{ + cpu->completeIfetch(); + return true; +} + +Packet * +TimingSimpleCPU::IcachePort::recvRetry() +{ + // we shouldn't get a retry unless we have a packet that we're + // waiting to transmit + assert(cpu->ifetch_pkt != NULL); + assert(cpu->_status == IcacheRetry); + cpu->_status = IcacheWaitResponse; + Packet *tmp = cpu->ifetch_pkt; + cpu->ifetch_pkt = NULL; + return tmp; +} + +void +TimingSimpleCPU::completeDataAccess(Packet *pkt) +{ + // received a response from the dcache: complete the load or store + // instruction + assert(pkt->result == Success); + assert(_status == DcacheWaitResponse); + _status = Running; + + Fault fault = curStaticInst->completeAcc(pkt, this, traceData); + + completeInst(fault); +} + + + +bool +TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt) +{ + cpu->completeDataAccess(pkt); + return true; +} + +Packet * +TimingSimpleCPU::DcachePort::recvRetry() +{ + // we shouldn't get a retry unless we have a packet that we're + // waiting to transmit + assert(cpu->dcache_pkt != NULL); + assert(cpu->_status == DcacheRetry); + cpu->_status = DcacheWaitResponse; + Packet *tmp = cpu->dcache_pkt; + cpu->dcache_pkt = NULL; + return tmp; +} + + +//////////////////////////////////////////////////////////////////////// +// +// TimingSimpleCPU Simulation Object +// +BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) + + Param<Counter> max_insts_any_thread; + Param<Counter> max_insts_all_threads; + Param<Counter> max_loads_any_thread; + Param<Counter> max_loads_all_threads; + SimObjectParam<MemObject *> mem; + +#if FULL_SYSTEM + SimObjectParam<AlphaITB *> itb; + SimObjectParam<AlphaDTB *> dtb; + SimObjectParam<System *> system; + Param<int> cpu_id; + Param<Tick> profile; +#else + SimObjectParam<Process *> workload; +#endif // FULL_SYSTEM + + Param<int> clock; + + Param<bool> defer_registration; + Param<int> width; + Param<bool> function_trace; + Param<Tick> function_trace_start; + Param<bool> simulate_stalls; + +END_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) + + INIT_PARAM(max_insts_any_thread, + "terminate when any thread reaches this inst count"), + INIT_PARAM(max_insts_all_threads, + "terminate when all threads have reached this inst count"), + INIT_PARAM(max_loads_any_thread, + "terminate when any thread reaches this load count"), + INIT_PARAM(max_loads_all_threads, + "terminate when all threads have reached this load count"), + INIT_PARAM(mem, "memory"), + +#if FULL_SYSTEM + INIT_PARAM(itb, "Instruction TLB"), + INIT_PARAM(dtb, "Data TLB"), + INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(profile, ""), +#else + INIT_PARAM(workload, "processes to run"), +#endif // FULL_SYSTEM + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + INIT_PARAM(width, "cpu width"), + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace"), + INIT_PARAM(simulate_stalls, "Simulate cache stall cycles") + +END_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) + + +CREATE_SIM_OBJECT(TimingSimpleCPU) +{ + TimingSimpleCPU::Params *params = new TimingSimpleCPU::Params(); + params->name = getInstanceName(); + params->numberOfThreads = 1; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + params->deferRegistration = defer_registration; + params->clock = clock; + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + params->mem = mem; + +#if FULL_SYSTEM + params->itb = itb; + params->dtb = dtb; + params->system = system; + params->cpu_id = cpu_id; + params->profile = profile; +#else + params->process = workload; +#endif + + TimingSimpleCPU *cpu = new TimingSimpleCPU(params); + return cpu; +} + +REGISTER_SIM_OBJECT("TimingSimpleCPU", TimingSimpleCPU) + diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh new file mode 100644 index 000000000..83be025d9 --- /dev/null +++ b/src/cpu/simple/timing.hh @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_SIMPLE_TIMING_HH__ +#define __CPU_SIMPLE_TIMING_HH__ + +#include "cpu/simple/base.hh" + +class TimingSimpleCPU : public BaseSimpleCPU +{ + public: + + struct Params : public BaseSimpleCPU::Params { + }; + + TimingSimpleCPU(Params *params); + virtual ~TimingSimpleCPU(); + + virtual void init(); + + public: + // + enum Status { + Idle, + Running, + IcacheRetry, + IcacheWaitResponse, + IcacheWaitSwitch, + DcacheRetry, + DcacheWaitResponse, + DcacheWaitSwitch, + SwitchedOut + }; + + protected: + Status _status; + + Status status() const { return _status; } + + private: + + class CpuPort : public Port + { + protected: + TimingSimpleCPU *cpu; + + public: + + CpuPort(TimingSimpleCPU *_cpu) + : cpu(_cpu) + { } + + protected: + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + }; + + class IcachePort : public CpuPort + { + public: + + IcachePort(TimingSimpleCPU *_cpu) + : CpuPort(_cpu) + { } + + protected: + + virtual bool recvTiming(Packet *pkt); + + virtual Packet *recvRetry(); + }; + + class DcachePort : public CpuPort + { + public: + + DcachePort(TimingSimpleCPU *_cpu) + : CpuPort(_cpu) + { } + + protected: + + virtual bool recvTiming(Packet *pkt); + + virtual Packet *recvRetry(); + }; + + IcachePort icachePort; + DcachePort dcachePort; + + Packet *ifetch_pkt; + Packet *dcache_pkt; + + public: + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + void switchOut(Sampler *s); + void takeOverFrom(BaseCPU *oldCPU); + + virtual void activateContext(int thread_num, int delay); + virtual void suspendContext(int thread_num); + + template <class T> + Fault read(Addr addr, T &data, unsigned flags); + + template <class T> + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + + void fetch(); + void completeInst(Fault fault); + void completeIfetch(); + void completeDataAccess(Packet *); +}; + +#endif // __CPU_SIMPLE_TIMING_HH__ diff --git a/src/cpu/smt.hh b/src/cpu/smt.hh new file mode 100644 index 000000000..9c52abf95 --- /dev/null +++ b/src/cpu/smt.hh @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Defines SMT_MAX_THREADS. + */ + +#ifndef __SMT_HH__ +#define __SMT_HH__ + +#ifndef SMT_MAX_THREADS +/** The number of TPUs in any processor. */ +#define SMT_MAX_THREADS 4 +#endif + +/** + * The maximum number of active threads across all cpus. Used to + * initialize per-thread statistics in the cache. + * + * NB: Be careful to only use it once all the CPUs that you care about + * have been initialized + */ +extern int maxThreadsPerCPU; + +/** + * Changes the status and priority of the thread with the given number. + * @param thread_number The thread to change. + * @param activate The new active status. + * @param priority The new priority. + */ +void change_thread_state(int thread_number, int activate, int priority); + +#endif // __SMT_HH__ diff --git a/src/cpu/static_inst.cc b/src/cpu/static_inst.cc new file mode 100644 index 000000000..c307dc6fc --- /dev/null +++ b/src/cpu/static_inst.cc @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <iostream> +#include "cpu/static_inst.hh" +#include "sim/root.hh" + +StaticInstPtr StaticInst::nullStaticInstPtr; + +// Define the decode cache hash map. +StaticInst::DecodeCache StaticInst::decodeCache; + +void +StaticInst::dumpDecodeCacheStats() +{ + using namespace std; + + cerr << "Decode hash table stats @ " << curTick << ":" << endl; + cerr << "\tnum entries = " << decodeCache.size() << endl; + cerr << "\tnum buckets = " << decodeCache.bucket_count() << endl; + vector<int> hist(100, 0); + int max_hist = 0; + for (int i = 0; i < decodeCache.bucket_count(); ++i) { + int count = decodeCache.elems_in_bucket(i); + if (count > max_hist) + max_hist = count; + hist[count]++; + } + for (int i = 0; i <= max_hist; ++i) { + cerr << "\tbuckets of size " << i << " = " << hist[i] << endl; + } +} + +bool +StaticInst::hasBranchTarget(Addr pc, ExecContext *xc, Addr &tgt) const +{ + if (isDirectCtrl()) { + tgt = branchTarget(pc); + return true; + } + + if (isIndirectCtrl()) { + tgt = branchTarget(xc); + return true; + } + + return false; +} + diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh new file mode 100644 index 000000000..33c9144fb --- /dev/null +++ b/src/cpu/static_inst.hh @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_STATIC_INST_HH__ +#define __CPU_STATIC_INST_HH__ + +#include <bitset> +#include <string> + +#include "base/hashmap.hh" +#include "base/misc.hh" +#include "base/refcnt.hh" +#include "cpu/op_class.hh" +#include "sim/host.hh" +#include "arch/isa_traits.hh" + +// forward declarations +struct AlphaSimpleImpl; +class ExecContext; +class DynInst; +class Packet; + +template <class Impl> +class AlphaDynInst; + +class FastCPU; +class AtomicSimpleCPU; +class TimingSimpleCPU; +class InorderCPU; +class SymbolTable; + +namespace Trace { + class InstRecord; +} + +/** + * Base, ISA-independent static instruction class. + * + * The main component of this class is the vector of flags and the + * associated methods for reading them. Any object that can rely + * solely on these flags can process instructions without being + * recompiled for multiple ISAs. + */ +class StaticInstBase : public RefCounted +{ + protected: + + /// Set of boolean static instruction properties. + /// + /// Notes: + /// - The IsInteger and IsFloating flags are based on the class of + /// registers accessed by the instruction. Although most + /// instructions will have exactly one of these two flags set, it + /// is possible for an instruction to have neither (e.g., direct + /// unconditional branches, memory barriers) or both (e.g., an + /// FP/int conversion). + /// - If IsMemRef is set, then exactly one of IsLoad or IsStore + /// will be set. + /// - If IsControl is set, then exactly one of IsDirectControl or + /// IsIndirect Control will be set, and exactly one of + /// IsCondControl or IsUncondControl will be set. + /// - IsSerializing, IsMemBarrier, and IsWriteBarrier are + /// implemented as flags since in the current model there's no + /// other way for instructions to inject behavior into the + /// pipeline outside of fetch. Once we go to an exec-in-exec CPU + /// model we should be able to get rid of these flags and + /// implement this behavior via the execute() methods. + /// + enum Flags { + IsNop, ///< Is a no-op (no effect at all). + + IsInteger, ///< References integer regs. + IsFloating, ///< References FP regs. + + IsMemRef, ///< References memory (load, store, or prefetch). + IsLoad, ///< Reads from memory (load or prefetch). + IsStore, ///< Writes to memory. + IsInstPrefetch, ///< Instruction-cache prefetch. + IsDataPrefetch, ///< Data-cache prefetch. + IsCopy, ///< Fast Cache block copy + + IsControl, ///< Control transfer instruction. + IsDirectControl, ///< PC relative control transfer. + IsIndirectControl, ///< Register indirect control transfer. + IsCondControl, ///< Conditional control transfer. + IsUncondControl, ///< Unconditional control transfer. + IsCall, ///< Subroutine call. + IsReturn, ///< Subroutine return. + + IsCondDelaySlot,///< Conditional Delay-Slot Instruction + + IsThreadSync, ///< Thread synchronization operation. + + IsSerializing, ///< Serializes pipeline: won't execute until all + /// older instructions have committed. + IsSerializeBefore, + IsSerializeAfter, + IsMemBarrier, ///< Is a memory barrier + IsWriteBarrier, ///< Is a write barrier + + IsNonSpeculative, ///< Should not be executed speculatively + + NumFlags + }; + + /// Flag values for this instruction. + std::bitset<NumFlags> flags; + + /// See opClass(). + OpClass _opClass; + + /// See numSrcRegs(). + int8_t _numSrcRegs; + + /// See numDestRegs(). + int8_t _numDestRegs; + + /// The following are used to track physical register usage + /// for machines with separate int & FP reg files. + //@{ + int8_t _numFPDestRegs; + int8_t _numIntDestRegs; + //@} + + /// Constructor. + /// It's important to initialize everything here to a sane + /// default, since the decoder generally only overrides + /// the fields that are meaningful for the particular + /// instruction. + StaticInstBase(OpClass __opClass) + : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0), + _numFPDestRegs(0), _numIntDestRegs(0) + { + } + + public: + + /// @name Register information. + /// The sum of numFPDestRegs() and numIntDestRegs() equals + /// numDestRegs(). The former two functions are used to track + /// physical register usage for machines with separate int & FP + /// reg files. + //@{ + /// Number of source registers. + int8_t numSrcRegs() const { return _numSrcRegs; } + /// Number of destination registers. + int8_t numDestRegs() const { return _numDestRegs; } + /// Number of floating-point destination regs. + int8_t numFPDestRegs() const { return _numFPDestRegs; } + /// Number of integer destination regs. + int8_t numIntDestRegs() const { return _numIntDestRegs; } + //@} + + /// @name Flag accessors. + /// These functions are used to access the values of the various + /// instruction property flags. See StaticInstBase::Flags for descriptions + /// of the individual flags. + //@{ + + bool isNop() const { return flags[IsNop]; } + + bool isMemRef() const { return flags[IsMemRef]; } + bool isLoad() const { return flags[IsLoad]; } + bool isStore() const { return flags[IsStore]; } + bool isInstPrefetch() const { return flags[IsInstPrefetch]; } + bool isDataPrefetch() const { return flags[IsDataPrefetch]; } + bool isCopy() const { return flags[IsCopy];} + + bool isInteger() const { return flags[IsInteger]; } + bool isFloating() const { return flags[IsFloating]; } + + bool isControl() const { return flags[IsControl]; } + bool isCall() const { return flags[IsCall]; } + bool isReturn() const { return flags[IsReturn]; } + bool isDirectCtrl() const { return flags[IsDirectControl]; } + bool isIndirectCtrl() const { return flags[IsIndirectControl]; } + bool isCondCtrl() const { return flags[IsCondControl]; } + bool isUncondCtrl() const { return flags[IsUncondControl]; } + + bool isThreadSync() const { return flags[IsThreadSync]; } + bool isSerializing() const { return flags[IsSerializing] || + flags[IsSerializeBefore] || + flags[IsSerializeAfter]; } + bool isSerializeBefore() const { return flags[IsSerializeBefore]; } + bool isSerializeAfter() const { return flags[IsSerializeAfter]; } + bool isMemBarrier() const { return flags[IsMemBarrier]; } + bool isWriteBarrier() const { return flags[IsWriteBarrier]; } + bool isNonSpeculative() const { return flags[IsNonSpeculative]; } + //@} + + /// Operation class. Used to select appropriate function unit in issue. + OpClass opClass() const { return _opClass; } +}; + + +// forward declaration +class StaticInstPtr; + +/** + * Generic yet ISA-dependent static instruction class. + * + * This class builds on StaticInstBase, defining fields and interfaces + * that are generic across all ISAs but that differ in details + * according to the specific ISA being used. + */ +class StaticInst : public StaticInstBase +{ + public: + + /// Binary machine instruction type. + typedef TheISA::MachInst MachInst; + /// Binary extended machine instruction type. + typedef TheISA::ExtMachInst ExtMachInst; + /// Logical register index type. + typedef TheISA::RegIndex RegIndex; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + + /// Return logical index (architectural reg num) of i'th destination reg. + /// Only the entries from 0 through numDestRegs()-1 are valid. + RegIndex destRegIdx(int i) const { return _destRegIdx[i]; } + + /// Return logical index (architectural reg num) of i'th source reg. + /// Only the entries from 0 through numSrcRegs()-1 are valid. + RegIndex srcRegIdx(int i) const { return _srcRegIdx[i]; } + + /// Pointer to a statically allocated "null" instruction object. + /// Used to give eaCompInst() and memAccInst() something to return + /// when called on non-memory instructions. + static StaticInstPtr nullStaticInstPtr; + + /** + * Memory references only: returns "fake" instruction representing + * the effective address part of the memory operation. Used to + * obtain the dependence info (numSrcRegs and srcRegIdx[]) for + * just the EA computation. + */ + virtual const + StaticInstPtr &eaCompInst() const { return nullStaticInstPtr; } + + /** + * Memory references only: returns "fake" instruction representing + * the memory access part of the memory operation. Used to + * obtain the dependence info (numSrcRegs and srcRegIdx[]) for + * just the memory access (not the EA computation). + */ + virtual const + StaticInstPtr &memAccInst() const { return nullStaticInstPtr; } + + /// The binary machine instruction. + const ExtMachInst machInst; + + protected: + + /// See destRegIdx(). + RegIndex _destRegIdx[MaxInstDestRegs]; + /// See srcRegIdx(). + RegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** + * Base mnemonic (e.g., "add"). Used by generateDisassembly() + * methods. Also useful to readily identify instructions from + * within the debugger when #cachedDisassembly has not been + * initialized. + */ + const char *mnemonic; + + /** + * String representation of disassembly (lazily evaluated via + * disassemble()). + */ + mutable std::string *cachedDisassembly; + + /** + * Internal function to generate disassembly string. + */ + virtual std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const = 0; + + /// Constructor. + StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) + : StaticInstBase(__opClass), + machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) + { + } + + public: + + virtual ~StaticInst() + { + if (cachedDisassembly) + delete cachedDisassembly; + } + +/** + * The execute() signatures are auto-generated by scons based on the + * set of CPU models we are compiling in today. + */ +#include "cpu/static_inst_exec_sigs.hh" + + /** + * Return the target address for a PC-relative branch. + * Invalid if not a PC-relative branch (i.e. isDirectCtrl() + * should be true). + */ + virtual Addr branchTarget(Addr branchPC) const + { + panic("StaticInst::branchTarget() called on instruction " + "that is not a PC-relative branch."); + } + + /** + * Return the target address for an indirect branch (jump). The + * register value is read from the supplied execution context, so + * the result is valid only if the execution context is about to + * execute the branch in question. Invalid if not an indirect + * branch (i.e. isIndirectCtrl() should be true). + */ + virtual Addr branchTarget(ExecContext *xc) const + { + panic("StaticInst::branchTarget() called on instruction " + "that is not an indirect branch."); + } + + /** + * Return true if the instruction is a control transfer, and if so, + * return the target address as well. + */ + bool hasBranchTarget(Addr pc, ExecContext *xc, Addr &tgt) const; + + /** + * Return string representation of disassembled instruction. + * The default version of this function will call the internal + * virtual generateDisassembly() function to get the string, + * then cache it in #cachedDisassembly. If the disassembly + * should not be cached, this function should be overridden directly. + */ + virtual const std::string &disassemble(Addr pc, + const SymbolTable *symtab = 0) const + { + if (!cachedDisassembly) + cachedDisassembly = + new std::string(generateDisassembly(pc, symtab)); + + return *cachedDisassembly; + } + + /// Decoded instruction cache type. + /// For now we're using a generic hash_map; this seems to work + /// pretty well. + typedef m5::hash_map<ExtMachInst, StaticInstPtr> DecodeCache; + + /// A cache of decoded instruction objects. + static DecodeCache decodeCache; + + /** + * Dump some basic stats on the decode cache hash map. + * Only gets called if DECODE_CACHE_HASH_STATS is defined. + */ + static void dumpDecodeCacheStats(); + + /// Decode a machine instruction. + /// @param mach_inst The binary instruction to decode. + /// @retval A pointer to the corresponding StaticInst object. + //This is defined as inline below. + static StaticInstPtr decode(ExtMachInst mach_inst); + + //MIPS Decoder Debug Functions + int getOpcode() { return (machInst & 0xFC000000) >> 26 ; }//31..26 + int getRs() { return (machInst & 0x03E00000) >> 21; } //25...21 + int getRt() { return (machInst & 0x001F0000) >> 16; } //20...16 + int getRd() { return (machInst & 0x0000F800) >> 11; } //15...11 + int getImm() { return (machInst & 0x0000FFFF); } //15...0 + int getFunction(){ return (machInst & 0x0000003F); }//5...0 + int getBranch(){ return (machInst & 0x0000FFFF); }//15...0 + int getJump(){ return (machInst & 0x03FFFFFF); }//5...0 + int getHint(){ return (machInst & 0x000007C0) >> 6; } //10...6 + std::string getName() { return mnemonic; } +}; + +typedef RefCountingPtr<StaticInstBase> StaticInstBasePtr; + +/// Reference-counted pointer to a StaticInst object. +/// This type should be used instead of "StaticInst *" so that +/// StaticInst objects can be properly reference-counted. +class StaticInstPtr : public RefCountingPtr<StaticInst> +{ + public: + /// Constructor. + StaticInstPtr() + : RefCountingPtr<StaticInst>() + { + } + + /// Conversion from "StaticInst *". + StaticInstPtr(StaticInst *p) + : RefCountingPtr<StaticInst>(p) + { + } + + /// Copy constructor. + StaticInstPtr(const StaticInstPtr &r) + : RefCountingPtr<StaticInst>(r) + { + } + + /// Construct directly from machine instruction. + /// Calls StaticInst::decode(). + StaticInstPtr(TheISA::ExtMachInst mach_inst) + : RefCountingPtr<StaticInst>(StaticInst::decode(mach_inst)) + { + } + + /// Convert to pointer to StaticInstBase class. + operator const StaticInstBasePtr() + { + return this->get(); + } +}; + +inline StaticInstPtr +StaticInst::decode(StaticInst::ExtMachInst mach_inst) +{ +#ifdef DECODE_CACHE_HASH_STATS + // Simple stats on decode hash_map. Turns out the default + // hash function is as good as anything I could come up with. + const int dump_every_n = 10000000; + static int decodes_til_dump = dump_every_n; + + if (--decodes_til_dump == 0) { + dumpDecodeCacheStats(); + decodes_til_dump = dump_every_n; + } +#endif + + DecodeCache::iterator iter = decodeCache.find(mach_inst); + if (iter != decodeCache.end()) { + return iter->second; + } + + StaticInstPtr si = TheISA::decodeInst(mach_inst); + decodeCache[mach_inst] = si; + return si; +} + +#endif // __CPU_STATIC_INST_HH__ diff --git a/src/cpu/trace/opt_cpu.cc b/src/cpu/trace/opt_cpu.cc new file mode 100644 index 000000000..6cd23b0dd --- /dev/null +++ b/src/cpu/trace/opt_cpu.cc @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definition of a memory trace CPU object for optimal caches. Uses a memory + * trace to access a fully associative cache with optimal replacement. + */ + +#include <algorithm> // For heap functions. + +#include "cpu/trace/opt_cpu.hh" +#include "cpu/trace/reader/mem_trace_reader.hh" + +#include "sim/builder.hh" +#include "sim/sim_events.hh" + +using namespace std; + +OptCPU::OptCPU(const string &name, + MemTraceReader *_trace, + int block_size, + int cache_size, + int _assoc) + : SimObject(name), tickEvent(this), trace(_trace), + numBlks(cache_size/block_size), assoc(_assoc), numSets(numBlks/assoc), + setMask(numSets - 1) +{ + int log_block_size = 0; + int tmp_block_size = block_size; + while (tmp_block_size > 1) { + ++log_block_size; + tmp_block_size = tmp_block_size >> 1; + } + assert(1<<log_block_size == block_size); + MemReqPtr req; + trace->getNextReq(req); + refInfo.resize(numSets); + while (req) { + RefInfo temp; + temp.addr = req->paddr >> log_block_size; + int set = temp.addr & setMask; + refInfo[set].push_back(temp); + trace->getNextReq(req); + } + + // Initialize top level of lookup table. + lookupTable.resize(16); + + // Annotate references with next ref time. + for (int k = 0; k < numSets; ++k) { + for (RefIndex i = refInfo[k].size() - 1; i >= 0; --i) { + Addr addr = refInfo[k][i].addr; + initTable(addr, InfiniteRef); + refInfo[k][i].nextRefTime = lookupValue(addr); + setValue(addr, i); + } + } + + // Reset the lookup table + for (int j = 0; j < 16; ++j) { + if (lookupTable[j].size() == (1<<16)) { + for (int k = 0; k < (1<<16); ++k) { + if (lookupTable[j][k].size() == (1<<16)) { + for (int l = 0; l < (1<<16); ++l) { + lookupTable[j][k][l] = -1; + } + } + } + } + } + + tickEvent.schedule(0); + + hits = 0; + misses = 0; +} + +void +OptCPU::processSet(int set) +{ + // Initialize cache + int blks_in_cache = 0; + RefIndex i = 0; + cacheHeap.clear(); + cacheHeap.resize(assoc); + + while (blks_in_cache < assoc) { + RefIndex cache_index = lookupValue(refInfo[set][i].addr); + if (cache_index == -1) { + // First reference to this block + misses++; + cache_index = blks_in_cache++; + setValue(refInfo[set][i].addr, cache_index); + } else { + hits++; + } + // update cache heap to most recent reference + cacheHeap[cache_index] = i; + if (++i >= refInfo[set].size()) { + return; + } + } + for (int start = assoc/2; start >= 0; --start) { + heapify(set,start); + } + //verifyHeap(set,0); + + for (; i < refInfo[set].size(); ++i) { + RefIndex cache_index = lookupValue(refInfo[set][i].addr); + if (cache_index == -1) { + // miss + misses++; + // replace from cacheHeap[0] + // mark replaced block as absent + setValue(refInfo[set][cacheHeap[0]].addr, -1); + setValue(refInfo[set][i].addr, 0); + cacheHeap[0] = i; + heapify(set, 0); + // Make sure its in the cache + assert(lookupValue(refInfo[set][i].addr) != -1); + } else { + // hit + hits++; + assert(refInfo[set][cacheHeap[cache_index]].addr == + refInfo[set][i].addr); + assert(refInfo[set][cacheHeap[cache_index]].nextRefTime == i); + assert(heapLeft(cache_index) >= assoc); + + cacheHeap[cache_index] = i; + processRankIncrease(set, cache_index); + assert(lookupValue(refInfo[set][i].addr) != -1); + } + } +} +void +OptCPU::tick() +{ + // Do opt simulation + + int references = 0; + for (int set = 0; set < numSets; ++set) { + if (!refInfo[set].empty()) { + processSet(set); + } + references += refInfo[set].size(); + } + // exit; + fprintf(stderr,"sys.cpu.misses %d #opt cache misses\n",misses); + fprintf(stderr,"sys.cpu.hits %d #opt cache hits\n", hits); + fprintf(stderr,"sys.cpu.accesses %d #opt cache acceses\n", references); + new SimExitEvent("Finshed Memory Trace"); +} + +void +OptCPU::initTable(Addr addr, RefIndex index) +{ + int l1_index = (addr >> 32) & 0x0f; + int l2_index = (addr >> 16) & 0xffff; + assert(l1_index == addr >> 32); + if (lookupTable[l1_index].size() != (1<<16)) { + lookupTable[l1_index].resize(1<<16); + } + if (lookupTable[l1_index][l2_index].size() != (1<<16)) { + lookupTable[l1_index][l2_index].resize(1<<16, index); + } +} + +OptCPU::TickEvent::TickEvent(OptCPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +void +OptCPU::TickEvent::process() +{ + cpu->tick(); +} + +const char * +OptCPU::TickEvent::description() +{ + return "OptCPU tick event"; +} + + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(OptCPU) + + SimObjectParam<MemTraceReader *> data_trace; + Param<int> size; + Param<int> block_size; +Param<int> assoc; + +END_DECLARE_SIM_OBJECT_PARAMS(OptCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(OptCPU) + + INIT_PARAM_DFLT(data_trace, "memory trace", NULL), + INIT_PARAM(size, "cache size"), + INIT_PARAM(block_size, "block size"), + INIT_PARAM(assoc,"associativity") + +END_INIT_SIM_OBJECT_PARAMS(OptCPU) + +CREATE_SIM_OBJECT(OptCPU) +{ + return new OptCPU(getInstanceName(), + data_trace, + block_size, + size, + assoc); +} + +REGISTER_SIM_OBJECT("OptCPU", OptCPU) diff --git a/src/cpu/trace/opt_cpu.hh b/src/cpu/trace/opt_cpu.hh new file mode 100644 index 000000000..f81691733 --- /dev/null +++ b/src/cpu/trace/opt_cpu.hh @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a memory trace CPU object for optimal caches. Uses a memory + * trace to access a fully associative cache with optimal replacement. + */ + +#ifndef __CPU_TRACE_OPT_CPU_HH__ +#define __CPU_TRACE_OPT_CPU_HH__ + +#include <vector> + +#include "mem/mem_req.hh" // for MemReqPtr +#include "sim/eventq.hh" // for Event +#include "sim/sim_object.hh" + +// Forward Declaration +class MemTraceReader; + +/** + * A CPU object to simulate a fully-associative cache with optimal replacement. + */ +class OptCPU : public SimObject +{ + private: + typedef int RefIndex; + + typedef std::vector<RefIndex> L3Table; + typedef std::vector<L3Table> L2Table; + typedef std::vector<L2Table> L1Table; + + /** + * Event to call OptCPU::tick + */ + class TickEvent : public Event + { + private: + /** The associated CPU */ + OptCPU *cpu; + + public: + /** + * Construct this event; + */ + TickEvent(OptCPU *c); + + /** + * Call the tick function. + */ + void process(); + + /** + * Return a string description of this event. + */ + const char *description(); + }; + + TickEvent tickEvent; + + class RefInfo + { + public: + RefIndex nextRefTime; + Addr addr; + }; + + /** Reference Information, per set. */ + std::vector<std::vector<RefInfo> > refInfo; + + /** Lookup table to track blocks in the cache heap */ + L1Table lookupTable; + + /** + * Return the correct value in the lookup table. + */ + RefIndex lookupValue(Addr addr) + { + int l1_index = (addr >> 32) & 0x0f; + int l2_index = (addr >> 16) & 0xffff; + int l3_index = addr & 0xffff; + assert(l1_index == addr >> 32); + return lookupTable[l1_index][l2_index][l3_index]; + } + + /** + * Set the value in the lookup table. + */ + void setValue(Addr addr, RefIndex index) + { + int l1_index = (addr >> 32) & 0x0f; + int l2_index = (addr >> 16) & 0xffff; + int l3_index = addr & 0xffff; + assert(l1_index == addr >> 32); + lookupTable[l1_index][l2_index][l3_index]=index; + } + + /** + * Initialize the lookup table to the given value. + */ + void initTable(Addr addr, RefIndex index); + + void heapSwap(int set, int a, int b) { + RefIndex tmp = cacheHeap[a]; + cacheHeap[a] = cacheHeap[b]; + cacheHeap[b] = tmp; + + setValue(refInfo[set][cacheHeap[a]].addr, a); + setValue(refInfo[set][cacheHeap[b]].addr, b); + } + + int heapLeft(int index) { return index + index + 1; } + int heapRight(int index) { return index + index + 2; } + int heapParent(int index) { return (index - 1) >> 1; } + + RefIndex heapRank(int set, int index) { + return refInfo[set][cacheHeap[index]].nextRefTime; + } + + void heapify(int set, int start){ + int left = heapLeft(start); + int right = heapRight(start); + int max = start; + if (left < assoc && heapRank(set, left) > heapRank(set, start)) { + max = left; + } + if (right < assoc && heapRank(set, right) > heapRank(set, max)) { + max = right; + } + + if (max != start) { + heapSwap(set, start, max); + heapify(set, max); + } + } + + void verifyHeap(int set, int start) { + int left = heapLeft(start); + int right = heapRight(start); + + if (left < assoc) { + assert(heapRank(set, start) >= heapRank(set, left)); + verifyHeap(set, left); + } + if (right < assoc) { + assert(heapRank(set, start) >= heapRank(set, right)); + verifyHeap(set, right); + } + } + + void processRankIncrease(int set, int start) { + int parent = heapParent(start); + while (start > 0 && heapRank(set,parent) < heapRank(set,start)) { + heapSwap(set, parent, start); + start = parent; + parent = heapParent(start); + } + } + + void processSet(int set); + + static const RefIndex InfiniteRef = 0x7fffffff; + + /** Memory reference trace. */ + MemTraceReader *trace; + + /** Cache heap for replacement. */ + std::vector<RefIndex> cacheHeap; + + /** The number of blocks in the cache. */ + const int numBlks; + + const int assoc; + const int numSets; + const int setMask; + + + int misses; + int hits; + + public: + /** + * Construct a OptCPU object. + */ + OptCPU(const std::string &name, + MemTraceReader *_trace, + int block_size, + int cache_size, + int assoc); + + /** + * Perform the optimal replacement simulation. + */ + void tick(); +}; + +#endif // __CPU_TRACE_OPT_CPU_HH__ diff --git a/src/cpu/trace/reader/ibm_reader.cc b/src/cpu/trace/reader/ibm_reader.cc new file mode 100644 index 000000000..420101b63 --- /dev/null +++ b/src/cpu/trace/reader/ibm_reader.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a IBM memory trace format reader. + */ +#include <sstream> + +#include "cpu/trace/reader/ibm_reader.hh" +#include "sim/builder.hh" +#include "base/misc.hh" // for fatal + +using namespace std; + +IBMReader::IBMReader(const string &name, const string &filename) + : MemTraceReader(name) +{ + if (strcmp((filename.c_str() + filename.length() -3), ".gz") == 0) { + // Compressed file, need to use a pipe to gzip. + stringstream buf; + buf << "gzip -d -c " << filename << endl; + trace = popen(buf.str().c_str(), "r"); + } else { + trace = fopen(filename.c_str(), "rb"); + } + if (!trace) { + fatal("Can't open file %s", filename); + } +} + +Tick +IBMReader::getNextReq(MemReqPtr &req) +{ + MemReqPtr tmp_req; + + int c = getc(trace); + if (c != EOF) { + tmp_req = new MemReq(); + //int cpu_id = (c & 0xf0) >> 4; + int type = c & 0x0f; + // We have L1 miss traces, so all accesses are 128 bytes + tmp_req->size = 128; + + tmp_req->paddr = 0; + for (int i = 2; i >= 0; --i) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of file"); + } + tmp_req->paddr |= ((c & 0xff) << (8 * i)); + } + tmp_req->paddr = tmp_req->paddr << 7; + + switch(type) { + case IBM_COND_EXCLUSIVE_FETCH: + case IBM_READ_ONLY_FETCH: + tmp_req->cmd = Read; + break; + case IBM_EXCLUSIVE_FETCH: + case IBM_FETCH_NO_DATA: + tmp_req->cmd = Write; + break; + case IBM_INST_FETCH: + tmp_req->cmd = Read; + break; + default: + fatal("Unknown trace entry type."); + } + + } + req = tmp_req; + return 0; +} + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(IBMReader) + + Param<string> filename; + +END_DECLARE_SIM_OBJECT_PARAMS(IBMReader) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(IBMReader) + + INIT_PARAM(filename, "trace file") + +END_INIT_SIM_OBJECT_PARAMS(IBMReader) + + +CREATE_SIM_OBJECT(IBMReader) +{ + return new IBMReader(getInstanceName(), filename); +} + +REGISTER_SIM_OBJECT("IBMReader", IBMReader) diff --git a/src/cpu/trace/reader/ibm_reader.hh b/src/cpu/trace/reader/ibm_reader.hh new file mode 100644 index 000000000..ce29206a2 --- /dev/null +++ b/src/cpu/trace/reader/ibm_reader.hh @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definition of a IBM memory trace format reader. + */ + +#ifndef __IBM_READER_HH__ +#define __IBM_READER_HH__ + +#include <stdio.h> +#include "cpu/trace/reader/mem_trace_reader.hh" +#include "mem/mem_req.hh" + +/** + * A memory trace reader for the IBM memory trace format. + */ +class IBMReader : public MemTraceReader +{ + /** IBM trace file. */ + FILE* trace; + + enum IBMType { + IBM_INST_FETCH, + IBM_READ_ONLY_FETCH, + IBM_COND_EXCLUSIVE_FETCH, + IBM_EXCLUSIVE_FETCH, + IBM_FETCH_NO_DATA + }; + + public: + /** + * Construct an IBMReader. + */ + IBMReader(const std::string &name, const std::string &filename); + + /** + * Read the next request from the trace. Returns the request in the + * provided MemReqPtr and the cycle of the request in the return value. + * @param req Return the next request from the trace. + * @return IBM traces don't store timing information, return 0 + */ + virtual Tick getNextReq(MemReqPtr &req); +}; + +#endif //__IBM_READER_HH__ + diff --git a/src/cpu/trace/reader/itx_reader.cc b/src/cpu/trace/reader/itx_reader.cc new file mode 100644 index 000000000..39ba27393 --- /dev/null +++ b/src/cpu/trace/reader/itx_reader.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a Intel ITX memory trace format reader. + */ +#include <sstream> + +#include "cpu/trace/reader/itx_reader.hh" +#include "sim/builder.hh" +#include "base/misc.hh" // for fatal + +using namespace std; + +ITXReader::ITXReader(const string &name, const string &filename) + : MemTraceReader(name) +{ + if (strcmp((filename.c_str() + filename.length() -3), ".gz") == 0) { + // Compressed file, need to use a pipe to gzip. + stringstream buf; + buf << "gzip -d -c " << filename << endl; + trace = popen(buf.str().c_str(), "r"); + } else { + trace = fopen(filename.c_str(), "rb"); + } + if (!trace) { + fatal("Can't open file %s", filename); + } + traceFormat = 0; + int c; + for (int i = 0; i < 4; ++i) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of trace file."); + } + traceFormat |= (c & 0xff) << (8 * i); + } + if (traceFormat > 2) + fatal("Invalid trace format."); +} + +Tick +ITXReader::getNextReq(MemReqPtr &req) +{ + MemReqPtr tmp_req = new MemReq(); + bool phys_val; + do { + int c = getc(trace); + if (c != EOF) { + // Decode first byte + // phys_val<1> | type <2:0> | size <3:0> + phys_val = c & 0x80; + tmp_req->size = (c & 0x0f) + 1; + int type = (c & 0x70) >> 4; + + // Could be a compressed instruction entry, expand if necessary + if (type == ITXCodeComp) { + if (traceFormat != 2) { + fatal("Compressed code entry in non CompCode trace."); + } + if (!codeVirtValid) { + fatal("Corrupt CodeComp entry."); + } + + tmp_req->vaddr = codeVirtAddr; + codeVirtAddr += tmp_req->size; + if (phys_val) { + if (!codePhysValid) { + fatal("Corrupt CodeComp entry."); + } + tmp_req->paddr = codePhysAddr; + if (((tmp_req->paddr & 0xfff) + tmp_req->size) & ~0xfff) { + // Crossed page boundary, next physical address is + // invalid + codePhysValid = false; + } else { + codePhysAddr += tmp_req->size; + } + assert(tmp_req->paddr >> 36 == 0); + } else { + codePhysValid = false; + } + type = ITXCode; + tmp_req->cmd = Read; + } else { + // Normal entry + tmp_req->vaddr = 0; + for (int i = 0; i < 4; ++i) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of trace file."); + } + tmp_req->vaddr |= (c & 0xff) << (8 * i); + } + if (type == ITXCode) { + codeVirtAddr = tmp_req->vaddr + tmp_req->size; + codeVirtValid = true; + } + tmp_req->paddr = 0; + if (phys_val) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of trace file."); + } + // Get the page offset from the virtual address. + tmp_req->paddr = tmp_req->vaddr & 0xfff; + tmp_req->paddr |= (c & 0xf0) << 8; + tmp_req->paddr |= (Addr)(c & 0x0f) << 32; + for (int i = 2; i < 4; ++i) { + c = getc(trace); + if (c == EOF) { + fatal("Unexpected end of trace file."); + } + tmp_req->paddr |= (Addr)(c & 0xff) << (8 * i); + } + if (type == ITXCode) { + if (((tmp_req->paddr & 0xfff) + tmp_req->size) + & ~0xfff) { + // Crossing the page boundary, next physical + // address isn't valid + codePhysValid = false; + } else { + codePhysAddr = tmp_req->paddr + tmp_req->size; + codePhysValid = true; + } + } + assert(tmp_req->paddr >> 36 == 0); + } else if (type == ITXCode) { + codePhysValid = false; + } + switch(type) { + case ITXRead: + tmp_req->cmd = Read; + break; + case ITXWrite: + tmp_req->cmd = Write; + break; + case ITXWriteback: + tmp_req->cmd = Writeback; + break; + case ITXCode: + tmp_req->cmd = Read; + tmp_req->flags |= INST_READ; + break; + default: + fatal("Unknown ITX type"); + } + } + } else { + // EOF need to return a null request + MemReqPtr null_req; + req = null_req; + return 0; + } + } while (!phys_val); + req = tmp_req; + assert(!req || (req->paddr >> 36) == 0); + return 0; +} + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(ITXReader) + + Param<string> filename; + +END_DECLARE_SIM_OBJECT_PARAMS(ITXReader) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(ITXReader) + + INIT_PARAM(filename, "trace file") + +END_INIT_SIM_OBJECT_PARAMS(ITXReader) + + +CREATE_SIM_OBJECT(ITXReader) +{ + return new ITXReader(getInstanceName(), filename); +} + +REGISTER_SIM_OBJECT("ITXReader", ITXReader) diff --git a/src/cpu/trace/reader/itx_reader.hh b/src/cpu/trace/reader/itx_reader.hh new file mode 100644 index 000000000..a16a08085 --- /dev/null +++ b/src/cpu/trace/reader/itx_reader.hh @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definition of a Intel ITX memory trace format reader. + */ + +#ifndef __ITX_READER_HH__ +#define __ITX_READER_HH__ + +#include <stdio.h> +#include <string> + +#include "cpu/trace/reader/mem_trace_reader.hh" +#include "mem/mem_req.hh" + + +/** + * A memory trace reader for the Intel ITX memory trace format. + */ +class ITXReader : public MemTraceReader +{ + private: + /** Trace file. */ + FILE *trace; + + bool codeVirtValid; + Addr codeVirtAddr; + bool codePhysValid; + Addr codePhysAddr; + + int traceFormat; + + enum ITXType { + ITXRead, + ITXWrite, + ITXWriteback, + ITXCode, + ITXCodeComp + }; + + public: + /** + * Construct an ITXReader. + */ + ITXReader(const std::string &name, const std::string &filename); + + /** + * Read the next request from the trace. Returns the request in the + * provided MemReqPtr and the cycle of the request in the return value. + * @param req Return the next request from the trace. + * @return ITX traces don't store timing information, return 0 + */ + virtual Tick getNextReq(MemReqPtr &req); +}; + +#endif //__ITX_READER_HH__ + diff --git a/src/cpu/trace/reader/m5_reader.cc b/src/cpu/trace/reader/m5_reader.cc new file mode 100644 index 000000000..ce44672f2 --- /dev/null +++ b/src/cpu/trace/reader/m5_reader.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a memory trace reader for a M5 memory trace. + */ + +#include "cpu/trace/reader/m5_reader.hh" +#include "mem/trace/m5_format.hh" +#include "mem/mem_cmd.hh" +#include "sim/builder.hh" + +using namespace std; + +M5Reader::M5Reader(const string &name, const string &filename) + : MemTraceReader(name) +{ + traceFile.open(filename.c_str(), ios::binary); +} + +Tick +M5Reader::getNextReq(MemReqPtr &req) +{ + M5Format ref; + + MemReqPtr tmp_req; + // Need to read EOF char before eof() will return true. + traceFile.read((char*) &ref, sizeof(ref)); + if (!traceFile.eof()) { + //traceFile.read((char*) &ref, sizeof(ref)); +#ifndef NDEBUG + int gcount = traceFile.gcount(); + assert(gcount != 0 || traceFile.eof()); + assert(gcount == sizeof(ref)); + assert(ref.cmd < 12); +#endif + tmp_req = new MemReq(); + tmp_req->paddr = ref.paddr; + tmp_req->asid = ref.asid; + // Assume asid == thread_num + tmp_req->thread_num = ref.asid; + tmp_req->cmd = (MemCmdEnum)ref.cmd; + tmp_req->size = ref.size; + tmp_req->dest = ref.dest; + } else { + ref.cycle = 0; + } + req = tmp_req; + return ref.cycle; +} + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(M5Reader) + + Param<string> filename; + +END_DECLARE_SIM_OBJECT_PARAMS(M5Reader) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(M5Reader) + + INIT_PARAM(filename, "trace file") + +END_INIT_SIM_OBJECT_PARAMS(M5Reader) + + +CREATE_SIM_OBJECT(M5Reader) +{ + return new M5Reader(getInstanceName(), filename); +} + +REGISTER_SIM_OBJECT("M5Reader", M5Reader) diff --git a/src/cpu/trace/reader/m5_reader.hh b/src/cpu/trace/reader/m5_reader.hh new file mode 100644 index 000000000..974a83ffa --- /dev/null +++ b/src/cpu/trace/reader/m5_reader.hh @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definition of a memory trace reader for a M5 memory trace. + */ + +#ifndef __M5_READER_HH__ +#define __M5_READER_HH__ + +#include <fstream> + +#include "cpu/trace/reader/mem_trace_reader.hh" + +/** + * A memory trace reader for an M5 memory trace. @sa M5Writer. + */ +class M5Reader : public MemTraceReader +{ + /** The traceFile. */ + std::ifstream traceFile; + + std::string fn; + + public: + /** + * Construct an M5 memory trace reader. + */ + M5Reader(const std::string &name, const std::string &filename); + + + /** + * Read the next request from the trace. Returns the request in the + * provided MemReqPtr and the cycle of the request in the return value. + * @param req Return the next request from the trace. + * @return The cycle the reference was started. + */ + virtual Tick getNextReq(MemReqPtr &req); +}; + +#endif // __M5_READER_HH__ diff --git a/src/cpu/trace/reader/mem_trace_reader.cc b/src/cpu/trace/reader/mem_trace_reader.cc new file mode 100644 index 000000000..769f0be27 --- /dev/null +++ b/src/cpu/trace/reader/mem_trace_reader.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * SimObject Declaration of pure virtual MemTraceReader class. + */ + +#include "cpu/trace/reader/mem_trace_reader.hh" +#include "sim/param.hh" + +DEFINE_SIM_OBJECT_CLASS_NAME("MemTraceReader", MemTraceReader); diff --git a/src/cpu/trace/reader/mem_trace_reader.hh b/src/cpu/trace/reader/mem_trace_reader.hh new file mode 100644 index 000000000..b433cdbdd --- /dev/null +++ b/src/cpu/trace/reader/mem_trace_reader.hh @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Definitions for a pure virtual interface to a memory trace reader. + */ + +#ifndef __MEM_TRACE_READER_HH__ +#define __MEM_TRACE_READER_HH__ + +#include "sim/sim_object.hh" +#include "mem/mem_req.hh" // For MemReqPtr + +/** + * Pure virtual base class for memory trace readers. + */ +class MemTraceReader : public SimObject +{ + public: + /** Construct this MemoryTrace reader. */ + MemTraceReader(const std::string &name) : SimObject(name) {} + + /** + * Read the next request from the trace. Returns the request in the + * provided MemReqPtr and the cycle of the request in the return value. + * @param req Return the next request from the trace. + * @return The cycle of the request, 0 if none in trace. + */ + virtual Tick getNextReq(MemReqPtr &req) = 0; +}; + +#endif //__MEM_TRACE_READER_HH__ diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc new file mode 100644 index 000000000..20d0a567f --- /dev/null +++ b/src/cpu/trace/trace_cpu.cc @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a memory trace CPU object. Uses a memory trace to drive the + * provided memory hierarchy. + */ + +#include <algorithm> // For min + +#include "cpu/trace/trace_cpu.hh" +#include "cpu/trace/reader/mem_trace_reader.hh" +#include "mem/base_mem.hh" // For PARAM constructor +#include "mem/mem_interface.hh" +#include "sim/builder.hh" +#include "sim/sim_events.hh" + +using namespace std; + +TraceCPU::TraceCPU(const string &name, + MemInterface *icache_interface, + MemInterface *dcache_interface, + MemTraceReader *data_trace) + : SimObject(name), icacheInterface(icache_interface), + dcacheInterface(dcache_interface), + dataTrace(data_trace), outstandingRequests(0), tickEvent(this) +{ + assert(dcacheInterface); + nextCycle = dataTrace->getNextReq(nextReq); + tickEvent.schedule(0); +} + +void +TraceCPU::tick() +{ + assert(outstandingRequests >= 0); + assert(outstandingRequests < 1000); + int instReqs = 0; + int dataReqs = 0; + + while (nextReq && curTick >= nextCycle) { + assert(nextReq->thread_num < 4 && "Not enough threads"); + if (nextReq->isInstRead() && icacheInterface) { + if (icacheInterface->isBlocked()) + break; + + nextReq->time = curTick; + if (nextReq->cmd == Squash) { + icacheInterface->squash(nextReq->asid); + } else { + ++instReqs; + if (icacheInterface->doEvents()) { + nextReq->completionEvent = + new TraceCompleteEvent(nextReq, this); + icacheInterface->access(nextReq); + } else { + icacheInterface->access(nextReq); + completeRequest(nextReq); + } + } + } else { + if (dcacheInterface->isBlocked()) + break; + + ++dataReqs; + nextReq->time = curTick; + if (dcacheInterface->doEvents()) { + nextReq->completionEvent = + new TraceCompleteEvent(nextReq, this); + dcacheInterface->access(nextReq); + } else { + dcacheInterface->access(nextReq); + completeRequest(nextReq); + } + + } + nextCycle = dataTrace->getNextReq(nextReq); + } + + if (!nextReq) { + // No more requests to send. Finish trailing events and exit. + if (mainEventQueue.empty()) { + new SimExitEvent("Finshed Memory Trace"); + } else { + tickEvent.schedule(mainEventQueue.nextEventTime() + cycles(1)); + } + } else { + tickEvent.schedule(max(curTick + cycles(1), nextCycle)); + } +} + +void +TraceCPU::completeRequest(MemReqPtr& req) +{ +} + +void +TraceCompleteEvent::process() +{ + tester->completeRequest(req); +} + +const char * +TraceCompleteEvent::description() +{ + return "trace access complete"; +} + +TraceCPU::TickEvent::TickEvent(TraceCPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +void +TraceCPU::TickEvent::process() +{ + cpu->tick(); +} + +const char * +TraceCPU::TickEvent::description() +{ + return "TraceCPU tick event"; +} + + + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(TraceCPU) + + SimObjectParam<BaseMem *> icache; + SimObjectParam<BaseMem *> dcache; + SimObjectParam<MemTraceReader *> data_trace; + +END_DECLARE_SIM_OBJECT_PARAMS(TraceCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(TraceCPU) + + INIT_PARAM_DFLT(icache, "instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "data cache", NULL), + INIT_PARAM_DFLT(data_trace, "data trace", NULL) + +END_INIT_SIM_OBJECT_PARAMS(TraceCPU) + +CREATE_SIM_OBJECT(TraceCPU) +{ + return new TraceCPU(getInstanceName(), + (icache) ? icache->getInterface() : NULL, + (dcache) ? dcache->getInterface() : NULL, + data_trace); +} + +REGISTER_SIM_OBJECT("TraceCPU", TraceCPU) + diff --git a/src/cpu/trace/trace_cpu.hh b/src/cpu/trace/trace_cpu.hh new file mode 100644 index 000000000..69ca35321 --- /dev/null +++ b/src/cpu/trace/trace_cpu.hh @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a memory trace CPU object. Uses a memory trace to drive the + * provided memory hierarchy. + */ + +#ifndef __CPU_TRACE_TRACE_CPU_HH__ +#define __CPU_TRACE_TRACE_CPU_HH__ + +#include <string> + +#include "mem/mem_req.hh" // for MemReqPtr +#include "sim/eventq.hh" // for Event +#include "sim/sim_object.hh" + +// Forward declaration. +class MemInterface; +class MemTraceReader; + +/** + * A cpu object for running memory traces through a memory hierarchy. + */ +class TraceCPU : public SimObject +{ + private: + /** Interface for instruction trace requests, if any. */ + MemInterface *icacheInterface; + /** Interface for data trace requests, if any. */ + MemInterface *dcacheInterface; + + /** Data reference trace. */ + MemTraceReader *dataTrace; + + /** Number of outstanding requests. */ + int outstandingRequests; + + /** Cycle of the next request, 0 if not available. */ + Tick nextCycle; + + /** Next request. */ + MemReqPtr nextReq; + + /** + * Event to call the TraceCPU::tick + */ + class TickEvent : public Event + { + private: + /** The associated CPU */ + TraceCPU *cpu; + + public: + /** + * Construct this event; + */ + TickEvent(TraceCPU *c); + + /** + * Call the tick function. + */ + void process(); + + /** + * Return a string description of this event. + */ + const char *description(); + }; + + TickEvent tickEvent; + + public: + /** + * Construct a TraceCPU object. + */ + TraceCPU(const std::string &name, + MemInterface *icache_interface, + MemInterface *dcache_interface, + MemTraceReader *data_trace); + + inline Tick cycles(int numCycles) { return numCycles; } + + /** + * Perform all the accesses for one cycle. + */ + void tick(); + + /** + * Handle a completed memory request. + */ + void completeRequest(MemReqPtr &req); +}; + +class TraceCompleteEvent : public Event +{ + MemReqPtr req; + TraceCPU *tester; + + public: + + TraceCompleteEvent(MemReqPtr &_req, TraceCPU *_tester) + : Event(&mainEventQueue), req(_req), tester(_tester) + { + setFlags(AutoDelete); + } + + void process(); + + virtual const char *description(); +}; + +#endif // __CPU_TRACE_TRACE_CPU_HH__ + |