diff options
author | Kevin Lim <ktlim@umich.edu> | 2006-05-30 14:17:41 -0400 |
---|---|---|
committer | Kevin Lim <ktlim@umich.edu> | 2006-05-30 14:17:41 -0400 |
commit | 4a5b51b516853c9fcaabc44caacdd7e8e93dc0ef (patch) | |
tree | 4b7d92408a2b74a16ae6f7b4167ded00079355ef /src/cpu/o3/cpu.cc | |
parent | d308055afc1ace1f321b76e8a85a9a45165da2ce (diff) | |
parent | f1fab2a4469d6cb2e55ebac15da02f8c1fcb7055 (diff) | |
download | gem5-4a5b51b516853c9fcaabc44caacdd7e8e93dc0ef.tar.xz |
Merge ktlim@zizzer:/bk/m5
into zamp.eecs.umich.edu:/z/ktlim2/clean/newmem
SConstruct:
src/SConscript:
src/arch/SConscript:
src/arch/alpha/faults.cc:
src/arch/alpha/tlb.cc:
src/base/traceflags.py:
src/cpu/SConscript:
src/cpu/base.cc:
src/cpu/base.hh:
src/cpu/base_dyn_inst.cc:
src/cpu/cpu_exec_context.cc:
src/cpu/cpu_exec_context.hh:
src/cpu/exec_context.hh:
src/cpu/o3/alpha_cpu.hh:
src/cpu/o3/alpha_cpu_impl.hh:
src/cpu/o3/alpha_dyn_inst.hh:
src/cpu/o3/cpu.cc:
src/cpu/o3/cpu.hh:
src/cpu/o3/regfile.hh:
src/cpu/ozone/cpu.hh:
src/cpu/simple/base.cc:
src/cpu/base_dyn_inst.hh:
src/cpu/o3/2bit_local_pred.cc:
src/cpu/o3/2bit_local_pred.hh:
src/cpu/o3/alpha_cpu.cc:
src/cpu/o3/alpha_cpu_builder.cc:
src/cpu/o3/alpha_dyn_inst.cc:
src/cpu/o3/alpha_dyn_inst_impl.hh:
src/cpu/o3/alpha_impl.hh:
src/cpu/o3/alpha_params.hh:
src/cpu/o3/bpred_unit.cc:
src/cpu/o3/bpred_unit.hh:
src/cpu/o3/bpred_unit_impl.hh:
src/cpu/o3/btb.cc:
src/cpu/o3/btb.hh:
src/cpu/o3/comm.hh:
src/cpu/o3/commit.cc:
src/cpu/o3/commit.hh:
src/cpu/o3/commit_impl.hh:
src/cpu/o3/cpu_policy.hh:
src/cpu/o3/decode.cc:
src/cpu/o3/decode.hh:
src/cpu/o3/decode_impl.hh:
src/cpu/o3/fetch.cc:
src/cpu/o3/fetch.hh:
src/cpu/o3/fetch_impl.hh:
src/cpu/o3/free_list.cc:
src/cpu/o3/free_list.hh:
src/cpu/o3/iew.cc:
src/cpu/o3/iew.hh:
src/cpu/o3/iew_impl.hh:
src/cpu/o3/inst_queue.cc:
src/cpu/o3/inst_queue.hh:
src/cpu/o3/inst_queue_impl.hh:
src/cpu/o3/mem_dep_unit.cc:
src/cpu/o3/mem_dep_unit.hh:
src/cpu/o3/mem_dep_unit_impl.hh:
src/cpu/o3/ras.cc:
src/cpu/o3/ras.hh:
src/cpu/o3/rename.cc:
src/cpu/o3/rename.hh:
src/cpu/o3/rename_impl.hh:
src/cpu/o3/rename_map.cc:
src/cpu/o3/rename_map.hh:
src/cpu/o3/rob.cc:
src/cpu/o3/rob.hh:
src/cpu/o3/rob_impl.hh:
src/cpu/o3/sat_counter.cc:
src/cpu/o3/sat_counter.hh:
src/cpu/o3/store_set.cc:
src/cpu/o3/store_set.hh:
src/cpu/o3/tournament_pred.cc:
src/cpu/o3/tournament_pred.hh:
Hand merges.
--HG--
rename : build/SConstruct => SConstruct
rename : SConscript => src/SConscript
rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc
rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa
rename : arch/alpha/isa/pal.isa => src/arch/alpha/isa/pal.isa
rename : base/traceflags.py => src/base/traceflags.py
rename : cpu/SConscript => src/cpu/SConscript
rename : cpu/base.cc => src/cpu/base.cc
rename : cpu/base.hh => src/cpu/base.hh
rename : cpu/base_dyn_inst.cc => src/cpu/base_dyn_inst.cc
rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh
rename : cpu/cpu_exec_context.cc => src/cpu/cpu_exec_context.cc
rename : cpu/cpu_exec_context.hh => src/cpu/cpu_exec_context.hh
rename : cpu/cpu_models.py => src/cpu/cpu_models.py
rename : cpu/exec_context.hh => src/cpu/exec_context.hh
rename : cpu/exetrace.cc => src/cpu/exetrace.cc
rename : cpu/exetrace.hh => src/cpu/exetrace.hh
rename : cpu/inst_seq.hh => src/cpu/inst_seq.hh
rename : cpu/o3/2bit_local_pred.cc => src/cpu/o3/2bit_local_pred.cc
rename : cpu/o3/2bit_local_pred.hh => src/cpu/o3/2bit_local_pred.hh
rename : cpu/o3/alpha_cpu.hh => src/cpu/o3/alpha_cpu.hh
rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha_cpu_builder.cc
rename : cpu/o3/alpha_cpu_impl.hh => src/cpu/o3/alpha_cpu_impl.hh
rename : cpu/o3/alpha_dyn_inst.hh => src/cpu/o3/alpha_dyn_inst.hh
rename : cpu/o3/alpha_dyn_inst_impl.hh => src/cpu/o3/alpha_dyn_inst_impl.hh
rename : cpu/o3/alpha_impl.hh => src/cpu/o3/alpha_impl.hh
rename : cpu/o3/alpha_params.hh => src/cpu/o3/alpha_params.hh
rename : cpu/o3/bpred_unit.cc => src/cpu/o3/bpred_unit.cc
rename : cpu/o3/bpred_unit.hh => src/cpu/o3/bpred_unit.hh
rename : cpu/o3/bpred_unit_impl.hh => src/cpu/o3/bpred_unit_impl.hh
rename : cpu/o3/btb.cc => src/cpu/o3/btb.cc
rename : cpu/o3/btb.hh => src/cpu/o3/btb.hh
rename : cpu/o3/comm.hh => src/cpu/o3/comm.hh
rename : cpu/o3/commit.cc => src/cpu/o3/commit.cc
rename : cpu/o3/commit.hh => src/cpu/o3/commit.hh
rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh
rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc
rename : cpu/o3/cpu.hh => src/cpu/o3/cpu.hh
rename : cpu/o3/cpu_policy.hh => src/cpu/o3/cpu_policy.hh
rename : cpu/o3/decode.cc => src/cpu/o3/decode.cc
rename : cpu/o3/decode.hh => src/cpu/o3/decode.hh
rename : cpu/o3/decode_impl.hh => src/cpu/o3/decode_impl.hh
rename : cpu/o3/fetch.cc => src/cpu/o3/fetch.cc
rename : cpu/o3/fetch.hh => src/cpu/o3/fetch.hh
rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh
rename : cpu/o3/free_list.cc => src/cpu/o3/free_list.cc
rename : cpu/o3/free_list.hh => src/cpu/o3/free_list.hh
rename : cpu/o3/iew.cc => src/cpu/o3/iew.cc
rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh
rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh
rename : cpu/o3/inst_queue.cc => src/cpu/o3/inst_queue.cc
rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh
rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh
rename : cpu/o3/mem_dep_unit.cc => src/cpu/o3/mem_dep_unit.cc
rename : cpu/o3/mem_dep_unit.hh => src/cpu/o3/mem_dep_unit.hh
rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh
rename : cpu/o3/ras.cc => src/cpu/o3/ras.cc
rename : cpu/o3/ras.hh => src/cpu/o3/ras.hh
rename : cpu/o3/regfile.hh => src/cpu/o3/regfile.hh
rename : cpu/o3/rename.cc => src/cpu/o3/rename.cc
rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh
rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh
rename : cpu/o3/rename_map.cc => src/cpu/o3/rename_map.cc
rename : cpu/o3/rename_map.hh => src/cpu/o3/rename_map.hh
rename : cpu/o3/rob.hh => src/cpu/o3/rob.hh
rename : cpu/o3/rob_impl.hh => src/cpu/o3/rob_impl.hh
rename : cpu/o3/sat_counter.hh => src/cpu/o3/sat_counter.hh
rename : cpu/o3/store_set.cc => src/cpu/o3/store_set.cc
rename : cpu/o3/store_set.hh => src/cpu/o3/store_set.hh
rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc
rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh
rename : cpu/ozone/cpu.cc => src/cpu/ozone/cpu.cc
rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh
rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh
rename : cpu/static_inst.hh => src/cpu/static_inst.hh
rename : kern/system_events.cc => src/kern/system_events.cc
rename : kern/tru64/tru64.hh => src/kern/tru64/tru64.hh
rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/AlphaFullCPU.py
rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc
extra : convert_revision : ff351fc0e3a7c0f23e59fdbec33d8209eb9280be
Diffstat (limited to 'src/cpu/o3/cpu.cc')
-rw-r--r-- | src/cpu/o3/cpu.cc | 994 |
1 files changed, 812 insertions, 182 deletions
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index a268dbc23..ed02a845b 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,19 +33,29 @@ #else #include "sim/process.hh" #endif -#include "sim/root.hh" +#include "cpu/activity.hh" +#include "cpu/checker/cpu.hh" #include "cpu/cpu_exec_context.hh" #include "cpu/exec_context.hh" #include "cpu/o3/alpha_dyn_inst.hh" #include "cpu/o3/alpha_impl.hh" #include "cpu/o3/cpu.hh" +#include "sim/root.hh" +#include "sim/stat_control.hh" + using namespace std; -BaseFullCPU::BaseFullCPU(Params ¶ms) - : BaseCPU(¶ms), cpu_id(0) +BaseFullCPU::BaseFullCPU(Params *params) + : BaseCPU(params), cpu_id(0) +{ +} + +void +BaseFullCPU::regStats() { + BaseCPU::regStats(); } template <class Impl> @@ -68,97 +78,88 @@ FullO3CPU<Impl>::TickEvent::description() return "FullO3CPU tick event"; } -//Call constructor to all the pipeline stages here template <class Impl> -FullO3CPU<Impl>::FullO3CPU(Params ¶ms) -#if FULL_SYSTEM - : BaseFullCPU(params), -#else +FullO3CPU<Impl>::FullO3CPU(Params *params) : BaseFullCPU(params), -#endif // FULL_SYSTEM tickEvent(this), + removeInstsThisCycle(false), fetch(params), decode(params), rename(params), iew(params), commit(params), - regFile(params.numPhysIntRegs, params.numPhysFloatRegs), + regFile(params->numPhysIntRegs, params->numPhysFloatRegs), - freeList(TheISA::NumIntRegs, params.numPhysIntRegs, - TheISA::NumFloatRegs, params.numPhysFloatRegs), + freeList(params->numberOfThreads,//number of activeThreads + TheISA::NumIntRegs, params->numPhysIntRegs, + TheISA::NumFloatRegs, params->numPhysFloatRegs), - renameMap(TheISA::NumIntRegs, params.numPhysIntRegs, - TheISA::NumFloatRegs, params.numPhysFloatRegs, - TheISA::NumMiscRegs, - TheISA::ZeroReg, - TheISA::ZeroReg + TheISA::NumIntRegs), + rob(params->numROBEntries, params->squashWidth, + params->smtROBPolicy, params->smtROBThreshold, + params->numberOfThreads), - rob(params.numROBEntries, params.squashWidth), + scoreboard(params->numberOfThreads,//number of activeThreads + TheISA::NumIntRegs, params->numPhysIntRegs, + TheISA::NumFloatRegs, params->numPhysFloatRegs, + TheISA::NumMiscRegs * number_of_threads, + TheISA::ZeroReg), - // What to pass to these time buffers? // For now just have these time buffers be pretty big. + // @todo: Make these time buffer sizes parameters or derived + // from latencies timeBuffer(5, 5), fetchQueue(5, 5), decodeQueue(5, 5), renameQueue(5, 5), iewQueue(5, 5), - - cpuXC(NULL), + activityRec(NumStages, 10, params->activity), globalSeqNum(1), #if FULL_SYSTEM - system(params.system), + system(params->system), memCtrl(system->memctrl), physmem(system->physmem), - itb(params.itb), - dtb(params.dtb), - mem(params.mem), + mem(params->mem), #else - // Hardcoded for a single thread!! - mem(params.workload[0]->getMemory()), +// pTable(params->pTable), + mem(params->workload[0]->getMemory()), #endif // FULL_SYSTEM - - icacheInterface(params.icacheInterface), - dcacheInterface(params.dcacheInterface), - deferRegistration(params.defReg), - numInsts(0), - funcExeInst(0) + switchCount(0), + icacheInterface(params->icacheInterface), + dcacheInterface(params->dcacheInterface), + deferRegistration(params->deferRegistration), + numThreads(number_of_threads) { _status = Idle; -#if !FULL_SYSTEM - thread.resize(this->number_of_threads); -#endif - - for (int i = 0; i < this->number_of_threads; ++i) { + if (params->checker) { + BaseCPU *temp_checker = params->checker; + checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker); + checker->setMemory(mem); #if FULL_SYSTEM - assert(i == 0); - thread[i] = new CPUExecContext(this, 0, system, itb, dtb, mem); - system->execContexts[i] = thread[i]->getProxy(); - - execContexts.push_back(system->execContexts[i]); -#else - if (i < params.workload.size()) { - DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, " - "process is %#x", - i, params.workload[i]->prog_entry, thread[i]); - thread[i] = new CPUExecContext(this, i, params.workload[i], i); - } - assert(params.workload[i]->getMemory() != NULL); - assert(mem != NULL); - execContexts.push_back(thread[i]->getProxy()); -#endif // !FULL_SYSTEM + checker->setSystem(params->system); +#endif + } else { + checker = NULL; } - // Note that this is a hack so that my code which still uses xc-> will - // still work. I should remove this eventually - cpuXC = thread[0]; +#if !FULL_SYSTEM + thread.resize(number_of_threads); + tids.resize(number_of_threads); +#endif - // The stages also need their CPU pointer setup. However this must be - // done at the upper level CPU because they have pointers to the upper - // level CPU, and not this FullO3CPU. + // The stages also need their CPU pointer setup. However this + // must be done at the upper level CPU because they have pointers + // to the upper level CPU, and not this FullO3CPU. + + // Set up Pointers to the activeThreads list for each stage + fetch.setActiveThreads(&activeThreads); + decode.setActiveThreads(&activeThreads); + rename.setActiveThreads(&activeThreads); + iew.setActiveThreads(&activeThreads); + commit.setActiveThreads(&activeThreads); // Give each of the stages the time buffer they will use. fetch.setTimeBuffer(&timeBuffer); @@ -170,6 +171,7 @@ FullO3CPU<Impl>::FullO3CPU(Params ¶ms) // Also setup each of the stages' queues. fetch.setFetchQueue(&fetchQueue); decode.setFetchQueue(&fetchQueue); + commit.setFetchQueue(&fetchQueue); decode.setDecodeQueue(&decodeQueue); rename.setDecodeQueue(&decodeQueue); rename.setRenameQueue(&renameQueue); @@ -178,16 +180,85 @@ FullO3CPU<Impl>::FullO3CPU(Params ¶ms) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); + commit.setFetchStage(&fetch); + commit.setIEWStage(&iew); + rename.setIEWStage(&iew); + rename.setCommitStage(&commit); + +#if !FULL_SYSTEM + int active_threads = params->workload.size(); +#else + int active_threads = 1; +#endif + + //Make Sure That this a Valid Architeture + assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); + assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); + + rename.setScoreboard(&scoreboard); + iew.setScoreboard(&scoreboard); + // Setup the rename map for whichever stages need it. - rename.setRenameMap(&renameMap); - iew.setRenameMap(&renameMap); + PhysRegIndex lreg_idx = 0; + PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs + + for (int tid=0; tid < numThreads; tid++) { + bool bindRegs = (tid <= active_threads - 1); + + commitRenameMap[tid].init(TheISA::NumIntRegs, + params->numPhysIntRegs, + lreg_idx, //Index for Logical. Regs + + TheISA::NumFloatRegs, + params->numPhysFloatRegs, + freg_idx, //Index for Float Regs + + TheISA::NumMiscRegs, + + TheISA::ZeroReg, + TheISA::ZeroReg, + + tid, + false); + + renameMap[tid].init(TheISA::NumIntRegs, + params->numPhysIntRegs, + lreg_idx, //Index for Logical. Regs + + TheISA::NumFloatRegs, + params->numPhysFloatRegs, + freg_idx, //Index for Float Regs - // Setup the free list for whichever stages need it. + TheISA::NumMiscRegs, + + TheISA::ZeroReg, + TheISA::ZeroReg, + + tid, + bindRegs); + } + + rename.setRenameMap(renameMap); + commit.setRenameMap(commitRenameMap); + + // Give renameMap & rename stage access to the freeList; + for (int i=0; i < numThreads; i++) { + renameMap[i].setFreeList(&freeList); + } rename.setFreeList(&freeList); - renameMap.setFreeList(&freeList); + + // Setup the page table for whichever stages need it. +#if !FULL_SYSTEM +// fetch.setPageTable(pTable); +// iew.setPageTable(pTable); +#endif // Setup the ROB for whichever stages need it. commit.setROB(&rob); + + lastRunningCycle = curTick; + + contextSwitch = false; } template <class Impl> @@ -199,7 +270,58 @@ template <class Impl> void FullO3CPU<Impl>::fullCPURegStats() { + BaseFullCPU::regStats(); + // Register any of the FullCPU's stats here. + timesIdled + .name(name() + ".timesIdled") + .desc("Number of times that the entire CPU went into an idle state and" + " unscheduled itself") + .prereq(timesIdled); + + idleCycles + .name(name() + ".idleCycles") + .desc("Total number of cycles that the CPU has spent unscheduled due " + "to idling") + .prereq(idleCycles); + + // Number of Instructions simulated + // -------------------------------- + // Should probably be in Base CPU but need templated + // MaxThreads so put in here instead + committedInsts + .init(numThreads) + .name(name() + ".committedInsts") + .desc("Number of Instructions Simulated"); + + totalCommittedInsts + .name(name() + ".committedInsts_total") + .desc("Number of Instructions Simulated"); + + cpi + .name(name() + ".cpi") + .desc("CPI: Cycles Per Instruction") + .precision(6); + cpi = simTicks / committedInsts; + + totalCpi + .name(name() + ".cpi_total") + .desc("CPI: Total CPI of All Threads") + .precision(6); + totalCpi = simTicks / totalCommittedInsts; + + ipc + .name(name() + ".ipc") + .desc("IPC: Instructions Per Cycle") + .precision(6); + ipc = committedInsts / simTicks; + + totalIpc + .name(name() + ".ipc_total") + .desc("IPC: Total IPC of All Threads") + .precision(6); + totalIpc = totalCommittedInsts / simTicks; + } template <class Impl> @@ -208,9 +330,11 @@ FullO3CPU<Impl>::tick() { DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n"); - //Tick each of the stages if they're actually running. - //Will want to figure out a way to unschedule itself if they're all - //going to be idle for a long time. + ++numCycles; + +// activity = false; + + //Tick each of the stages fetch.tick(); decode.tick(); @@ -221,7 +345,11 @@ FullO3CPU<Impl>::tick() commit.tick(); - // Now advance the time buffers, unless the stage is stalled. +#if !FULL_SYSTEM + doContextSwitch(); +#endif + + // Now advance the time buffers timeBuffer.advance(); fetchQueue.advance(); @@ -229,99 +357,396 @@ FullO3CPU<Impl>::tick() renameQueue.advance(); iewQueue.advance(); - if (_status == Running && !tickEvent.scheduled()) - tickEvent.schedule(curTick + 1); + activityRec.advance(); + + if (removeInstsThisCycle) { + cleanUpRemovedInsts(); + } + + if (!tickEvent.scheduled()) { + if (_status == SwitchedOut) { + // increment stat + lastRunningCycle = curTick; + } else if (!activityRec.active()) { + lastRunningCycle = curTick; + timesIdled++; + } else { + tickEvent.schedule(curTick + cycles(1)); + } + } + +#if !FULL_SYSTEM + updateThreadPriority(); +#endif + } template <class Impl> void FullO3CPU<Impl>::init() { - if(!deferRegistration) - { - this->registerExecContexts(); + if (!deferRegistration) { + registerExecContexts(); + } - // Need to do a copy of the xc->regs into the CPU's regfile so - // that it can start properly. + // Set inSyscall so that the CPU doesn't squash when initially + // setting up registers. + for (int i = 0; i < number_of_threads; ++i) + thread[i]->inSyscall = true; + + for (int tid=0; tid < number_of_threads; tid++) { #if FULL_SYSTEM - ExecContext *src_xc = system->execContexts[0]; - TheISA::initCPU(src_xc, src_xc->readCpuId()); + ExecContext *src_xc = execContexts[tid]; #else - ExecContext *src_xc = thread[0]->getProxy(); + ExecContext *src_xc = thread[tid]->getXCProxy(); #endif - // First loop through the integer registers. - for (int i = 0; i < TheISA::NumIntRegs; ++i) - { - regFile.intRegFile[i] = src_xc->readIntReg(i); + // Threads start in the Suspended State + if (src_xc->status() != ExecContext::Suspended) { + continue; } - // Then loop through the floating point registers. - for (int i = 0; i < TheISA::NumFloatRegs; ++i) - { - regFile.floatRegFile.setRegBits(i, src_xc->readRegBits(i)) - } -/* - // Then loop through the misc registers. - regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr; - regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq; - regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag; - regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr; -*/ - // Then finally set the PC and the next PC. - regFile.pc = src_xc->readPC(); - regFile.npc = src_xc->readNextPC(); +#if FULL_SYSTEM + TheISA::initCPU(src_xc, src_xc->readCpuId()); +#endif + } + + // Clear inSyscall. + for (int i = 0; i < number_of_threads; ++i) + thread[i]->inSyscall = false; + + // Initialize stages. + fetch.initStage(); + iew.initStage(); + rename.initStage(); + commit.initStage(); + + commit.setThreads(thread); +} + +template <class Impl> +void +FullO3CPU<Impl>::insertThread(unsigned tid) +{ + DPRINTF(FullCPU,"[tid:%i] Initializing thread data"); + // Will change now that the PC and thread state is internal to the CPU + // and not in the CPUExecContext. +#if 0 +#if FULL_SYSTEM + ExecContext *src_xc = system->execContexts[tid]; +#else + CPUExecContext *src_xc = thread[tid]; +#endif + + //Bind Int Regs to Rename Map + for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { + PhysRegIndex phys_reg = freeList.getIntReg(); + + renameMap[tid].setEntry(ireg,phys_reg); + scoreboard.setReg(phys_reg); + } + + //Bind Float Regs to Rename Map + for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { + PhysRegIndex phys_reg = freeList.getFloatReg(); + + renameMap[tid].setEntry(freg,phys_reg); + scoreboard.setReg(phys_reg); + } + + //Copy Thread Data Into RegFile + this->copyFromXC(tid); + + //Set PC/NPC + regFile.pc[tid] = src_xc->readPC(); + regFile.npc[tid] = src_xc->readNextPC(); + + src_xc->setStatus(ExecContext::Active); + + activateContext(tid,1); + + //Reset ROB/IQ/LSQ Entries + commit.rob->resetEntries(); + iew.resetEntries(); +#endif +} + +template <class Impl> +void +FullO3CPU<Impl>::removeThread(unsigned tid) +{ + DPRINTF(FullCPU,"[tid:%i] Removing thread data"); +#if 0 + //Unbind Int Regs from Rename Map + for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { + PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); + + scoreboard.unsetReg(phys_reg); + freeList.addReg(phys_reg); + } + + //Unbind Float Regs from Rename Map + for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { + PhysRegIndex phys_reg = renameMap[tid].lookup(freg); + + scoreboard.unsetReg(phys_reg); + freeList.addReg(phys_reg); + } + + //Copy Thread Data From RegFile + /* Fix Me: + * Do we really need to do this if we are removing a thread + * in the sense that it's finished (exiting)? If the thread is just + * being suspended we might... + */ +// this->copyToXC(tid); + + //Squash Throughout Pipeline + fetch.squash(0,tid); + decode.squash(tid); + rename.squash(tid); + + assert(iew.ldstQueue.getCount(tid) == 0); + + //Reset ROB/IQ/LSQ Entries + if (activeThreads.size() >= 1) { + commit.rob->resetEntries(); + iew.resetEntries(); + } +#endif +} + + +template <class Impl> +void +FullO3CPU<Impl>::activateWhenReady(int tid) +{ + DPRINTF(FullCPU,"[tid:%i]: Checking if resources are available for incoming" + "(e.g. PhysRegs/ROB/IQ/LSQ) \n", + tid); + + bool ready = true; + + if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) { + DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + "Phys. Int. Regs.\n", + tid); + ready = false; + } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) { + DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + "Phys. Float. Regs.\n", + tid); + ready = false; + } else if (commit.rob->numFreeEntries() >= + commit.rob->entryAmount(activeThreads.size() + 1)) { + DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + "ROB entries.\n", + tid); + ready = false; + } else if (iew.instQueue.numFreeEntries() >= + iew.instQueue.entryAmount(activeThreads.size() + 1)) { + DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + "IQ entries.\n", + tid); + ready = false; + } else if (iew.ldstQueue.numFreeEntries() >= + iew.ldstQueue.entryAmount(activeThreads.size() + 1)) { + DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + "LSQ entries.\n", + tid); + ready = false; + } + + if (ready) { + insertThread(tid); + + contextSwitch = false; + + cpuWaitList.remove(tid); + } else { + suspendContext(tid); + + //blocks fetch + contextSwitch = true; + + //do waitlist + cpuWaitList.push_back(tid); } } template <class Impl> void -FullO3CPU<Impl>::activateContext(int thread_num, int delay) +FullO3CPU<Impl>::activateContext(int tid, int delay) { // Needs to set each stage to running as well. + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive == activeThreads.end()) { + //May Need to Re-code this if the delay variable is the + //delay needed for thread to activate + DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", + tid); + + activeThreads.push_back(tid); + } + + assert(_status == Idle || _status == SwitchedOut); scheduleTickEvent(delay); + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + _status = Running; } template <class Impl> void -FullO3CPU<Impl>::suspendContext(int thread_num) +FullO3CPU<Impl>::suspendContext(int tid) +{ + DPRINTF(FullCPU,"[tid: %i]: Suspended ...\n", tid); + unscheduleTickEvent(); + _status = Idle; +/* + //Remove From Active List, if Active + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive != activeThreads.end()) { + DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(isActive); + } +*/ +} + +template <class Impl> +void +FullO3CPU<Impl>::deallocateContext(int tid) { - panic("suspendContext unimplemented!"); + DPRINTF(FullCPU,"[tid:%i]: Deallocating ...", tid); +/* + //Remove From Active List, if Active + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive != activeThreads.end()) { + DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(isActive); + + removeThread(tid); + } +*/ } template <class Impl> void -FullO3CPU<Impl>::deallocateContext(int thread_num) +FullO3CPU<Impl>::haltContext(int tid) { - panic("deallocateContext unimplemented!"); + DPRINTF(FullCPU,"[tid:%i]: Halted ...", tid); +/* + //Remove From Active List, if Active + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive != activeThreads.end()) { + DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(isActive); + + removeThread(tid); + } +*/ } template <class Impl> void -FullO3CPU<Impl>::haltContext(int thread_num) +FullO3CPU<Impl>::switchOut(Sampler *_sampler) { - panic("haltContext unimplemented!"); + sampler = _sampler; + switchCount = 0; + fetch.switchOut(); + decode.switchOut(); + rename.switchOut(); + iew.switchOut(); + commit.switchOut(); + + // Wake the CPU and record activity so everything can drain out if + // the CPU is currently idle. + wakeCPU(); + activityRec.activity(); } template <class Impl> void -FullO3CPU<Impl>::switchOut() +FullO3CPU<Impl>::signalSwitched() { - panic("FullO3CPU does not have a switch out function.\n"); + if (++switchCount == NumStages) { + fetch.doSwitchOut(); + rename.doSwitchOut(); + commit.doSwitchOut(); + instList.clear(); + while (!removeList.empty()) { + removeList.pop(); + } + + if (checker) + checker->switchOut(sampler); + + if (tickEvent.scheduled()) + tickEvent.squash(); + sampler->signalSwitched(); + _status = SwitchedOut; + } + assert(switchCount <= 5); } template <class Impl> void FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) { + // Flush out any old data from the time buffers. + for (int i = 0; i < 10; ++i) { + timeBuffer.advance(); + fetchQueue.advance(); + decodeQueue.advance(); + renameQueue.advance(); + iewQueue.advance(); + } + + activityRec.reset(); + BaseCPU::takeOverFrom(oldCPU); + fetch.takeOverFrom(); + decode.takeOverFrom(); + rename.takeOverFrom(); + iew.takeOverFrom(); + commit.takeOverFrom(); + assert(!tickEvent.scheduled()); - // Set all status's to active, schedule the - // CPU's tick event. + // @todo: Figure out how to properly select the tid to put onto + // the active threads list. + int tid = 0; + + list<unsigned>::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive == activeThreads.end()) { + //May Need to Re-code this if the delay variable is the delay + //needed for thread to activate + DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", + tid); + + activeThreads.push_back(tid); + } + + // Set all statuses to active, schedule the CPU's tick event. + // @todo: Fix up statuses so this is handled properly for (int i = 0; i < execContexts.size(); ++i) { ExecContext *xc = execContexts[i]; if (xc->status() == ExecContext::Active && _status != Running) { @@ -329,14 +754,8 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) tickEvent.schedule(curTick); } } -} - -template <class Impl> -InstSeqNum -FullO3CPU<Impl>::getAndIncrementInstSeq() -{ - // Hopefully this works right. - return globalSeqNum++; + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); } template <class Impl> @@ -363,7 +782,6 @@ FullO3CPU<Impl>::readFloatReg(int reg_idx) template <class Impl> FloatRegBits FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width) -{ return regFile.readFloatRegBits(reg_idx, width); } @@ -411,156 +829,368 @@ FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val) template <class Impl> uint64_t -FullO3CPU<Impl>::readPC() +FullO3CPU<Impl>::readArchIntReg(int reg_idx, unsigned tid) +{ + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + + return regFile.readIntReg(phys_reg); +} + +template <class Impl> +float +FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid) +{ + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); + + return regFile.readFloatRegSingle(phys_reg); +} + +template <class Impl> +double +FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid) +{ + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); + + return regFile.readFloatRegDouble(phys_reg); +} + +template <class Impl> +uint64_t +FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid) { - return regFile.readPC(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); + + return regFile.readFloatRegInt(phys_reg); } template <class Impl> void -FullO3CPU<Impl>::setNextPC(uint64_t val) +FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, unsigned tid) { - regFile.setNextPC(val); + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + + regFile.setIntReg(phys_reg, val); } template <class Impl> void -FullO3CPU<Impl>::setPC(Addr new_PC) +FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) { - regFile.setPC(new_PC); + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + + regFile.setFloatRegSingle(phys_reg, val); } template <class Impl> void -FullO3CPU<Impl>::addInst(DynInstPtr &inst) +FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) { - instList.push_back(inst); + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + + regFile.setFloatRegDouble(phys_reg, val); } template <class Impl> void -FullO3CPU<Impl>::instDone() +FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) { - // Keep an instruction count. - numInsts++; + PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); - // Check for instruction-count-based events. - comInstEventQueue[0]->serviceEvents(numInsts); + regFile.setFloatRegInt(phys_reg, val); +} + +template <class Impl> +uint64_t +FullO3CPU<Impl>::readPC(unsigned tid) +{ + return commit.readPC(tid); } template <class Impl> void -FullO3CPU<Impl>::removeBackInst(DynInstPtr &inst) +FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid) { - DynInstPtr inst_to_delete; + commit.setPC(new_PC, tid); +} - // Walk through the instruction list, removing any instructions - // that were inserted after the given instruction, inst. - while (instList.back() != inst) - { - assert(!instList.empty()); +template <class Impl> +uint64_t +FullO3CPU<Impl>::readNextPC(unsigned tid) +{ + return commit.readNextPC(tid); +} + +template <class Impl> +void +FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid) +{ + commit.setNextPC(val, tid); +} + +template <class Impl> +typename FullO3CPU<Impl>::ListIt +FullO3CPU<Impl>::addInst(DynInstPtr &inst) +{ + instList.push_back(inst); - // Obtain the pointer to the instruction. - inst_to_delete = instList.back(); + return --(instList.end()); +} - DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", - inst_to_delete->seqNum, inst_to_delete->readPC()); +template <class Impl> +void +FullO3CPU<Impl>::instDone(unsigned tid) +{ + // Keep an instruction count. + thread[tid]->numInst++; + thread[tid]->numInsts++; + committedInsts[tid]++; + totalCommittedInsts++; - // Remove the instruction from the list. - instList.pop_back(); + // Check for instruction-count-based events. + comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); +} - // Mark it as squashed. - inst_to_delete->setSquashed(); - } +template <class Impl> +void +FullO3CPU<Impl>::addToRemoveList(DynInstPtr &inst) +{ + removeInstsThisCycle = true; + + removeList.push(inst->getInstListIt()); } template <class Impl> void FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst) { - DynInstPtr inst_to_remove; + DPRINTF(FullCPU, "FullCPU: Removing committed instruction [tid:%i] PC %#x " + "[sn:%lli]\n", + inst->threadNumber, inst->readPC(), inst->seqNum); - // The front instruction should be the same one being asked to be removed. - assert(instList.front() == inst); + removeInstsThisCycle = true; // Remove the front instruction. - inst_to_remove = inst; - instList.pop_front(); - - DPRINTF(FullCPU, "FullCPU: Removing committed instruction %#x, PC %#x\n", - inst_to_remove, inst_to_remove->readPC()); + removeList.push(inst->getInstListIt()); } template <class Impl> void -FullO3CPU<Impl>::removeInstsNotInROB() +FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid) { - DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " - "list.\n"); + DPRINTF(FullCPU, "FullCPU: Thread %i: Deleting instructions from instruction" + " list.\n", tid); + + ListIt end_it; + + bool rob_empty = false; + + if (instList.empty()) { + return; + } else if (rob.isEmpty(/*tid*/)) { + DPRINTF(FullCPU, "FullCPU: ROB is empty, squashing all insts.\n"); + end_it = instList.begin(); + rob_empty = true; + } else { + end_it = (rob.readTailInst(tid))->getInstListIt(); + DPRINTF(FullCPU, "FullCPU: ROB is not empty, squashing insts not in ROB.\n"); + } + + removeInstsThisCycle = true; + + ListIt inst_it = instList.end(); + + inst_it--; + + // Walk through the instruction list, removing any instructions + // that were inserted after the given instruction iterator, end_it. + while (inst_it != end_it) { + assert(!instList.empty()); - DynInstPtr rob_tail = rob.readTailInst(); + squashInstIt(inst_it, tid); - removeBackInst(rob_tail); + inst_it--; + } + + // If the ROB was empty, then we actually need to remove the first + // instruction as well. + if (rob_empty) { + squashInstIt(inst_it, tid); + } } template <class Impl> void -FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num) +FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, + unsigned tid) { + assert(!instList.empty()); + + removeInstsThisCycle = true; + + ListIt inst_iter = instList.end(); + + inst_iter--; + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " - "list.\n"); + "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", + tid, seq_num, (*inst_iter)->seqNum); - DynInstPtr inst_to_delete; + while ((*inst_iter)->seqNum > seq_num) { - while (instList.back()->seqNum > seq_num) { - assert(!instList.empty()); + bool break_loop = (inst_iter == instList.begin()); - // Obtain the pointer to the instruction. - inst_to_delete = instList.back(); + squashInstIt(inst_iter, tid); - DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", - inst_to_delete->seqNum, inst_to_delete->readPC()); + inst_iter--; - // Remove the instruction from the list. - instList.back() = NULL; - instList.pop_back(); + if (break_loop) + break; + } +} + +template <class Impl> +inline void +FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, const unsigned &tid) +{ + if ((*instIt)->threadNumber == tid) { + DPRINTF(FullCPU, "FullCPU: Squashing instruction, " + "[tid:%i] [sn:%lli] PC %#x\n", + (*instIt)->threadNumber, + (*instIt)->seqNum, + (*instIt)->readPC()); // Mark it as squashed. - inst_to_delete->setSquashed(); - } + (*instIt)->setSquashed(); + // @todo: Formulate a consistent method for deleting + // instructions from the instruction list + // Remove the instruction from the list. + removeList.push(instIt); + } } template <class Impl> void +FullO3CPU<Impl>::cleanUpRemovedInsts() +{ + while (!removeList.empty()) { + DPRINTF(FullCPU, "FullCPU: Removing instruction, " + "[tid:%i] [sn:%lli] PC %#x\n", + (*removeList.front())->threadNumber, + (*removeList.front())->seqNum, + (*removeList.front())->readPC()); + + instList.erase(removeList.front()); + + removeList.pop(); + } + + removeInstsThisCycle = false; +} +/* +template <class Impl> +void FullO3CPU<Impl>::removeAllInsts() { instList.clear(); } - +*/ template <class Impl> void FullO3CPU<Impl>::dumpInsts() { int num = 0; - typename list<DynInstPtr>::iterator inst_list_it = instList.begin(); - while (inst_list_it != instList.end()) - { - cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n", - num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, - (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); + ListIt inst_list_it = instList.begin(); + + cprintf("Dumping Instruction List\n"); + + while (inst_list_it != instList.end()) { + cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" + "Squashed:%i\n\n", + num, (*inst_list_it)->readPC(), (*inst_list_it)->threadNumber, + (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); inst_list_it++; ++num; } } - +/* template <class Impl> void FullO3CPU<Impl>::wakeDependents(DynInstPtr &inst) { iew.wakeDependents(inst); } +*/ +template <class Impl> +void +FullO3CPU<Impl>::wakeCPU() +{ + if (activityRec.active() || tickEvent.scheduled()) { + DPRINTF(Activity, "CPU already running.\n"); + return; + } + + DPRINTF(Activity, "Waking up CPU\n"); + + idleCycles += (curTick - 1) - lastRunningCycle; + + tickEvent.schedule(curTick); +} + +template <class Impl> +int +FullO3CPU<Impl>::getFreeTid() +{ + for (int i=0; i < numThreads; i++) { + if (!tids[i]) { + tids[i] = true; + return i; + } + } + + return -1; +} + +template <class Impl> +void +FullO3CPU<Impl>::doContextSwitch() +{ + if (contextSwitch) { + + //ADD CODE TO DEACTIVE THREAD HERE (???) + + for (int tid=0; tid < cpuWaitList.size(); tid++) { + activateWhenReady(tid); + } + + if (cpuWaitList.size() == 0) + contextSwitch = true; + } +} + +template <class Impl> +void +FullO3CPU<Impl>::updateThreadPriority() +{ + if (activeThreads.size() > 1) + { + //DEFAULT TO ROUND ROBIN SCHEME + //e.g. Move highest priority to end of thread list + list<unsigned>::iterator list_begin = activeThreads.begin(); + list<unsigned>::iterator list_end = activeThreads.end(); + + unsigned high_thread = *list_begin; + + activeThreads.erase(list_begin); + + activeThreads.push_back(high_thread); + } +} // Forward declaration of FullO3CPU. template class FullO3CPU<AlphaSimpleImpl>; |