From 582a0148b441fe9f4a6f977094c5ce6bf7ab6313 Mon Sep 17 00:00:00 2001 From: Mitch Hayenga Date: Wed, 30 Sep 2015 11:14:19 -0500 Subject: config,cpu: Add SMT support to Atomic and Timing CPUs Adds SMT support to the "simple" CPU models so that they can be used with other SMT-supported CPUs. Example usage: this enables the TimingSimpleCPU to be used to warmup caches before swapping to detailed mode with the in-order or out-of-order based CPU models. --- src/cpu/simple/atomic.cc | 146 ++++++----- src/cpu/simple/atomic.hh | 9 +- src/cpu/simple/base.cc | 556 ++++++++++++++++++++++++----------------- src/cpu/simple/base.hh | 351 ++------------------------ src/cpu/simple/exec_context.hh | 416 ++++++++++++++++++++++++++++++ src/cpu/simple/timing.cc | 168 ++++++++----- src/cpu/simple/timing.hh | 9 +- 7 files changed, 967 insertions(+), 688 deletions(-) create mode 100644 src/cpu/simple/exec_context.hh (limited to 'src/cpu/simple') diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 3777ddee9..6690c1da6 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -1,6 +1,6 @@ /* * Copyright 2014 Google, Inc. - * Copyright (c) 2012-2013 ARM Limited + * Copyright (c) 2012-2013,2015 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -84,24 +84,11 @@ AtomicSimpleCPU::TickEvent::description() const void AtomicSimpleCPU::init() { - BaseCPU::init(); + BaseSimpleCPU::init(); - // Initialise the ThreadContext's memory proxies - tcBase()->initMemProxies(tcBase()); - - if (FullSystem && !params()->switched_out) { - ThreadID size = threadContexts.size(); - for (ThreadID i = 0; i < size; ++i) { - ThreadContext *tc = threadContexts[i]; - // initialize CPU, including PC - TheISA::initCPU(tc, tc->contextId()); - } - } - - // Atomic doesn't do MT right now, so contextId == threadId - ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT - data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too - data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too + ifetch_req.setThreadContext(_cpuId, 0); + data_read_req.setThreadContext(_cpuId, 0); + data_write_req.setThreadContext(_cpuId, 0); } AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) @@ -131,12 +118,13 @@ AtomicSimpleCPU::drain() return DrainState::Drained; if (!isDrained()) { - DPRINTF(Drain, "Requesting drain: %s\n", pcState()); + DPRINTF(Drain, "Requesting drain.\n"); return DrainState::Draining; } else { if (tickEvent.scheduled()) deschedule(tickEvent); + activeThreads.clear(); DPRINTF(Drain, "Not executing microcode, no need to drain.\n"); return DrainState::Drained; } @@ -153,16 +141,22 @@ AtomicSimpleCPU::drainResume() verifyMemoryMode(); assert(!threadContexts.empty()); - if (threadContexts.size() > 1) - fatal("The atomic CPU only supports one thread.\n"); - if (thread->status() == ThreadContext::Active) { - schedule(tickEvent, nextCycle()); - _status = BaseSimpleCPU::Running; - notIdleFraction = 1; - } else { - _status = BaseSimpleCPU::Idle; - notIdleFraction = 0; + _status = BaseSimpleCPU::Idle; + + for (ThreadID tid = 0; tid < numThreads; tid++) { + if (threadInfo[tid]->thread->status() == ThreadContext::Active) { + threadInfo[tid]->notIdleFraction = 1; + activeThreads.push_back(tid); + _status = BaseSimpleCPU::Running; + + // Tick if any threads active + if (!tickEvent.scheduled()) { + schedule(tickEvent, nextCycle()); + } + } else { + threadInfo[tid]->notIdleFraction = 0; + } } } @@ -172,7 +166,7 @@ AtomicSimpleCPU::tryCompleteDrain() if (drainState() != DrainState::Draining) return false; - DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState()); + DPRINTF(Drain, "tryCompleteDrain.\n"); if (!isDrained()) return false; @@ -201,10 +195,6 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) // The tick event should have been descheduled by drain() assert(!tickEvent.scheduled()); - - ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT - data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too - data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too } void @@ -221,20 +211,23 @@ AtomicSimpleCPU::activateContext(ThreadID thread_num) { DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num); - assert(thread_num == 0); - assert(thread); - - assert(_status == Idle); - assert(!tickEvent.scheduled()); + assert(thread_num < numThreads); - notIdleFraction = 1; - Cycles delta = ticksToCycles(thread->lastActivate - thread->lastSuspend); + threadInfo[thread_num]->notIdleFraction = 1; + Cycles delta = ticksToCycles(threadInfo[thread_num]->thread->lastActivate - + threadInfo[thread_num]->thread->lastSuspend); numCycles += delta; ppCycles->notify(delta); - //Make sure ticks are still on multiples of cycles - schedule(tickEvent, clockEdge(Cycles(0))); + if (!tickEvent.scheduled()) { + //Make sure ticks are still on multiples of cycles + schedule(tickEvent, clockEdge(Cycles(0))); + } _status = BaseSimpleCPU::Running; + if (std::find(activeThreads.begin(), activeThreads.end(), thread_num) + == activeThreads.end()) { + activeThreads.push_back(thread_num); + } } @@ -243,21 +236,24 @@ AtomicSimpleCPU::suspendContext(ThreadID thread_num) { DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); - assert(thread_num == 0); - assert(thread); + assert(thread_num < numThreads); + activeThreads.remove(thread_num); if (_status == Idle) return; assert(_status == BaseSimpleCPU::Running); - // tick event may not be scheduled if this gets called from inside - // an instruction's execution, e.g. "quiesce" - if (tickEvent.scheduled()) - deschedule(tickEvent); + threadInfo[thread_num]->notIdleFraction = 0; + + if (activeThreads.empty()) { + _status = Idle; + + if (tickEvent.scheduled()) { + deschedule(tickEvent); + } + } - notIdleFraction = 0; - _status = Idle; } @@ -269,7 +265,7 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt) // X86 ISA: Snooping an invalidation for monitor/mwait AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); - if(cpu->getAddrMonitor()->doMonitor(pkt)) { + if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { cpu->wakeup(); } @@ -277,7 +273,9 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt) if (pkt->isInvalidate()) { DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n", pkt->getAddr()); - TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask); + for (auto &t_info : cpu->threadInfo) { + TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask); + } } return 0; @@ -291,7 +289,7 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt) // X86 ISA: Snooping an invalidation for monitor/mwait AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); - if(cpu->getAddrMonitor()->doMonitor(pkt)) { + if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { cpu->wakeup(); } @@ -299,7 +297,9 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt) if (pkt->isInvalidate()) { DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n", pkt->getAddr()); - TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask); + for (auto &t_info : cpu->threadInfo) { + TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask); + } } } @@ -307,6 +307,9 @@ Fault AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size, unsigned flags) { + SimpleExecContext& t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + // use the CPU's statically allocated read request and packet objects Request *req = &data_read_req; @@ -330,7 +333,8 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr()); // translate to physical address - Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read); + Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), + BaseTLB::Read); // Now do the access. if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) { @@ -370,6 +374,7 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, assert(!locked); locked = true; } + return fault; } @@ -391,7 +396,8 @@ Fault AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr, unsigned flags, uint64_t *res) { - + SimpleExecContext& t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; static uint8_t zero_array[64] = {}; if (data == NULL) { @@ -424,7 +430,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr()); // translate to physical address - Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write); + Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write); // Now do the access. if (fault == NoFault) { @@ -477,6 +483,8 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, assert(locked); locked = false; } + + if (fault != NoFault && req->isPrefetch()) { return NoFault; } else { @@ -503,6 +511,19 @@ AtomicSimpleCPU::tick() { DPRINTF(SimpleCPU, "Tick\n"); + // Change thread if multi-threaded + swapActiveThread(); + + // Set memroy request ids to current thread + if (numThreads > 1) { + ifetch_req.setThreadContext(_cpuId, curThread); + data_read_req.setThreadContext(_cpuId, curThread); + data_write_req.setThreadContext(_cpuId, curThread); + } + + SimpleExecContext& t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + Tick latency = 0; for (int i = 0; i < width || locked; ++i) { @@ -529,7 +550,7 @@ AtomicSimpleCPU::tick() if (needToFetch) { ifetch_req.taskId(taskId()); setupFetchRequest(&ifetch_req); - fault = thread->itb->translateAtomic(&ifetch_req, tc, + fault = thread->itb->translateAtomic(&ifetch_req, thread->getTC(), BaseTLB::Execute); } @@ -565,7 +586,7 @@ AtomicSimpleCPU::tick() preExecute(); if (curStaticInst) { - fault = curStaticInst->execute(this, traceData); + fault = curStaticInst->execute(&t_info, traceData); // keep an instruction count if (fault == NoFault) { @@ -601,7 +622,7 @@ AtomicSimpleCPU::tick() } } - if(fault != NoFault || !stayAtPC) + if(fault != NoFault || !t_info.stayAtPC) advancePC(fault); } @@ -613,7 +634,7 @@ AtomicSimpleCPU::tick() latency = clockPeriod(); if (_status != Idle) - schedule(tickEvent, curTick() + latency); + reschedule(tickEvent, curTick() + latency, true); } void @@ -638,8 +659,5 @@ AtomicSimpleCPU::printAddr(Addr a) AtomicSimpleCPU * AtomicSimpleCPUParams::create() { - numThreads = 1; - if (!FullSystem && workload.size() != 1) - panic("only one workload allowed"); return new AtomicSimpleCPU(this); } diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 3f587e671..76ee9f897 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 ARM Limited + * Copyright (c) 2012-2013,2015 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -44,6 +44,7 @@ #define __CPU_SIMPLE_ATOMIC_HH__ #include "cpu/simple/base.hh" +#include "cpu/simple/exec_context.hh" #include "params/AtomicSimpleCPU.hh" #include "sim/probe/probe.hh" @@ -96,9 +97,11 @@ class AtomicSimpleCPU : public BaseSimpleCPU * */ bool isDrained() { - return microPC() == 0 && + SimpleExecContext &t_info = *threadInfo[curThread]; + + return t_info.thread->microPC() == 0 && !locked && - !stayAtPC; + !t_info.stayAtPC; } /** diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 2751a346c..673cadd77 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2012,2015 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -62,6 +62,7 @@ #include "cpu/exetrace.hh" #include "cpu/pred/bpred_unit.hh" #include "cpu/profile.hh" +#include "cpu/simple/exec_context.hh" #include "cpu/simple_thread.hh" #include "cpu/smt.hh" #include "cpu/static_inst.hh" @@ -87,46 +88,121 @@ using namespace TheISA; BaseSimpleCPU::BaseSimpleCPU(BaseSimpleCPUParams *p) : BaseCPU(p), + curThread(0), branchPred(p->branchPred), - traceData(NULL), thread(NULL), _status(Idle), interval_stats(false), - inst() + traceData(NULL), + inst(), + _status(Idle) { - if (FullSystem) - thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb, - p->isa[0]); - else - thread = new SimpleThread(this, /* thread_num */ 0, p->system, - p->workload[0], p->itb, p->dtb, p->isa[0]); - - thread->setStatus(ThreadContext::Halted); + SimpleThread *thread; - tc = thread->getTC(); + for (unsigned i = 0; i < numThreads; i++) { + if (FullSystem) { + thread = new SimpleThread(this, i, p->system, + p->itb, p->dtb, p->isa[i]); + } else { + thread = new SimpleThread(this, i, p->system, p->workload[i], + p->itb, p->dtb, p->isa[i]); + } + threadInfo.push_back(new SimpleExecContext(this, thread)); + ThreadContext *tc = thread->getTC(); + threadContexts.push_back(tc); + } if (p->checker) { + if (numThreads != 1) + fatal("Checker currently does not support SMT"); + BaseCPU *temp_checker = p->checker; checker = dynamic_cast(temp_checker); checker->setSystem(p->system); // Manipulate thread context - ThreadContext *cpu_tc = tc; - tc = new CheckerThreadContext(cpu_tc, this->checker); + ThreadContext *cpu_tc = threadContexts[0]; + threadContexts[0] = new CheckerThreadContext(cpu_tc, this->checker); } else { checker = NULL; } +} + +void +BaseSimpleCPU::init() +{ + BaseCPU::init(); + + for (auto tc : threadContexts) { + // Initialise the ThreadContext's memory proxies + tc->initMemProxies(tc); + + if (FullSystem && !params()->switched_out) { + // initialize CPU, including PC + TheISA::initCPU(tc, tc->contextId()); + } + } +} + +void +BaseSimpleCPU::checkPcEventQueue() +{ + Addr oldpc, pc = threadInfo[curThread]->thread->instAddr(); + do { + oldpc = pc; + system->pcEventQueue.service(threadContexts[curThread]); + pc = threadInfo[curThread]->thread->instAddr(); + } while (oldpc != pc); +} + +void +BaseSimpleCPU::swapActiveThread() +{ + if (numThreads > 1) { + if ((!curStaticInst || !curStaticInst->isDelayedCommit()) && + !threadInfo[curThread]->stayAtPC) { + // Swap active threads + if (!activeThreads.empty()) { + curThread = activeThreads.front(); + activeThreads.pop_front(); + activeThreads.push_back(curThread); + } + } + } +} + +void +BaseSimpleCPU::countInst() +{ + SimpleExecContext& t_info = *threadInfo[curThread]; + + if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { + t_info.numInst++; + t_info.numInsts++; + } + t_info.numOp++; + t_info.numOps++; - numInst = 0; - startNumInst = 0; - numOp = 0; - startNumOp = 0; - numLoad = 0; - startNumLoad = 0; - lastIcacheStall = 0; - lastDcacheStall = 0; + system->totalNumInsts++; + t_info.thread->funcExeInst++; +} + +Counter +BaseSimpleCPU::totalInsts() const +{ + Counter total_inst = 0; + for (auto& t_info : threadInfo) { + total_inst += t_info->numInst; + } - threadContexts.push_back(tc); + return total_inst; +} +Counter +BaseSimpleCPU::totalOps() const +{ + Counter total_op = 0; + for (auto& t_info : threadInfo) { + total_op += t_info->numOp; + } - fetchOffset = 0; - stayAtPC = false; + return total_op; } BaseSimpleCPU::~BaseSimpleCPU() @@ -148,177 +224,184 @@ BaseSimpleCPU::regStats() BaseCPU::regStats(); - numInsts - .name(name() + ".committedInsts") - .desc("Number of instructions committed") - ; - - numOps - .name(name() + ".committedOps") - .desc("Number of ops (including micro ops) committed") - ; - - numIntAluAccesses - .name(name() + ".num_int_alu_accesses") - .desc("Number of integer alu accesses") - ; - - numFpAluAccesses - .name(name() + ".num_fp_alu_accesses") - .desc("Number of float alu accesses") - ; - - numCallsReturns - .name(name() + ".num_func_calls") - .desc("number of times a function call or return occured") - ; - - numCondCtrlInsts - .name(name() + ".num_conditional_control_insts") - .desc("number of instructions that are conditional controls") - ; - - numIntInsts - .name(name() + ".num_int_insts") - .desc("number of integer instructions") - ; - - numFpInsts - .name(name() + ".num_fp_insts") - .desc("number of float instructions") - ; - - numIntRegReads - .name(name() + ".num_int_register_reads") - .desc("number of times the integer registers were read") - ; - - numIntRegWrites - .name(name() + ".num_int_register_writes") - .desc("number of times the integer registers were written") - ; - - numFpRegReads - .name(name() + ".num_fp_register_reads") - .desc("number of times the floating registers were read") - ; - - numFpRegWrites - .name(name() + ".num_fp_register_writes") - .desc("number of times the floating registers were written") - ; - - numCCRegReads - .name(name() + ".num_cc_register_reads") - .desc("number of times the CC registers were read") - .flags(nozero) - ; - - numCCRegWrites - .name(name() + ".num_cc_register_writes") - .desc("number of times the CC registers were written") - .flags(nozero) - ; - - numMemRefs - .name(name()+".num_mem_refs") - .desc("number of memory refs") - ; - - numStoreInsts - .name(name() + ".num_store_insts") - .desc("Number of store instructions") - ; - - numLoadInsts - .name(name() + ".num_load_insts") - .desc("Number of load instructions") - ; - - notIdleFraction - .name(name() + ".not_idle_fraction") - .desc("Percentage of non-idle cycles") - ; - - idleFraction - .name(name() + ".idle_fraction") - .desc("Percentage of idle cycles") - ; - - numBusyCycles - .name(name() + ".num_busy_cycles") - .desc("Number of busy cycles") - ; - - numIdleCycles - .name(name()+".num_idle_cycles") - .desc("Number of idle cycles") - ; - - icacheStallCycles - .name(name() + ".icache_stall_cycles") - .desc("ICache total stall cycles") - .prereq(icacheStallCycles) - ; - - dcacheStallCycles - .name(name() + ".dcache_stall_cycles") - .desc("DCache total stall cycles") - .prereq(dcacheStallCycles) - ; - - statExecutedInstType - .init(Enums::Num_OpClass) - .name(name() + ".op_class") - .desc("Class of executed instruction") - .flags(total | pdf | dist) - ; - for (unsigned i = 0; i < Num_OpClasses; ++i) { - statExecutedInstType.subname(i, Enums::OpClassStrings[i]); - } + for (ThreadID tid = 0; tid < numThreads; tid++) { + SimpleExecContext& t_info = *threadInfo[tid]; + + std::string thread_str = name(); + if (numThreads > 1) + thread_str += ".thread" + std::to_string(tid); + + t_info.numInsts + .name(thread_str + ".committedInsts") + .desc("Number of instructions committed") + ; + + t_info.numOps + .name(thread_str + ".committedOps") + .desc("Number of ops (including micro ops) committed") + ; + + t_info.numIntAluAccesses + .name(thread_str + ".num_int_alu_accesses") + .desc("Number of integer alu accesses") + ; + + t_info.numFpAluAccesses + .name(thread_str + ".num_fp_alu_accesses") + .desc("Number of float alu accesses") + ; + + t_info.numCallsReturns + .name(thread_str + ".num_func_calls") + .desc("number of times a function call or return occured") + ; + + t_info.numCondCtrlInsts + .name(thread_str + ".num_conditional_control_insts") + .desc("number of instructions that are conditional controls") + ; + + t_info.numIntInsts + .name(thread_str + ".num_int_insts") + .desc("number of integer instructions") + ; + + t_info.numFpInsts + .name(thread_str + ".num_fp_insts") + .desc("number of float instructions") + ; + + t_info.numIntRegReads + .name(thread_str + ".num_int_register_reads") + .desc("number of times the integer registers were read") + ; + + t_info.numIntRegWrites + .name(thread_str + ".num_int_register_writes") + .desc("number of times the integer registers were written") + ; + + t_info.numFpRegReads + .name(thread_str + ".num_fp_register_reads") + .desc("number of times the floating registers were read") + ; + + t_info.numFpRegWrites + .name(thread_str + ".num_fp_register_writes") + .desc("number of times the floating registers were written") + ; + + t_info.numCCRegReads + .name(thread_str + ".num_cc_register_reads") + .desc("number of times the CC registers were read") + .flags(nozero) + ; + + t_info.numCCRegWrites + .name(thread_str + ".num_cc_register_writes") + .desc("number of times the CC registers were written") + .flags(nozero) + ; + + t_info.numMemRefs + .name(thread_str + ".num_mem_refs") + .desc("number of memory refs") + ; + + t_info.numStoreInsts + .name(thread_str + ".num_store_insts") + .desc("Number of store instructions") + ; + + t_info.numLoadInsts + .name(thread_str + ".num_load_insts") + .desc("Number of load instructions") + ; + + t_info.notIdleFraction + .name(thread_str + ".not_idle_fraction") + .desc("Percentage of non-idle cycles") + ; + + t_info.idleFraction + .name(thread_str + ".idle_fraction") + .desc("Percentage of idle cycles") + ; + + t_info.numBusyCycles + .name(thread_str + ".num_busy_cycles") + .desc("Number of busy cycles") + ; + + t_info.numIdleCycles + .name(thread_str + ".num_idle_cycles") + .desc("Number of idle cycles") + ; + + t_info.icacheStallCycles + .name(thread_str + ".icache_stall_cycles") + .desc("ICache total stall cycles") + .prereq(t_info.icacheStallCycles) + ; + + t_info.dcacheStallCycles + .name(thread_str + ".dcache_stall_cycles") + .desc("DCache total stall cycles") + .prereq(t_info.dcacheStallCycles) + ; + + t_info.statExecutedInstType + .init(Enums::Num_OpClass) + .name(thread_str + ".op_class") + .desc("Class of executed instruction") + .flags(total | pdf | dist) + ; + + for (unsigned i = 0; i < Num_OpClasses; ++i) { + t_info.statExecutedInstType.subname(i, Enums::OpClassStrings[i]); + } - idleFraction = constant(1.0) - notIdleFraction; - numIdleCycles = idleFraction * numCycles; - numBusyCycles = (notIdleFraction)*numCycles; + t_info.idleFraction = constant(1.0) - t_info.notIdleFraction; + t_info.numIdleCycles = t_info.idleFraction * numCycles; + t_info.numBusyCycles = t_info.notIdleFraction * numCycles; - numBranches - .name(name() + ".Branches") - .desc("Number of branches fetched") - .prereq(numBranches); + t_info.numBranches + .name(thread_str + ".Branches") + .desc("Number of branches fetched") + .prereq(t_info.numBranches); - numPredictedBranches - .name(name() + ".predictedBranches") - .desc("Number of branches predicted as taken") - .prereq(numPredictedBranches); + t_info.numPredictedBranches + .name(thread_str + ".predictedBranches") + .desc("Number of branches predicted as taken") + .prereq(t_info.numPredictedBranches); - numBranchMispred - .name(name() + ".BranchMispred") - .desc("Number of branch mispredictions") - .prereq(numBranchMispred); + t_info.numBranchMispred + .name(thread_str + ".BranchMispred") + .desc("Number of branch mispredictions") + .prereq(t_info.numBranchMispred); + } } void BaseSimpleCPU::resetStats() { -// startNumInst = numInst; - notIdleFraction = (_status != Idle); + for (auto &thread_info : threadInfo) { + thread_info->notIdleFraction = (_status != Idle); + } } void BaseSimpleCPU::serializeThread(CheckpointOut &cp, ThreadID tid) const { assert(_status == Idle || _status == Running); - assert(tid == 0); - thread->serialize(cp); + threadInfo[tid]->thread->serialize(cp); } void BaseSimpleCPU::unserializeThread(CheckpointIn &cp, ThreadID tid) { - if (tid != 0) - fatal("Trying to load more than one thread into a SimpleCPU\n"); - thread->unserialize(cp); + threadInfo[tid]->thread->unserialize(cp); } void @@ -329,29 +412,34 @@ change_thread_state(ThreadID tid, int activate, int priority) Addr BaseSimpleCPU::dbg_vtophys(Addr addr) { - return vtophys(tc, addr); + return vtophys(threadContexts[curThread], addr); } void BaseSimpleCPU::wakeup() { - getAddrMonitor()->gotWakeup = true; + getCpuAddrMonitor()->gotWakeup = true; - if (thread->status() != ThreadContext::Suspended) - return; - - DPRINTF(Quiesce,"Suspended Processor awoke\n"); - thread->activate(); + for (ThreadID tid = 0; tid < numThreads; tid++) { + if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) { + DPRINTF(Quiesce,"Suspended Processor awoke\n"); + threadInfo[tid]->thread->activate(); + } + } } void BaseSimpleCPU::checkForInterrupts() { + SimpleExecContext&t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + ThreadContext* tc = thread->getTC(); + if (checkInterrupts(tc)) { Fault interrupt = interrupts->getInterrupt(tc); if (interrupt != NoFault) { - fetchOffset = 0; + t_info.fetchOffset = 0; interrupts->updateIntrInfo(tc); interrupt->invoke(tc); thread->decoder.reset(); @@ -363,12 +451,15 @@ BaseSimpleCPU::checkForInterrupts() void BaseSimpleCPU::setupFetchRequest(Request *req) { + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + Addr instAddr = thread->instAddr(); // set up memory request for instruction fetch DPRINTF(Fetch, "Fetch: PC:%08p\n", instAddr); - Addr fetchPC = (instAddr & PCMask) + fetchOffset; + Addr fetchPC = (instAddr & PCMask) + t_info.fetchOffset; req->setVirt(0, fetchPC, sizeof(MachInst), Request::INST_FETCH, instMasterId(), instAddr); } @@ -377,6 +468,9 @@ BaseSimpleCPU::setupFetchRequest(Request *req) void BaseSimpleCPU::preExecute() { + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + // maintain $r0 semantics thread->setIntReg(ZeroReg, 0); #if THE_ISA == ALPHA_ISA @@ -384,7 +478,7 @@ BaseSimpleCPU::preExecute() #endif // ALPHA_ISA // check for instruction-count-based events - comInstEventQueue[0]->serviceEvents(numInst); + comInstEventQueue[curThread]->serviceEvents(t_info.numInst); system->instEventQueue.serviceEvents(system->totalNumInsts); // decode the instruction @@ -393,7 +487,7 @@ BaseSimpleCPU::preExecute() TheISA::PCState pcState = thread->pcState(); if (isRomMicroPC(pcState.microPC())) { - stayAtPC = false; + t_info.stayAtPC = false; curStaticInst = microcodeRom.fetchMicroop(pcState.microPC(), curMacroStaticInst); } else if (!curMacroStaticInst) { @@ -404,7 +498,7 @@ BaseSimpleCPU::preExecute() //Predecode, ie bundle up an ExtMachInst //If more fetch data is needed, pass it in. - Addr fetchPC = (pcState.instAddr() & PCMask) + fetchOffset; + Addr fetchPC = (pcState.instAddr() & PCMask) + t_info.fetchOffset; //if(decoder->needMoreBytes()) decoder->moreBytes(pcState, fetchPC, inst); //else @@ -414,18 +508,19 @@ BaseSimpleCPU::preExecute() //fetch beyond the MachInst at the current pc. instPtr = decoder->decode(pcState); if (instPtr) { - stayAtPC = false; + t_info.stayAtPC = false; thread->pcState(pcState); } else { - stayAtPC = true; - fetchOffset += sizeof(MachInst); + t_info.stayAtPC = true; + t_info.fetchOffset += sizeof(MachInst); } //If we decoded an instruction and it's microcoded, start pulling //out micro ops if (instPtr && instPtr->isMacroop()) { curMacroStaticInst = instPtr; - curStaticInst = curMacroStaticInst->fetchMicroop(pcState.microPC()); + curStaticInst = + curMacroStaticInst->fetchMicroop(pcState.microPC()); } else { curStaticInst = instPtr; } @@ -437,7 +532,7 @@ BaseSimpleCPU::preExecute() //If we decoded an instruction this "tick", record information about it. if (curStaticInst) { #if TRACING_ON - traceData = tracer->getInstRecord(curTick(), tc, + traceData = tracer->getInstRecord(curTick(), thread->getTC(), curStaticInst, thread->pcState(), curMacroStaticInst); DPRINTF(Decode,"Decode: Decoded %s instruction: %#x\n", @@ -445,86 +540,91 @@ BaseSimpleCPU::preExecute() #endif // TRACING_ON } - if (branchPred && curStaticInst && curStaticInst->isControl()) { + if (branchPred && curStaticInst && + curStaticInst->isControl()) { // Use a fake sequence number since we only have one // instruction in flight at the same time. const InstSeqNum cur_sn(0); - const ThreadID tid(0); - pred_pc = thread->pcState(); + t_info.predPC = thread->pcState(); const bool predict_taken( - branchPred->predict(curStaticInst, cur_sn, pred_pc, tid)); + branchPred->predict(curStaticInst, cur_sn, t_info.predPC, + curThread)); if (predict_taken) - ++numPredictedBranches; + ++t_info.numPredictedBranches; } } void BaseSimpleCPU::postExecute() { + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + assert(curStaticInst); - TheISA::PCState pc = tc->pcState(); + TheISA::PCState pc = threadContexts[curThread]->pcState(); Addr instAddr = pc.instAddr(); if (FullSystem && thread->profile) { - bool usermode = TheISA::inUserMode(tc); + bool usermode = TheISA::inUserMode(threadContexts[curThread]); thread->profilePC = usermode ? 1 : instAddr; - ProfileNode *node = thread->profile->consume(tc, curStaticInst); + ProfileNode *node = thread->profile->consume(threadContexts[curThread], + curStaticInst); if (node) thread->profileNode = node; } if (curStaticInst->isMemRef()) { - numMemRefs++; + t_info.numMemRefs++; } if (curStaticInst->isLoad()) { - ++numLoad; - comLoadEventQueue[0]->serviceEvents(numLoad); + ++t_info.numLoad; + comLoadEventQueue[curThread]->serviceEvents(t_info.numLoad); } if (CPA::available()) { - CPA::cpa()->swAutoBegin(tc, pc.nextInstAddr()); + CPA::cpa()->swAutoBegin(threadContexts[curThread], pc.nextInstAddr()); } if (curStaticInst->isControl()) { - ++numBranches; + ++t_info.numBranches; } /* Power model statistics */ //integer alu accesses if (curStaticInst->isInteger()){ - numIntAluAccesses++; - numIntInsts++; + t_info.numIntAluAccesses++; + t_info.numIntInsts++; } //float alu accesses if (curStaticInst->isFloating()){ - numFpAluAccesses++; - numFpInsts++; + t_info.numFpAluAccesses++; + t_info.numFpInsts++; } - + //number of function calls/returns to get window accesses if (curStaticInst->isCall() || curStaticInst->isReturn()){ - numCallsReturns++; + t_info.numCallsReturns++; } - + //the number of branch predictions that will be made if (curStaticInst->isCondCtrl()){ - numCondCtrlInsts++; + t_info.numCondCtrlInsts++; } - + //result bus acceses if (curStaticInst->isLoad()){ - numLoadInsts++; + t_info.numLoadInsts++; } - + if (curStaticInst->isStore()){ - numStoreInsts++; + t_info.numStoreInsts++; } /* End power model statistics */ - statExecutedInstType[curStaticInst->opClass()]++; + t_info.statExecutedInstType[curStaticInst->opClass()]++; if (FullSystem) traceFunctions(instAddr); @@ -542,13 +642,16 @@ BaseSimpleCPU::postExecute() void BaseSimpleCPU::advancePC(const Fault &fault) { + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + const bool branching(thread->pcState().branching()); //Since we're moving to a new pc, zero out the offset - fetchOffset = 0; + t_info.fetchOffset = 0; if (fault != NoFault) { curMacroStaticInst = StaticInst::nullStaticInstPtr; - fault->invoke(tc, curStaticInst); + fault->invoke(threadContexts[curThread], curStaticInst); thread->decoder.reset(); } else { if (curStaticInst) { @@ -564,16 +667,14 @@ BaseSimpleCPU::advancePC(const Fault &fault) // Use a fake sequence number since we only have one // instruction in flight at the same time. const InstSeqNum cur_sn(0); - const ThreadID tid(0); - if (pred_pc == thread->pcState()) { + if (t_info.predPC == thread->pcState()) { // Correctly predicted branch - branchPred->update(cur_sn, tid); + branchPred->update(cur_sn, curThread); } else { // Mis-predicted branch - branchPred->squash(cur_sn, pcState(), - branching, tid); - ++numBranchMispred; + branchPred->squash(cur_sn, thread->pcState(), branching, curThread); + ++t_info.numBranchMispred; } } } @@ -582,5 +683,6 @@ void BaseSimpleCPU::startup() { BaseCPU::startup(); - thread->startup(); + for (auto& t_info : threadInfo) + t_info->thread->startup(); } diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 2f7247010..c108cb986 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2011-2012,2015 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -79,57 +79,35 @@ namespace Trace { struct BaseSimpleCPUParams; class BPredUnit; +class SimpleExecContext; -class BaseSimpleCPU : public BaseCPU, public ExecContext +class BaseSimpleCPU : public BaseCPU { protected: - typedef TheISA::MiscReg MiscReg; - typedef TheISA::FloatReg FloatReg; - typedef TheISA::FloatRegBits FloatRegBits; - typedef TheISA::CCReg CCReg; - + ThreadID curThread; BPredUnit *branchPred; - protected: - Trace::InstRecord *traceData; - - inline void checkPcEventQueue() { - Addr oldpc, pc = thread->instAddr(); - do { - oldpc = pc; - system->pcEventQueue.service(tc); - pc = thread->instAddr(); - } while (oldpc != pc); - } - - public: - void wakeup(); - - void zero_fill_64(Addr addr) { - static int warned = 0; - if (!warned) { - warn ("WH64 is not implemented"); - warned = 1; - } - }; + void checkPcEventQueue(); + void swapActiveThread(); public: BaseSimpleCPU(BaseSimpleCPUParams *params); virtual ~BaseSimpleCPU(); - + void wakeup(); + virtual void init(); public: - /** SimpleThread object, provides all the architectural state. */ - SimpleThread *thread; + Trace::InstRecord *traceData; + CheckerCPU *checker; - /** ThreadContext object, provides an interface for external - * objects to modify this thread's state. - */ - ThreadContext *tc; + std::vector threadInfo; + std::list activeThreads; - CheckerCPU *checker; + /** Current instruction */ + TheISA::MachInst inst; + StaticInstPtr curStaticInst; + StaticInstPtr curMacroStaticInst; protected: - enum Status { Idle, Running, @@ -147,22 +125,8 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext Status _status; public: - Addr dbg_vtophys(Addr addr); - bool interval_stats; - - // current instruction - TheISA::MachInst inst; - - StaticInstPtr curStaticInst; - StaticInstPtr curMacroStaticInst; - - //This is the offset from the current pc that fetch should be performed at - Addr fetchOffset; - //This flag says to stay at the current pc. This is useful for - //instructions which go beyond MachInst boundaries. - bool stayAtPC; void checkForInterrupts(); void setupFetchRequest(Request *req); @@ -178,289 +142,20 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext virtual void startup(); - // number of simulated instructions - Counter numInst; - Counter startNumInst; - Stats::Scalar numInsts; - Counter numOp; - Counter startNumOp; - Stats::Scalar numOps; - - void countInst() - { - if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { - numInst++; - numInsts++; - } - numOp++; - numOps++; - - system->totalNumInsts++; - thread->funcExeInst++; - } - - virtual Counter totalInsts() const - { - return numInst - startNumInst; - } - - virtual Counter totalOps() const - { - return numOp - startNumOp; - } - - //number of integer alu accesses - Stats::Scalar numIntAluAccesses; - - //number of float alu accesses - Stats::Scalar numFpAluAccesses; - - //number of function calls/returns - Stats::Scalar numCallsReturns; - - //conditional control instructions; - Stats::Scalar numCondCtrlInsts; - - //number of int instructions - Stats::Scalar numIntInsts; - - //number of float instructions - Stats::Scalar numFpInsts; - - //number of integer register file accesses - Stats::Scalar numIntRegReads; - Stats::Scalar numIntRegWrites; - - //number of float register file accesses - Stats::Scalar numFpRegReads; - Stats::Scalar numFpRegWrites; - - //number of condition code register file accesses - Stats::Scalar numCCRegReads; - Stats::Scalar numCCRegWrites; + virtual Fault readMem(Addr addr, uint8_t* data, unsigned size, + unsigned flags) = 0; - // number of simulated memory references - Stats::Scalar numMemRefs; - Stats::Scalar numLoadInsts; - Stats::Scalar numStoreInsts; + virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr, + unsigned flags, uint64_t* res) = 0; - // number of idle cycles - Stats::Formula numIdleCycles; - - // number of busy cycles - Stats::Formula numBusyCycles; - - // number of simulated loads - Counter numLoad; - Counter startNumLoad; - - // number of idle cycles - Stats::Average notIdleFraction; - Stats::Formula idleFraction; - - // number of cycles stalled for I-cache responses - Stats::Scalar icacheStallCycles; - Counter lastIcacheStall; - - // number of cycles stalled for D-cache responses - Stats::Scalar dcacheStallCycles; - Counter lastDcacheStall; - - /// @{ - /// Total number of branches fetched - Stats::Scalar numBranches; - /// Number of branches predicted as taken - Stats::Scalar numPredictedBranches; - /// Number of misprediced branches - Stats::Scalar numBranchMispred; - /// @} - - // instruction mix histogram by OpClass - Stats::Vector statExecutedInstType; + void countInst(); + virtual Counter totalInsts() const; + virtual Counter totalOps() const; void serializeThread(CheckpointOut &cp, ThreadID tid) const M5_ATTR_OVERRIDE; void unserializeThread(CheckpointIn &cp, ThreadID tid) M5_ATTR_OVERRIDE; - // These functions are only used in CPU models that split - // effective address computation from the actual memory access. - void setEA(Addr EA) { panic("BaseSimpleCPU::setEA() not implemented\n"); } - Addr getEA() const { panic("BaseSimpleCPU::getEA() not implemented\n"); } - - // The register accessor methods provide the index of the - // instruction's operand (e.g., 0 or 1), not the architectural - // register index, to simplify the implementation of register - // renaming. We find the architectural register index by indexing - // into the instruction's own operand index table. Note that a - // raw pointer to the StaticInst is provided instead of a - // ref-counted StaticInstPtr to redice overhead. This is fine as - // long as these methods don't copy the pointer into any long-term - // storage (which is pretty hard to imagine they would have reason - // to do). - - IntReg readIntRegOperand(const StaticInst *si, int idx) - { - numIntRegReads++; - return thread->readIntReg(si->srcRegIdx(idx)); - } - - FloatReg readFloatRegOperand(const StaticInst *si, int idx) - { - numFpRegReads++; - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base; - return thread->readFloatReg(reg_idx); - } - - FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx) - { - numFpRegReads++; - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base; - return thread->readFloatRegBits(reg_idx); - } - - CCReg readCCRegOperand(const StaticInst *si, int idx) - { - numCCRegReads++; - int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; - return thread->readCCReg(reg_idx); - } - - void setIntRegOperand(const StaticInst *si, int idx, IntReg val) - { - numIntRegWrites++; - thread->setIntReg(si->destRegIdx(idx), val); - } - - void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val) - { - numFpRegWrites++; - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base; - thread->setFloatReg(reg_idx, val); - } - - void setFloatRegOperandBits(const StaticInst *si, int idx, - FloatRegBits val) - { - numFpRegWrites++; - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base; - thread->setFloatRegBits(reg_idx, val); - } - - void setCCRegOperand(const StaticInst *si, int idx, CCReg val) - { - numCCRegWrites++; - int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; - thread->setCCReg(reg_idx, val); - } - - bool readPredicate() { return thread->readPredicate(); } - void setPredicate(bool val) - { - thread->setPredicate(val); - if (traceData) { - traceData->setPredicate(val); - } - } - TheISA::PCState pcState() const { return thread->pcState(); } - void pcState(const TheISA::PCState &val) { thread->pcState(val); } - Addr instAddr() { return thread->instAddr(); } - Addr nextInstAddr() { return thread->nextInstAddr(); } - MicroPC microPC() { return thread->microPC(); } - - MiscReg readMiscRegNoEffect(int misc_reg) const - { - return thread->readMiscRegNoEffect(misc_reg); - } - - MiscReg readMiscReg(int misc_reg) - { - numIntRegReads++; - return thread->readMiscReg(misc_reg); - } - - void setMiscReg(int misc_reg, const MiscReg &val) - { - numIntRegWrites++; - return thread->setMiscReg(misc_reg, val); - } - - MiscReg readMiscRegOperand(const StaticInst *si, int idx) - { - numIntRegReads++; - int reg_idx = si->srcRegIdx(idx) - TheISA::Misc_Reg_Base; - return thread->readMiscReg(reg_idx); - } - - void setMiscRegOperand( - const StaticInst *si, int idx, const MiscReg &val) - { - numIntRegWrites++; - int reg_idx = si->destRegIdx(idx) - TheISA::Misc_Reg_Base; - return thread->setMiscReg(reg_idx, val); - } - - void demapPage(Addr vaddr, uint64_t asn) - { - thread->demapPage(vaddr, asn); - } - - void demapInstPage(Addr vaddr, uint64_t asn) - { - thread->demapInstPage(vaddr, asn); - } - - void demapDataPage(Addr vaddr, uint64_t asn) - { - thread->demapDataPage(vaddr, asn); - } - - unsigned int readStCondFailures() const { - return thread->readStCondFailures(); - } - - void setStCondFailures(unsigned int sc_failures) { - thread->setStCondFailures(sc_failures); - } - - MiscReg readRegOtherThread(int regIdx, ThreadID tid = InvalidThreadID) - { - panic("Simple CPU models do not support multithreaded " - "register access.\n"); - } - - void setRegOtherThread(int regIdx, MiscReg val, - ThreadID tid = InvalidThreadID) - { - panic("Simple CPU models do not support multithreaded " - "register access.\n"); - } - - //Fault CacheOp(uint8_t Op, Addr EA); - - Fault hwrei() { return thread->hwrei(); } - bool simPalCheck(int palFunc) { return thread->simPalCheck(palFunc); } - - void - syscall(int64_t callnum) - { - if (FullSystem) - panic("Syscall emulation isn't available in FS mode.\n"); - - thread->syscall(callnum); - } - - ThreadContext *tcBase() { return tc; } - - private: - TheISA::PCState pred_pc; - - public: - // monitor/mwait funtions - void armMonitor(Addr address) { BaseCPU::armMonitor(address); } - bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); } - void mwaitAtomic(ThreadContext *tc) - { return BaseCPU::mwaitAtomic(tc, thread->dtb); } - AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); } }; #endif // __CPU_SIMPLE_BASE_HH__ diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh new file mode 100644 index 000000000..f474cc358 --- /dev/null +++ b/src/cpu/simple/exec_context.hh @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2014-2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Andreas Sandberg + * Mitch Hayenga + */ + +#ifndef __CPU_SIMPLE_EXEC_CONTEXT_HH__ +#define __CPU_SIMPLE_EXEC_CONTEXT_HH__ + +#include "arch/registers.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/simple/base.hh" +#include "cpu/static_inst_fwd.hh" +#include "cpu/translation.hh" + +class BaseSimpleCPU; + +class SimpleExecContext : public ExecContext { + protected: + typedef TheISA::MiscReg MiscReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::CCReg CCReg; + + public: + BaseSimpleCPU *cpu; + SimpleThread* thread; + + // This is the offset from the current pc that fetch should be performed + Addr fetchOffset; + // This flag says to stay at the current pc. This is useful for + // instructions which go beyond MachInst boundaries. + bool stayAtPC; + + // Branch prediction + TheISA::PCState predPC; + + /** PER-THREAD STATS */ + + // Number of simulated instructions + Counter numInst; + Stats::Scalar numInsts; + Counter numOp; + Stats::Scalar numOps; + + // Number of integer alu accesses + Stats::Scalar numIntAluAccesses; + + // Number of float alu accesses + Stats::Scalar numFpAluAccesses; + + // Number of function calls/returns + Stats::Scalar numCallsReturns; + + // Conditional control instructions; + Stats::Scalar numCondCtrlInsts; + + // Number of int instructions + Stats::Scalar numIntInsts; + + // Number of float instructions + Stats::Scalar numFpInsts; + + // Number of integer register file accesses + Stats::Scalar numIntRegReads; + Stats::Scalar numIntRegWrites; + + // Number of float register file accesses + Stats::Scalar numFpRegReads; + Stats::Scalar numFpRegWrites; + + // Number of condition code register file accesses + Stats::Scalar numCCRegReads; + Stats::Scalar numCCRegWrites; + + // Number of simulated memory references + Stats::Scalar numMemRefs; + Stats::Scalar numLoadInsts; + Stats::Scalar numStoreInsts; + + // Number of idle cycles + Stats::Formula numIdleCycles; + + // Number of busy cycles + Stats::Formula numBusyCycles; + + // Number of simulated loads + Counter numLoad; + + // Number of idle cycles + Stats::Average notIdleFraction; + Stats::Formula idleFraction; + + // Number of cycles stalled for I-cache responses + Stats::Scalar icacheStallCycles; + Counter lastIcacheStall; + + // Number of cycles stalled for D-cache responses + Stats::Scalar dcacheStallCycles; + Counter lastDcacheStall; + + /// @{ + /// Total number of branches fetched + Stats::Scalar numBranches; + /// Number of branches predicted as taken + Stats::Scalar numPredictedBranches; + /// Number of misprediced branches + Stats::Scalar numBranchMispred; + /// @} + + // Instruction mix histogram by OpClass + Stats::Vector statExecutedInstType; + + public: + /** Constructor */ + SimpleExecContext(BaseSimpleCPU* _cpu, SimpleThread* _thread) + : cpu(_cpu), thread(_thread), fetchOffset(0), stayAtPC(false), + numInst(0), numOp(0), numLoad(0), lastIcacheStall(0), lastDcacheStall(0) + { } + + /** Reads an integer register. */ + IntReg readIntRegOperand(const StaticInst *si, int idx) M5_ATTR_OVERRIDE + { + numIntRegReads++; + return thread->readIntReg(si->srcRegIdx(idx)); + } + + /** Sets an integer register to a value. */ + void setIntRegOperand(const StaticInst *si, int idx, IntReg val) + M5_ATTR_OVERRIDE + { + numIntRegWrites++; + thread->setIntReg(si->destRegIdx(idx), val); + } + + /** Reads a floating point register of single register width. */ + FloatReg readFloatRegOperand(const StaticInst *si, int idx) + M5_ATTR_OVERRIDE + { + numFpRegReads++; + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base; + return thread->readFloatReg(reg_idx); + } + + /** Reads a floating point register in its binary format, instead + * of by value. */ + FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx) + M5_ATTR_OVERRIDE + { + numFpRegReads++; + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base; + return thread->readFloatRegBits(reg_idx); + } + + /** Sets a floating point register of single width to a value. */ + void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val) + M5_ATTR_OVERRIDE + { + numFpRegWrites++; + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base; + thread->setFloatReg(reg_idx, val); + } + + /** Sets the bits of a floating point register of single width + * to a binary value. */ + void setFloatRegOperandBits(const StaticInst *si, int idx, + FloatRegBits val) M5_ATTR_OVERRIDE + { + numFpRegWrites++; + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base; + thread->setFloatRegBits(reg_idx, val); + } + + CCReg readCCRegOperand(const StaticInst *si, int idx) M5_ATTR_OVERRIDE + { + numCCRegReads++; + int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; + return thread->readCCReg(reg_idx); + } + + void setCCRegOperand(const StaticInst *si, int idx, CCReg val) + M5_ATTR_OVERRIDE + { + numCCRegWrites++; + int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; + thread->setCCReg(reg_idx, val); + } + + MiscReg readMiscRegOperand(const StaticInst *si, int idx) M5_ATTR_OVERRIDE + { + numIntRegReads++; + int reg_idx = si->srcRegIdx(idx) - TheISA::Misc_Reg_Base; + return thread->readMiscReg(reg_idx); + } + + void setMiscRegOperand(const StaticInst *si, int idx, const MiscReg &val) + M5_ATTR_OVERRIDE + { + numIntRegWrites++; + int reg_idx = si->destRegIdx(idx) - TheISA::Misc_Reg_Base; + thread->setMiscReg(reg_idx, val); + } + + /** + * Reads a miscellaneous register, handling any architectural + * side effects due to reading that register. + */ + MiscReg readMiscReg(int misc_reg) M5_ATTR_OVERRIDE + { + numIntRegReads++; + return thread->readMiscReg(misc_reg); + } + + /** + * Sets a miscellaneous register, handling any architectural + * side effects due to writing that register. + */ + void setMiscReg(int misc_reg, const MiscReg &val) M5_ATTR_OVERRIDE + { + numIntRegWrites++; + thread->setMiscReg(misc_reg, val); + } + + PCState pcState() const M5_ATTR_OVERRIDE + { + return thread->pcState(); + } + + void pcState(const PCState &val) M5_ATTR_OVERRIDE + { + thread->pcState(val); + } + + + /** + * Record the effective address of the instruction. + * + * @note Only valid for memory ops. + */ + void setEA(Addr EA) M5_ATTR_OVERRIDE + { panic("BaseSimpleCPU::setEA() not implemented\n"); } + + /** + * Get the effective address of the instruction. + * + * @note Only valid for memory ops. + */ + Addr getEA() const M5_ATTR_OVERRIDE + { panic("BaseSimpleCPU::getEA() not implemented\n"); } + + Fault readMem(Addr addr, uint8_t *data, unsigned int size, + unsigned int flags) M5_ATTR_OVERRIDE + { + return cpu->readMem(addr, data, size, flags); + } + + Fault writeMem(uint8_t *data, unsigned int size, Addr addr, + unsigned int flags, uint64_t *res) M5_ATTR_OVERRIDE + { + return cpu->writeMem(data, size, addr, flags, res); + } + + /** + * Sets the number of consecutive store conditional failures. + */ + void setStCondFailures(unsigned int sc_failures) M5_ATTR_OVERRIDE + { + thread->setStCondFailures(sc_failures); + } + + /** + * Returns the number of consecutive store conditional failures. + */ + unsigned int readStCondFailures() const M5_ATTR_OVERRIDE + { + return thread->readStCondFailures(); + } + + /** + * Executes a syscall specified by the callnum. + */ + void syscall(int64_t callnum) M5_ATTR_OVERRIDE + { + if (FullSystem) + panic("Syscall emulation isn't available in FS mode."); + + thread->syscall(callnum); + } + + /** Returns a pointer to the ThreadContext. */ + ThreadContext *tcBase() M5_ATTR_OVERRIDE + { + return thread->getTC(); + } + + /** + * Somewhat Alpha-specific function that handles returning from an + * error or interrupt. + */ + Fault hwrei() M5_ATTR_OVERRIDE + { + return thread->hwrei(); + } + + /** + * Check for special simulator handling of specific PAL calls. If + * return value is false, actual PAL call will be suppressed. + */ + bool simPalCheck(int palFunc) M5_ATTR_OVERRIDE + { + return thread->simPalCheck(palFunc); + } + + bool readPredicate() M5_ATTR_OVERRIDE + { + return thread->readPredicate(); + } + + void setPredicate(bool val) M5_ATTR_OVERRIDE + { + thread->setPredicate(val); + + if (cpu->traceData) { + cpu->traceData->setPredicate(val); + } + } + + /** + * Invalidate a page in the DTLB and ITLB. + */ + void demapPage(Addr vaddr, uint64_t asn) M5_ATTR_OVERRIDE + { + thread->demapPage(vaddr, asn); + } + + void armMonitor(Addr address) M5_ATTR_OVERRIDE + { + cpu->armMonitor(address); + } + + bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE + { + return cpu->mwait(pkt); + } + + void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE + { + cpu->mwaitAtomic(tc, thread->dtb); + } + + AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE + { + return cpu->getCpuAddrMonitor(); + } + +#if THE_ISA == MIPS_ISA + MiscReg readRegOtherThread(int regIdx, ThreadID tid = InvalidThreadID) + M5_ATTR_OVERRIDE + { + panic("Simple CPU models do not support multithreaded " + "register access."); + } + + void setRegOtherThread(int regIdx, MiscReg val, + ThreadID tid = InvalidThreadID) M5_ATTR_OVERRIDE + { + panic("Simple CPU models do not support multithreaded " + "register access."); + } + +#endif + +}; + +#endif // __CPU_EXEC_CONTEXT_HH__ diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 5dc042f1e..487da36ea 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -1,6 +1,6 @@ /* * Copyright 2014 Google, Inc. - * Copyright (c) 2010-2013 ARM Limited + * Copyright (c) 2010-2013,2015 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -67,18 +67,7 @@ using namespace TheISA; void TimingSimpleCPU::init() { - BaseCPU::init(); - - // Initialise the ThreadContext's memory proxies - tcBase()->initMemProxies(tcBase()); - - if (FullSystem && !params()->switched_out) { - for (int i = 0; i < threadContexts.size(); ++i) { - ThreadContext *tc = threadContexts[i]; - // initialize CPU, including PC - TheISA::initCPU(tc, _cpuId); - } - } + BaseSimpleCPU::init(); } void @@ -111,9 +100,10 @@ TimingSimpleCPU::drain() if (_status == Idle || (_status == BaseSimpleCPU::Running && isDrained())) { DPRINTF(Drain, "No need to drain.\n"); + activeThreads.clear(); return DrainState::Drained; } else { - DPRINTF(Drain, "Requesting drain: %s\n", pcState()); + DPRINTF(Drain, "Requesting drain.\n"); // The fetch event can become descheduled if a drain didn't // succeed on the first attempt. We need to reschedule it if @@ -136,17 +126,27 @@ TimingSimpleCPU::drainResume() verifyMemoryMode(); assert(!threadContexts.empty()); - if (threadContexts.size() > 1) - fatal("The timing CPU only supports one thread.\n"); - if (thread->status() == ThreadContext::Active) { - schedule(fetchEvent, nextCycle()); - _status = BaseSimpleCPU::Running; - notIdleFraction = 1; - } else { - _status = BaseSimpleCPU::Idle; - notIdleFraction = 0; + _status = BaseSimpleCPU::Idle; + + for (ThreadID tid = 0; tid < numThreads; tid++) { + if (threadInfo[tid]->thread->status() == ThreadContext::Active) { + threadInfo[tid]->notIdleFraction = 1; + + activeThreads.push_back(tid); + + _status = BaseSimpleCPU::Running; + + // Fetch if any threads active + if (!fetchEvent.scheduled()) { + schedule(fetchEvent, nextCycle()); + } + } else { + threadInfo[tid]->notIdleFraction = 0; + } } + + system->totalNumInsts = 0; } bool @@ -155,7 +155,7 @@ TimingSimpleCPU::tryCompleteDrain() if (drainState() != DrainState::Draining) return false; - DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState()); + DPRINTF(Drain, "tryCompleteDrain.\n"); if (!isDrained()) return false; @@ -168,12 +168,15 @@ TimingSimpleCPU::tryCompleteDrain() void TimingSimpleCPU::switchOut() { + SimpleExecContext& t_info = *threadInfo[curThread]; + M5_VAR_USED SimpleThread* thread = t_info.thread; + BaseSimpleCPU::switchOut(); assert(!fetchEvent.scheduled()); assert(_status == BaseSimpleCPU::Running || _status == Idle); - assert(!stayAtPC); - assert(microPC() == 0); + assert(!t_info.stayAtPC); + assert(thread->microPC() == 0); updateCycleCounts(); } @@ -201,16 +204,20 @@ TimingSimpleCPU::activateContext(ThreadID thread_num) { DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num); - assert(thread_num == 0); - assert(thread); - - assert(_status == Idle); + assert(thread_num < numThreads); - notIdleFraction = 1; - _status = BaseSimpleCPU::Running; + threadInfo[thread_num]->notIdleFraction = 1; + if (_status == BaseSimpleCPU::Idle) + _status = BaseSimpleCPU::Running; // kick things off by initiating the fetch of the next instruction - schedule(fetchEvent, clockEdge(Cycles(0))); + if (!fetchEvent.scheduled()) + schedule(fetchEvent, clockEdge(Cycles(0))); + + if (std::find(activeThreads.begin(), activeThreads.end(), thread_num) + == activeThreads.end()) { + activeThreads.push_back(thread_num); + } } @@ -219,24 +226,31 @@ TimingSimpleCPU::suspendContext(ThreadID thread_num) { DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); - assert(thread_num == 0); - assert(thread); + assert(thread_num < numThreads); + activeThreads.remove(thread_num); if (_status == Idle) return; assert(_status == BaseSimpleCPU::Running); - // just change status to Idle... if status != Running, - // completeInst() will not initiate fetch of next instruction. + threadInfo[thread_num]->notIdleFraction = 0; - notIdleFraction = 0; - _status = Idle; + if (activeThreads.empty()) { + _status = Idle; + + if (fetchEvent.scheduled()) { + deschedule(fetchEvent); + } + } } bool TimingSimpleCPU::handleReadPacket(PacketPtr pkt) { + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + RequestPtr req = pkt->req; // We're about the issues a locked load, so tell the monitor @@ -264,6 +278,9 @@ void TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool read) { + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + PacketPtr pkt = buildPacket(req, read); pkt->dataDynamic(data); if (req->getFlags().isSet(Request::NO_ACCESS)) { @@ -389,9 +406,12 @@ Fault TimingSimpleCPU::readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags) { + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + Fault fault; const int asid = 0; - const ThreadID tid = 0; + const ThreadID tid = curThread; const Addr pc = thread->instAddr(); unsigned block_size = cacheLineSize(); BaseTLB::Mode mode = BaseTLB::Read; @@ -400,7 +420,8 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data, traceData->setMem(addr, size, flags); RequestPtr req = new Request(asid, addr, size, - flags, dataMasterId(), pc, _cpuId, tid); + flags, dataMasterId(), pc, + thread->contextId(), tid); req->taskId(taskId()); @@ -421,14 +442,14 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data, DataTranslation *trans2 = new DataTranslation(this, state, 1); - thread->dtb->translateTiming(req1, tc, trans1, mode); - thread->dtb->translateTiming(req2, tc, trans2, mode); + thread->dtb->translateTiming(req1, thread->getTC(), trans1, mode); + thread->dtb->translateTiming(req2, thread->getTC(), trans2, mode); } else { WholeTranslationState *state = new WholeTranslationState(req, new uint8_t[size], NULL, mode); DataTranslation *translation = new DataTranslation(this, state); - thread->dtb->translateTiming(req, tc, translation, mode); + thread->dtb->translateTiming(req, thread->getTC(), translation, mode); } return NoFault; @@ -437,6 +458,9 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data, bool TimingSimpleCPU::handleWritePacket() { + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + RequestPtr req = dcache_pkt->req; if (req->isMmappedIpr()) { Cycles delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt); @@ -457,9 +481,12 @@ Fault TimingSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr, unsigned flags, uint64_t *res) { + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + uint8_t *newData = new uint8_t[size]; const int asid = 0; - const ThreadID tid = 0; + const ThreadID tid = curThread; const Addr pc = thread->instAddr(); unsigned block_size = cacheLineSize(); BaseTLB::Mode mode = BaseTLB::Write; @@ -476,7 +503,8 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size, traceData->setMem(addr, size, flags); RequestPtr req = new Request(asid, addr, size, - flags, dataMasterId(), pc, _cpuId, tid); + flags, dataMasterId(), pc, + thread->contextId(), tid); req->taskId(taskId()); @@ -496,14 +524,14 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size, DataTranslation *trans2 = new DataTranslation(this, state, 1); - thread->dtb->translateTiming(req1, tc, trans1, mode); - thread->dtb->translateTiming(req2, tc, trans2, mode); + thread->dtb->translateTiming(req1, thread->getTC(), trans1, mode); + thread->dtb->translateTiming(req2, thread->getTC(), trans2, mode); } else { WholeTranslationState *state = new WholeTranslationState(req, newData, res, mode); DataTranslation *translation = new DataTranslation(this, state); - thread->dtb->translateTiming(req, tc, translation, mode); + thread->dtb->translateTiming(req, thread->getTC(), translation, mode); } // Translation faults will be returned via finishTranslation() @@ -540,6 +568,12 @@ TimingSimpleCPU::finishTranslation(WholeTranslationState *state) void TimingSimpleCPU::fetch() { + // Change thread if multi-threaded + swapActiveThread(); + + SimpleExecContext &t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + DPRINTF(SimpleCPU, "Fetch\n"); if (!curStaticInst || !curStaticInst->isDelayedCommit()) { @@ -552,17 +586,18 @@ TimingSimpleCPU::fetch() return; TheISA::PCState pcState = thread->pcState(); - bool needToFetch = !isRomMicroPC(pcState.microPC()) && !curMacroStaticInst; + bool needToFetch = !isRomMicroPC(pcState.microPC()) && + !curMacroStaticInst; if (needToFetch) { _status = BaseSimpleCPU::Running; Request *ifetch_req = new Request(); ifetch_req->taskId(taskId()); - ifetch_req->setThreadContext(_cpuId, /* thread ID */ 0); + ifetch_req->setThreadContext(thread->contextId(), curThread); setupFetchRequest(ifetch_req); DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr()); - thread->itb->translateTiming(ifetch_req, tc, &fetchTranslation, - BaseTLB::Execute); + thread->itb->translateTiming(ifetch_req, thread->getTC(), + &fetchTranslation, BaseTLB::Execute); } else { _status = IcacheWaitResponse; completeIfetch(NULL); @@ -607,6 +642,8 @@ TimingSimpleCPU::sendFetch(const Fault &fault, RequestPtr req, void TimingSimpleCPU::advanceInst(const Fault &fault) { + SimpleExecContext &t_info = *threadInfo[curThread]; + if (_status == Faulting) return; @@ -619,7 +656,7 @@ TimingSimpleCPU::advanceInst(const Fault &fault) } - if (!stayAtPC) + if (!t_info.stayAtPC) advancePC(fault); if (tryCompleteDrain()) @@ -637,6 +674,8 @@ TimingSimpleCPU::advanceInst(const Fault &fault) void TimingSimpleCPU::completeIfetch(PacketPtr pkt) { + SimpleExecContext& t_info = *threadInfo[curThread]; + DPRINTF(SimpleCPU, "Complete ICache Fetch for addr %#x\n", pkt ? pkt->getAddr() : 0); @@ -656,7 +695,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) preExecute(); if (curStaticInst && curStaticInst->isMemRef()) { // load or store: just send to dcache - Fault fault = curStaticInst->initiateAcc(this, traceData); + Fault fault = curStaticInst->initiateAcc(&t_info, traceData); // If we're not running now the instruction will complete in a dcache // response callback or the instruction faulted and has started an @@ -677,7 +716,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) } } else if (curStaticInst) { // non-memory instruction: execute completely now - Fault fault = curStaticInst->execute(this, traceData); + Fault fault = curStaticInst->execute(&t_info, traceData); // keep an instruction count if (fault == NoFault) @@ -690,7 +729,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) postExecute(); // @todo remove me after debugging with legion done if (curStaticInst && (!curStaticInst->isMicroop() || - curStaticInst->isFirstMicroop())) + curStaticInst->isFirstMicroop())) instCnt++; advanceInst(fault); } else { @@ -776,7 +815,8 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt) _status = BaseSimpleCPU::Running; - Fault fault = curStaticInst->completeAcc(pkt, this, traceData); + Fault fault = curStaticInst->completeAcc(pkt, threadInfo[curThread], + traceData); // keep an instruction count if (fault == NoFault) @@ -810,17 +850,20 @@ void TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt) { // X86 ISA: Snooping an invalidation for monitor/mwait - if(cpu->getAddrMonitor()->doMonitor(pkt)) { + if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { cpu->wakeup(); } - TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask); + + for (auto &t_info : cpu->threadInfo) { + TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask); + } } void TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt) { // X86 ISA: Snooping an invalidation for monitor/mwait - if(cpu->getAddrMonitor()->doMonitor(pkt)) { + if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { cpu->wakeup(); } } @@ -930,8 +973,5 @@ TimingSimpleCPU::printAddr(Addr a) TimingSimpleCPU * TimingSimpleCPUParams::create() { - numThreads = 1; - if (!FullSystem && workload.size() != 1) - panic("only one workload allowed"); return new TimingSimpleCPU(this); } diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index b6a1da4e2..d409ac5d2 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 ARM Limited + * Copyright (c) 2012-2013,2015 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,6 +44,7 @@ #define __CPU_SIMPLE_TIMING_HH__ #include "cpu/simple/base.hh" +#include "cpu/simple/exec_context.hh" #include "cpu/translation.hh" #include "params/TimingSimpleCPU.hh" @@ -342,7 +343,11 @@ class TimingSimpleCPU : public BaseSimpleCPU * */ bool isDrained() { - return microPC() == 0 && !stayAtPC && !fetchEvent.scheduled(); + SimpleExecContext& t_info = *threadInfo[curThread]; + SimpleThread* thread = t_info.thread; + + return thread->microPC() == 0 && !t_info.stayAtPC && + !fetchEvent.scheduled(); } /** -- cgit v1.2.3