From be28d96510e0e722db83b26f1a12d3f5de979b32 Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Wed, 6 Apr 2016 19:43:31 +0100 Subject: Revert power patch sets with unexpected interactions The following patches had unexpected interactions with the current upstream code and have been reverted for now: e07fd01651f3: power: Add support for power models 831c7f2f9e39: power: Low-power idle power state for idle CPUs 4f749e00b667: power: Add power states to ClockedObject Signed-off-by: Andreas Sandberg --HG-- extra : amend_source : 0b6fb073c6bbc24be533ec431eb51fbf1b269508 --- src/cpu/base.cc | 27 ----- src/cpu/base.hh | 9 +- src/cpu/base_dyn_inst.hh | 4 +- src/cpu/checker/cpu.cc | 4 +- src/cpu/checker/cpu_impl.hh | 3 +- src/cpu/kvm/base.cc | 2 +- src/cpu/kvm/x86_cpu.cc | 2 +- src/cpu/minor/cpu.cc | 4 - src/cpu/minor/fetch1.cc | 3 +- src/cpu/minor/lsq.cc | 7 +- src/cpu/o3/cpu.cc | 4 - src/cpu/o3/fetch_impl.hh | 6 +- src/cpu/o3/lsq.hh | 4 +- src/cpu/o3/lsq_impl.hh | 3 +- src/cpu/pred/2bit_local.cc | 9 +- src/cpu/pred/2bit_local.hh | 13 +- src/cpu/pred/BranchPredictor.py | 10 -- src/cpu/pred/SConscript | 2 - src/cpu/pred/bi_mode.cc | 49 ++++---- src/cpu/pred/bi_mode.hh | 18 ++- src/cpu/pred/bpred_unit.cc | 146 ++++++----------------- src/cpu/pred/bpred_unit.hh | 38 ++---- src/cpu/pred/btb.cc | 18 ++- src/cpu/pred/btb.hh | 7 +- src/cpu/pred/indirect.cc | 185 ----------------------------- src/cpu/pred/indirect.hh | 97 --------------- src/cpu/pred/tournament.cc | 69 +++++------ src/cpu/pred/tournament.hh | 22 ++-- src/cpu/simple/atomic.cc | 15 +-- src/cpu/simple/timing.cc | 18 +-- src/cpu/testers/memtest/memtest.cc | 2 +- src/cpu/testers/networktest/networktest.cc | 4 +- src/cpu/testers/rubytest/Check.cc | 6 +- src/cpu/trace/trace_cpu.cc | 4 +- 34 files changed, 181 insertions(+), 633 deletions(-) delete mode 100644 src/cpu/pred/indirect.cc delete mode 100644 src/cpu/pred/indirect.hh (limited to 'src/cpu') diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 0f9fe49ea..624843f42 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -64,7 +64,6 @@ #include "debug/SyscallVerbose.hh" #include "mem/page_table.hh" #include "params/BaseCPU.hh" -#include "sim/clocked_object.hh" #include "sim/full_system.hh" #include "sim/process.hh" #include "sim/sim_events.hh" @@ -356,11 +355,6 @@ BaseCPU::startup() if (params()->progress_interval) { new CPUProgressEvent(this, params()->progress_interval); } - - // Assumption CPU start to operate instantaneously without any latency - if (ClockedObject::pwrState() == Enums::PwrState::UNDEFINED) - ClockedObject::pwrState(Enums::PwrState::ON); - } ProbePoints::PMUUPtr @@ -478,27 +472,6 @@ BaseCPU::findContext(ThreadContext *tc) return 0; } -void -BaseCPU::activateContext(ThreadID thread_num) -{ - // For any active thread running, update CPU power state to active (ON) - ClockedObject::pwrState(Enums::PwrState::ON); -} - -void -BaseCPU::suspendContext(ThreadID thread_num) -{ - // Check if all threads are suspended - for (auto t : threadContexts) { - if (t->status() != ThreadContext::Suspended) { - return; - } - } - - // All CPU threads suspended, enter lower power state for the CPU - ClockedObject::pwrState(Enums::PwrState::CLK_GATED); -} - void BaseCPU::switchOut() { diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 6622339e0..438c38812 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -279,11 +279,10 @@ class BaseCPU : public MemObject Trace::InstTracer * getTracer() { return tracer; } /// Notify the CPU that the indicated context is now active. - virtual void activateContext(ThreadID thread_num); + virtual void activateContext(ThreadID thread_num) {} /// Notify the CPU that the indicated context is now suspended. - /// Check if possible to enter a lower power state - virtual void suspendContext(ThreadID thread_num); + virtual void suspendContext(ThreadID thread_num) {} /// Notify the CPU that the indicated context is now halted. virtual void haltContext(ThreadID thread_num) {} @@ -297,10 +296,6 @@ class BaseCPU : public MemObject /// Get the number of thread contexts available unsigned numContexts() { return threadContexts.size(); } - /// Convert ContextID to threadID - ThreadID contextToThread(ContextID cid) - { return static_cast(cid - threadContexts[0]->contextId()); } - public: typedef BaseCPUParams Params; const Params *params() const diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index e846f6790..031337aec 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -886,7 +886,7 @@ BaseDynInst::initiateMemRead(Addr addr, unsigned size, unsigned flags) sreqHigh = savedSreqHigh; } else { req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(), - thread->contextId()); + thread->contextId(), threadNumber); req->taskId(cpu->taskId()); @@ -942,7 +942,7 @@ BaseDynInst::writeMem(uint8_t *data, unsigned size, sreqHigh = savedSreqHigh; } else { req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(), - thread->contextId()); + thread->contextId(), threadNumber); req->taskId(cpu->taskId()); diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index 4d5919cdf..ac476e5f4 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -155,7 +155,7 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags) // Need to account for multiple accesses like the Atomic and TimingSimple while (1) { memReq = new Request(0, addr, size, flags, masterId, - thread->pcState().instAddr(), tc->contextId()); + thread->pcState().instAddr(), tc->contextId(), 0); // translate to physical address fault = dtb->translateFunctional(memReq, tc, BaseTLB::Read); @@ -243,7 +243,7 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size, // Need to account for a multiple access like Atomic and Timing CPUs while (1) { memReq = new Request(0, addr, size, flags, masterId, - thread->pcState().instAddr(), tc->contextId()); + thread->pcState().instAddr(), tc->contextId(), 0); // translate to physical address fault = dtb->translateFunctional(memReq, tc, BaseTLB::Write); diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index 5d5900aae..289861521 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -248,7 +248,8 @@ Checker::verify(DynInstPtr &completed_inst) sizeof(MachInst), 0, masterId, - fetch_PC, thread->contextId()); + fetch_PC, thread->contextId(), + unverifiedInst->threadNumber); memReq->setVirt(0, fetch_PC, sizeof(MachInst), Request::INST_FETCH, masterId, thread->instAddr()); diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc index 0670f61c6..bf4d68603 100644 --- a/src/cpu/kvm/base.cc +++ b/src/cpu/kvm/base.cc @@ -1027,7 +1027,7 @@ BaseKvmCPU::doMMIOAccess(Addr paddr, void *data, int size, bool write) syncThreadContext(); Request mmio_req(paddr, size, Request::UNCACHEABLE, dataMasterId()); - mmio_req.setContext(tc->contextId()); + mmio_req.setThreadContext(tc->contextId(), 0); // Some architectures do need to massage physical addresses a bit // before they are inserted into the memory system. This enables // APIC accesses on x86 and m5ops where supported through a MMIO diff --git a/src/cpu/kvm/x86_cpu.cc b/src/cpu/kvm/x86_cpu.cc index 9e9115ef5..c6c874dc4 100644 --- a/src/cpu/kvm/x86_cpu.cc +++ b/src/cpu/kvm/x86_cpu.cc @@ -1346,7 +1346,7 @@ X86KvmCPU::handleKvmExitIO() Request io_req(pAddr, kvm_run.io.size, Request::UNCACHEABLE, dataMasterId()); - io_req.setContext(tc->contextId()); + io_req.setThreadContext(tc->contextId(), 0); const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq); // Temporarily lock and migrate to the event queue of the diff --git a/src/cpu/minor/cpu.cc b/src/cpu/minor/cpu.cc index a707c6045..cd39a8b93 100644 --- a/src/cpu/minor/cpu.cc +++ b/src/cpu/minor/cpu.cc @@ -287,8 +287,6 @@ MinorCPU::activateContext(ThreadID thread_id) threads[thread_id]->activate(); wakeupOnEvent(Minor::Pipeline::CPUStageId); pipeline->wakeupFetch(); - - BaseCPU::activateContext(thread_id); } void @@ -297,8 +295,6 @@ MinorCPU::suspendContext(ThreadID thread_id) DPRINTF(MinorCPU, "SuspendContext %d\n", thread_id); threads[thread_id]->suspend(); - - BaseCPU::suspendContext(thread_id); } void diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc index d19d7b042..84aaf02f5 100644 --- a/src/cpu/minor/fetch1.cc +++ b/src/cpu/minor/fetch1.cc @@ -135,7 +135,8 @@ Fetch1::fetchLine() "%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n", request_id, aligned_pc, pc, line_offset, request_size); - request->request.setContext(cpu.threads[0]->getTC()->contextId()); + request->request.setThreadContext(cpu.threads[0]->getTC()->contextId(), + /* thread id */ 0); request->request.setVirt(0 /* asid */, aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(), /* I've no idea why we need the PC, but give it */ diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc index b5c0bc974..e0c5796c8 100644 --- a/src/cpu/minor/lsq.cc +++ b/src/cpu/minor/lsq.cc @@ -422,7 +422,7 @@ LSQ::SplitDataRequest::makeFragmentRequests() Request *fragment = new Request(); - fragment->setContext(request.contextId()); + fragment->setThreadContext(request.contextId(), /* thread id */ 0); fragment->setVirt(0 /* asid */, fragment_addr, fragment_size, request.getFlags(), request.masterId(), @@ -1070,8 +1070,7 @@ LSQ::tryToSend(LSQRequestPtr request) if (request->request.isMmappedIpr()) { ThreadContext *thread = - cpu.getContext(cpu.contextToThread( - request->request.contextId())); + cpu.getContext(request->request.threadId()); if (request->isLoad) { DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst)); @@ -1503,7 +1502,7 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, inst->traceData->setMem(addr, size, flags); int cid = cpu.threads[inst->id.threadId]->getTC()->contextId(); - request->request.setContext(cid); + request->request.setThreadContext(cid, /* thread id */ 0); request->request.setVirt(0 /* asid */, addr, size, flags, cpu.dataMasterId(), /* I've no idea why we need the PC, but give it */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 73174e4a9..79ad705bf 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -735,8 +735,6 @@ FullO3CPU::activateContext(ThreadID tid) lastActivatedCycle = curTick(); _status = Running; - - BaseCPU::activateContext(tid); } } @@ -757,8 +755,6 @@ FullO3CPU::suspendContext(ThreadID tid) } DPRINTF(Quiesce, "Suspending Context\n"); - - BaseCPU::suspendContext(tid); } template diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 3b29d87d4..4b1479bcb 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -378,7 +378,7 @@ template void DefaultFetch::processCacheCompletion(PacketPtr pkt) { - ThreadID tid = cpu->contextToThread(pkt->req->contextId()); + ThreadID tid = pkt->req->threadId(); DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid); assert(!cpu->switchedOut()); @@ -622,7 +622,7 @@ DefaultFetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) RequestPtr mem_req = new Request(tid, fetchBufferBlockPC, fetchBufferSize, Request::INST_FETCH, cpu->instMasterId(), pc, - cpu->thread[tid]->contextId()); + cpu->thread[tid]->contextId(), tid); mem_req->taskId(cpu->taskId()); @@ -640,7 +640,7 @@ template void DefaultFetch::finishTranslation(const Fault &fault, RequestPtr mem_req) { - ThreadID tid = cpu->contextToThread(mem_req->contextId()); + ThreadID tid = mem_req->threadId(); Addr fetchBufferBlockPC = mem_req->getVaddr(); assert(!cpu->switchedOut()); diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 6bc9b3d73..dcd676221 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -334,7 +334,7 @@ Fault LSQ::read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, int load_idx) { - ThreadID tid = cpu->contextToThread(req->contextId()); + ThreadID tid = req->threadId(); return thread[tid].read(req, sreqLow, sreqHigh, load_idx); } @@ -344,7 +344,7 @@ Fault LSQ::write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, uint8_t *data, int store_idx) { - ThreadID tid = cpu->contextToThread(req->contextId()); + ThreadID tid = req->threadId(); return thread[tid].write(req, sreqLow, sreqHigh, data, store_idx); } diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 9080907fe..06467243d 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -347,8 +347,7 @@ LSQ::recvTimingResp(PacketPtr pkt) DPRINTF(LSQ, "Got error packet back for address: %#X\n", pkt->getAddr()); - thread[cpu->contextToThread(pkt->req->contextId())] - .completeDataAccess(pkt); + thread[pkt->req->threadId()].completeDataAccess(pkt); if (pkt->isInvalidate()) { // This response also contains an invalidate; e.g. this can be the case diff --git a/src/cpu/pred/2bit_local.cc b/src/cpu/pred/2bit_local.cc index 9e1c781c5..36ca1593e 100644 --- a/src/cpu/pred/2bit_local.cc +++ b/src/cpu/pred/2bit_local.cc @@ -78,7 +78,7 @@ LocalBP::reset() } void -LocalBP::btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history) +LocalBP::btbUpdate(Addr branch_addr, void * &bp_history) { // Place holder for a function that is called to update predictor history when // a BTB entry is invalid or not found. @@ -86,7 +86,7 @@ LocalBP::btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history) bool -LocalBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history) +LocalBP::lookup(Addr branch_addr, void * &bp_history) { bool taken; uint8_t counter_val; @@ -117,8 +117,7 @@ LocalBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history) } void -LocalBP::update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed) +LocalBP::update(Addr branch_addr, bool taken, void *bp_history, bool squashed) { assert(bp_history == NULL); unsigned local_predictor_idx; @@ -153,7 +152,7 @@ LocalBP::getLocalIndex(Addr &branch_addr) } void -LocalBP::uncondBranch(ThreadID tid, Addr pc, void *&bp_history) +LocalBP::uncondBranch(Addr pc, void *&bp_history) { } diff --git a/src/cpu/pred/2bit_local.hh b/src/cpu/pred/2bit_local.hh index e3f87491b..61e2dc24e 100644 --- a/src/cpu/pred/2bit_local.hh +++ b/src/cpu/pred/2bit_local.hh @@ -66,7 +66,7 @@ class LocalBP : public BPredUnit */ LocalBP(const LocalBPParams *params); - virtual void uncondBranch(ThreadID tid, Addr pc, void * &bp_history); + virtual void uncondBranch(Addr pc, void * &bp_history); /** * Looks up the given address in the branch predictor and returns @@ -75,7 +75,7 @@ class LocalBP : public BPredUnit * @param bp_history Pointer to any bp history state. * @return Whether or not the branch is taken. */ - bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history); + bool lookup(Addr branch_addr, void * &bp_history); /** * Updates the branch predictor to Not Taken if a BTB entry is @@ -84,20 +84,19 @@ class LocalBP : public BPredUnit * @param bp_history Pointer to any bp history state. * @return Whether or not the branch is taken. */ - void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history); + void btbUpdate(Addr branch_addr, void * &bp_history); /** * Updates the branch predictor with the actual result of a branch. * @param branch_addr The address of the branch to update. * @param taken Whether or not the branch was taken. */ - void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed); + void update(Addr branch_addr, bool taken, void *bp_history, bool squashed); - void retireSquashed(ThreadID tid, void *bp_history) + void retireSquashed(void *bp_history) { assert(bp_history == NULL); } - void squash(ThreadID tid, void *bp_history) + void squash(void *bp_history) { assert(bp_history == NULL); } void reset(); diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 2d7d0d0e2..5c52fb65e 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -42,16 +42,6 @@ class BranchPredictor(SimObject): RASSize = Param.Unsigned(16, "RAS size") instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by") - useIndirect = Param.Bool(True, "Use indirect branch predictor") - indirectHashGHR = Param.Bool(True, "Hash branch predictor GHR") - indirectHashTargets = Param.Bool(True, "Hash path history targets") - indirectSets = Param.Unsigned(256, "Cache sets for indirect predictor") - indirectWays = Param.Unsigned(2, "Ways for indirect predictor") - indirectTagSize = Param.Unsigned(16, "Indirect target cache tag bits") - indirectPathLength = Param.Unsigned(3, - "Previous indirect targets to use for path history") - - class LocalBP(BranchPredictor): type = 'LocalBP' diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index dca5e8d88..1bf94712d 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -35,11 +35,9 @@ if env['TARGET_ISA'] == 'null': SimObject('BranchPredictor.py') -DebugFlag('Indirect') Source('bpred_unit.cc') Source('2bit_local.cc') Source('btb.cc') -Source('indirect.cc') Source('ras.cc') Source('tournament.cc') Source ('bi_mode.cc') diff --git a/src/cpu/pred/bi_mode.cc b/src/cpu/pred/bi_mode.cc index 48e798d96..c2a41cd4d 100644 --- a/src/cpu/pred/bi_mode.cc +++ b/src/cpu/pred/bi_mode.cc @@ -38,7 +38,7 @@ BiModeBP::BiModeBP(const BiModeBPParams *params) : BPredUnit(params), - globalHistoryReg(params->numThreads, 0), + globalHistoryReg(0), globalHistoryBits(ceilLog2(params->globalPredictorSize)), choicePredictorSize(params->choicePredictorSize), choiceCtrBits(params->choiceCtrBits), @@ -77,23 +77,23 @@ BiModeBP::BiModeBP(const BiModeBPParams *params) * chooses the taken array and the taken array predicts taken. */ void -BiModeBP::uncondBranch(ThreadID tid, Addr pc, void * &bpHistory) +BiModeBP::uncondBranch(Addr pc, void * &bpHistory) { BPHistory *history = new BPHistory; - history->globalHistoryReg = globalHistoryReg[tid]; + history->globalHistoryReg = globalHistoryReg; history->takenUsed = true; history->takenPred = true; history->notTakenPred = true; history->finalPred = true; bpHistory = static_cast(history); - updateGlobalHistReg(tid, true); + updateGlobalHistReg(true); } void -BiModeBP::squash(ThreadID tid, void *bpHistory) +BiModeBP::squash(void *bpHistory) { BPHistory *history = static_cast(bpHistory); - globalHistoryReg[tid] = history->globalHistoryReg; + globalHistoryReg = history->globalHistoryReg; delete history; } @@ -108,12 +108,12 @@ BiModeBP::squash(ThreadID tid, void *bpHistory) * direction predictors for the final branch prediction. */ bool -BiModeBP::lookup(ThreadID tid, Addr branchAddr, void * &bpHistory) +BiModeBP::lookup(Addr branchAddr, void * &bpHistory) { unsigned choiceHistoryIdx = ((branchAddr >> instShiftAmt) & choiceHistoryMask); unsigned globalHistoryIdx = (((branchAddr >> instShiftAmt) - ^ globalHistoryReg[tid]) + ^ globalHistoryReg) & globalHistoryMask); assert(choiceHistoryIdx < choicePredictorSize); @@ -128,7 +128,7 @@ BiModeBP::lookup(ThreadID tid, Addr branchAddr, void * &bpHistory) bool finalPrediction; BPHistory *history = new BPHistory; - history->globalHistoryReg = globalHistoryReg[tid]; + history->globalHistoryReg = globalHistoryReg; history->takenUsed = choicePrediction; history->takenPred = takenGHBPrediction; history->notTakenPred = notTakenGHBPrediction; @@ -141,15 +141,15 @@ BiModeBP::lookup(ThreadID tid, Addr branchAddr, void * &bpHistory) history->finalPred = finalPrediction; bpHistory = static_cast(history); - updateGlobalHistReg(tid, finalPrediction); + updateGlobalHistReg(finalPrediction); return finalPrediction; } void -BiModeBP::btbUpdate(ThreadID tid, Addr branchAddr, void * &bpHistory) +BiModeBP::btbUpdate(Addr branchAddr, void * &bpHistory) { - globalHistoryReg[tid] &= (historyRegisterMask & ~ULL(1)); + globalHistoryReg &= (historyRegisterMask & ~ULL(1)); } /* Only the selected direction predictor will be updated with the final @@ -159,8 +159,7 @@ BiModeBP::btbUpdate(ThreadID tid, Addr branchAddr, void * &bpHistory) * the direction predictors makes a correct final prediction. */ void -BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken, void *bpHistory, - bool squashed) +BiModeBP::update(Addr branchAddr, bool taken, void *bpHistory, bool squashed) { if (bpHistory) { BPHistory *history = static_cast(bpHistory); @@ -219,11 +218,11 @@ BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken, void *bpHistory, if (squashed) { if (taken) { - globalHistoryReg[tid] = (history->globalHistoryReg << 1) | 1; + globalHistoryReg = (history->globalHistoryReg << 1) | 1; } else { - globalHistoryReg[tid] = (history->globalHistoryReg << 1); + globalHistoryReg = (history->globalHistoryReg << 1); } - globalHistoryReg[tid] &= historyRegisterMask; + globalHistoryReg &= historyRegisterMask; } else { delete history; } @@ -231,24 +230,18 @@ BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken, void *bpHistory, } void -BiModeBP::retireSquashed(ThreadID tid, void *bp_history) +BiModeBP::retireSquashed(void *bp_history) { BPHistory *history = static_cast(bp_history); delete history; } -unsigned -BiModeBP::getGHR(ThreadID tid, void *bp_history) const -{ - return static_cast(bp_history)->globalHistoryReg; -} - void -BiModeBP::updateGlobalHistReg(ThreadID tid, bool taken) +BiModeBP::updateGlobalHistReg(bool taken) { - globalHistoryReg[tid] = taken ? (globalHistoryReg[tid] << 1) | 1 : - (globalHistoryReg[tid] << 1); - globalHistoryReg[tid] &= historyRegisterMask; + globalHistoryReg = taken ? (globalHistoryReg << 1) | 1 : + (globalHistoryReg << 1); + globalHistoryReg &= historyRegisterMask; } BiModeBP* diff --git a/src/cpu/pred/bi_mode.hh b/src/cpu/pred/bi_mode.hh index 96b3b2ef7..da7c49f46 100644 --- a/src/cpu/pred/bi_mode.hh +++ b/src/cpu/pred/bi_mode.hh @@ -57,17 +57,15 @@ class BiModeBP : public BPredUnit { public: BiModeBP(const BiModeBPParams *params); - void uncondBranch(ThreadID tid, Addr pc, void * &bp_history); - void squash(ThreadID tid, void *bp_history); - bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history); - void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history); - void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed); - void retireSquashed(ThreadID tid, void *bp_history); - unsigned getGHR(ThreadID tid, void *bp_history) const; + void uncondBranch(Addr pc, void * &bp_history); + void squash(void *bp_history); + bool lookup(Addr branch_addr, void * &bp_history); + void btbUpdate(Addr branch_addr, void * &bp_history); + void update(Addr branch_addr, bool taken, void *bp_history, bool squashed); + void retireSquashed(void *bp_history); private: - void updateGlobalHistReg(ThreadID tid, bool taken); + void updateGlobalHistReg(bool taken); struct BPHistory { unsigned globalHistoryReg; @@ -96,7 +94,7 @@ class BiModeBP : public BPredUnit // not-taken direction predictors std::vector notTakenCounters; - std::vector globalHistoryReg; + unsigned globalHistoryReg; unsigned globalHistoryBits; unsigned historyRegisterMask; diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc index 04a05eaa4..8bb84f836 100644 --- a/src/cpu/pred/bpred_unit.cc +++ b/src/cpu/pred/bpred_unit.cc @@ -59,18 +59,8 @@ BPredUnit::BPredUnit(const Params *params) predHist(numThreads), BTB(params->BTBEntries, params->BTBTagSize, - params->instShiftAmt, - params->numThreads), + params->instShiftAmt), RAS(numThreads), - useIndirect(params->useIndirect), - iPred(params->indirectHashGHR, - params->indirectHashTargets, - params->indirectSets, - params->indirectWays, - params->indirectTagSize, - params->indirectPathLength, - params->instShiftAmt, - params->numThreads), instShiftAmt(params->instShiftAmt) { for (auto& r : RAS) @@ -126,27 +116,6 @@ BPredUnit::regStats() .name(name() + ".RASInCorrect") .desc("Number of incorrect RAS predictions.") ; - - indirectLookups - .name(name() + ".indirectLookups") - .desc("Number of indirect predictor lookups.") - ; - - indirectHits - .name(name() + ".indirectHits") - .desc("Number of indirect target hits.") - ; - - indirectMisses - .name(name() + ".indirectMisses") - .desc("Number of indirect misses.") - ; - - indirectMispredicted - .name(name() + "indirectMispredcited") - .desc("Number of mispredicted indirect branches.") - ; - } ProbePoints::PMUUPtr @@ -195,10 +164,10 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, DPRINTF(Branch, "[tid:%i]: Unconditional control.\n", tid); pred_taken = true; // Tell the BP there was an unconditional branch. - uncondBranch(tid, pc.instAddr(), bp_history); + uncondBranch(pc.instAddr(), bp_history); } else { ++condPredicted; - pred_taken = lookup(tid, pc.instAddr(), bp_history); + pred_taken = lookup(pc.instAddr(), bp_history); DPRINTF(Branch, "[tid:%i]: [sn:%i] Branch predictor" " predicted %i for PC %s\n", tid, seqNum, pred_taken, pc); @@ -246,59 +215,31 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, tid, pc, pc, RAS[tid].topIdx()); } - if (inst->isDirectCtrl() || !useIndirect) { - // Check BTB on direct branches - if (BTB.valid(pc.instAddr(), tid)) { - ++BTBHits; - - // If it's not a return, use the BTB to get target addr. - target = BTB.lookup(pc.instAddr(), tid); - - DPRINTF(Branch, "[tid:%i]: Instruction %s predicted" - " target is %s.\n", tid, pc, target); - - } else { - DPRINTF(Branch, "[tid:%i]: BTB doesn't have a " - "valid entry.\n",tid); - pred_taken = false; - // The Direction of the branch predictor is altered - // because the BTB did not have an entry - // The predictor needs to be updated accordingly - if (!inst->isCall() && !inst->isReturn()) { - btbUpdate(tid, pc.instAddr(), bp_history); - DPRINTF(Branch, "[tid:%i]:[sn:%i] btbUpdate" - " called for %s\n", tid, seqNum, pc); - } else if (inst->isCall() && !inst->isUncondCtrl()) { - RAS[tid].pop(); - predict_record.pushedRAS = false; - } - TheISA::advancePC(target, inst); - } + if (BTB.valid(pc.instAddr(), tid)) { + ++BTBHits; + + // If it's not a return, use the BTB to get the target addr. + target = BTB.lookup(pc.instAddr(), tid); + + DPRINTF(Branch, "[tid:%i]: Instruction %s predicted" + " target is %s.\n", tid, pc, target); + } else { - predict_record.wasIndirect = true; - ++indirectLookups; - //Consult indirect predictor on indirect control - if (iPred.lookup(pc.instAddr(), getGHR(tid, bp_history), - target, tid)) { - // Indirect predictor hit - ++indirectHits; - DPRINTF(Branch, "[tid:%i]: Instruction %s predicted " - "indirect target is %s.\n", tid, pc, target); - } else { - ++indirectMisses; - pred_taken = false; - DPRINTF(Branch, "[tid:%i]: Instruction %s no indirect " - "target.\n", tid, pc); - if (!inst->isCall() && !inst->isReturn()) { - - } else if (inst->isCall() && !inst->isUncondCtrl()) { - RAS[tid].pop(); - predict_record.pushedRAS = false; - } - TheISA::advancePC(target, inst); + DPRINTF(Branch, "[tid:%i]: BTB doesn't have a " + "valid entry.\n",tid); + pred_taken = false; + // The Direction of the branch predictor is altered because the + // BTB did not have an entry + // The predictor needs to be updated accordingly + if (!inst->isCall() && !inst->isReturn()) { + btbUpdate(pc.instAddr(), bp_history); + DPRINTF(Branch, "[tid:%i]:[sn:%i] btbUpdate" + " called for %s\n", tid, seqNum, pc); + } else if (inst->isCall() && !inst->isUncondCtrl()) { + RAS[tid].pop(); + predict_record.pushedRAS = false; } - iPred.recordIndirect(pc.instAddr(), target.instAddr(), seqNum, - tid); + TheISA::advancePC(target, inst); } } } else { @@ -346,7 +287,7 @@ BPredUnit::predictInOrder(const StaticInstPtr &inst, const InstSeqNum &seqNum, DPRINTF(Branch, "[tid:%i] Unconditional control.\n", tid); pred_taken = true; // Tell the BP there was an unconditional branch. - uncondBranch(tid, instPC.instAddr(), bp_history); + uncondBranch(instPC.instAddr(), bp_history); if (inst->isReturn() && RAS[tid].empty()) { DPRINTF(Branch, "[tid:%i] RAS is empty, predicting " @@ -356,7 +297,7 @@ BPredUnit::predictInOrder(const StaticInstPtr &inst, const InstSeqNum &seqNum, } else { ++condPredicted; - pred_taken = lookup(tid, predPC.instAddr(), bp_history); + pred_taken = lookup(predPC.instAddr(), bp_history); } PredictorHistory predict_record(seqNum, predPC.instAddr(), pred_taken, @@ -446,16 +387,14 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid) DPRINTF(Branch, "[tid:%i]: Committing branches until " "[sn:%lli].\n", tid, done_sn); - iPred.commit(done_sn, tid); while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { // Update the branch predictor with the correct results. if (!predHist[tid].back().wasSquashed) { - update(tid, predHist[tid].back().pc, - predHist[tid].back().predTaken, - predHist[tid].back().bpHistory, false); + update(predHist[tid].back().pc, predHist[tid].back().predTaken, + predHist[tid].back().bpHistory, false); } else { - retireSquashed(tid, predHist[tid].back().bpHistory); + retireSquashed(predHist[tid].back().bpHistory); } predHist[tid].pop_back(); @@ -467,7 +406,6 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid) { History &pred_hist = predHist[tid]; - iPred.squash(squashed_sn, tid); while (!pred_hist.empty() && pred_hist.front().seqNum > squashed_sn) { if (pred_hist.front().usedRAS) { @@ -486,7 +424,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid) } // This call should delete the bpHistory. - squash(tid, pred_hist.front().bpHistory); + squash(pred_hist.front().bpHistory); DPRINTF(Branch, "[tid:%i]: Removing history for [sn:%i] " "PC %s.\n", tid, pred_hist.front().seqNum, @@ -546,14 +484,9 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, if ((*hist_it).usedRAS) { ++RASIncorrect; - DPRINTF(Branch, "[tid:%i]: Incorrect RAS [sn:%i]\n", - tid, hist_it->seqNum); } - // Have to get GHR here because the update deletes bpHistory - unsigned ghr = getGHR(tid, hist_it->bpHistory); - - update(tid, (*hist_it).pc, actually_taken, + update((*hist_it).pc, actually_taken, pred_hist.front().bpHistory, true); hist_it->wasSquashed = true; @@ -565,15 +498,12 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, RAS[tid].pop(); hist_it->usedRAS = true; } - if (hist_it->wasIndirect) { - ++indirectMispredicted; - iPred.recordTarget(hist_it->seqNum, ghr, corrTarget, tid); - } else { - DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]" - " PC: %s\n", tid,hist_it->seqNum, hist_it->pc); - BTB.update((*hist_it).pc, corrTarget, tid); - } + DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]" + " PC: %s\n", tid,hist_it->seqNum, hist_it->pc); + + BTB.update((*hist_it).pc, corrTarget, tid); + } else { //Actually not Taken if (hist_it->usedRAS) { diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh index c5e73f59d..bef8cb949 100644 --- a/src/cpu/pred/bpred_unit.hh +++ b/src/cpu/pred/bpred_unit.hh @@ -52,7 +52,6 @@ #include "base/statistics.hh" #include "base/types.hh" #include "cpu/pred/btb.hh" -#include "cpu/pred/indirect.hh" #include "cpu/pred/ras.hh" #include "cpu/inst_seq.hh" #include "cpu/static_inst.hh" @@ -98,7 +97,7 @@ class BPredUnit : public SimObject TheISA::PCState &predPC, ThreadID tid); // @todo: Rename this function. - virtual void uncondBranch(ThreadID tid, Addr pc, void * &bp_history) = 0; + virtual void uncondBranch(Addr pc, void * &bp_history) = 0; /** * Tells the branch predictor to commit any updates until the given @@ -133,7 +132,7 @@ class BPredUnit : public SimObject * @param bp_history Pointer to the history object. The predictor * will need to update any state and delete the object. */ - virtual void squash(ThreadID tid, void *bp_history) = 0; + virtual void squash(void *bp_history) = 0; /** * Looks up a given PC in the BP to see if it is taken or not taken. @@ -142,7 +141,7 @@ class BPredUnit : public SimObject * has the branch predictor state associated with the lookup. * @return Whether the branch is taken or not taken. */ - virtual bool lookup(ThreadID tid, Addr instPC, void * &bp_history) = 0; + virtual bool lookup(Addr instPC, void * &bp_history) = 0; /** * If a branch is not taken, because the BTB address is invalid or missing, @@ -152,7 +151,7 @@ class BPredUnit : public SimObject * @param bp_history Pointer that will be set to an object that * has the branch predictor state associated with the lookup. */ - virtual void btbUpdate(ThreadID tid, Addr instPC, void * &bp_history) = 0; + virtual void btbUpdate(Addr instPC, void * &bp_history) = 0; /** * Looks up a given PC in the BTB to see if a matching entry exists. @@ -180,15 +179,15 @@ class BPredUnit : public SimObject * squash operation. * @todo Make this update flexible enough to handle a global predictor. */ - virtual void update(ThreadID tid, Addr instPC, bool taken, - void *bp_history, bool squashed) = 0; + virtual void update(Addr instPC, bool taken, void *bp_history, + bool squashed) = 0; /** * Deletes the associated history with a branch, performs no predictor * updates. Used for branches that mispredict and update tables but * are still speculative and later retire. * @param bp_history History to delete associated with this predictor */ - virtual void retireSquashed(ThreadID tid, void *bp_history) = 0; + virtual void retireSquashed(void *bp_history) = 0; /** * Updates the BTB with the target of a branch. @@ -198,9 +197,6 @@ class BPredUnit : public SimObject void BTBUpdate(Addr instPC, const TheISA::PCState &target) { BTB.update(instPC, target, 0); } - - virtual unsigned getGHR(ThreadID tid, void* bp_history) const { return 0; } - void dump(); private: @@ -214,7 +210,7 @@ class BPredUnit : public SimObject ThreadID _tid) : seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0), RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0), - wasCall(0), wasReturn(0), wasSquashed(0), wasIndirect(0) + wasCall(0), wasReturn(0), wasSquashed(0) {} bool operator==(const PredictorHistory &entry) const { @@ -259,9 +255,6 @@ class BPredUnit : public SimObject /** Whether this instruction has already mispredicted/updated bp */ bool wasSquashed; - - /** Wether this instruction was an indirect branch */ - bool wasIndirect; }; typedef std::deque History; @@ -283,12 +276,6 @@ class BPredUnit : public SimObject /** The per-thread return address stack. */ std::vector RAS; - /** Option to disable indirect predictor. */ - const bool useIndirect; - - /** The indirect target predictor. */ - IndirectPredictor iPred; - /** Stat for number of BP lookups. */ Stats::Scalar lookups; /** Stat for number of conditional branches predicted. */ @@ -308,15 +295,6 @@ class BPredUnit : public SimObject /** Stat for number of times the RAS is incorrect. */ Stats::Scalar RASIncorrect; - /** Stat for the number of indirect target lookups.*/ - Stats::Scalar indirectLookups; - /** Stat for the number of indirect target hits.*/ - Stats::Scalar indirectHits; - /** Stat for the number of indirect target misses.*/ - Stats::Scalar indirectMisses; - /** Stat for the number of indirect target mispredictions.*/ - Stats::Scalar indirectMispredicted; - protected: /** Number of bits to shift instructions by for predictor addresses. */ const unsigned instShiftAmt; diff --git a/src/cpu/pred/btb.cc b/src/cpu/pred/btb.cc index c7ef1959f..393e52ccf 100644 --- a/src/cpu/pred/btb.cc +++ b/src/cpu/pred/btb.cc @@ -35,12 +35,10 @@ DefaultBTB::DefaultBTB(unsigned _numEntries, unsigned _tagBits, - unsigned _instShiftAmt, - unsigned _num_threads) + unsigned _instShiftAmt) : numEntries(_numEntries), tagBits(_tagBits), - instShiftAmt(_instShiftAmt), - log2NumThreads(floorLog2(_num_threads)) + instShiftAmt(_instShiftAmt) { DPRINTF(Fetch, "BTB: Creating BTB object.\n"); @@ -71,12 +69,10 @@ DefaultBTB::reset() inline unsigned -DefaultBTB::getIndex(Addr instPC, ThreadID tid) +DefaultBTB::getIndex(Addr instPC) { // Need to shift PC over by the word offset. - return ((instPC >> instShiftAmt) - ^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads))) - & idxMask; + return (instPC >> instShiftAmt) & idxMask; } inline @@ -89,7 +85,7 @@ DefaultBTB::getTag(Addr instPC) bool DefaultBTB::valid(Addr instPC, ThreadID tid) { - unsigned btb_idx = getIndex(instPC, tid); + unsigned btb_idx = getIndex(instPC); Addr inst_tag = getTag(instPC); @@ -110,7 +106,7 @@ DefaultBTB::valid(Addr instPC, ThreadID tid) TheISA::PCState DefaultBTB::lookup(Addr instPC, ThreadID tid) { - unsigned btb_idx = getIndex(instPC, tid); + unsigned btb_idx = getIndex(instPC); Addr inst_tag = getTag(instPC); @@ -128,7 +124,7 @@ DefaultBTB::lookup(Addr instPC, ThreadID tid) void DefaultBTB::update(Addr instPC, const TheISA::PCState &target, ThreadID tid) { - unsigned btb_idx = getIndex(instPC, tid); + unsigned btb_idx = getIndex(instPC); assert(btb_idx < numEntries); diff --git a/src/cpu/pred/btb.hh b/src/cpu/pred/btb.hh index 209bbdb49..3a773e40d 100644 --- a/src/cpu/pred/btb.hh +++ b/src/cpu/pred/btb.hh @@ -66,7 +66,7 @@ class DefaultBTB * @param instShiftAmt Offset amount for instructions to ignore alignment. */ DefaultBTB(unsigned numEntries, unsigned tagBits, - unsigned instShiftAmt, unsigned numThreads); + unsigned instShiftAmt); void reset(); @@ -97,7 +97,7 @@ class DefaultBTB * @param inst_PC The branch to look up. * @return Returns the index into the BTB. */ - inline unsigned getIndex(Addr instPC, ThreadID tid); + inline unsigned getIndex(Addr instPC); /** Returns the tag bits of a given address. * @param inst_PC The branch's address. @@ -125,9 +125,6 @@ class DefaultBTB /** Number of bits to shift PC when calculating tag. */ unsigned tagShiftAmt; - - /** Log2 NumThreads used for hashing threadid */ - unsigned log2NumThreads; }; #endif // __CPU_PRED_BTB_HH__ diff --git a/src/cpu/pred/indirect.cc b/src/cpu/pred/indirect.cc deleted file mode 100644 index a8934d55e..000000000 --- a/src/cpu/pred/indirect.cc +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2014 ARM Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Mitch Hayenga - */ - -#include "cpu/pred/indirect.hh" - -#include "base/intmath.hh" -#include "debug/Indirect.hh" - -IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets, - unsigned num_sets, unsigned num_ways, - unsigned tag_bits, unsigned path_len, unsigned inst_shift, - unsigned num_threads) - : hashGHR(hash_ghr), hashTargets(hash_targets), - numSets(num_sets), numWays(num_ways), tagBits(tag_bits), - pathLength(path_len), instShift(inst_shift) -{ - if (!isPowerOf2(numSets)) { - panic("Indirect predictor requires power of 2 number of sets"); - } - - threadInfo.resize(num_threads); - - targetCache.resize(numSets); - for (unsigned i = 0; i < numSets; i++) { - targetCache[i].resize(numWays); - } -} - -bool -IndirectPredictor::lookup(Addr br_addr, unsigned ghr, TheISA::PCState& target, - ThreadID tid) -{ - Addr set_index = getSetIndex(br_addr, ghr, tid); - Addr tag = getTag(br_addr); - - assert(set_index < numSets); - - DPRINTF(Indirect, "Looking up %x (set:%d)\n", br_addr, set_index); - const auto &iset = targetCache[set_index]; - for (auto way = iset.begin(); way != iset.end(); ++way) { - if (way->tag == tag) { - DPRINTF(Indirect, "Hit %x (target:%s)\n", br_addr, way->target); - target = way->target; - return true; - } - } - DPRINTF(Indirect, "Miss %x\n", br_addr); - return false; -} - -void -IndirectPredictor::recordIndirect(Addr br_addr, Addr tgt_addr, - InstSeqNum seq_num, ThreadID tid) -{ - DPRINTF(Indirect, "Recording %x seq:%d\n", br_addr, seq_num); - HistoryEntry entry(br_addr, tgt_addr, seq_num); - threadInfo[tid].pathHist.push_back(entry); -} - -void -IndirectPredictor::commit(InstSeqNum seq_num, ThreadID tid) -{ - DPRINTF(Indirect, "Committing seq:%d\n", seq_num); - ThreadInfo &t_info = threadInfo[tid]; - - if (t_info.pathHist.empty()) return; - - if (t_info.headHistEntry < t_info.pathHist.size() && - t_info.pathHist[t_info.headHistEntry].seqNum <= seq_num) { - if (t_info.headHistEntry >= pathLength) { - t_info.pathHist.pop_front(); - } else { - ++t_info.headHistEntry; - } - } -} - -void -IndirectPredictor::squash(InstSeqNum seq_num, ThreadID tid) -{ - DPRINTF(Indirect, "Squashing seq:%d\n", seq_num); - ThreadInfo &t_info = threadInfo[tid]; - auto squash_itr = t_info.pathHist.begin(); - while (squash_itr != t_info.pathHist.end()) { - if (squash_itr->seqNum > seq_num) { - break; - } - ++squash_itr; - } - if (squash_itr != t_info.pathHist.end()) { - DPRINTF(Indirect, "Squashing series starting with sn:%d\n", - squash_itr->seqNum); - } - t_info.pathHist.erase(squash_itr, t_info.pathHist.end()); -} - - -void -IndirectPredictor::recordTarget(InstSeqNum seq_num, unsigned ghr, - const TheISA::PCState& target, ThreadID tid) -{ - ThreadInfo &t_info = threadInfo[tid]; - - // Should have just squashed so this branch should be the oldest - auto hist_entry = *(t_info.pathHist.rbegin()); - // Temporarily pop it off the history so we can calculate the set - t_info.pathHist.pop_back(); - Addr set_index = getSetIndex(hist_entry.pcAddr, ghr, tid); - Addr tag = getTag(hist_entry.pcAddr); - hist_entry.targetAddr = target.instAddr(); - t_info.pathHist.push_back(hist_entry); - - assert(set_index < numSets); - - auto &iset = targetCache[set_index]; - for (auto way = iset.begin(); way != iset.end(); ++way) { - if (way->tag == tag) { - DPRINTF(Indirect, "Updating Target (seq: %d br:%x set:%d target:" - "%s)\n", seq_num, hist_entry.pcAddr, set_index, target); - way->target = target; - return; - } - } - - DPRINTF(Indirect, "Allocating Target (seq: %d br:%x set:%d target:%s)\n", - seq_num, hist_entry.pcAddr, set_index, target); - // Did not find entry, random replacement - auto &way = iset[rand() % numWays]; - way.tag = tag; - way.target = target; -} - - -inline Addr -IndirectPredictor::getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid) -{ - ThreadInfo &t_info = threadInfo[tid]; - - Addr hash = br_addr >> instShift; - if (hashGHR) { - hash ^= ghr; - } - if (hashTargets) { - unsigned hash_shift = floorLog2(numSets) / pathLength; - for (int i = t_info.pathHist.size()-1, p = 0; - i >= 0 && p < pathLength; i--, p++) { - hash ^= (t_info.pathHist[i].targetAddr >> - (instShift + p*hash_shift)); - } - } - return hash & (numSets-1); -} - -inline Addr -IndirectPredictor::getTag(Addr br_addr) -{ - return (br_addr >> instShift) & ((0x1< - -#include "arch/isa_traits.hh" -#include "config/the_isa.hh" -#include "cpu/inst_seq.hh" - -class IndirectPredictor -{ - public: - IndirectPredictor(bool hash_ghr, bool hash_targets, - unsigned num_sets, unsigned num_ways, - unsigned tag_bits, unsigned path_len, - unsigned inst_shift, unsigned num_threads); - bool lookup(Addr br_addr, unsigned ghr, TheISA::PCState& br_target, - ThreadID tid); - void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num, - ThreadID tid); - void commit(InstSeqNum seq_num, ThreadID tid); - void squash(InstSeqNum seq_num, ThreadID tid); - void recordTarget(InstSeqNum seq_num, unsigned ghr, - const TheISA::PCState& target, ThreadID tid); - - private: - const bool hashGHR; - const bool hashTargets; - const unsigned numSets; - const unsigned numWays; - const unsigned tagBits; - const unsigned pathLength; - const unsigned instShift; - - struct IPredEntry - { - IPredEntry() : tag(0), target(0) { } - Addr tag; - TheISA::PCState target; - }; - - std::vector > targetCache; - - Addr getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid); - Addr getTag(Addr br_addr); - - struct HistoryEntry - { - HistoryEntry(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num) - : pcAddr(br_addr), targetAddr(tgt_addr), seqNum(seq_num) { } - Addr pcAddr; - Addr targetAddr; - InstSeqNum seqNum; - }; - - - struct ThreadInfo { - ThreadInfo() : headHistEntry(0) { } - - std::deque pathHist; - unsigned headHistEntry; - }; - - std::vector threadInfo; -}; - -#endif // __CPU_PRED_INDIRECT_HH__ diff --git a/src/cpu/pred/tournament.cc b/src/cpu/pred/tournament.cc index 319606871..ea6be46fa 100644 --- a/src/cpu/pred/tournament.cc +++ b/src/cpu/pred/tournament.cc @@ -52,7 +52,6 @@ TournamentBP::TournamentBP(const TournamentBPParams *params) localHistoryBits(ceilLog2(params->localPredictorSize)), globalPredictorSize(params->globalPredictorSize), globalCtrBits(params->globalCtrBits), - globalHistory(params->numThreads, 0), globalHistoryBits( ceilLog2(params->globalPredictorSize) > ceilLog2(params->choicePredictorSize) ? @@ -93,6 +92,8 @@ TournamentBP::TournamentBP(const TournamentBPParams *params) for (int i = 0; i < globalPredictorSize; ++i) globalCtrs[i].setBits(globalCtrBits); + //Clear the global history + globalHistory = 0; // Set up the global history mask // this is equivalent to mask(log2(globalPredictorSize) globalHistoryMask = globalPredictorSize - 1; @@ -144,18 +145,18 @@ TournamentBP::calcLocHistIdx(Addr &branch_addr) inline void -TournamentBP::updateGlobalHistTaken(ThreadID tid) +TournamentBP::updateGlobalHistTaken() { - globalHistory[tid] = (globalHistory[tid] << 1) | 1; - globalHistory[tid] = globalHistory[tid] & historyRegisterMask; + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & historyRegisterMask; } inline void -TournamentBP::updateGlobalHistNotTaken(ThreadID tid) +TournamentBP::updateGlobalHistNotTaken() { - globalHistory[tid] = (globalHistory[tid] << 1); - globalHistory[tid] = globalHistory[tid] & historyRegisterMask; + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & historyRegisterMask; } inline @@ -176,18 +177,18 @@ TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx) void -TournamentBP::btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history) +TournamentBP::btbUpdate(Addr branch_addr, void * &bp_history) { unsigned local_history_idx = calcLocHistIdx(branch_addr); //Update Global History to Not Taken (clear LSB) - globalHistory[tid] &= (historyRegisterMask & ~ULL(1)); + globalHistory &= (historyRegisterMask & ~ULL(1)); //Update Local History to Not Taken localHistoryTable[local_history_idx] = localHistoryTable[local_history_idx] & (localPredictorMask & ~ULL(1)); } bool -TournamentBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history) +TournamentBP::lookup(Addr branch_addr, void * &bp_history) { bool local_prediction; unsigned local_history_idx; @@ -203,16 +204,16 @@ TournamentBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history) local_prediction = localCtrs[local_predictor_idx].read() > localThreshold; //Lookup in the global predictor to get its branch prediction - global_prediction = globalThreshold < - globalCtrs[globalHistory[tid] & globalHistoryMask].read(); + global_prediction = + globalCtrs[globalHistory & globalHistoryMask].read() > globalThreshold; //Lookup in the choice predictor to see which one to use - choice_prediction = choiceThreshold < - choiceCtrs[globalHistory[tid] & choiceHistoryMask].read(); + choice_prediction = + choiceCtrs[globalHistory & choiceHistoryMask].read() > choiceThreshold; // Create BPHistory and pass it back to be recorded. BPHistory *history = new BPHistory; - history->globalHistory = globalHistory[tid]; + history->globalHistory = globalHistory; history->localPredTaken = local_prediction; history->globalPredTaken = global_prediction; history->globalUsed = choice_prediction; @@ -226,21 +227,21 @@ TournamentBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history) // all histories. if (choice_prediction) { if (global_prediction) { - updateGlobalHistTaken(tid); + updateGlobalHistTaken(); updateLocalHistTaken(local_history_idx); return true; } else { - updateGlobalHistNotTaken(tid); + updateGlobalHistNotTaken(); updateLocalHistNotTaken(local_history_idx); return false; } } else { if (local_prediction) { - updateGlobalHistTaken(tid); + updateGlobalHistTaken(); updateLocalHistTaken(local_history_idx); return true; } else { - updateGlobalHistNotTaken(tid); + updateGlobalHistNotTaken(); updateLocalHistNotTaken(local_history_idx); return false; } @@ -248,11 +249,11 @@ TournamentBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history) } void -TournamentBP::uncondBranch(ThreadID tid, Addr pc, void * &bp_history) +TournamentBP::uncondBranch(Addr pc, void * &bp_history) { // Create BPHistory and pass it back to be recorded. BPHistory *history = new BPHistory; - history->globalHistory = globalHistory[tid]; + history->globalHistory = globalHistory; history->localPredTaken = true; history->globalPredTaken = true; history->globalUsed = true; @@ -260,12 +261,12 @@ TournamentBP::uncondBranch(ThreadID tid, Addr pc, void * &bp_history) history->localHistory = invalidPredictorIndex; bp_history = static_cast(history); - updateGlobalHistTaken(tid); + updateGlobalHistTaken(); } void -TournamentBP::update(ThreadID tid, Addr branch_addr, bool taken, - void *bp_history, bool squashed) +TournamentBP::update(Addr branch_addr, bool taken, void *bp_history, + bool squashed) { unsigned local_history_idx; unsigned local_predictor_idx M5_VAR_USED; @@ -331,15 +332,15 @@ TournamentBP::update(ThreadID tid, Addr branch_addr, bool taken, } if (squashed) { if (taken) { - globalHistory[tid] = (history->globalHistory << 1) | 1; - globalHistory[tid] = globalHistory[tid] & historyRegisterMask; + globalHistory = (history->globalHistory << 1) | 1; + globalHistory = globalHistory & historyRegisterMask; if (old_local_pred_valid) { localHistoryTable[local_history_idx] = (history->localHistory << 1) | 1; } } else { - globalHistory[tid] = (history->globalHistory << 1); - globalHistory[tid] = globalHistory[tid] & historyRegisterMask; + globalHistory = (history->globalHistory << 1); + globalHistory = globalHistory & historyRegisterMask; if (old_local_pred_valid) { localHistoryTable[local_history_idx] = history->localHistory << 1; @@ -358,19 +359,19 @@ TournamentBP::update(ThreadID tid, Addr branch_addr, bool taken, } void -TournamentBP::retireSquashed(ThreadID tid, void *bp_history) +TournamentBP::retireSquashed(void *bp_history) { BPHistory *history = static_cast(bp_history); delete history; } void -TournamentBP::squash(ThreadID tid, void *bp_history) +TournamentBP::squash(void *bp_history) { BPHistory *history = static_cast(bp_history); // Restore global history to state prior to this branch. - globalHistory[tid] = history->globalHistory; + globalHistory = history->globalHistory; // Restore local history if (history->localHistoryIdx != invalidPredictorIndex) { @@ -387,12 +388,6 @@ TournamentBPParams::create() return new TournamentBP(this); } -unsigned -TournamentBP::getGHR(ThreadID tid, void *bp_history) const -{ - return static_cast(bp_history)->globalHistory; -} - #ifdef DEBUG int TournamentBP::BPHistory::newCount = 0; diff --git a/src/cpu/pred/tournament.hh b/src/cpu/pred/tournament.hh index 3aa17e030..8fb5b515c 100644 --- a/src/cpu/pred/tournament.hh +++ b/src/cpu/pred/tournament.hh @@ -77,7 +77,7 @@ class TournamentBP : public BPredUnit * @param bp_history Pointer that will be set to the BPHistory object. * @return Whether or not the branch is taken. */ - bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history); + bool lookup(Addr branch_addr, void * &bp_history); /** * Records that there was an unconditional branch, and modifies @@ -85,7 +85,7 @@ class TournamentBP : public BPredUnit * global history stored in it. * @param bp_history Pointer that will be set to the BPHistory object. */ - void uncondBranch(ThreadID tid, Addr pc, void * &bp_history); + void uncondBranch(Addr pc, void * &bp_history); /** * Updates the branch predictor to Not Taken if a BTB entry is * invalid or not found. @@ -93,7 +93,7 @@ class TournamentBP : public BPredUnit * @param bp_history Pointer to any bp history state. * @return Whether or not the branch is taken. */ - void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history); + void btbUpdate(Addr branch_addr, void * &bp_history); /** * Updates the branch predictor with the actual result of a branch. * @param branch_addr The address of the branch to update. @@ -103,19 +103,19 @@ class TournamentBP : public BPredUnit * @param squashed is set when this function is called during a squash * operation. */ - void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history, - bool squashed); + void update(Addr branch_addr, bool taken, void *bp_history, bool squashed); - void retireSquashed(ThreadID tid, void *bp_history); + void retireSquashed(void *bp_history); /** * Restores the global branch history on a squash. * @param bp_history Pointer to the BPHistory object that has the * previous global branch history in it. */ - void squash(ThreadID tid, void *bp_history); + void squash(void *bp_history); - unsigned getGHR(ThreadID tid, void *bp_history) const; + /** Returns the global history. */ + inline unsigned readGlobalHist() { return globalHistory; } private: /** @@ -132,10 +132,10 @@ class TournamentBP : public BPredUnit inline unsigned calcLocHistIdx(Addr &branch_addr); /** Updates global history as taken. */ - inline void updateGlobalHistTaken(ThreadID tid); + inline void updateGlobalHistTaken(); /** Updates global history as not taken. */ - inline void updateGlobalHistNotTaken(ThreadID tid); + inline void updateGlobalHistNotTaken(); /** * Updates local histories as taken. @@ -209,7 +209,7 @@ class TournamentBP : public BPredUnit /** Global history register. Contains as much history as specified by * globalHistoryBits. Actual number of bits used is determined by * globalHistoryMask and choiceHistoryMask. */ - std::vector globalHistory; + unsigned globalHistory; /** Number of bits for the global history. Determines maximum number of entries in global and choice predictor tables. */ diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index a8e97f14c..f3e14d401 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -87,9 +87,9 @@ AtomicSimpleCPU::init() BaseSimpleCPU::init(); int cid = threadContexts[0]->contextId(); - ifetch_req.setContext(cid); - data_read_req.setContext(cid); - data_write_req.setContext(cid); + ifetch_req.setThreadContext(cid, 0); + data_read_req.setThreadContext(cid, 0); + data_write_req.setThreadContext(cid, 0); } AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) @@ -247,8 +247,6 @@ AtomicSimpleCPU::activateContext(ThreadID thread_num) == activeThreads.end()) { activeThreads.push_back(thread_num); } - - BaseCPU::activateContext(thread_num); } @@ -275,7 +273,6 @@ AtomicSimpleCPU::suspendContext(ThreadID thread_num) } } - BaseCPU::suspendContext(thread_num); } @@ -557,9 +554,9 @@ AtomicSimpleCPU::tick() if (numThreads > 1) { ContextID cid = threadContexts[curThread]->contextId(); - ifetch_req.setContext(cid); - data_read_req.setContext(cid); - data_write_req.setContext(cid); + ifetch_req.setThreadContext(cid, curThread); + data_read_req.setThreadContext(cid, curThread); + data_write_req.setThreadContext(cid, curThread); } SimpleExecContext& t_info = *threadInfo[curThread]; diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 515d6b23c..43f4eb9f4 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -218,8 +218,6 @@ TimingSimpleCPU::activateContext(ThreadID thread_num) == activeThreads.end()) { activeThreads.push_back(thread_num); } - - BaseCPU::activateContext(thread_num); } @@ -245,8 +243,6 @@ TimingSimpleCPU::suspendContext(ThreadID thread_num) deschedule(fetchEvent); } } - - BaseCPU::suspendContext(thread_num); } bool @@ -423,6 +419,7 @@ TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size, unsigned flags) Fault fault; const int asid = 0; + const ThreadID tid = curThread; const Addr pc = thread->instAddr(); unsigned block_size = cacheLineSize(); BaseTLB::Mode mode = BaseTLB::Read; @@ -430,8 +427,9 @@ TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size, unsigned flags) if (traceData) traceData->setMem(addr, size, flags); - RequestPtr req = new Request(asid, addr, size, flags, dataMasterId(), pc, - thread->contextId()); + RequestPtr req = new Request(asid, addr, size, + flags, dataMasterId(), pc, + thread->contextId(), tid); req->taskId(taskId()); @@ -496,6 +494,7 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size, uint8_t *newData = new uint8_t[size]; const int asid = 0; + const ThreadID tid = curThread; const Addr pc = thread->instAddr(); unsigned block_size = cacheLineSize(); BaseTLB::Mode mode = BaseTLB::Write; @@ -511,8 +510,9 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size, if (traceData) traceData->setMem(addr, size, flags); - RequestPtr req = new Request(asid, addr, size, flags, dataMasterId(), pc, - thread->contextId()); + RequestPtr req = new Request(asid, addr, size, + flags, dataMasterId(), pc, + thread->contextId(), tid); req->taskId(taskId()); @@ -614,7 +614,7 @@ TimingSimpleCPU::fetch() _status = BaseSimpleCPU::Running; Request *ifetch_req = new Request(); ifetch_req->taskId(taskId()); - ifetch_req->setContext(thread->contextId()); + ifetch_req->setThreadContext(thread->contextId(), curThread); setupFetchRequest(ifetch_req); DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr()); thread->itb->translateTiming(ifetch_req, thread->getTC(), diff --git a/src/cpu/testers/memtest/memtest.cc b/src/cpu/testers/memtest/memtest.cc index c2c721bcf..223532088 100644 --- a/src/cpu/testers/memtest/memtest.cc +++ b/src/cpu/testers/memtest/memtest.cc @@ -245,7 +245,7 @@ MemTest::tick() bool do_functional = (random_mt.random(0, 100) < percentFunctional) && !uncacheable; Request *req = new Request(paddr, 1, flags, masterId); - req->setContext(id); + req->setThreadContext(id, 0); outstandingAddrs.insert(paddr); diff --git a/src/cpu/testers/networktest/networktest.cc b/src/cpu/testers/networktest/networktest.cc index 6ad26077c..79a563f28 100644 --- a/src/cpu/testers/networktest/networktest.cc +++ b/src/cpu/testers/networktest/networktest.cc @@ -243,7 +243,7 @@ NetworkTest::generatePkt() // generate packet for virtual network 1 requestType = MemCmd::ReadReq; flags.set(Request::INST_FETCH); - req = new Request(0, 0x0, access_size, flags, masterId, 0x0, 0); + req = new Request(0, 0x0, access_size, flags, masterId, 0x0, 0, 0); req->setPaddr(paddr); } else { // if (randomReqType == 2) // generate packet for virtual network 2 @@ -251,7 +251,7 @@ NetworkTest::generatePkt() req = new Request(paddr, access_size, flags, masterId); } - req->setContext(id); + req->setThreadContext(id,0); //No need to do functional simulation //We just do timing simulation of the network diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc index c869bd728..c8e7816c3 100644 --- a/src/cpu/testers/rubytest/Check.cc +++ b/src/cpu/testers/rubytest/Check.cc @@ -107,7 +107,7 @@ Check::initiatePrefetch() // Prefetches are assumed to be 0 sized Request *req = new Request(m_address, 0, flags, m_tester_ptr->masterId(), curTick(), m_pc); - req->setContext(index); + req->setThreadContext(index, 0); PacketPtr pkt = new Packet(req, cmd); // despite the oddity of the 0 size (questionable if this should @@ -180,7 +180,7 @@ Check::initiateAction() Request *req = new Request(writeAddr, 1, flags, m_tester_ptr->masterId(), curTick(), m_pc); - req->setContext(index); + req->setThreadContext(index, 0); Packet::Command cmd; // 1 out of 8 chance, issue an atomic rather than a write @@ -245,7 +245,7 @@ Check::initiateCheck() Request *req = new Request(m_address, CHECK_SIZE, flags, m_tester_ptr->masterId(), curTick(), m_pc); - req->setContext(index); + req->setThreadContext(index, 0); PacketPtr pkt = new Packet(req, MemCmd::ReadReq); uint8_t *dataArray = new uint8_t[CHECK_SIZE]; pkt->dataDynamic(dataArray); diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc index e81a79818..d6aa9aaeb 100644 --- a/src/cpu/trace/trace_cpu.cc +++ b/src/cpu/trace/trace_cpu.cc @@ -627,7 +627,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) // Create a request and the packet containing request Request* req = new Request(node_ptr->physAddr, node_ptr->size, node_ptr->flags, masterID, node_ptr->seqNum, - ContextID(0)); + ContextID(0), ThreadID(0)); req->setPC(node_ptr->pc); // If virtual address is valid, set the asid and virtual address fields // of the request. @@ -1123,7 +1123,7 @@ TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, req->setPC(pc); // If this is not done it triggers assert in L1 cache for invalid contextId - req->setContext(ContextID(0)); + req->setThreadContext(ContextID(0), ThreadID(0)); // Embed it in a packet PacketPtr pkt = new Packet(req, cmd); -- cgit v1.2.3