From 52383ca7cc2b4698109b71a968cde16e9f7dc6e0 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 16 May 2006 14:09:04 -0400 Subject: Sampler updates. cpu/ozone/cpu.hh: Updates for sampler. cpu/ozone/cpu_impl.hh: Updates for sampler, checker. cpu/ozone/inorder_back_end.hh: Sampler updates. Also support old memory system. --HG-- extra : convert_revision : 33ebe38e4c08d49c6af84032b819533b784b4fe8 --- cpu/ozone/cpu.hh | 8 ++- cpu/ozone/cpu_impl.hh | 122 +++++++++++++----------------------------- cpu/ozone/front_end.hh | 2 + cpu/ozone/front_end_impl.hh | 10 ++++ cpu/ozone/inorder_back_end.hh | 44 ++++++++++++--- cpu/ozone/lw_back_end.hh | 3 +- cpu/ozone/lw_back_end_impl.hh | 19 +++++++ cpu/ozone/lw_lsq_impl.hh | 9 +++- 8 files changed, 122 insertions(+), 95 deletions(-) diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index eec8902d8..1d522b2fa 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -64,6 +64,7 @@ class Process; #endif // FULL_SYSTEM class Checkpoint; +class EndQuiesceEvent; class MemInterface; namespace Trace { @@ -149,7 +150,7 @@ class OzoneCPU : public BaseCPU void unserialize(Checkpoint *cp, const std::string §ion); #if FULL_SYSTEM - Event *getQuiesceEvent(); + EndQuiesceEvent *getQuiesceEvent(); Tick readLastActivate(); Tick readLastSuspend(); @@ -330,8 +331,13 @@ class OzoneCPU : public BaseCPU int cpuId; void switchOut(Sampler *sampler); + void signalSwitched(); void takeOverFrom(BaseCPU *oldCPU); + Sampler *sampler; + + int switchCount; + #if FULL_SYSTEM Addr dbg_vtophys(Addr addr); diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 4f3fdf521..b085f077f 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -329,15 +329,30 @@ OzoneCPU::copyToXC() */ template void -OzoneCPU::switchOut(Sampler *sampler) +OzoneCPU::switchOut(Sampler *_sampler) { + sampler = _sampler; + switchCount = 0; // Front end needs state from back end, so switch out the back end first. backEnd->switchOut(); frontEnd->switchOut(); - _status = SwitchedOut; - if (tickEvent.scheduled()) - tickEvent.squash(); - sampler->signalSwitched(); +} + +template +void +OzoneCPU::signalSwitched() +{ + if (++switchCount == 2) { + backEnd->doSwitchOut(); + frontEnd->doSwitchOut(); + if (checker) + checker->switchOut(sampler); + _status = SwitchedOut; + if (tickEvent.scheduled()) + tickEvent.squash(); + sampler->signalSwitched(); + } + assert(switchCount <= 2); } template @@ -366,6 +381,11 @@ OzoneCPU::takeOverFrom(BaseCPU *oldCPU) tickEvent.schedule(curTick); } } + // Nothing running, change status to reflect that we're no longer + // switched out. + if (_status == SwitchedOut) { + _status = Idle; + } } template @@ -666,83 +686,6 @@ OzoneCPU::tick() thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]-> setDoubleResult(0.0); - // General code flow: - // Check for any interrupts. Handle them if I do have one. - // Check if I have a need to fetch a new cache block. Either a bit could be - // set by functions indicating that I need to fetch a new block, or I could - // hang onto the last PC of the last cache block I fetched and compare the - // current PC to that. Setting a bit seems nicer but may be more error - // prone. - // Scan through the IQ to figure out if there's anything I can issue/execute - // Might need something close to the FU Pools to tell what instructions - // I can issue. How to handle loads and stores vs other insts? - // Extremely slow way: find first inst that can possibly issue; if it's a - // load or a store, then iterate through load/store queue. - // If I can't find instructions to execute and I've got room in the IQ - // (which is just a counter), then grab a few instructions out of the cache - // line buffer until I either run out or can execute up until my limit. - - numCycles++; - - traceData = NULL; - -// Fault fault = NoFault; - -#if 0 // FULL_SYSTEM - if (checkInterrupts && check_interrupts() && !inPalMode() && - status() != IcacheMissComplete) { - int ipl = 0; - int summary = 0; - checkInterrupts = false; - - if (readMiscReg(IPR_SIRR)) { - for (int i = INTLEVEL_SOFTWARE_MIN; - i < INTLEVEL_SOFTWARE_MAX; i++) { - if (readMiscReg(IPR_SIRR) & (ULL(1) << i)) { - // See table 4-19 of 21164 hardware reference - ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; - summary |= (ULL(1) << i); - } - } - } - - // Is this method so that if the interrupts are switched over from - // another CPU they'll still be handled? -// uint64_t interrupts = cpuXC->cpu->intr_status(); - uint64_t interrupts = intr_status(); - for (int i = INTLEVEL_EXTERNAL_MIN; - i < INTLEVEL_EXTERNAL_MAX; i++) { - if (interrupts & (ULL(1) << i)) { - // See table 4-19 of 21164 hardware reference - ipl = i; - summary |= (ULL(1) << i); - } - } - - if (readMiscReg(IPR_ASTRR)) - panic("asynchronous traps not implemented\n"); - - if (ipl && ipl > readMiscReg(IPR_IPLR)) { - setMiscReg(IPR_ISR, summary); - setMiscReg(IPR_INTID, ipl); - - Fault(new InterruptFault)->invoke(xc); - - DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", - readMiscReg(IPR_IPLR), ipl, summary); - } - } -#endif - - // Make call to ISA to ensure 0 register semantics...actually because the - // DynInsts will generally be the register file, this should only have to - // happen when the xc is actually written to (during a syscall or something) - // maintain $r0 semantics -// assert(renameTable[ZeroReg]->readIntResult() == 0); -#ifdef TARGET_ALPHA -// assert(renameTable[ZeroReg]->readDoubleResult() == 0); -#endif // TARGET_ALPHA - comm.advance(); frontEnd->tick(); backEnd->tick(); @@ -876,8 +819,8 @@ OzoneCPU::processInterrupts() thread.setMiscReg(IPR_INTID, ipl); // @todo: Make this more transparent if (checker) { - checkerXC->setMiscReg(IPR_ISR, summary); - checkerXC->setMiscReg(IPR_INTID, ipl); + checker->cpuXCBase()->setMiscReg(IPR_ISR, summary); + checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl); } Fault fault = new InterruptFault; fault->invoke(thread.getXCProxy()); @@ -993,6 +936,15 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) setFuncExeInst(old_context->readFuncExeInst()); #endif + EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); + if (other_quiesce) { + // Point the quiesce event's XC at this XC so that it wakes up + // the proper CPU. + other_quiesce->xc = this; + } + if (thread->quiesceEvent) { + thread->quiesceEvent->xc = this; + } // storeCondFailures = 0; cpu->lockFlag = false; @@ -1016,7 +968,7 @@ OzoneCPU::OzoneXC::unserialize(Checkpoint *cp, const std::string §ion) #if FULL_SYSTEM template -Event * +EndQuiesceEvent * OzoneCPU::OzoneXC::getQuiesceEvent() { return thread->quiesceEvent; diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh index 188925ae5..f9db9ea5c 100644 --- a/cpu/ozone/front_end.hh +++ b/cpu/ozone/front_end.hh @@ -68,6 +68,8 @@ class FrontEnd void switchOut(); + void doSwitchOut(); + void takeOverFrom(ExecContext *old_xc = NULL); bool isSwitchedOut() { return switchedOut; } diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index a3eb809d0..8ae9ec696 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -240,6 +240,9 @@ template void FrontEnd::tick() { + if (switchedOut) + return; + // @todo: Maybe I want to just have direct communication... if (fromCommit->doneSeqNum) { branchPred.update(fromCommit->doneSeqNum, 0); @@ -828,6 +831,13 @@ void FrontEnd::switchOut() { switchedOut = true; + cpu->signalSwitched(); +} + +template +void +FrontEnd::doSwitchOut() +{ memReq = NULL; squash(0, 0); instBuffer.clear(); diff --git a/cpu/ozone/inorder_back_end.hh b/cpu/ozone/inorder_back_end.hh index 6519b79e5..4039d8384 100644 --- a/cpu/ozone/inorder_back_end.hh +++ b/cpu/ozone/inorder_back_end.hh @@ -97,6 +97,10 @@ class InorderBackEnd Addr commitPC; + void switchOut() { panic("Not implemented!"); } + void doSwitchOut() { panic("Not implemented!"); } + void takeOverFrom(ExecContext *old_xc = NULL) { panic("Not implemented!"); } + public: FullCPU *cpu; @@ -330,14 +334,17 @@ InorderBackEnd::read(MemReqPtr &req, T &data, int load_idx) // translate to physical address // Fault fault = cpu->translateDataReadReq(req); + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + req->flags &= ~INST_READ; + Fault fault = cpu->read(req, data); + memcpy(req->data, &data, sizeof(T)); // if we have a cache, do cache access too if (dcacheInterface) { - req->cmd = Read; - req->completionEvent = NULL; - req->data = new uint8_t[64]; - req->time = curTick; - req->flags &= ~INST_READ; MemAccessResult result = dcacheInterface->access(req); // Ugly hack to get an event scheduled *only* if the access is @@ -372,6 +379,30 @@ InorderBackEnd::write(MemReqPtr &req, T &data, int store_idx) // translate to physical address // Fault fault = cpu->translateDataWriteReq(req); + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&data, req->size); + + switch(req->size) { + case 1: + cpu->write(req, (uint8_t &)data); + break; + case 2: + cpu->write(req, (uint16_t &)data); + break; + case 4: + cpu->write(req, (uint32_t &)data); + break; + case 8: + cpu->write(req, (uint64_t &)data); + break; + default: + panic("Unexpected store size!\n"); + } + if (dcacheInterface) { req->cmd = Write; req->data = new uint8_t[64]; @@ -395,7 +426,7 @@ InorderBackEnd::write(MemReqPtr &req, T &data, int store_idx) } } - +/* if (req->flags & LOCKED) { if (req->flags & UNCACHEABLE) { // Don't update result register (see stq_c in isa_desc) @@ -404,6 +435,7 @@ InorderBackEnd::write(MemReqPtr &req, T &data, int store_idx) req->result = 1; } } +*/ /* if (res && (fault == NoFault)) *res = req->result; diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh index 028fdaf8c..770b66ad5 100644 --- a/cpu/ozone/lw_back_end.hh +++ b/cpu/ozone/lw_back_end.hh @@ -187,7 +187,7 @@ class LWBackEnd void instToCommit(DynInstPtr &inst); void switchOut(); - + void doSwitchOut(); void takeOverFrom(ExecContext *old_xc = NULL); bool isSwitchedOut() { return switchedOut; } @@ -314,6 +314,7 @@ class LWBackEnd bool fetchHasFault; bool switchedOut; + bool switchPending; DynInstPtr memBarrier; diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index d4829629d..a82dd5b70 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -192,6 +192,7 @@ LWBackEnd::LWBackEnd(Params *params) numWaitingMemOps = 0; waitingInsts = 0; switchedOut = false; + switchPending = false; // IQ.setBE(this); LSQ.setBE(this); @@ -631,6 +632,11 @@ LWBackEnd::tick() { DPRINTF(BE, "Ticking back end\n"); + if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) { + cpu->signalSwitched(); + return; + } + ROB_count[0]+= numInsts; wbCycle = 0; @@ -682,6 +688,7 @@ LWBackEnd::tick() assert(numInsts == instList.size()); assert(waitingInsts == waitingList.size()); assert(numWaitingMemOps == waitingMemOps.size()); + assert(!switchedOut); #endif } @@ -1440,12 +1447,24 @@ LWBackEnd::fetchFault(Fault &fault) template void LWBackEnd::switchOut() +{ + switchPending = true; +} + +template +void +LWBackEnd::doSwitchOut() { switchedOut = true; + switchPending = false; // Need to get rid of all committed, non-speculative state and write it // to memory/XC. In this case this is stores that have committed and not // yet written back. + assert(robEmpty()); + assert(!LSQ.hasStoresToWB()); + LSQ.switchOut(); + squash(0); } diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh index 9b7e48f96..fdf6bff07 100644 --- a/cpu/ozone/lw_lsq_impl.hh +++ b/cpu/ozone/lw_lsq_impl.hh @@ -791,6 +791,8 @@ template void OzoneLWLSQ::switchOut() { +// assert(loads == 0); + assert(storesToWB == 0); switchedOut = true; SQIt sq_it = --(storeQueue.end()); while (storesToWB > 0 && @@ -810,9 +812,12 @@ OzoneLWLSQ::switchOut() // Store conditionals don't complete until *after* they have written // back. If it's here and not yet sent to memory, then don't bother // as it's not part of committed state. - if (inst->isDataPrefetch() || (*sq_it).committed || - (*sq_it).req->flags & LOCKED) { + if (inst->isDataPrefetch() || (*sq_it).committed) { + sq_it--; + continue; + } else if ((*sq_it).req->flags & LOCKED) { sq_it--; + assert(!(*sq_it).canWB || ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); continue; } -- cgit v1.2.3