From b0e609d5cf6961bb9b3f12065659e1c42c13ef06 Mon Sep 17 00:00:00 2001 From: Iru Cai Date: Sun, 12 May 2019 14:34:21 +0800 Subject: only spec load when hit --- src/cpu/o3/iew_impl.hh | 2 +- src/cpu/o3/inst_queue_impl.hh | 2 - src/cpu/o3/lsq.hh | 8 - src/cpu/o3/lsq_impl.hh | 23 -- src/cpu/o3/lsq_unit.hh | 94 ------ src/cpu/o3/lsq_unit_impl.hh | 453 ++--------------------------- src/mem/protocol/MESI_Two_Level-L1cache.sm | 9 +- src/mem/ruby/system/Sequencer.cc | 123 +------- src/mem/ruby/system/Sequencer.hh | 4 - 9 files changed, 34 insertions(+), 684 deletions(-) diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 5b67e4c3c..6af8b4563 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1193,7 +1193,7 @@ DefaultIEW::executeInsts() // if we successfully commit sth, then we need to activate the stage or somehow // problems happen when interacting with squash // NOTE: we always send validations before execute load requests - ldstQueue.exposeLoads(); + // ldstQueue.exposeLoads(); // Uncomment this if you want to see all available instructions. // @todo This doesn't actually work anymore, we should fix it. diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 980f29b35..0a6d309fe 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -1184,11 +1184,9 @@ InstructionQueue::getDeferredMemInstToExecute() // for both (2, 3) we need to restart the translation if ( (*it)->translationCompleted() || ((*it)->onlyWaitForFence() && !(*it)->fenceDelay()) - || ((*it)->onlyWaitForExpose() && (*it)->readyToExpose()) || (*it)->isSquashed()) { DynInstPtr mem_inst = std::move(*it); mem_inst->onlyWaitForFence(false); - mem_inst->onlyWaitForExpose(false); deferredMemInsts.erase(it); return mem_inst; } diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index e5c35a3a6..0e18aa145 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -135,14 +135,6 @@ class LSQ { void writebackStores(ThreadID tid); - /** [mengjia] - * Attempts to validate loads until all cache ports are used or the - * interface becomes blocked. - */ - int exposeLoads(); - /** Same as above, but only for one thread. */ - int exposeLoads(ThreadID tid); - /** [mengjia] * attempt to update FenceDelay state for load insts */ diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 24066cd4b..b6742070e 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -292,29 +292,6 @@ LSQ::writebackStores() } } -// [mengjia] -template -int -LSQ::exposeLoads() -{ - list::iterator threads = activeThreads->begin(); - list::iterator end = activeThreads->end(); - - int exposedLoads = 0; - while (threads != end) { - ThreadID tid = *threads++; - - if (numLoadsToVLD(tid) > 0) { - DPRINTF(Writeback,"[tid:%i] Validate loads. %i loads " - "available for Validate.\n", tid, numLoadsToVLD(tid)); - } - - exposedLoads += thread[tid].exposeLoads(); - } - return exposedLoads; -} - - // [mengjia] template void diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 1c8b98f2e..711447f31 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -141,12 +141,6 @@ class LSQUnit { */ void checkSnoop(PacketPtr pkt); - // [InvisiSpec] check whether current request will hit in the - // spec buffer or not - int checkSpecBuffHit(const RequestPtr req, const int req_idx); - void setSpecBuffState(const RequestPtr req); - - bool checkPrevLoadsExecuted(const int req_idx); /** Executes a load instruction. */ Fault executeLoad(const DynInstPtr &inst); @@ -165,9 +159,6 @@ class LSQUnit { /** Writes back stores. */ void writebackStores(); - /** [mengjia] Validate loads. */ - int exposeLoads(); - /** [mengjia] Update Visbible State. * In the mode defence relying on fence: setup fenceDelay state. * In the mode defence relying on invisibleSpec: @@ -257,9 +248,6 @@ class LSQUnit { /** Writes back the instruction, sending it to IEW. */ void writeback(const DynInstPtr &inst, PacketPtr pkt); - // [InvisiSpec] complete Validates - void completeValidate(DynInstPtr &inst, PacketPtr pkt); - /** Writes back a store that couldn't be completed the previous cycle. */ void writebackPendingStore(); @@ -872,46 +860,12 @@ LSQUnit::read(const RequestPtr &req, fst_data_pkt = data_pkt; fst_data_pkt->setFirst(); - if (sendSpecRead){ - int src_idx = checkSpecBuffHit(req, load_idx); - if (src_idx != -1) { - if (allowSpecBuffHit){ - data_pkt->setOnlyAccessSpecBuff(); - } - data_pkt->srcIdx = src_idx; - specBuffHits++; - }else{ - specBuffMisses++; - } - } fst_data_pkt->reqIdx = load_idx; } else { // Create the split packets. if(sendSpecRead){ - fst_data_pkt = Packet::createReadSpec(sreqLow); - int fst_src_idx = checkSpecBuffHit(sreqLow, load_idx); - if ( fst_src_idx != -1 ) { - if (allowSpecBuffHit){ - fst_data_pkt->setOnlyAccessSpecBuff(); - } - fst_data_pkt->srcIdx = fst_src_idx; - specBuffHits++; - } else { - specBuffMisses++; - } - snd_data_pkt = Packet::createReadSpec(sreqHigh); - int snd_src_idx = checkSpecBuffHit(sreqHigh, load_idx); - if ( snd_src_idx != -1 ) { - if (allowSpecBuffHit){ - snd_data_pkt->setOnlyAccessSpecBuff(); - } - snd_data_pkt->srcIdx = snd_src_idx; - specBuffHits++; - } else { - specBuffMisses++; - } }else{ fst_data_pkt = Packet::createRead(sreqLow); snd_data_pkt = Packet::createRead(sreqHigh); @@ -1002,58 +956,10 @@ LSQUnit::read(const RequestPtr &req, // Set everything ready for expose/validation after the read is // successfully sent out if(sendSpecRead){ // sending actual request - - // [mengjia] Here we set the needExposeOnly flag - if (needsTSO && !load_inst->isDataPrefetch()){ - // need to check whether previous load_instructions specComplete or not - if ( checkPrevLoadsExecuted(load_idx) ){ - load_inst->needExposeOnly(true); - DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as " - "needExposeOnly\n", - load_inst->pcState(), load_inst->seqNum); - } else { - DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as " - "needValidation\n", - load_inst->pcState(), load_inst->seqNum); - } - }else{ - //if RC, always only need expose - load_inst->needExposeOnly(true); - DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as needExposeOnly\n", - load_inst->pcState(), load_inst->seqNum); - } - - load_inst->needPostFetch(true); assert(!req->isMmappedIpr()); - //save expose requestPtr - if (TheISA::HasUnalignedMemAcc && sreqLow) { - load_inst->postSreqLow = std::make_shared(*sreqLow); - load_inst->postSreqHigh = std::make_shared(*sreqHigh); - load_inst->postReq = nullptr; - DPRINTF(LSQUnit, "created validation/expose" - " request for inst [sn:%lli]" - " reqLow=%#x, reqHigh=%#x\n", - load_inst->seqNum, - load_inst->postSreqLow->getVaddr(), - load_inst->postSreqHigh->getVaddr()); - }else{ - load_inst->postReq = std::make_shared(*req); - load_inst->postSreqLow = nullptr; - load_inst->postSreqHigh = nullptr; - DPRINTF(LSQUnit, "created validation/expose" - " request for inst [sn:%lli]" - " req=%#x\n", - load_inst->seqNum, load_inst->postReq->getVaddr()); - } } else { load_inst->setExposeCompleted(); load_inst->needPostFetch(false); - if (TheISA::HasUnalignedMemAcc && sreqLow) { - setSpecBuffState(sreqLow); - setSpecBuffState(sreqHigh); - } else { - setSpecBuffState(req); - } } return NoFault; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 56870b5a3..ebc963d5b 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -117,8 +117,6 @@ LSQUnit::completeDataAccess(PacketPtr pkt) } else { inst->setL1HitLow(); } - } else if (!pkt->isSpec()) { - setSpecBuffState(pkt->req); } // If this is a split access, wait until all packets are received. @@ -126,6 +124,12 @@ LSQUnit::completeDataAccess(PacketPtr pkt) return; } + if ( pkt->isSpec() && pkt->isRead() && (!pkt->isL1Hit()) ) { + DPRINTF(LSQUnit, "spec load miss for inst [sn:%lli], fence it.\n", + inst->seqNum); + inst->fenceDelay(true); + } + assert(!cpu->switchedOut()); if (!inst->isSquashed()) { if (!state->noWB) { @@ -147,7 +151,7 @@ LSQUnit::completeDataAccess(PacketPtr pkt) } if (pkt->isValidate() || pkt->isExpose()) { - completeValidate(inst, pkt); + assert(false); } } @@ -628,103 +632,6 @@ LSQUnit::checkSnoop(PacketPtr pkt) return; } -template -bool -LSQUnit::checkPrevLoadsExecuted(int req_idx) -{ - int load_idx = loadHead; - while (load_idx != req_idx){ - if (!loadQueue[load_idx]->isExecuted()){ - // if at least on load ahead of current load - // does not finish spec access, - // then return false - return false; - } - incrLdIdx(load_idx); - } - - //if all executed, return true - return true; -} - -template -void -LSQUnit::setSpecBuffState(RequestPtr expose_req) -{ - Addr req_eff_addr1 = expose_req->getPaddr() & cacheBlockMask; - - int load_idx = loadHead; - while (load_idx != loadTail){ - DynInstPtr ld_inst = loadQueue[load_idx]; - if (ld_inst->effAddrValid()){ - - Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask; - Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask; - if (ld_eff_addr1 == req_eff_addr1){ - ld_inst->setSpecBuffObsoleteLow(); - } else if (ld_eff_addr2 == req_eff_addr1){ - ld_inst->setSpecBuffObsoleteHigh(); - } - } - incrLdIdx(load_idx); - } -} - - -template -int -LSQUnit::checkSpecBuffHit(RequestPtr req, int req_idx) -{ - - Addr req_eff_addr1 = req->getPaddr() & cacheBlockMask; - //Addr req_eff_addr2 = (req->getPaddr() + req->getSize()-1) & cacheBlockMask; - // the req should be within the same cache line - //assert (req_eff_addr1 == req_eff_addr2); - assert (!loadQueue[req_idx]->isExecuted()); - - int load_idx = loadHead; - - while (load_idx != loadTail){ - DynInstPtr ld_inst = loadQueue[load_idx]; - if (ld_inst->effAddrValid()){ - Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask; - Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask; - - if ((req_eff_addr1 == ld_eff_addr1 && ld_inst->isL1HitLow()) - || (req_eff_addr1 == ld_eff_addr2 && ld_inst->isL1HitHigh())){ - return -1; - //already in L1, do not copy from buffer - } else { - - if (ld_inst->isExecuted() && ld_inst->needPostFetch() - && !ld_inst->isSquashed() && ld_inst->fault==NoFault){ - if (req_eff_addr1 == ld_eff_addr1 && !ld_inst->isL1HitLow() - && !ld_inst->isSpecBuffObsoleteLow()){ - DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] " - "and [sn:%lli] (low) at address %#x\n", - loadQueue[req_idx]->seqNum, ld_inst->seqNum, - req_eff_addr1); - return load_idx; - } else if ( ld_eff_addr2 !=0 && - req_eff_addr1 == ld_eff_addr2 && !ld_inst->isL1HitHigh() - && !ld_inst->isSpecBuffObsoleteHigh()){ - DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] " - "and [sn:%lli] (high) at address %#x\n", - loadQueue[req_idx]->seqNum, ld_inst->seqNum, - req_eff_addr1); - return load_idx; - } - } - } - } - incrLdIdx(load_idx); - } - - return -1; -} - - - template Fault LSQUnit::checkViolations(int load_idx, const DynInstPtr &inst) @@ -1064,6 +971,7 @@ LSQUnit::updateVisibleState() } } inst->readyToExpose(true); + inst->fenceDelay(false); } else { if (!useIFT) { if (inst->readyToExpose()){ @@ -1097,10 +1005,10 @@ LSQUnit::updateVisibleState() } else { DPRINTF(LSQUnit, "load inst [sn:%lli] %s is an unsafe speculated load, but source registers are not tainted.\n", inst->seqNum, inst->pcState()); inst->readyToExpose(true); + inst->fenceDelay(false); } } } - inst->fenceDelay(false); } else { inst->readyToExpose(true); inst->fenceDelay(false); @@ -1109,281 +1017,6 @@ LSQUnit::updateVisibleState() } } -// [InvisiSpec] validate loads -template -int -LSQUnit::exposeLoads() -{ - if(!isInvisibleSpec){ - assert(loadsToVLD==0 - && "request validation on Non invisible Spec mode"); - } - - int old_loadsToVLD = loadsToVLD; - - DPRINTF(LSQUnit, "starting exposeLoads(): loadsToVLD = %d\n", loadsToVLD); - - // [InvisiSpec] Note: - // need to iterate from the head every time - // since the load can be exposed out-of-order - int loadVLDIdx = loadHead; - - while (loadsToVLD > 0 && - loadVLDIdx != loadTail && - loadQueue[loadVLDIdx]) { - - if (loadQueue[loadVLDIdx]->isSquashed()){ - incrLdIdx(loadVLDIdx); - continue; - } - // skip the loads that either do not need to expose - // or exposed already - if(!loadQueue[loadVLDIdx]->needPostFetch() - || loadQueue[loadVLDIdx]->isExposeSent() ){ - incrLdIdx(loadVLDIdx); - continue; - } - - DynInstPtr load_inst = loadQueue[loadVLDIdx]; - if (loadQueue[loadVLDIdx]->fault!=NoFault){ - //load is executed, so it wait for expose complete - //to send it to commit, regardless of whether it is ready - //to expose - load_inst->setExposeCompleted(); - load_inst->setExposeSent(); - loadsToVLD--; - if (load_inst->isExecuted()){ - DPRINTF(LSQUnit, "Execute finished and gets violation fault." - "Send inst [sn:%lli] to commit stage.\n", - load_inst->seqNum); - iewStage->instToCommit(load_inst); - iewStage->activityThisCycle(); - } - incrLdIdx(loadVLDIdx); - continue; - } - - // skip the loads that need expose but - // are not ready - if (loadQueue[loadVLDIdx]->needPostFetch() - && !loadQueue[loadVLDIdx]->readyToExpose()){ - incrLdIdx(loadVLDIdx); - continue; - } - - assert(loadQueue[loadVLDIdx]->needPostFetch() - && loadQueue[loadVLDIdx]->readyToExpose() ); - - assert(!load_inst->isCommitted()); - - - RequestPtr req = load_inst->postReq; - RequestPtr sreqLow = load_inst->postSreqLow; - RequestPtr sreqHigh = load_inst->postSreqHigh; - - // we should not have both req and sreqLow not NULL - assert( !(req && sreqLow)); - - if (req) { - DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]" - " PC= %s. req=%#x\n", - load_inst->seqNum, load_inst->pcState(), - req->getVaddr()); - } else { - DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]" - " PC= %s. reqLow=%#x, reqHigh=%#x\n", - load_inst->seqNum, load_inst->pcState(), - load_inst->postSreqLow->getVaddr(), - load_inst->postSreqHigh->getVaddr()); - } - - bool split = false; - if (TheISA::HasUnalignedMemAcc && sreqLow) { - split = true; - } else { - assert(req); - } - - if (load_inst->isL1HitLow() && (!split || load_inst->isL1HitHigh()) ) { - load_inst->setExposeCompleted(); - load_inst->setExposeSent(); - --loadsToVLD; - incrLdIdx(loadVLDIdx); - iewStage->instToCommit(load_inst); - iewStage->activityThisCycle(); - continue; - } - - PacketPtr data_pkt = NULL; - PacketPtr snd_data_pkt = NULL; - - LSQSenderState *state = new LSQSenderState; - state->isLoad = false; - state->idx = loadVLDIdx; - state->inst = load_inst; - state->noWB = true; - - bool onlyExpose = false; - if (!split) { - if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){ - data_pkt = Packet::createExpose(req); - onlyExpose = true; - }else { - data_pkt = Packet::createValidate(req); - if (!load_inst->vldData) - load_inst->vldData = new uint8_t[1]; - data_pkt->dataStatic(load_inst->vldData); - } - data_pkt->senderState = state; - data_pkt->setFirst(); - data_pkt->reqIdx = loadVLDIdx; - DPRINTF(LSQUnit, "contextid = %d\n", req->contextId()); - } else { - // allocate memory if we need at least one validation - if (!load_inst->needExposeOnly() && - (!load_inst->isL1HitLow() || !load_inst->isL1HitHigh())){ - if (!load_inst->vldData) - load_inst->vldData = new uint8_t[2]; - } else { - onlyExpose = true; - } - - // Create the split packets. - first one - if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){ - data_pkt = Packet::createExpose(sreqLow); - }else{ - data_pkt = Packet::createValidate(sreqLow); - assert(load_inst->vldData); - data_pkt->dataStatic(load_inst->vldData); - } - - // Create the split packets. - second one - if (load_inst->needExposeOnly() || load_inst->isL1HitHigh()){ - snd_data_pkt = Packet::createExpose(sreqHigh); - } else { - snd_data_pkt = Packet::createValidate(sreqHigh); - assert(load_inst->vldData); - snd_data_pkt->dataStatic(&(load_inst->vldData[1])); - } - - data_pkt->senderState = state; - data_pkt->setFirst(); - snd_data_pkt->senderState = state; - data_pkt->reqIdx = loadVLDIdx; - snd_data_pkt->reqIdx = loadVLDIdx; - - data_pkt->isSplit = true; - snd_data_pkt->isSplit = true; - state->isSplit = true; - state->outstanding = 2; - state->mainPkt = data_pkt; - - DPRINTF(LSQUnit, "contextid = %d, %d\n", - sreqLow->contextId(), sreqHigh->contextId()); - req = sreqLow; - } - - assert(!req->isStrictlyOrdered()); - assert(!req->isMmappedIpr()); - - DPRINTF(LSQUnit, "D-Cache: Validating/Exposing load idx:%i PC:%s " - "to Addr:%#x, data:%#x [sn:%lli]\n", - loadVLDIdx, load_inst->pcState(), - //FIXME: resultData not memData - req->getPaddr(), (int)*(load_inst->memData), - load_inst->seqNum); - - bool successful_expose = true; - bool completedFirst = false; - - if (!dcachePort->sendTimingReq(data_pkt)){ - DPRINTF(IEW, "D-Cache became blocked when " - "validating [sn:%lli], will retry later\n", - load_inst->seqNum); - successful_expose = false; - } else { - if (split) { - // If split, try to send the second packet too - completedFirst = true; - assert(snd_data_pkt); - - if (!dcachePort->sendTimingReq(snd_data_pkt)){ - state->complete(); - state->cacheBlocked = true; - successful_expose = false; - DPRINTF(IEW, "D-Cache became blocked when validating" - " [sn:%lli] second packet, will retry later\n", - load_inst->seqNum); - } - } - } - - if (!successful_expose){ - if (!split) { - delete state; - delete data_pkt; - }else{ - if (!completedFirst){ - delete state; - delete data_pkt; - delete snd_data_pkt; - } else { - delete snd_data_pkt; - } - } - //cpu->wakeCPU(); // This will cause issue(wrong activity count and affects the memory transactions - ++lsqCacheBlocked; - break; - } else { - // if all the packets we sent out is expose, - // we assume the expose is alreay completed - if (onlyExpose) { - load_inst->setExposeCompleted(); - numExposes++; - } else { - numValidates++; - } - if (load_inst->needExposeOnly()){ - numConvertedExposes++; - } - if (load_inst->isExecuted() && load_inst->isExposeCompleted() - && !load_inst->isSquashed()){ - DPRINTF(LSQUnit, "Expose finished. Execution done." - "Send inst [sn:%lli] to commit stage.\n", - load_inst->seqNum); - iewStage->instToCommit(load_inst); - iewStage->activityThisCycle(); - } else{ - DPRINTF(LSQUnit, "Need validation or execution not finishes." - "Need to wait for readResp/validateResp " - "for inst [sn:%lli].\n", - load_inst->seqNum); - } - - load_inst->setExposeSent(); - --loadsToVLD; - incrLdIdx(loadVLDIdx); - if (!split){ - setSpecBuffState(req); - } else { - setSpecBuffState(sreqLow); - setSpecBuffState(sreqHigh); - } - } - } - - DPRINTF(LSQUnit, "Send validate/expose for %d insts. loadsToVLD=%d" - ". loadHead=%d. loadTail=%d.\n", - old_loadsToVLD-loadsToVLD, loadsToVLD, loadHead, - loadTail); - - assert(loads>=0 && loadsToVLD >= 0); - - return old_loadsToVLD-loadsToVLD; -} - - - template void @@ -1614,12 +1247,6 @@ LSQUnit::squash(const InstSeqNum &squashed_num) stallingLoadIdx = 0; } - if (loadQueue[load_idx]->needPostFetch() && - loadQueue[load_idx]->readyToExpose() && - !loadQueue[load_idx]->isExposeSent()){ - loadsToVLD --; - } - // Clear the smart pointer to make sure it is decremented. loadQueue[load_idx]->setSquashed(); loadQueue[load_idx] = NULL; @@ -1723,53 +1350,6 @@ LSQUnit::storePostSend(PacketPtr pkt) incrStIdx(storeWBIdx); } - - -template -void -LSQUnit::completeValidate(DynInstPtr &inst, PacketPtr pkt) -{ - iewStage->wakeCPU(); - // if instruction fault, no need to check value, - // return directly - //assert(!inst->needExposeOnly()); - if (inst->isExposeCompleted() || inst->isSquashed()){ - //assert(inst->fault != NoFault); - //Already sent to commit, do nothing - return; - } - //Check validation result - bool validation_fail = false; - if (!inst->isL1HitLow() && inst->vldData[0]==0) { - validation_fail = true; - } else { - if (pkt->isSplit && !inst->isL1HitHigh() - && inst->vldData[1]==0){ - validation_fail = true; - } - } - if (validation_fail){ - // Mark the load for re-execution - inst->fault = std::make_shared(); - inst->validationFail(true); - DPRINTF(LSQUnit, "Validation failed.\n", - inst->seqNum); - } - - inst->setExposeCompleted(); - if ( inst->isExecuted() && inst->isExposeCompleted() ){ - DPRINTF(LSQUnit, "Validation finished. Execution done." - "Send inst [sn:%lli] to commit stage.\n", - inst->seqNum); - iewStage->instToCommit(inst); - iewStage->activityThisCycle(); - } else{ - DPRINTF(LSQUnit, "Validation done. Execution not finishes." - "Need to wait for readResp for inst [sn:%lli].\n", - inst->seqNum); - } -} - template void LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt) @@ -1788,7 +1368,11 @@ LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt) inst->fault==NoFault) && "in this case, we will put it into ROB twice."); - if (!inst->isExecuted()) { + if (inst->fenceDelay()) { + DPRINTF(LSQUnit, "To write back a fence delayed spec load [sn:%lli].\n", inst->seqNum); + inst->onlyWaitForFence(true); + iewStage->instQueue.deferMemInst(inst); + } else if (!inst->isExecuted()) { inst->setExecuted(); if (inst->fault == NoFault) { @@ -1832,11 +1416,9 @@ LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt) "on write back path"); // check whether the instruction can be committed - if ( !inst->isExposeCompleted() && inst->needPostFetch() ){ - DPRINTF(LSQUnit, "Expose not finished. " - "Wait until expose completion" - " to send inst [sn:%lli] to commit stage\n", inst->seqNum); - }else{ + if ( inst->fenceDelay() ) { + DPRINTF(LSQUnit, "inst [sn:%lli] misses in spec load.\n", inst->seqNum); + } else { DPRINTF(LSQUnit, "Expose and execution both finished. " "Send inst [sn:%lli] to commit stage\n", inst->seqNum); iewStage->instToCommit(inst); @@ -1927,7 +1509,6 @@ LSQUnit::sendStore(PacketPtr data_pkt) retryPkt = data_pkt; return false; } - setSpecBuffState(data_pkt->req); return true; } diff --git a/src/mem/protocol/MESI_Two_Level-L1cache.sm b/src/mem/protocol/MESI_Two_Level-L1cache.sm index f5feb7e23..8496fda61 100644 --- a/src/mem/protocol/MESI_Two_Level-L1cache.sm +++ b/src/mem/protocol/MESI_Two_Level-L1cache.sm @@ -981,6 +981,12 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") sequencer.readCallback(address, cache_entry.DataBlk); } + action(h_spec_load_miss, "hsm", + desc="Notify sequencer the spec load misses.") + { + sequencer.readCallback(address, cache_entry.DataBlk, true); + } + action(h_ifetch_hit, "hi", desc="Notify sequencer the instruction fetch completed.") { assert(is_valid(cache_entry)); @@ -1222,8 +1228,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") } transition({NP,I}, SpecLoad, IX) { - iw_allocateTBEWithoutCacheEntry; - as_issueGETSPEC; + h_spec_load_miss; k_popMandatoryQueue; } diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 4a8e5ae02..090030f08 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -56,9 +56,7 @@ RubySequencerParams::create() Sequencer::Sequencer(const Params *p) : RubyPort(p), m_IncompleteTimes(MachineType_NUM), - deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check"), - m_specBuf(33), - specBufferHitEvent([this]{ specBufferHitCallback(); }, "Sequencer spec buffer hit") + deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check") { m_outstanding_count = 0; @@ -429,18 +427,6 @@ Sequencer::writeCallback(Addr address, DataBlock& data, initialRequestTime, forwardRequestTime, firstResponseTime); } -bool Sequencer::updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress) { - uint8_t idx = pkt->reqIdx; - SBE& sbe = m_specBuf[idx]; - int blkIdx = pkt->isFirst() ? 0 : 1; - SBB& sbb = sbe.blocks[blkIdx]; - if (makeLineAddress(sbb.reqAddress) == dataAddress) { - sbb.data = data; - return true; - } - return false; -} - // [InvisiSpec] Called by Ruby to send a response to CPU. void Sequencer::readCallback(Addr address, DataBlock& data, @@ -466,71 +452,19 @@ Sequencer::readCallback(Addr address, DataBlock& data, PacketPtr pkt = request->pkt; if (pkt->isSpec()) { - assert(!pkt->onlyAccessSpecBuff()); DPRINTFR(SpecBuffer, "%10s SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr())); - updateSBB(pkt, data, address); if (!externalHit) { pkt->setL1Hit(); } - } else if (pkt->isExpose()) { - DPRINTFR(SpecBuffer, "%10s EXPOSE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr())); - } else if (pkt->isValidate()) { - DPRINTFR(SpecBuffer, "%10s VALIDATE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr())); - uint8_t idx = pkt->reqIdx; - SBE& sbe = m_specBuf[idx]; - int blkIdx = pkt->isFirst() ? 0 : 1; - SBB& sbb = sbe.blocks[blkIdx]; - assert(makeLineAddress(sbb.reqAddress) == address); - if (!memcmp(sbb.data.getData(getOffset(pkt->getAddr()), pkt->getSize()), data.getData(getOffset(pkt->getAddr()), pkt->getSize()), pkt->getSize())) { - *(pkt->getPtr()) = 1; - } else { - // std::ostringstream os; - // sbb.data.print(os); - // DPRINTFR(SpecBufferValidate, "%s\n", os.str()); - // os.str(""); - // data.print(os); - // DPRINTFR(SpecBufferValidate, "%s\n", os.str()); - *(pkt->getPtr()) = 0; - } } - for (auto& dependentPkt : request->dependentSpecRequests) { - assert(!dependentPkt->onlyAccessSpecBuff()); - DPRINTFR(SpecBuffer, "%10s Merged SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), dependentPkt->reqIdx, dependentPkt->isFirst()? 0 : 1, printAddress(dependentPkt->getAddr())); - assert(dependentPkt->isSpec()); - updateSBB(dependentPkt, data, address); - if (!externalHit) { - dependentPkt->setL1Hit(); - } - memcpy(dependentPkt->getPtr(), - data.getData(getOffset(dependentPkt->getAddr()), dependentPkt->getSize()), - dependentPkt->getSize()); - ruby_hit_callback(dependentPkt); - } + assert(!pkt->isExpose()); + assert(!pkt->isValidate()); hitCallback(request, data, true, mach, externalHit, initialRequestTime, forwardRequestTime, firstResponseTime); } -void -Sequencer::specBufferHitCallback() -{ - assert(m_specRequestQueue.size()); - while (m_specRequestQueue.size()) { - auto specReq = m_specRequestQueue.front(); - if (specReq.second <= curTick()) { - PacketPtr pkt = specReq.first; - assert(pkt->onlyAccessSpecBuff()); - DPRINTFR(SpecBuffer, "%10s SB Hit Callback (idx=%d, addr=%#x)\n", curTick(), pkt->reqIdx, printAddress(pkt->getAddr())); - ruby_hit_callback(pkt); - m_specRequestQueue.pop(); - } else { - schedule(specBufferHitEvent, specReq.second); - break; - } - } -} - // [InvisiSpec] Response on the way from Ruby to CPU void Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, @@ -567,7 +501,9 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, data.setData(pkt->getConstPtr(), getOffset(request_address), pkt->getSize()); } else if (!pkt->isFlush() && !pkt->isExpose() && !pkt->isValidate()) { - if ((type == RubyRequestType_LD) || + if (type == RubyRequestType_SPEC_LD && externalHit) { + DPRINTF(RubySequencer, "spec load miss!\n"); + } else if ((type == RubyRequestType_LD) || (type == RubyRequestType_SPEC_LD) || (type == RubyRequestType_IFETCH) || (type == RubyRequestType_RMW_Read) || @@ -642,51 +578,10 @@ Sequencer::makeRequest(PacketPtr pkt) if (pkt->isSpec()) { assert(pkt->cmd == MemCmd::ReadSpecReq); assert(pkt->isSplit || pkt->isFirst()); - uint8_t idx = pkt->reqIdx; - SBE& sbe = m_specBuf[idx]; - sbe.isSplit = pkt->isSplit; - int blkIdx = pkt->isFirst() ? 0 : 1; - SBB& sbb = sbe.blocks[blkIdx]; - sbb.reqAddress = pkt->getAddr(); - sbb.reqSize = pkt->getSize(); - if (pkt->onlyAccessSpecBuff()) { - int srcIdx = pkt->srcIdx; - SBE& srcEntry = m_specBuf[srcIdx]; - if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[0].reqAddress)) { - sbb.data = srcEntry.blocks[0].data; - } else if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[1].reqAddress)) { - sbb.data = srcEntry.blocks[1].data; - } else { - fatal("Requested address %#x is not present in the spec buffer\n", printAddress(sbb.reqAddress)); - } - memcpy(pkt->getPtr(), - sbb.data.getData(getOffset(sbb.reqAddress), sbb.reqSize), - sbb.reqSize); - m_specRequestQueue.push({pkt, curTick()}); - DPRINTFR(SpecBuffer, "%10s SB Hit (idx=%d, addr=%#x) on (srcIdx=%d)\n", curTick(), idx, printAddress(sbb.reqAddress), srcIdx); - if (!specBufferHitEvent.scheduled()) { - schedule(specBufferHitEvent, clockEdge(Cycles(1))); - } - return RequestStatus_Issued; - } else { - // assert it is not in the buffer - primary_type = secondary_type = RubyRequestType_SPEC_LD; - } + // assert it is not in the buffer + primary_type = secondary_type = RubyRequestType_SPEC_LD; } else if (pkt->isExpose() || pkt->isValidate()) { - assert(pkt->cmd == MemCmd::ExposeReq || pkt->cmd == MemCmd::ValidateReq); - assert(pkt->isSplit || pkt->isFirst()); - uint8_t idx = pkt->reqIdx; - SBE& sbe = m_specBuf[idx]; - sbe.isSplit = pkt->isSplit; - int blkIdx = pkt->isFirst() ? 0 : 1; - SBB& sbb = sbe.blocks[blkIdx]; - if (sbb.reqAddress != pkt->getAddr()) { - fatal("sbb.reqAddress != pkt->getAddr: %#x != %#x\n", printAddress(sbb.reqAddress), printAddress(pkt->getAddr())); - } - if (sbb.reqSize != pkt->getSize()) { - fatal("sbb.reqSize != pkt->getSize(): %d != %d\n", sbb.reqSize, pkt->getSize()); - } - primary_type = secondary_type = RubyRequestType_EXPOSE; + assert(false); } else if (pkt->isLLSC()) { // // Alpha LL/SC instructions need to be handled carefully by the cache diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 66ff92777..8e1f08a48 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -256,10 +256,6 @@ class Sequencer : public RubyPort std::vector m_IncompleteTimes; EventFunctionWrapper deadlockCheckEvent; - - std::vector m_specBuf; - std::queue> m_specRequestQueue; - EventFunctionWrapper specBufferHitEvent; }; inline std::ostream& -- cgit v1.2.3