From b0e609d5cf6961bb9b3f12065659e1c42c13ef06 Mon Sep 17 00:00:00 2001 From: Iru Cai Date: Sun, 12 May 2019 14:34:21 +0800 Subject: only spec load when hit --- src/cpu/o3/lsq_unit_impl.hh | 453 ++------------------------------------------ 1 file changed, 17 insertions(+), 436 deletions(-) (limited to 'src/cpu/o3/lsq_unit_impl.hh') diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 56870b5a3..ebc963d5b 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -117,8 +117,6 @@ LSQUnit::completeDataAccess(PacketPtr pkt) } else { inst->setL1HitLow(); } - } else if (!pkt->isSpec()) { - setSpecBuffState(pkt->req); } // If this is a split access, wait until all packets are received. @@ -126,6 +124,12 @@ LSQUnit::completeDataAccess(PacketPtr pkt) return; } + if ( pkt->isSpec() && pkt->isRead() && (!pkt->isL1Hit()) ) { + DPRINTF(LSQUnit, "spec load miss for inst [sn:%lli], fence it.\n", + inst->seqNum); + inst->fenceDelay(true); + } + assert(!cpu->switchedOut()); if (!inst->isSquashed()) { if (!state->noWB) { @@ -147,7 +151,7 @@ LSQUnit::completeDataAccess(PacketPtr pkt) } if (pkt->isValidate() || pkt->isExpose()) { - completeValidate(inst, pkt); + assert(false); } } @@ -628,103 +632,6 @@ LSQUnit::checkSnoop(PacketPtr pkt) return; } -template -bool -LSQUnit::checkPrevLoadsExecuted(int req_idx) -{ - int load_idx = loadHead; - while (load_idx != req_idx){ - if (!loadQueue[load_idx]->isExecuted()){ - // if at least on load ahead of current load - // does not finish spec access, - // then return false - return false; - } - incrLdIdx(load_idx); - } - - //if all executed, return true - return true; -} - -template -void -LSQUnit::setSpecBuffState(RequestPtr expose_req) -{ - Addr req_eff_addr1 = expose_req->getPaddr() & cacheBlockMask; - - int load_idx = loadHead; - while (load_idx != loadTail){ - DynInstPtr ld_inst = loadQueue[load_idx]; - if (ld_inst->effAddrValid()){ - - Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask; - Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask; - if (ld_eff_addr1 == req_eff_addr1){ - ld_inst->setSpecBuffObsoleteLow(); - } else if (ld_eff_addr2 == req_eff_addr1){ - ld_inst->setSpecBuffObsoleteHigh(); - } - } - incrLdIdx(load_idx); - } -} - - -template -int -LSQUnit::checkSpecBuffHit(RequestPtr req, int req_idx) -{ - - Addr req_eff_addr1 = req->getPaddr() & cacheBlockMask; - //Addr req_eff_addr2 = (req->getPaddr() + req->getSize()-1) & cacheBlockMask; - // the req should be within the same cache line - //assert (req_eff_addr1 == req_eff_addr2); - assert (!loadQueue[req_idx]->isExecuted()); - - int load_idx = loadHead; - - while (load_idx != loadTail){ - DynInstPtr ld_inst = loadQueue[load_idx]; - if (ld_inst->effAddrValid()){ - Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask; - Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask; - - if ((req_eff_addr1 == ld_eff_addr1 && ld_inst->isL1HitLow()) - || (req_eff_addr1 == ld_eff_addr2 && ld_inst->isL1HitHigh())){ - return -1; - //already in L1, do not copy from buffer - } else { - - if (ld_inst->isExecuted() && ld_inst->needPostFetch() - && !ld_inst->isSquashed() && ld_inst->fault==NoFault){ - if (req_eff_addr1 == ld_eff_addr1 && !ld_inst->isL1HitLow() - && !ld_inst->isSpecBuffObsoleteLow()){ - DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] " - "and [sn:%lli] (low) at address %#x\n", - loadQueue[req_idx]->seqNum, ld_inst->seqNum, - req_eff_addr1); - return load_idx; - } else if ( ld_eff_addr2 !=0 && - req_eff_addr1 == ld_eff_addr2 && !ld_inst->isL1HitHigh() - && !ld_inst->isSpecBuffObsoleteHigh()){ - DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] " - "and [sn:%lli] (high) at address %#x\n", - loadQueue[req_idx]->seqNum, ld_inst->seqNum, - req_eff_addr1); - return load_idx; - } - } - } - } - incrLdIdx(load_idx); - } - - return -1; -} - - - template Fault LSQUnit::checkViolations(int load_idx, const DynInstPtr &inst) @@ -1064,6 +971,7 @@ LSQUnit::updateVisibleState() } } inst->readyToExpose(true); + inst->fenceDelay(false); } else { if (!useIFT) { if (inst->readyToExpose()){ @@ -1097,10 +1005,10 @@ LSQUnit::updateVisibleState() } else { DPRINTF(LSQUnit, "load inst [sn:%lli] %s is an unsafe speculated load, but source registers are not tainted.\n", inst->seqNum, inst->pcState()); inst->readyToExpose(true); + inst->fenceDelay(false); } } } - inst->fenceDelay(false); } else { inst->readyToExpose(true); inst->fenceDelay(false); @@ -1109,281 +1017,6 @@ LSQUnit::updateVisibleState() } } -// [InvisiSpec] validate loads -template -int -LSQUnit::exposeLoads() -{ - if(!isInvisibleSpec){ - assert(loadsToVLD==0 - && "request validation on Non invisible Spec mode"); - } - - int old_loadsToVLD = loadsToVLD; - - DPRINTF(LSQUnit, "starting exposeLoads(): loadsToVLD = %d\n", loadsToVLD); - - // [InvisiSpec] Note: - // need to iterate from the head every time - // since the load can be exposed out-of-order - int loadVLDIdx = loadHead; - - while (loadsToVLD > 0 && - loadVLDIdx != loadTail && - loadQueue[loadVLDIdx]) { - - if (loadQueue[loadVLDIdx]->isSquashed()){ - incrLdIdx(loadVLDIdx); - continue; - } - // skip the loads that either do not need to expose - // or exposed already - if(!loadQueue[loadVLDIdx]->needPostFetch() - || loadQueue[loadVLDIdx]->isExposeSent() ){ - incrLdIdx(loadVLDIdx); - continue; - } - - DynInstPtr load_inst = loadQueue[loadVLDIdx]; - if (loadQueue[loadVLDIdx]->fault!=NoFault){ - //load is executed, so it wait for expose complete - //to send it to commit, regardless of whether it is ready - //to expose - load_inst->setExposeCompleted(); - load_inst->setExposeSent(); - loadsToVLD--; - if (load_inst->isExecuted()){ - DPRINTF(LSQUnit, "Execute finished and gets violation fault." - "Send inst [sn:%lli] to commit stage.\n", - load_inst->seqNum); - iewStage->instToCommit(load_inst); - iewStage->activityThisCycle(); - } - incrLdIdx(loadVLDIdx); - continue; - } - - // skip the loads that need expose but - // are not ready - if (loadQueue[loadVLDIdx]->needPostFetch() - && !loadQueue[loadVLDIdx]->readyToExpose()){ - incrLdIdx(loadVLDIdx); - continue; - } - - assert(loadQueue[loadVLDIdx]->needPostFetch() - && loadQueue[loadVLDIdx]->readyToExpose() ); - - assert(!load_inst->isCommitted()); - - - RequestPtr req = load_inst->postReq; - RequestPtr sreqLow = load_inst->postSreqLow; - RequestPtr sreqHigh = load_inst->postSreqHigh; - - // we should not have both req and sreqLow not NULL - assert( !(req && sreqLow)); - - if (req) { - DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]" - " PC= %s. req=%#x\n", - load_inst->seqNum, load_inst->pcState(), - req->getVaddr()); - } else { - DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]" - " PC= %s. reqLow=%#x, reqHigh=%#x\n", - load_inst->seqNum, load_inst->pcState(), - load_inst->postSreqLow->getVaddr(), - load_inst->postSreqHigh->getVaddr()); - } - - bool split = false; - if (TheISA::HasUnalignedMemAcc && sreqLow) { - split = true; - } else { - assert(req); - } - - if (load_inst->isL1HitLow() && (!split || load_inst->isL1HitHigh()) ) { - load_inst->setExposeCompleted(); - load_inst->setExposeSent(); - --loadsToVLD; - incrLdIdx(loadVLDIdx); - iewStage->instToCommit(load_inst); - iewStage->activityThisCycle(); - continue; - } - - PacketPtr data_pkt = NULL; - PacketPtr snd_data_pkt = NULL; - - LSQSenderState *state = new LSQSenderState; - state->isLoad = false; - state->idx = loadVLDIdx; - state->inst = load_inst; - state->noWB = true; - - bool onlyExpose = false; - if (!split) { - if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){ - data_pkt = Packet::createExpose(req); - onlyExpose = true; - }else { - data_pkt = Packet::createValidate(req); - if (!load_inst->vldData) - load_inst->vldData = new uint8_t[1]; - data_pkt->dataStatic(load_inst->vldData); - } - data_pkt->senderState = state; - data_pkt->setFirst(); - data_pkt->reqIdx = loadVLDIdx; - DPRINTF(LSQUnit, "contextid = %d\n", req->contextId()); - } else { - // allocate memory if we need at least one validation - if (!load_inst->needExposeOnly() && - (!load_inst->isL1HitLow() || !load_inst->isL1HitHigh())){ - if (!load_inst->vldData) - load_inst->vldData = new uint8_t[2]; - } else { - onlyExpose = true; - } - - // Create the split packets. - first one - if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){ - data_pkt = Packet::createExpose(sreqLow); - }else{ - data_pkt = Packet::createValidate(sreqLow); - assert(load_inst->vldData); - data_pkt->dataStatic(load_inst->vldData); - } - - // Create the split packets. - second one - if (load_inst->needExposeOnly() || load_inst->isL1HitHigh()){ - snd_data_pkt = Packet::createExpose(sreqHigh); - } else { - snd_data_pkt = Packet::createValidate(sreqHigh); - assert(load_inst->vldData); - snd_data_pkt->dataStatic(&(load_inst->vldData[1])); - } - - data_pkt->senderState = state; - data_pkt->setFirst(); - snd_data_pkt->senderState = state; - data_pkt->reqIdx = loadVLDIdx; - snd_data_pkt->reqIdx = loadVLDIdx; - - data_pkt->isSplit = true; - snd_data_pkt->isSplit = true; - state->isSplit = true; - state->outstanding = 2; - state->mainPkt = data_pkt; - - DPRINTF(LSQUnit, "contextid = %d, %d\n", - sreqLow->contextId(), sreqHigh->contextId()); - req = sreqLow; - } - - assert(!req->isStrictlyOrdered()); - assert(!req->isMmappedIpr()); - - DPRINTF(LSQUnit, "D-Cache: Validating/Exposing load idx:%i PC:%s " - "to Addr:%#x, data:%#x [sn:%lli]\n", - loadVLDIdx, load_inst->pcState(), - //FIXME: resultData not memData - req->getPaddr(), (int)*(load_inst->memData), - load_inst->seqNum); - - bool successful_expose = true; - bool completedFirst = false; - - if (!dcachePort->sendTimingReq(data_pkt)){ - DPRINTF(IEW, "D-Cache became blocked when " - "validating [sn:%lli], will retry later\n", - load_inst->seqNum); - successful_expose = false; - } else { - if (split) { - // If split, try to send the second packet too - completedFirst = true; - assert(snd_data_pkt); - - if (!dcachePort->sendTimingReq(snd_data_pkt)){ - state->complete(); - state->cacheBlocked = true; - successful_expose = false; - DPRINTF(IEW, "D-Cache became blocked when validating" - " [sn:%lli] second packet, will retry later\n", - load_inst->seqNum); - } - } - } - - if (!successful_expose){ - if (!split) { - delete state; - delete data_pkt; - }else{ - if (!completedFirst){ - delete state; - delete data_pkt; - delete snd_data_pkt; - } else { - delete snd_data_pkt; - } - } - //cpu->wakeCPU(); // This will cause issue(wrong activity count and affects the memory transactions - ++lsqCacheBlocked; - break; - } else { - // if all the packets we sent out is expose, - // we assume the expose is alreay completed - if (onlyExpose) { - load_inst->setExposeCompleted(); - numExposes++; - } else { - numValidates++; - } - if (load_inst->needExposeOnly()){ - numConvertedExposes++; - } - if (load_inst->isExecuted() && load_inst->isExposeCompleted() - && !load_inst->isSquashed()){ - DPRINTF(LSQUnit, "Expose finished. Execution done." - "Send inst [sn:%lli] to commit stage.\n", - load_inst->seqNum); - iewStage->instToCommit(load_inst); - iewStage->activityThisCycle(); - } else{ - DPRINTF(LSQUnit, "Need validation or execution not finishes." - "Need to wait for readResp/validateResp " - "for inst [sn:%lli].\n", - load_inst->seqNum); - } - - load_inst->setExposeSent(); - --loadsToVLD; - incrLdIdx(loadVLDIdx); - if (!split){ - setSpecBuffState(req); - } else { - setSpecBuffState(sreqLow); - setSpecBuffState(sreqHigh); - } - } - } - - DPRINTF(LSQUnit, "Send validate/expose for %d insts. loadsToVLD=%d" - ". loadHead=%d. loadTail=%d.\n", - old_loadsToVLD-loadsToVLD, loadsToVLD, loadHead, - loadTail); - - assert(loads>=0 && loadsToVLD >= 0); - - return old_loadsToVLD-loadsToVLD; -} - - - template void @@ -1614,12 +1247,6 @@ LSQUnit::squash(const InstSeqNum &squashed_num) stallingLoadIdx = 0; } - if (loadQueue[load_idx]->needPostFetch() && - loadQueue[load_idx]->readyToExpose() && - !loadQueue[load_idx]->isExposeSent()){ - loadsToVLD --; - } - // Clear the smart pointer to make sure it is decremented. loadQueue[load_idx]->setSquashed(); loadQueue[load_idx] = NULL; @@ -1723,53 +1350,6 @@ LSQUnit::storePostSend(PacketPtr pkt) incrStIdx(storeWBIdx); } - - -template -void -LSQUnit::completeValidate(DynInstPtr &inst, PacketPtr pkt) -{ - iewStage->wakeCPU(); - // if instruction fault, no need to check value, - // return directly - //assert(!inst->needExposeOnly()); - if (inst->isExposeCompleted() || inst->isSquashed()){ - //assert(inst->fault != NoFault); - //Already sent to commit, do nothing - return; - } - //Check validation result - bool validation_fail = false; - if (!inst->isL1HitLow() && inst->vldData[0]==0) { - validation_fail = true; - } else { - if (pkt->isSplit && !inst->isL1HitHigh() - && inst->vldData[1]==0){ - validation_fail = true; - } - } - if (validation_fail){ - // Mark the load for re-execution - inst->fault = std::make_shared(); - inst->validationFail(true); - DPRINTF(LSQUnit, "Validation failed.\n", - inst->seqNum); - } - - inst->setExposeCompleted(); - if ( inst->isExecuted() && inst->isExposeCompleted() ){ - DPRINTF(LSQUnit, "Validation finished. Execution done." - "Send inst [sn:%lli] to commit stage.\n", - inst->seqNum); - iewStage->instToCommit(inst); - iewStage->activityThisCycle(); - } else{ - DPRINTF(LSQUnit, "Validation done. Execution not finishes." - "Need to wait for readResp for inst [sn:%lli].\n", - inst->seqNum); - } -} - template void LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt) @@ -1788,7 +1368,11 @@ LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt) inst->fault==NoFault) && "in this case, we will put it into ROB twice."); - if (!inst->isExecuted()) { + if (inst->fenceDelay()) { + DPRINTF(LSQUnit, "To write back a fence delayed spec load [sn:%lli].\n", inst->seqNum); + inst->onlyWaitForFence(true); + iewStage->instQueue.deferMemInst(inst); + } else if (!inst->isExecuted()) { inst->setExecuted(); if (inst->fault == NoFault) { @@ -1832,11 +1416,9 @@ LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt) "on write back path"); // check whether the instruction can be committed - if ( !inst->isExposeCompleted() && inst->needPostFetch() ){ - DPRINTF(LSQUnit, "Expose not finished. " - "Wait until expose completion" - " to send inst [sn:%lli] to commit stage\n", inst->seqNum); - }else{ + if ( inst->fenceDelay() ) { + DPRINTF(LSQUnit, "inst [sn:%lli] misses in spec load.\n", inst->seqNum); + } else { DPRINTF(LSQUnit, "Expose and execution both finished. " "Send inst [sn:%lli] to commit stage\n", inst->seqNum); iewStage->instToCommit(inst); @@ -1927,7 +1509,6 @@ LSQUnit::sendStore(PacketPtr data_pkt) retryPkt = data_pkt; return false; } - setSpecBuffState(data_pkt->req); return true; } -- cgit v1.2.3