summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIru Cai <mytbk920423@gmail.com>2019-05-12 14:34:21 +0800
committerIru Cai <mytbk920423@gmail.com>2019-05-12 14:34:21 +0800
commitb0e609d5cf6961bb9b3f12065659e1c42c13ef06 (patch)
treed03553831a09a99902b8cf1f631f4e684f433425
parent2b62fec3590024a7ce82ef5d4647397d37ed37eb (diff)
downloadgem5-b0e609d5cf6961bb9b3f12065659e1c42c13ef06.tar.xz
only spec load when hit
-rw-r--r--src/cpu/o3/iew_impl.hh2
-rw-r--r--src/cpu/o3/inst_queue_impl.hh2
-rw-r--r--src/cpu/o3/lsq.hh8
-rw-r--r--src/cpu/o3/lsq_impl.hh23
-rw-r--r--src/cpu/o3/lsq_unit.hh94
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh453
-rw-r--r--src/mem/protocol/MESI_Two_Level-L1cache.sm9
-rw-r--r--src/mem/ruby/system/Sequencer.cc123
-rw-r--r--src/mem/ruby/system/Sequencer.hh4
9 files changed, 34 insertions, 684 deletions
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 5b67e4c3c..6af8b4563 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1193,7 +1193,7 @@ DefaultIEW<Impl>::executeInsts()
// if we successfully commit sth, then we need to activate the stage or somehow
// problems happen when interacting with squash
// NOTE: we always send validations before execute load requests
- ldstQueue.exposeLoads();
+ // ldstQueue.exposeLoads();
// Uncomment this if you want to see all available instructions.
// @todo This doesn't actually work anymore, we should fix it.
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 980f29b35..0a6d309fe 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1184,11 +1184,9 @@ InstructionQueue<Impl>::getDeferredMemInstToExecute()
// for both (2, 3) we need to restart the translation
if ( (*it)->translationCompleted()
|| ((*it)->onlyWaitForFence() && !(*it)->fenceDelay())
- || ((*it)->onlyWaitForExpose() && (*it)->readyToExpose())
|| (*it)->isSquashed()) {
DynInstPtr mem_inst = std::move(*it);
mem_inst->onlyWaitForFence(false);
- mem_inst->onlyWaitForExpose(false);
deferredMemInsts.erase(it);
return mem_inst;
}
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index e5c35a3a6..0e18aa145 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -136,14 +136,6 @@ class LSQ {
/** [mengjia]
- * Attempts to validate loads until all cache ports are used or the
- * interface becomes blocked.
- */
- int exposeLoads();
- /** Same as above, but only for one thread. */
- int exposeLoads(ThreadID tid);
-
- /** [mengjia]
* attempt to update FenceDelay state for load insts
*/
void updateVisibleState();
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 24066cd4b..b6742070e 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -294,29 +294,6 @@ LSQ<Impl>::writebackStores()
// [mengjia]
template<class Impl>
-int
-LSQ<Impl>::exposeLoads()
-{
- list<ThreadID>::iterator threads = activeThreads->begin();
- list<ThreadID>::iterator end = activeThreads->end();
-
- int exposedLoads = 0;
- while (threads != end) {
- ThreadID tid = *threads++;
-
- if (numLoadsToVLD(tid) > 0) {
- DPRINTF(Writeback,"[tid:%i] Validate loads. %i loads "
- "available for Validate.\n", tid, numLoadsToVLD(tid));
- }
-
- exposedLoads += thread[tid].exposeLoads();
- }
- return exposedLoads;
-}
-
-
-// [mengjia]
-template<class Impl>
void
LSQ<Impl>::updateVisibleState()
{
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 1c8b98f2e..711447f31 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -141,12 +141,6 @@ class LSQUnit {
*/
void checkSnoop(PacketPtr pkt);
- // [InvisiSpec] check whether current request will hit in the
- // spec buffer or not
- int checkSpecBuffHit(const RequestPtr req, const int req_idx);
- void setSpecBuffState(const RequestPtr req);
-
- bool checkPrevLoadsExecuted(const int req_idx);
/** Executes a load instruction. */
Fault executeLoad(const DynInstPtr &inst);
@@ -165,9 +159,6 @@ class LSQUnit {
/** Writes back stores. */
void writebackStores();
- /** [mengjia] Validate loads. */
- int exposeLoads();
-
/** [mengjia] Update Visbible State.
* In the mode defence relying on fence: setup fenceDelay state.
* In the mode defence relying on invisibleSpec:
@@ -257,9 +248,6 @@ class LSQUnit {
/** Writes back the instruction, sending it to IEW. */
void writeback(const DynInstPtr &inst, PacketPtr pkt);
- // [InvisiSpec] complete Validates
- void completeValidate(DynInstPtr &inst, PacketPtr pkt);
-
/** Writes back a store that couldn't be completed the previous cycle. */
void writebackPendingStore();
@@ -872,46 +860,12 @@ LSQUnit<Impl>::read(const RequestPtr &req,
fst_data_pkt = data_pkt;
fst_data_pkt->setFirst();
- if (sendSpecRead){
- int src_idx = checkSpecBuffHit(req, load_idx);
- if (src_idx != -1) {
- if (allowSpecBuffHit){
- data_pkt->setOnlyAccessSpecBuff();
- }
- data_pkt->srcIdx = src_idx;
- specBuffHits++;
- }else{
- specBuffMisses++;
- }
- }
fst_data_pkt->reqIdx = load_idx;
} else {
// Create the split packets.
if(sendSpecRead){
-
fst_data_pkt = Packet::createReadSpec(sreqLow);
- int fst_src_idx = checkSpecBuffHit(sreqLow, load_idx);
- if ( fst_src_idx != -1 ) {
- if (allowSpecBuffHit){
- fst_data_pkt->setOnlyAccessSpecBuff();
- }
- fst_data_pkt->srcIdx = fst_src_idx;
- specBuffHits++;
- } else {
- specBuffMisses++;
- }
-
snd_data_pkt = Packet::createReadSpec(sreqHigh);
- int snd_src_idx = checkSpecBuffHit(sreqHigh, load_idx);
- if ( snd_src_idx != -1 ) {
- if (allowSpecBuffHit){
- snd_data_pkt->setOnlyAccessSpecBuff();
- }
- snd_data_pkt->srcIdx = snd_src_idx;
- specBuffHits++;
- } else {
- specBuffMisses++;
- }
}else{
fst_data_pkt = Packet::createRead(sreqLow);
snd_data_pkt = Packet::createRead(sreqHigh);
@@ -1002,58 +956,10 @@ LSQUnit<Impl>::read(const RequestPtr &req,
// Set everything ready for expose/validation after the read is
// successfully sent out
if(sendSpecRead){ // sending actual request
-
- // [mengjia] Here we set the needExposeOnly flag
- if (needsTSO && !load_inst->isDataPrefetch()){
- // need to check whether previous load_instructions specComplete or not
- if ( checkPrevLoadsExecuted(load_idx) ){
- load_inst->needExposeOnly(true);
- DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as "
- "needExposeOnly\n",
- load_inst->pcState(), load_inst->seqNum);
- } else {
- DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as "
- "needValidation\n",
- load_inst->pcState(), load_inst->seqNum);
- }
- }else{
- //if RC, always only need expose
- load_inst->needExposeOnly(true);
- DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as needExposeOnly\n",
- load_inst->pcState(), load_inst->seqNum);
- }
-
- load_inst->needPostFetch(true);
assert(!req->isMmappedIpr());
- //save expose requestPtr
- if (TheISA::HasUnalignedMemAcc && sreqLow) {
- load_inst->postSreqLow = std::make_shared<Request>(*sreqLow);
- load_inst->postSreqHigh = std::make_shared<Request>(*sreqHigh);
- load_inst->postReq = nullptr;
- DPRINTF(LSQUnit, "created validation/expose"
- " request for inst [sn:%lli]"
- " reqLow=%#x, reqHigh=%#x\n",
- load_inst->seqNum,
- load_inst->postSreqLow->getVaddr(),
- load_inst->postSreqHigh->getVaddr());
- }else{
- load_inst->postReq = std::make_shared<Request>(*req);
- load_inst->postSreqLow = nullptr;
- load_inst->postSreqHigh = nullptr;
- DPRINTF(LSQUnit, "created validation/expose"
- " request for inst [sn:%lli]"
- " req=%#x\n",
- load_inst->seqNum, load_inst->postReq->getVaddr());
- }
} else {
load_inst->setExposeCompleted();
load_inst->needPostFetch(false);
- if (TheISA::HasUnalignedMemAcc && sreqLow) {
- setSpecBuffState(sreqLow);
- setSpecBuffState(sreqHigh);
- } else {
- setSpecBuffState(req);
- }
}
return NoFault;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 56870b5a3..ebc963d5b 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -117,8 +117,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
} else {
inst->setL1HitLow();
}
- } else if (!pkt->isSpec()) {
- setSpecBuffState(pkt->req);
}
// If this is a split access, wait until all packets are received.
@@ -126,6 +124,12 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
return;
}
+ if ( pkt->isSpec() && pkt->isRead() && (!pkt->isL1Hit()) ) {
+ DPRINTF(LSQUnit, "spec load miss for inst [sn:%lli], fence it.\n",
+ inst->seqNum);
+ inst->fenceDelay(true);
+ }
+
assert(!cpu->switchedOut());
if (!inst->isSquashed()) {
if (!state->noWB) {
@@ -147,7 +151,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
if (pkt->isValidate() || pkt->isExpose()) {
- completeValidate(inst, pkt);
+ assert(false);
}
}
@@ -629,103 +633,6 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
}
template <class Impl>
-bool
-LSQUnit<Impl>::checkPrevLoadsExecuted(int req_idx)
-{
- int load_idx = loadHead;
- while (load_idx != req_idx){
- if (!loadQueue[load_idx]->isExecuted()){
- // if at least on load ahead of current load
- // does not finish spec access,
- // then return false
- return false;
- }
- incrLdIdx(load_idx);
- }
-
- //if all executed, return true
- return true;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::setSpecBuffState(RequestPtr expose_req)
-{
- Addr req_eff_addr1 = expose_req->getPaddr() & cacheBlockMask;
-
- int load_idx = loadHead;
- while (load_idx != loadTail){
- DynInstPtr ld_inst = loadQueue[load_idx];
- if (ld_inst->effAddrValid()){
-
- Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask;
- Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask;
- if (ld_eff_addr1 == req_eff_addr1){
- ld_inst->setSpecBuffObsoleteLow();
- } else if (ld_eff_addr2 == req_eff_addr1){
- ld_inst->setSpecBuffObsoleteHigh();
- }
- }
- incrLdIdx(load_idx);
- }
-}
-
-
-template <class Impl>
-int
-LSQUnit<Impl>::checkSpecBuffHit(RequestPtr req, int req_idx)
-{
-
- Addr req_eff_addr1 = req->getPaddr() & cacheBlockMask;
- //Addr req_eff_addr2 = (req->getPaddr() + req->getSize()-1) & cacheBlockMask;
- // the req should be within the same cache line
- //assert (req_eff_addr1 == req_eff_addr2);
- assert (!loadQueue[req_idx]->isExecuted());
-
- int load_idx = loadHead;
-
- while (load_idx != loadTail){
- DynInstPtr ld_inst = loadQueue[load_idx];
- if (ld_inst->effAddrValid()){
- Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask;
- Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask;
-
- if ((req_eff_addr1 == ld_eff_addr1 && ld_inst->isL1HitLow())
- || (req_eff_addr1 == ld_eff_addr2 && ld_inst->isL1HitHigh())){
- return -1;
- //already in L1, do not copy from buffer
- } else {
-
- if (ld_inst->isExecuted() && ld_inst->needPostFetch()
- && !ld_inst->isSquashed() && ld_inst->fault==NoFault){
- if (req_eff_addr1 == ld_eff_addr1 && !ld_inst->isL1HitLow()
- && !ld_inst->isSpecBuffObsoleteLow()){
- DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] "
- "and [sn:%lli] (low) at address %#x\n",
- loadQueue[req_idx]->seqNum, ld_inst->seqNum,
- req_eff_addr1);
- return load_idx;
- } else if ( ld_eff_addr2 !=0 &&
- req_eff_addr1 == ld_eff_addr2 && !ld_inst->isL1HitHigh()
- && !ld_inst->isSpecBuffObsoleteHigh()){
- DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] "
- "and [sn:%lli] (high) at address %#x\n",
- loadQueue[req_idx]->seqNum, ld_inst->seqNum,
- req_eff_addr1);
- return load_idx;
- }
- }
- }
- }
- incrLdIdx(load_idx);
- }
-
- return -1;
-}
-
-
-
-template <class Impl>
Fault
LSQUnit<Impl>::checkViolations(int load_idx, const DynInstPtr &inst)
{
@@ -1064,6 +971,7 @@ LSQUnit<Impl>::updateVisibleState()
}
}
inst->readyToExpose(true);
+ inst->fenceDelay(false);
} else {
if (!useIFT) {
if (inst->readyToExpose()){
@@ -1097,10 +1005,10 @@ LSQUnit<Impl>::updateVisibleState()
} else {
DPRINTF(LSQUnit, "load inst [sn:%lli] %s is an unsafe speculated load, but source registers are not tainted.\n", inst->seqNum, inst->pcState());
inst->readyToExpose(true);
+ inst->fenceDelay(false);
}
}
}
- inst->fenceDelay(false);
} else {
inst->readyToExpose(true);
inst->fenceDelay(false);
@@ -1109,281 +1017,6 @@ LSQUnit<Impl>::updateVisibleState()
}
}
-// [InvisiSpec] validate loads
-template <class Impl>
-int
-LSQUnit<Impl>::exposeLoads()
-{
- if(!isInvisibleSpec){
- assert(loadsToVLD==0
- && "request validation on Non invisible Spec mode");
- }
-
- int old_loadsToVLD = loadsToVLD;
-
- DPRINTF(LSQUnit, "starting exposeLoads(): loadsToVLD = %d\n", loadsToVLD);
-
- // [InvisiSpec] Note:
- // need to iterate from the head every time
- // since the load can be exposed out-of-order
- int loadVLDIdx = loadHead;
-
- while (loadsToVLD > 0 &&
- loadVLDIdx != loadTail &&
- loadQueue[loadVLDIdx]) {
-
- if (loadQueue[loadVLDIdx]->isSquashed()){
- incrLdIdx(loadVLDIdx);
- continue;
- }
- // skip the loads that either do not need to expose
- // or exposed already
- if(!loadQueue[loadVLDIdx]->needPostFetch()
- || loadQueue[loadVLDIdx]->isExposeSent() ){
- incrLdIdx(loadVLDIdx);
- continue;
- }
-
- DynInstPtr load_inst = loadQueue[loadVLDIdx];
- if (loadQueue[loadVLDIdx]->fault!=NoFault){
- //load is executed, so it wait for expose complete
- //to send it to commit, regardless of whether it is ready
- //to expose
- load_inst->setExposeCompleted();
- load_inst->setExposeSent();
- loadsToVLD--;
- if (load_inst->isExecuted()){
- DPRINTF(LSQUnit, "Execute finished and gets violation fault."
- "Send inst [sn:%lli] to commit stage.\n",
- load_inst->seqNum);
- iewStage->instToCommit(load_inst);
- iewStage->activityThisCycle();
- }
- incrLdIdx(loadVLDIdx);
- continue;
- }
-
- // skip the loads that need expose but
- // are not ready
- if (loadQueue[loadVLDIdx]->needPostFetch()
- && !loadQueue[loadVLDIdx]->readyToExpose()){
- incrLdIdx(loadVLDIdx);
- continue;
- }
-
- assert(loadQueue[loadVLDIdx]->needPostFetch()
- && loadQueue[loadVLDIdx]->readyToExpose() );
-
- assert(!load_inst->isCommitted());
-
-
- RequestPtr req = load_inst->postReq;
- RequestPtr sreqLow = load_inst->postSreqLow;
- RequestPtr sreqHigh = load_inst->postSreqHigh;
-
- // we should not have both req and sreqLow not NULL
- assert( !(req && sreqLow));
-
- if (req) {
- DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]"
- " PC= %s. req=%#x\n",
- load_inst->seqNum, load_inst->pcState(),
- req->getVaddr());
- } else {
- DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]"
- " PC= %s. reqLow=%#x, reqHigh=%#x\n",
- load_inst->seqNum, load_inst->pcState(),
- load_inst->postSreqLow->getVaddr(),
- load_inst->postSreqHigh->getVaddr());
- }
-
- bool split = false;
- if (TheISA::HasUnalignedMemAcc && sreqLow) {
- split = true;
- } else {
- assert(req);
- }
-
- if (load_inst->isL1HitLow() && (!split || load_inst->isL1HitHigh()) ) {
- load_inst->setExposeCompleted();
- load_inst->setExposeSent();
- --loadsToVLD;
- incrLdIdx(loadVLDIdx);
- iewStage->instToCommit(load_inst);
- iewStage->activityThisCycle();
- continue;
- }
-
- PacketPtr data_pkt = NULL;
- PacketPtr snd_data_pkt = NULL;
-
- LSQSenderState *state = new LSQSenderState;
- state->isLoad = false;
- state->idx = loadVLDIdx;
- state->inst = load_inst;
- state->noWB = true;
-
- bool onlyExpose = false;
- if (!split) {
- if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){
- data_pkt = Packet::createExpose(req);
- onlyExpose = true;
- }else {
- data_pkt = Packet::createValidate(req);
- if (!load_inst->vldData)
- load_inst->vldData = new uint8_t[1];
- data_pkt->dataStatic(load_inst->vldData);
- }
- data_pkt->senderState = state;
- data_pkt->setFirst();
- data_pkt->reqIdx = loadVLDIdx;
- DPRINTF(LSQUnit, "contextid = %d\n", req->contextId());
- } else {
- // allocate memory if we need at least one validation
- if (!load_inst->needExposeOnly() &&
- (!load_inst->isL1HitLow() || !load_inst->isL1HitHigh())){
- if (!load_inst->vldData)
- load_inst->vldData = new uint8_t[2];
- } else {
- onlyExpose = true;
- }
-
- // Create the split packets. - first one
- if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){
- data_pkt = Packet::createExpose(sreqLow);
- }else{
- data_pkt = Packet::createValidate(sreqLow);
- assert(load_inst->vldData);
- data_pkt->dataStatic(load_inst->vldData);
- }
-
- // Create the split packets. - second one
- if (load_inst->needExposeOnly() || load_inst->isL1HitHigh()){
- snd_data_pkt = Packet::createExpose(sreqHigh);
- } else {
- snd_data_pkt = Packet::createValidate(sreqHigh);
- assert(load_inst->vldData);
- snd_data_pkt->dataStatic(&(load_inst->vldData[1]));
- }
-
- data_pkt->senderState = state;
- data_pkt->setFirst();
- snd_data_pkt->senderState = state;
- data_pkt->reqIdx = loadVLDIdx;
- snd_data_pkt->reqIdx = loadVLDIdx;
-
- data_pkt->isSplit = true;
- snd_data_pkt->isSplit = true;
- state->isSplit = true;
- state->outstanding = 2;
- state->mainPkt = data_pkt;
-
- DPRINTF(LSQUnit, "contextid = %d, %d\n",
- sreqLow->contextId(), sreqHigh->contextId());
- req = sreqLow;
- }
-
- assert(!req->isStrictlyOrdered());
- assert(!req->isMmappedIpr());
-
- DPRINTF(LSQUnit, "D-Cache: Validating/Exposing load idx:%i PC:%s "
- "to Addr:%#x, data:%#x [sn:%lli]\n",
- loadVLDIdx, load_inst->pcState(),
- //FIXME: resultData not memData
- req->getPaddr(), (int)*(load_inst->memData),
- load_inst->seqNum);
-
- bool successful_expose = true;
- bool completedFirst = false;
-
- if (!dcachePort->sendTimingReq(data_pkt)){
- DPRINTF(IEW, "D-Cache became blocked when "
- "validating [sn:%lli], will retry later\n",
- load_inst->seqNum);
- successful_expose = false;
- } else {
- if (split) {
- // If split, try to send the second packet too
- completedFirst = true;
- assert(snd_data_pkt);
-
- if (!dcachePort->sendTimingReq(snd_data_pkt)){
- state->complete();
- state->cacheBlocked = true;
- successful_expose = false;
- DPRINTF(IEW, "D-Cache became blocked when validating"
- " [sn:%lli] second packet, will retry later\n",
- load_inst->seqNum);
- }
- }
- }
-
- if (!successful_expose){
- if (!split) {
- delete state;
- delete data_pkt;
- }else{
- if (!completedFirst){
- delete state;
- delete data_pkt;
- delete snd_data_pkt;
- } else {
- delete snd_data_pkt;
- }
- }
- //cpu->wakeCPU(); // This will cause issue(wrong activity count and affects the memory transactions
- ++lsqCacheBlocked;
- break;
- } else {
- // if all the packets we sent out is expose,
- // we assume the expose is alreay completed
- if (onlyExpose) {
- load_inst->setExposeCompleted();
- numExposes++;
- } else {
- numValidates++;
- }
- if (load_inst->needExposeOnly()){
- numConvertedExposes++;
- }
- if (load_inst->isExecuted() && load_inst->isExposeCompleted()
- && !load_inst->isSquashed()){
- DPRINTF(LSQUnit, "Expose finished. Execution done."
- "Send inst [sn:%lli] to commit stage.\n",
- load_inst->seqNum);
- iewStage->instToCommit(load_inst);
- iewStage->activityThisCycle();
- } else{
- DPRINTF(LSQUnit, "Need validation or execution not finishes."
- "Need to wait for readResp/validateResp "
- "for inst [sn:%lli].\n",
- load_inst->seqNum);
- }
-
- load_inst->setExposeSent();
- --loadsToVLD;
- incrLdIdx(loadVLDIdx);
- if (!split){
- setSpecBuffState(req);
- } else {
- setSpecBuffState(sreqLow);
- setSpecBuffState(sreqHigh);
- }
- }
- }
-
- DPRINTF(LSQUnit, "Send validate/expose for %d insts. loadsToVLD=%d"
- ". loadHead=%d. loadTail=%d.\n",
- old_loadsToVLD-loadsToVLD, loadsToVLD, loadHead,
- loadTail);
-
- assert(loads>=0 && loadsToVLD >= 0);
-
- return old_loadsToVLD-loadsToVLD;
-}
-
-
-
template <class Impl>
void
@@ -1614,12 +1247,6 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
stallingLoadIdx = 0;
}
- if (loadQueue[load_idx]->needPostFetch() &&
- loadQueue[load_idx]->readyToExpose() &&
- !loadQueue[load_idx]->isExposeSent()){
- loadsToVLD --;
- }
-
// Clear the smart pointer to make sure it is decremented.
loadQueue[load_idx]->setSquashed();
loadQueue[load_idx] = NULL;
@@ -1723,53 +1350,6 @@ LSQUnit<Impl>::storePostSend(PacketPtr pkt)
incrStIdx(storeWBIdx);
}
-
-
-template <class Impl>
-void
-LSQUnit<Impl>::completeValidate(DynInstPtr &inst, PacketPtr pkt)
-{
- iewStage->wakeCPU();
- // if instruction fault, no need to check value,
- // return directly
- //assert(!inst->needExposeOnly());
- if (inst->isExposeCompleted() || inst->isSquashed()){
- //assert(inst->fault != NoFault);
- //Already sent to commit, do nothing
- return;
- }
- //Check validation result
- bool validation_fail = false;
- if (!inst->isL1HitLow() && inst->vldData[0]==0) {
- validation_fail = true;
- } else {
- if (pkt->isSplit && !inst->isL1HitHigh()
- && inst->vldData[1]==0){
- validation_fail = true;
- }
- }
- if (validation_fail){
- // Mark the load for re-execution
- inst->fault = std::make_shared<ReExec>();
- inst->validationFail(true);
- DPRINTF(LSQUnit, "Validation failed.\n",
- inst->seqNum);
- }
-
- inst->setExposeCompleted();
- if ( inst->isExecuted() && inst->isExposeCompleted() ){
- DPRINTF(LSQUnit, "Validation finished. Execution done."
- "Send inst [sn:%lli] to commit stage.\n",
- inst->seqNum);
- iewStage->instToCommit(inst);
- iewStage->activityThisCycle();
- } else{
- DPRINTF(LSQUnit, "Validation done. Execution not finishes."
- "Need to wait for readResp for inst [sn:%lli].\n",
- inst->seqNum);
- }
-}
-
template <class Impl>
void
LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
@@ -1788,7 +1368,11 @@ LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
inst->fault==NoFault) &&
"in this case, we will put it into ROB twice.");
- if (!inst->isExecuted()) {
+ if (inst->fenceDelay()) {
+ DPRINTF(LSQUnit, "To write back a fence delayed spec load [sn:%lli].\n", inst->seqNum);
+ inst->onlyWaitForFence(true);
+ iewStage->instQueue.deferMemInst(inst);
+ } else if (!inst->isExecuted()) {
inst->setExecuted();
if (inst->fault == NoFault) {
@@ -1832,11 +1416,9 @@ LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
"on write back path");
// check whether the instruction can be committed
- if ( !inst->isExposeCompleted() && inst->needPostFetch() ){
- DPRINTF(LSQUnit, "Expose not finished. "
- "Wait until expose completion"
- " to send inst [sn:%lli] to commit stage\n", inst->seqNum);
- }else{
+ if ( inst->fenceDelay() ) {
+ DPRINTF(LSQUnit, "inst [sn:%lli] misses in spec load.\n", inst->seqNum);
+ } else {
DPRINTF(LSQUnit, "Expose and execution both finished. "
"Send inst [sn:%lli] to commit stage\n", inst->seqNum);
iewStage->instToCommit(inst);
@@ -1927,7 +1509,6 @@ LSQUnit<Impl>::sendStore(PacketPtr data_pkt)
retryPkt = data_pkt;
return false;
}
- setSpecBuffState(data_pkt->req);
return true;
}
diff --git a/src/mem/protocol/MESI_Two_Level-L1cache.sm b/src/mem/protocol/MESI_Two_Level-L1cache.sm
index f5feb7e23..8496fda61 100644
--- a/src/mem/protocol/MESI_Two_Level-L1cache.sm
+++ b/src/mem/protocol/MESI_Two_Level-L1cache.sm
@@ -981,6 +981,12 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
sequencer.readCallback(address, cache_entry.DataBlk);
}
+ action(h_spec_load_miss, "hsm",
+ desc="Notify sequencer the spec load misses.")
+ {
+ sequencer.readCallback(address, cache_entry.DataBlk, true);
+ }
+
action(h_ifetch_hit, "hi", desc="Notify sequencer the instruction fetch completed.")
{
assert(is_valid(cache_entry));
@@ -1222,8 +1228,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
transition({NP,I}, SpecLoad, IX) {
- iw_allocateTBEWithoutCacheEntry;
- as_issueGETSPEC;
+ h_spec_load_miss;
k_popMandatoryQueue;
}
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 4a8e5ae02..090030f08 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -56,9 +56,7 @@ RubySequencerParams::create()
Sequencer::Sequencer(const Params *p)
: RubyPort(p), m_IncompleteTimes(MachineType_NUM),
- deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check"),
- m_specBuf(33),
- specBufferHitEvent([this]{ specBufferHitCallback(); }, "Sequencer spec buffer hit")
+ deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check")
{
m_outstanding_count = 0;
@@ -429,18 +427,6 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
initialRequestTime, forwardRequestTime, firstResponseTime);
}
-bool Sequencer::updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress) {
- uint8_t idx = pkt->reqIdx;
- SBE& sbe = m_specBuf[idx];
- int blkIdx = pkt->isFirst() ? 0 : 1;
- SBB& sbb = sbe.blocks[blkIdx];
- if (makeLineAddress(sbb.reqAddress) == dataAddress) {
- sbb.data = data;
- return true;
- }
- return false;
-}
-
// [InvisiSpec] Called by Ruby to send a response to CPU.
void
Sequencer::readCallback(Addr address, DataBlock& data,
@@ -466,71 +452,19 @@ Sequencer::readCallback(Addr address, DataBlock& data,
PacketPtr pkt = request->pkt;
if (pkt->isSpec()) {
- assert(!pkt->onlyAccessSpecBuff());
DPRINTFR(SpecBuffer, "%10s SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
- updateSBB(pkt, data, address);
if (!externalHit) {
pkt->setL1Hit();
}
- } else if (pkt->isExpose()) {
- DPRINTFR(SpecBuffer, "%10s EXPOSE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
- } else if (pkt->isValidate()) {
- DPRINTFR(SpecBuffer, "%10s VALIDATE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
- uint8_t idx = pkt->reqIdx;
- SBE& sbe = m_specBuf[idx];
- int blkIdx = pkt->isFirst() ? 0 : 1;
- SBB& sbb = sbe.blocks[blkIdx];
- assert(makeLineAddress(sbb.reqAddress) == address);
- if (!memcmp(sbb.data.getData(getOffset(pkt->getAddr()), pkt->getSize()), data.getData(getOffset(pkt->getAddr()), pkt->getSize()), pkt->getSize())) {
- *(pkt->getPtr<uint8_t>()) = 1;
- } else {
- // std::ostringstream os;
- // sbb.data.print(os);
- // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
- // os.str("");
- // data.print(os);
- // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
- *(pkt->getPtr<uint8_t>()) = 0;
- }
}
- for (auto& dependentPkt : request->dependentSpecRequests) {
- assert(!dependentPkt->onlyAccessSpecBuff());
- DPRINTFR(SpecBuffer, "%10s Merged SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), dependentPkt->reqIdx, dependentPkt->isFirst()? 0 : 1, printAddress(dependentPkt->getAddr()));
- assert(dependentPkt->isSpec());
- updateSBB(dependentPkt, data, address);
- if (!externalHit) {
- dependentPkt->setL1Hit();
- }
- memcpy(dependentPkt->getPtr<uint8_t>(),
- data.getData(getOffset(dependentPkt->getAddr()), dependentPkt->getSize()),
- dependentPkt->getSize());
- ruby_hit_callback(dependentPkt);
- }
+ assert(!pkt->isExpose());
+ assert(!pkt->isValidate());
hitCallback(request, data, true, mach, externalHit,
initialRequestTime, forwardRequestTime, firstResponseTime);
}
-void
-Sequencer::specBufferHitCallback()
-{
- assert(m_specRequestQueue.size());
- while (m_specRequestQueue.size()) {
- auto specReq = m_specRequestQueue.front();
- if (specReq.second <= curTick()) {
- PacketPtr pkt = specReq.first;
- assert(pkt->onlyAccessSpecBuff());
- DPRINTFR(SpecBuffer, "%10s SB Hit Callback (idx=%d, addr=%#x)\n", curTick(), pkt->reqIdx, printAddress(pkt->getAddr()));
- ruby_hit_callback(pkt);
- m_specRequestQueue.pop();
- } else {
- schedule(specBufferHitEvent, specReq.second);
- break;
- }
- }
-}
-
// [InvisiSpec] Response on the way from Ruby to CPU
void
Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
@@ -567,7 +501,9 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
data.setData(pkt->getConstPtr<uint8_t>(),
getOffset(request_address), pkt->getSize());
} else if (!pkt->isFlush() && !pkt->isExpose() && !pkt->isValidate()) {
- if ((type == RubyRequestType_LD) ||
+ if (type == RubyRequestType_SPEC_LD && externalHit) {
+ DPRINTF(RubySequencer, "spec load miss!\n");
+ } else if ((type == RubyRequestType_LD) ||
(type == RubyRequestType_SPEC_LD) ||
(type == RubyRequestType_IFETCH) ||
(type == RubyRequestType_RMW_Read) ||
@@ -642,51 +578,10 @@ Sequencer::makeRequest(PacketPtr pkt)
if (pkt->isSpec()) {
assert(pkt->cmd == MemCmd::ReadSpecReq);
assert(pkt->isSplit || pkt->isFirst());
- uint8_t idx = pkt->reqIdx;
- SBE& sbe = m_specBuf[idx];
- sbe.isSplit = pkt->isSplit;
- int blkIdx = pkt->isFirst() ? 0 : 1;
- SBB& sbb = sbe.blocks[blkIdx];
- sbb.reqAddress = pkt->getAddr();
- sbb.reqSize = pkt->getSize();
- if (pkt->onlyAccessSpecBuff()) {
- int srcIdx = pkt->srcIdx;
- SBE& srcEntry = m_specBuf[srcIdx];
- if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[0].reqAddress)) {
- sbb.data = srcEntry.blocks[0].data;
- } else if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[1].reqAddress)) {
- sbb.data = srcEntry.blocks[1].data;
- } else {
- fatal("Requested address %#x is not present in the spec buffer\n", printAddress(sbb.reqAddress));
- }
- memcpy(pkt->getPtr<uint8_t>(),
- sbb.data.getData(getOffset(sbb.reqAddress), sbb.reqSize),
- sbb.reqSize);
- m_specRequestQueue.push({pkt, curTick()});
- DPRINTFR(SpecBuffer, "%10s SB Hit (idx=%d, addr=%#x) on (srcIdx=%d)\n", curTick(), idx, printAddress(sbb.reqAddress), srcIdx);
- if (!specBufferHitEvent.scheduled()) {
- schedule(specBufferHitEvent, clockEdge(Cycles(1)));
- }
- return RequestStatus_Issued;
- } else {
- // assert it is not in the buffer
- primary_type = secondary_type = RubyRequestType_SPEC_LD;
- }
+ // assert it is not in the buffer
+ primary_type = secondary_type = RubyRequestType_SPEC_LD;
} else if (pkt->isExpose() || pkt->isValidate()) {
- assert(pkt->cmd == MemCmd::ExposeReq || pkt->cmd == MemCmd::ValidateReq);
- assert(pkt->isSplit || pkt->isFirst());
- uint8_t idx = pkt->reqIdx;
- SBE& sbe = m_specBuf[idx];
- sbe.isSplit = pkt->isSplit;
- int blkIdx = pkt->isFirst() ? 0 : 1;
- SBB& sbb = sbe.blocks[blkIdx];
- if (sbb.reqAddress != pkt->getAddr()) {
- fatal("sbb.reqAddress != pkt->getAddr: %#x != %#x\n", printAddress(sbb.reqAddress), printAddress(pkt->getAddr()));
- }
- if (sbb.reqSize != pkt->getSize()) {
- fatal("sbb.reqSize != pkt->getSize(): %d != %d\n", sbb.reqSize, pkt->getSize());
- }
- primary_type = secondary_type = RubyRequestType_EXPOSE;
+ assert(false);
} else if (pkt->isLLSC()) {
//
// Alpha LL/SC instructions need to be handled carefully by the cache
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index 66ff92777..8e1f08a48 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -256,10 +256,6 @@ class Sequencer : public RubyPort
std::vector<Stats::Counter> m_IncompleteTimes;
EventFunctionWrapper deadlockCheckEvent;
-
- std::vector<SBE> m_specBuf;
- std::queue<std::pair<PacketPtr, Tick>> m_specRequestQueue;
- EventFunctionWrapper specBufferHitEvent;
};
inline std::ostream&