From b0e609d5cf6961bb9b3f12065659e1c42c13ef06 Mon Sep 17 00:00:00 2001
From: Iru Cai <mytbk920423@gmail.com>
Date: Sun, 12 May 2019 14:34:21 +0800
Subject: only spec load when hit

---
 src/cpu/o3/iew_impl.hh                     |   2 +-
 src/cpu/o3/inst_queue_impl.hh              |   2 -
 src/cpu/o3/lsq.hh                          |   8 -
 src/cpu/o3/lsq_impl.hh                     |  23 --
 src/cpu/o3/lsq_unit.hh                     |  94 ------
 src/cpu/o3/lsq_unit_impl.hh                | 453 ++---------------------------
 src/mem/protocol/MESI_Two_Level-L1cache.sm |   9 +-
 src/mem/ruby/system/Sequencer.cc           | 123 +-------
 src/mem/ruby/system/Sequencer.hh           |   4 -
 9 files changed, 34 insertions(+), 684 deletions(-)
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 5b67e4c3c..6af8b4563 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1193,7 +1193,7 @@ DefaultIEW<Impl>::executeInsts()
     // if we successfully commit sth, then we need to activate the stage or somehow
     // problems happen when interacting with squash
     // NOTE: we always send validations before execute load requests 
-    ldstQueue.exposeLoads();
+    // ldstQueue.exposeLoads();
     
     // Uncomment this if you want to see all available instructions.
     // @todo This doesn't actually work anymore, we should fix it.
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 980f29b35..0a6d309fe 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1184,11 +1184,9 @@ InstructionQueue<Impl>::getDeferredMemInstToExecute()
         // for both (2, 3) we need to restart the translation
         if ( (*it)->translationCompleted() 
                 || ((*it)->onlyWaitForFence() && !(*it)->fenceDelay())
-                || ((*it)->onlyWaitForExpose() && (*it)->readyToExpose())
                 || (*it)->isSquashed()) {
             DynInstPtr mem_inst = std::move(*it);
             mem_inst->onlyWaitForFence(false);
-            mem_inst->onlyWaitForExpose(false);
             deferredMemInsts.erase(it);
             return mem_inst;
         }
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index e5c35a3a6..0e18aa145 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -135,14 +135,6 @@ class LSQ {
     void writebackStores(ThreadID tid);
 
 
-    /** [mengjia]
-     * Attempts to validate loads until all cache ports are used or the
-     * interface becomes blocked.
-     */
-    int exposeLoads();
-    /** Same as above, but only for one thread. */
-    int exposeLoads(ThreadID tid);
-
     /** [mengjia]
      * attempt to update FenceDelay state for load insts
      */
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 24066cd4b..b6742070e 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -292,29 +292,6 @@ LSQ<Impl>::writebackStores()
     }
 }
 
-// [mengjia]
-template<class Impl>
-int 
-LSQ<Impl>::exposeLoads()
-{
-    list<ThreadID>::iterator threads = activeThreads->begin();
-    list<ThreadID>::iterator end = activeThreads->end();
-
-    int exposedLoads = 0;
-    while (threads != end) {
-        ThreadID tid = *threads++;
-
-        if (numLoadsToVLD(tid) > 0) {
-            DPRINTF(Writeback,"[tid:%i] Validate loads. %i loads "
-                "available for Validate.\n", tid, numLoadsToVLD(tid));
-        }
-
-        exposedLoads += thread[tid].exposeLoads();
-    }
-    return exposedLoads;
-}
-
-
 // [mengjia]
 template<class Impl>
 void
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 1c8b98f2e..711447f31 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -141,12 +141,6 @@ class LSQUnit {
      */
     void checkSnoop(PacketPtr pkt);
 
-    // [InvisiSpec] check whether current request will hit in the
-    // spec buffer or not
-    int checkSpecBuffHit(const RequestPtr req, const int req_idx);
-    void setSpecBuffState(const RequestPtr req);
-
-    bool checkPrevLoadsExecuted(const int req_idx);
     /** Executes a load instruction. */
     Fault executeLoad(const DynInstPtr &inst);
 
@@ -165,9 +159,6 @@ class LSQUnit {
     /** Writes back stores. */
     void writebackStores();
 
-    /** [mengjia] Validate loads. */
-    int exposeLoads();
-
     /** [mengjia] Update Visbible State.
      * In the mode defence relying on fence: setup fenceDelay state.
      * In the mode defence relying on invisibleSpec:
@@ -257,9 +248,6 @@ class LSQUnit {
     /** Writes back the instruction, sending it to IEW. */
     void writeback(const DynInstPtr &inst, PacketPtr pkt);
 
-    // [InvisiSpec] complete Validates
-    void completeValidate(DynInstPtr &inst, PacketPtr pkt);
-
     /** Writes back a store that couldn't be completed the previous cycle. */
     void writebackPendingStore();
 
@@ -872,46 +860,12 @@ LSQUnit<Impl>::read(const RequestPtr &req,
         fst_data_pkt = data_pkt;
 
         fst_data_pkt->setFirst();
-        if (sendSpecRead){
-            int src_idx = checkSpecBuffHit(req, load_idx);
-            if (src_idx != -1) {
-                if (allowSpecBuffHit){
-                    data_pkt->setOnlyAccessSpecBuff();
-                }
-                data_pkt->srcIdx = src_idx;
-                specBuffHits++;
-            }else{
-                specBuffMisses++;
-            }
-        }
         fst_data_pkt->reqIdx = load_idx;
     } else {
         // Create the split packets.
         if(sendSpecRead){
-
             fst_data_pkt = Packet::createReadSpec(sreqLow);
-            int fst_src_idx = checkSpecBuffHit(sreqLow, load_idx);
-            if ( fst_src_idx != -1 ) {
-                if (allowSpecBuffHit){
-                    fst_data_pkt->setOnlyAccessSpecBuff();
-                }
-                fst_data_pkt->srcIdx = fst_src_idx;
-                specBuffHits++;
-            } else {
-                specBuffMisses++;
-            }
-
             snd_data_pkt = Packet::createReadSpec(sreqHigh);
-            int snd_src_idx = checkSpecBuffHit(sreqHigh, load_idx);
-            if ( snd_src_idx != -1 ) {
-                if (allowSpecBuffHit){
-                    snd_data_pkt->setOnlyAccessSpecBuff();
-                }
-                snd_data_pkt->srcIdx = snd_src_idx;
-                specBuffHits++;
-            } else {
-                specBuffMisses++;
-            }
         }else{
             fst_data_pkt = Packet::createRead(sreqLow);
             snd_data_pkt = Packet::createRead(sreqHigh);
@@ -1002,58 +956,10 @@ LSQUnit<Impl>::read(const RequestPtr &req,
     // Set everything ready for expose/validation after the read is
     // successfully sent out
     if(sendSpecRead){ // sending actual request
-
-            // [mengjia] Here we set the needExposeOnly flag
-            if (needsTSO && !load_inst->isDataPrefetch()){
-                // need to check whether previous load_instructions specComplete or not
-                if ( checkPrevLoadsExecuted(load_idx) ){
-                    load_inst->needExposeOnly(true);
-                    DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as "
-                            "needExposeOnly\n",
-                        load_inst->pcState(), load_inst->seqNum);
-                } else {
-                    DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as "
-                            "needValidation\n",
-                        load_inst->pcState(), load_inst->seqNum);
-                }
-            }else{
-                //if RC, always only need expose
-                load_inst->needExposeOnly(true);
-                DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as needExposeOnly\n",
-                    load_inst->pcState(), load_inst->seqNum);
-            }
-
-            load_inst->needPostFetch(true);
             assert(!req->isMmappedIpr());
-            //save expose requestPtr
-            if (TheISA::HasUnalignedMemAcc && sreqLow) {
-                load_inst->postSreqLow = std::make_shared<Request>(*sreqLow);
-                load_inst->postSreqHigh = std::make_shared<Request>(*sreqHigh);
-                load_inst->postReq = nullptr;
-                DPRINTF(LSQUnit, "created validation/expose"
-                        " request for inst [sn:%lli]"
-                        " reqLow=%#x, reqHigh=%#x\n",
-                        load_inst->seqNum,
-                        load_inst->postSreqLow->getVaddr(),
-                        load_inst->postSreqHigh->getVaddr());
-            }else{
-                load_inst->postReq = std::make_shared<Request>(*req);
-                load_inst->postSreqLow = nullptr;
-                load_inst->postSreqHigh = nullptr;
-                DPRINTF(LSQUnit, "created validation/expose"
-                        " request for inst [sn:%lli]"
-                        " req=%#x\n",
-                        load_inst->seqNum, load_inst->postReq->getVaddr());
-            }
     } else {
         load_inst->setExposeCompleted();
         load_inst->needPostFetch(false);
-        if (TheISA::HasUnalignedMemAcc && sreqLow) {
-            setSpecBuffState(sreqLow);
-            setSpecBuffState(sreqHigh);
-        } else {
-            setSpecBuffState(req);
-        }
     }
 
     return NoFault;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 56870b5a3..ebc963d5b 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -117,8 +117,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
         } else {
             inst->setL1HitLow();
         }
-    } else if (!pkt->isSpec()) {
-        setSpecBuffState(pkt->req);
     }
 
     // If this is a split access, wait until all packets are received.
@@ -126,6 +124,12 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
         return;
     }
 
+    if ( pkt->isSpec() && pkt->isRead() && (!pkt->isL1Hit()) ) {
+        DPRINTF(LSQUnit, "spec load miss for inst [sn:%lli], fence it.\n",
+                inst->seqNum);
+        inst->fenceDelay(true);
+    }
+
     assert(!cpu->switchedOut());
     if (!inst->isSquashed()) {
         if (!state->noWB) {
@@ -147,7 +151,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
         }
 
         if (pkt->isValidate() || pkt->isExpose()) {
-            completeValidate(inst, pkt);
+          assert(false);
         }
     }
 
@@ -628,103 +632,6 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
     return;
 }
 
-template <class Impl>
-bool
-LSQUnit<Impl>::checkPrevLoadsExecuted(int req_idx)
-{
-    int load_idx = loadHead;
-    while (load_idx != req_idx){
-        if (!loadQueue[load_idx]->isExecuted()){
-            // if at least on load ahead of current load
-            // does not finish spec access,
-            // then return false
-            return false;
-        }
-        incrLdIdx(load_idx);
-    }
-
-    //if all executed, return true
-    return true;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::setSpecBuffState(RequestPtr expose_req)
-{
-    Addr req_eff_addr1 = expose_req->getPaddr() & cacheBlockMask;
-
-    int load_idx = loadHead;
-    while (load_idx != loadTail){
-        DynInstPtr ld_inst = loadQueue[load_idx];
-        if (ld_inst->effAddrValid()){
-
-            Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask;
-            Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask;
-            if (ld_eff_addr1 == req_eff_addr1){
-                ld_inst->setSpecBuffObsoleteLow();
-            } else if (ld_eff_addr2 == req_eff_addr1){
-                ld_inst->setSpecBuffObsoleteHigh();
-            }
-        }
-        incrLdIdx(load_idx);
-    }
-}
-
-
-template <class Impl>
-int
-LSQUnit<Impl>::checkSpecBuffHit(RequestPtr req, int req_idx)
-{
-
-    Addr req_eff_addr1 = req->getPaddr() & cacheBlockMask;
-    //Addr req_eff_addr2 = (req->getPaddr() + req->getSize()-1) & cacheBlockMask;
-    // the req should be within the same cache line
-    //assert (req_eff_addr1 == req_eff_addr2);
-    assert (!loadQueue[req_idx]->isExecuted());
-
-    int load_idx = loadHead;
-
-    while (load_idx != loadTail){
-        DynInstPtr ld_inst = loadQueue[load_idx];
-        if (ld_inst->effAddrValid()){
-            Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask;
-            Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask;
-
-            if ((req_eff_addr1 == ld_eff_addr1 && ld_inst->isL1HitLow())
-                || (req_eff_addr1 == ld_eff_addr2 && ld_inst->isL1HitHigh())){
-                return -1;
-                //already in L1, do not copy from buffer
-            } else {
-
-                if (ld_inst->isExecuted() && ld_inst->needPostFetch()
-                    && !ld_inst->isSquashed() && ld_inst->fault==NoFault){
-                    if (req_eff_addr1 == ld_eff_addr1 && !ld_inst->isL1HitLow()
-                            && !ld_inst->isSpecBuffObsoleteLow()){
-                        DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] "
-                            "and [sn:%lli] (low) at address %#x\n",
-                            loadQueue[req_idx]->seqNum, ld_inst->seqNum,
-                            req_eff_addr1);
-                        return load_idx;
-                    } else if ( ld_eff_addr2 !=0  &&
-                        req_eff_addr1 == ld_eff_addr2 && !ld_inst->isL1HitHigh()
-                        && !ld_inst->isSpecBuffObsoleteHigh()){
-                        DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] "
-                            "and [sn:%lli] (high) at address %#x\n",
-                            loadQueue[req_idx]->seqNum, ld_inst->seqNum,
-                            req_eff_addr1);
-                        return load_idx;
-                    }
-                }
-            }
-        }
-        incrLdIdx(load_idx);
-    }
-
-    return -1;
-}
-
-
-
 template <class Impl>
 Fault
 LSQUnit<Impl>::checkViolations(int load_idx, const DynInstPtr &inst)
@@ -1064,6 +971,7 @@ LSQUnit<Impl>::updateVisibleState()
                     }
                 }
                 inst->readyToExpose(true);
+                inst->fenceDelay(false);
             } else {
                 if (!useIFT) {
                   if (inst->readyToExpose()){
@@ -1097,10 +1005,10 @@ LSQUnit<Impl>::updateVisibleState()
                   } else {
                     DPRINTF(LSQUnit, "load inst [sn:%lli] %s is an unsafe speculated load, but source registers are not tainted.\n", inst->seqNum, inst->pcState());
                     inst->readyToExpose(true);
+                    inst->fenceDelay(false);
                   }
                 }
             }
-            inst->fenceDelay(false);
         } else {
             inst->readyToExpose(true);
             inst->fenceDelay(false);
@@ -1109,281 +1017,6 @@ LSQUnit<Impl>::updateVisibleState()
     }
 }
 
-// [InvisiSpec] validate loads
-template <class Impl>
-int
-LSQUnit<Impl>::exposeLoads()
-{
-    if(!isInvisibleSpec){
-        assert(loadsToVLD==0
-            && "request validation on Non invisible Spec mode");
-    }
-
-    int old_loadsToVLD = loadsToVLD;
-
-    DPRINTF(LSQUnit, "starting exposeLoads(): loadsToVLD = %d\n", loadsToVLD);
-
-    // [InvisiSpec] Note:
-    // need to iterate from the head every time
-    // since the load can be exposed out-of-order
-    int loadVLDIdx = loadHead;
-
-    while (loadsToVLD > 0 &&
-        loadVLDIdx != loadTail &&
-        loadQueue[loadVLDIdx]) {
-
-        if (loadQueue[loadVLDIdx]->isSquashed()){
-            incrLdIdx(loadVLDIdx);
-            continue;
-        }
-        // skip the loads that either do not need to expose
-        // or exposed already
-        if(!loadQueue[loadVLDIdx]->needPostFetch()
-                || loadQueue[loadVLDIdx]->isExposeSent() ){
-            incrLdIdx(loadVLDIdx);
-            continue;
-        }
-
-        DynInstPtr load_inst = loadQueue[loadVLDIdx];
-        if (loadQueue[loadVLDIdx]->fault!=NoFault){
-            //load is executed, so it wait for expose complete
-            //to send it to commit, regardless of whether it is ready
-            //to expose
-            load_inst->setExposeCompleted();
-            load_inst->setExposeSent();
-            loadsToVLD--;
-            if (load_inst->isExecuted()){
-                DPRINTF(LSQUnit, "Execute finished and gets violation fault."
-                    "Send inst [sn:%lli] to commit stage.\n",
-                    load_inst->seqNum);
-                    iewStage->instToCommit(load_inst);
-                    iewStage->activityThisCycle();
-            }
-            incrLdIdx(loadVLDIdx);
-            continue;
-        }
-
-        // skip the loads that need expose but
-        // are not ready
-        if (loadQueue[loadVLDIdx]->needPostFetch()
-                && !loadQueue[loadVLDIdx]->readyToExpose()){
-            incrLdIdx(loadVLDIdx);
-            continue;
-        }
-
-        assert(loadQueue[loadVLDIdx]->needPostFetch()
-                && loadQueue[loadVLDIdx]->readyToExpose() );
-
-        assert(!load_inst->isCommitted());
-
-
-        RequestPtr req = load_inst->postReq;
-        RequestPtr sreqLow = load_inst->postSreqLow;
-        RequestPtr sreqHigh = load_inst->postSreqHigh;
-
-        // we should not have both req and sreqLow not NULL
-        assert( !(req && sreqLow));
-
-        if (req) {
-          DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]"
-                  " PC= %s. req=%#x\n",
-                  load_inst->seqNum, load_inst->pcState(),
-                  req->getVaddr());
-        } else {
-          DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]"
-                  " PC= %s. reqLow=%#x, reqHigh=%#x\n",
-                  load_inst->seqNum, load_inst->pcState(),
-                  load_inst->postSreqLow->getVaddr(),
-                  load_inst->postSreqHigh->getVaddr());
-        }
-
-        bool split = false;
-        if (TheISA::HasUnalignedMemAcc && sreqLow) {
-            split = true;
-        } else {
-            assert(req);
-        }
-
-        if (load_inst->isL1HitLow() && (!split || load_inst->isL1HitHigh()) ) {
-            load_inst->setExposeCompleted();
-            load_inst->setExposeSent();
-            --loadsToVLD;
-            incrLdIdx(loadVLDIdx);
-            iewStage->instToCommit(load_inst);
-            iewStage->activityThisCycle();
-            continue;
-        }
-
-        PacketPtr data_pkt = NULL;
-        PacketPtr snd_data_pkt = NULL;
-
-        LSQSenderState *state = new LSQSenderState;
-        state->isLoad = false;
-        state->idx = loadVLDIdx;
-        state->inst = load_inst;
-        state->noWB = true;
-
-        bool onlyExpose = false;
-        if (!split) {
-            if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){
-                data_pkt = Packet::createExpose(req);
-                onlyExpose = true;
-            }else {
-                data_pkt = Packet::createValidate(req);
-                if (!load_inst->vldData)
-                    load_inst->vldData = new uint8_t[1];
-                data_pkt->dataStatic(load_inst->vldData);
-            }
-            data_pkt->senderState = state;
-            data_pkt->setFirst();
-            data_pkt->reqIdx = loadVLDIdx;
-            DPRINTF(LSQUnit, "contextid = %d\n", req->contextId());
-        } else {
-            // allocate memory if we need at least one validation
-            if (!load_inst->needExposeOnly() &&
-                (!load_inst->isL1HitLow() || !load_inst->isL1HitHigh())){
-                if (!load_inst->vldData)
-                    load_inst->vldData = new uint8_t[2];
-            } else {
-                onlyExpose = true;
-            }
-
-            // Create the split packets. - first one
-            if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){
-                data_pkt = Packet::createExpose(sreqLow);
-            }else{
-                data_pkt = Packet::createValidate(sreqLow);
-                assert(load_inst->vldData);
-                data_pkt->dataStatic(load_inst->vldData);
-            }
-
-            // Create the split packets. - second one
-            if (load_inst->needExposeOnly() || load_inst->isL1HitHigh()){
-                snd_data_pkt = Packet::createExpose(sreqHigh);
-            } else {
-                snd_data_pkt = Packet::createValidate(sreqHigh);
-                assert(load_inst->vldData);
-                snd_data_pkt->dataStatic(&(load_inst->vldData[1]));
-            }
-
-            data_pkt->senderState = state;
-            data_pkt->setFirst();
-            snd_data_pkt->senderState = state;
-            data_pkt->reqIdx = loadVLDIdx;
-            snd_data_pkt->reqIdx = loadVLDIdx;
-
-            data_pkt->isSplit = true;
-            snd_data_pkt->isSplit = true;
-            state->isSplit = true;
-            state->outstanding = 2;
-            state->mainPkt = data_pkt;
-
-            DPRINTF(LSQUnit, "contextid = %d, %d\n",
-                    sreqLow->contextId(), sreqHigh->contextId());
-            req = sreqLow;
-        }
-
-        assert(!req->isStrictlyOrdered());
-        assert(!req->isMmappedIpr());
-
-        DPRINTF(LSQUnit, "D-Cache: Validating/Exposing load idx:%i PC:%s "
-                "to Addr:%#x, data:%#x [sn:%lli]\n",
-                loadVLDIdx, load_inst->pcState(),
-                //FIXME: resultData not memData
-                req->getPaddr(), (int)*(load_inst->memData),
-                load_inst->seqNum);
-
-        bool successful_expose = true;
-        bool completedFirst = false;
-
-        if (!dcachePort->sendTimingReq(data_pkt)){
-            DPRINTF(IEW, "D-Cache became blocked when "
-                "validating [sn:%lli], will retry later\n",
-                load_inst->seqNum);
-            successful_expose = false;
-        } else {
-            if (split) {
-                // If split, try to send the second packet too
-                completedFirst = true;
-                assert(snd_data_pkt);
-
-                if (!dcachePort->sendTimingReq(snd_data_pkt)){
-                    state->complete();
-                    state->cacheBlocked = true;
-                    successful_expose = false;
-                    DPRINTF(IEW, "D-Cache became blocked when validating"
-                        " [sn:%lli] second packet, will retry later\n",
-                        load_inst->seqNum);
-                }
-            }
-        }
-
-        if (!successful_expose){
-            if (!split) {
-                delete state;
-                delete data_pkt;
-            }else{
-                if (!completedFirst){
-                    delete state;
-                    delete data_pkt;
-                    delete snd_data_pkt;
-                } else {
-                    delete snd_data_pkt;
-                }
-            }
-            //cpu->wakeCPU();  // This will cause issue(wrong activity count and affects the memory transactions
-            ++lsqCacheBlocked;
-            break;
-        } else {
-            // if all the packets we sent out is expose,
-            // we assume the expose is alreay completed
-            if (onlyExpose) {
-                load_inst->setExposeCompleted();
-                numExposes++;
-            } else {
-                numValidates++;
-            }
-            if (load_inst->needExposeOnly()){
-                numConvertedExposes++;
-            }
-            if (load_inst->isExecuted() && load_inst->isExposeCompleted()
-                    && !load_inst->isSquashed()){
-                DPRINTF(LSQUnit, "Expose finished. Execution done."
-                    "Send inst [sn:%lli] to commit stage.\n",
-                    load_inst->seqNum);
-                    iewStage->instToCommit(load_inst);
-                    iewStage->activityThisCycle();
-            } else{
-                DPRINTF(LSQUnit, "Need validation or execution not finishes."
-                    "Need to wait for readResp/validateResp "
-                    "for inst [sn:%lli].\n",
-                    load_inst->seqNum);
-            }
-
-            load_inst->setExposeSent();
-            --loadsToVLD;
-            incrLdIdx(loadVLDIdx);
-            if (!split){
-                setSpecBuffState(req);
-            } else {
-                setSpecBuffState(sreqLow);
-                setSpecBuffState(sreqHigh);
-            }
-        }
-    }
-
-    DPRINTF(LSQUnit, "Send validate/expose for %d insts. loadsToVLD=%d"
-            ". loadHead=%d. loadTail=%d.\n",
-            old_loadsToVLD-loadsToVLD, loadsToVLD, loadHead,
-            loadTail);
-
-    assert(loads>=0 && loadsToVLD >= 0);
-
-    return old_loadsToVLD-loadsToVLD;
-}
-
-
-
 
 template <class Impl>
 void
@@ -1614,12 +1247,6 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
             stallingLoadIdx = 0;
         }
 
-        if (loadQueue[load_idx]->needPostFetch() &&
-                loadQueue[load_idx]->readyToExpose() &&
-                !loadQueue[load_idx]->isExposeSent()){
-            loadsToVLD --;
-        }
-
         // Clear the smart pointer to make sure it is decremented.
         loadQueue[load_idx]->setSquashed();
         loadQueue[load_idx] = NULL;
@@ -1723,53 +1350,6 @@ LSQUnit<Impl>::storePostSend(PacketPtr pkt)
     incrStIdx(storeWBIdx);
 }
 
-
-
-template <class Impl>
-void
-LSQUnit<Impl>::completeValidate(DynInstPtr &inst, PacketPtr pkt)
-{
-    iewStage->wakeCPU();
-    // if instruction fault, no need to check value,
-    // return directly
-    //assert(!inst->needExposeOnly());
-    if (inst->isExposeCompleted() || inst->isSquashed()){
-        //assert(inst->fault != NoFault);
-        //Already sent to commit, do nothing
-        return;
-    }
-    //Check validation result
-    bool validation_fail = false;
-    if (!inst->isL1HitLow() && inst->vldData[0]==0) {
-        validation_fail = true;
-    } else {
-        if (pkt->isSplit && !inst->isL1HitHigh()
-            && inst->vldData[1]==0){
-            validation_fail = true;
-        }
-    }
-    if (validation_fail){
-        // Mark the load for re-execution
-        inst->fault = std::make_shared<ReExec>();
-        inst->validationFail(true);
-        DPRINTF(LSQUnit, "Validation failed.\n",
-        inst->seqNum);
-    }
-
-    inst->setExposeCompleted();
-    if ( inst->isExecuted() && inst->isExposeCompleted() ){
-        DPRINTF(LSQUnit, "Validation finished. Execution done."
-            "Send inst [sn:%lli] to commit stage.\n",
-            inst->seqNum);
-            iewStage->instToCommit(inst);
-            iewStage->activityThisCycle();
-    } else{
-        DPRINTF(LSQUnit, "Validation done. Execution not finishes."
-            "Need to wait for readResp for inst [sn:%lli].\n",
-            inst->seqNum);
-    }
-}
-
 template <class Impl>
 void
 LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
@@ -1788,7 +1368,11 @@ LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
                 inst->fault==NoFault) &&
             "in this case, we will put it into ROB twice.");
 
-    if (!inst->isExecuted()) {
+    if (inst->fenceDelay()) {
+        DPRINTF(LSQUnit, "To write back a fence delayed spec load [sn:%lli].\n", inst->seqNum);
+        inst->onlyWaitForFence(true);
+        iewStage->instQueue.deferMemInst(inst);
+    } else if (!inst->isExecuted()) {
         inst->setExecuted();
 
         if (inst->fault == NoFault) {
@@ -1832,11 +1416,9 @@ LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
                 "on write back path");
 
         // check whether the instruction can be committed
-        if ( !inst->isExposeCompleted() && inst->needPostFetch() ){
-            DPRINTF(LSQUnit, "Expose not finished. "
-                "Wait until expose completion"
-                " to send inst [sn:%lli] to commit stage\n", inst->seqNum);
-        }else{
+        if ( inst->fenceDelay() ) {
+            DPRINTF(LSQUnit, "inst [sn:%lli] misses in spec load.\n", inst->seqNum);
+        } else {
             DPRINTF(LSQUnit, "Expose and execution both finished. "
                 "Send inst [sn:%lli] to commit stage\n", inst->seqNum);
             iewStage->instToCommit(inst);
@@ -1927,7 +1509,6 @@ LSQUnit<Impl>::sendStore(PacketPtr data_pkt)
         retryPkt = data_pkt;
         return false;
     }
-    setSpecBuffState(data_pkt->req);
     return true;
 }
 
diff --git a/src/mem/protocol/MESI_Two_Level-L1cache.sm b/src/mem/protocol/MESI_Two_Level-L1cache.sm
index f5feb7e23..8496fda61 100644
--- a/src/mem/protocol/MESI_Two_Level-L1cache.sm
+++ b/src/mem/protocol/MESI_Two_Level-L1cache.sm
@@ -981,6 +981,12 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
     sequencer.readCallback(address, cache_entry.DataBlk);
   }
 
+  action(h_spec_load_miss, "hsm",
+         desc="Notify sequencer the spec load misses.")
+  {
+    sequencer.readCallback(address, cache_entry.DataBlk, true);
+  }
+
   action(h_ifetch_hit, "hi", desc="Notify sequencer the instruction fetch completed.")
   {
     assert(is_valid(cache_entry));
@@ -1222,8 +1228,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
   }
 
   transition({NP,I}, SpecLoad, IX) {
-    iw_allocateTBEWithoutCacheEntry;
-    as_issueGETSPEC;
+    h_spec_load_miss;
     k_popMandatoryQueue;
   }
 
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 4a8e5ae02..090030f08 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -56,9 +56,7 @@ RubySequencerParams::create()
 
 Sequencer::Sequencer(const Params *p)
     : RubyPort(p), m_IncompleteTimes(MachineType_NUM),
-      deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check"),
-      m_specBuf(33),
-      specBufferHitEvent([this]{ specBufferHitCallback(); }, "Sequencer spec buffer hit")
+      deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check")
 {
     m_outstanding_count = 0;
 
@@ -429,18 +427,6 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
                 initialRequestTime, forwardRequestTime, firstResponseTime);
 }
 
-bool Sequencer::updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress) {
-    uint8_t idx = pkt->reqIdx;
-    SBE& sbe = m_specBuf[idx];
-    int blkIdx = pkt->isFirst() ? 0 : 1;
-    SBB& sbb = sbe.blocks[blkIdx];
-    if (makeLineAddress(sbb.reqAddress) == dataAddress) {
-        sbb.data = data;
-        return true;
-    }
-    return false;
-}
-
 // [InvisiSpec] Called by Ruby to send a response to CPU.
 void
 Sequencer::readCallback(Addr address, DataBlock& data,
@@ -466,71 +452,19 @@ Sequencer::readCallback(Addr address, DataBlock& data,
     
     PacketPtr pkt = request->pkt;
     if (pkt->isSpec()) {
-        assert(!pkt->onlyAccessSpecBuff());
         DPRINTFR(SpecBuffer, "%10s SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
-        updateSBB(pkt, data, address);
         if (!externalHit) {
             pkt->setL1Hit();
         }
-    } else if (pkt->isExpose()) {
-        DPRINTFR(SpecBuffer, "%10s EXPOSE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
-    } else if (pkt->isValidate()) {
-        DPRINTFR(SpecBuffer, "%10s VALIDATE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
-        uint8_t idx = pkt->reqIdx;
-        SBE& sbe = m_specBuf[idx];
-        int blkIdx = pkt->isFirst() ? 0 : 1;
-        SBB& sbb = sbe.blocks[blkIdx];
-        assert(makeLineAddress(sbb.reqAddress) == address);
-        if (!memcmp(sbb.data.getData(getOffset(pkt->getAddr()), pkt->getSize()), data.getData(getOffset(pkt->getAddr()), pkt->getSize()), pkt->getSize())) {
-            *(pkt->getPtr<uint8_t>()) = 1;
-        } else {
-            // std::ostringstream os;
-            // sbb.data.print(os);
-            // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
-            // os.str("");
-            // data.print(os);
-            // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
-            *(pkt->getPtr<uint8_t>()) = 0;
-        }
     }
 
-    for (auto& dependentPkt : request->dependentSpecRequests) {
-        assert(!dependentPkt->onlyAccessSpecBuff());
-        DPRINTFR(SpecBuffer, "%10s Merged SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), dependentPkt->reqIdx, dependentPkt->isFirst()? 0 : 1, printAddress(dependentPkt->getAddr()));
-        assert(dependentPkt->isSpec());
-        updateSBB(dependentPkt, data, address);
-        if (!externalHit) {
-            dependentPkt->setL1Hit();
-        }
-        memcpy(dependentPkt->getPtr<uint8_t>(),
-               data.getData(getOffset(dependentPkt->getAddr()), dependentPkt->getSize()),
-               dependentPkt->getSize());
-        ruby_hit_callback(dependentPkt);
-    }
+    assert(!pkt->isExpose());
+    assert(!pkt->isValidate());
 
     hitCallback(request, data, true, mach, externalHit,
                 initialRequestTime, forwardRequestTime, firstResponseTime);
 }
 
-void
-Sequencer::specBufferHitCallback()
-{
-    assert(m_specRequestQueue.size());
-    while (m_specRequestQueue.size()) {
-        auto specReq = m_specRequestQueue.front();
-        if (specReq.second <= curTick()) {
-            PacketPtr pkt = specReq.first;
-            assert(pkt->onlyAccessSpecBuff());
-            DPRINTFR(SpecBuffer, "%10s SB Hit Callback (idx=%d, addr=%#x)\n", curTick(), pkt->reqIdx, printAddress(pkt->getAddr()));
-            ruby_hit_callback(pkt);
-            m_specRequestQueue.pop();
-        } else {
-            schedule(specBufferHitEvent, specReq.second);
-            break;
-        }
-    }
-}
-
 // [InvisiSpec] Response on the way from Ruby to CPU
 void
 Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
@@ -567,7 +501,9 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
         data.setData(pkt->getConstPtr<uint8_t>(),
                      getOffset(request_address), pkt->getSize());
     } else if (!pkt->isFlush() && !pkt->isExpose() && !pkt->isValidate()) {
-        if ((type == RubyRequestType_LD) ||
+        if (type == RubyRequestType_SPEC_LD && externalHit) {
+            DPRINTF(RubySequencer, "spec load miss!\n");
+        } else if ((type == RubyRequestType_LD) ||
             (type == RubyRequestType_SPEC_LD) ||
             (type == RubyRequestType_IFETCH) ||
             (type == RubyRequestType_RMW_Read) ||
@@ -642,51 +578,10 @@ Sequencer::makeRequest(PacketPtr pkt)
     if (pkt->isSpec()) {
         assert(pkt->cmd == MemCmd::ReadSpecReq);
         assert(pkt->isSplit || pkt->isFirst());
-        uint8_t idx = pkt->reqIdx;
-        SBE& sbe = m_specBuf[idx];
-        sbe.isSplit = pkt->isSplit;
-        int blkIdx = pkt->isFirst() ? 0 : 1;
-        SBB& sbb = sbe.blocks[blkIdx];
-        sbb.reqAddress = pkt->getAddr();
-        sbb.reqSize = pkt->getSize();
-        if (pkt->onlyAccessSpecBuff()) {
-            int srcIdx = pkt->srcIdx;
-            SBE& srcEntry = m_specBuf[srcIdx];
-            if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[0].reqAddress)) {
-                sbb.data = srcEntry.blocks[0].data;
-            } else if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[1].reqAddress)) {
-                sbb.data = srcEntry.blocks[1].data;
-            } else {
-                fatal("Requested address %#x is not present in the spec buffer\n", printAddress(sbb.reqAddress));
-            }
-            memcpy(pkt->getPtr<uint8_t>(),
-                   sbb.data.getData(getOffset(sbb.reqAddress), sbb.reqSize),
-                   sbb.reqSize);
-            m_specRequestQueue.push({pkt, curTick()});
-            DPRINTFR(SpecBuffer, "%10s SB Hit (idx=%d, addr=%#x) on (srcIdx=%d)\n", curTick(), idx, printAddress(sbb.reqAddress), srcIdx);
-            if (!specBufferHitEvent.scheduled()) {
-                schedule(specBufferHitEvent, clockEdge(Cycles(1)));
-            }
-            return RequestStatus_Issued;
-        } else {
-            // assert it is not in the buffer
-            primary_type = secondary_type = RubyRequestType_SPEC_LD;
-        }
+        // assert it is not in the buffer
+        primary_type = secondary_type = RubyRequestType_SPEC_LD;
     } else if (pkt->isExpose() || pkt->isValidate()) {
-        assert(pkt->cmd == MemCmd::ExposeReq || pkt->cmd == MemCmd::ValidateReq);
-        assert(pkt->isSplit || pkt->isFirst());
-        uint8_t idx = pkt->reqIdx;
-        SBE& sbe = m_specBuf[idx];
-        sbe.isSplit = pkt->isSplit;
-        int blkIdx = pkt->isFirst() ? 0 : 1;
-        SBB& sbb = sbe.blocks[blkIdx];
-        if (sbb.reqAddress != pkt->getAddr()) {
-            fatal("sbb.reqAddress != pkt->getAddr: %#x != %#x\n", printAddress(sbb.reqAddress), printAddress(pkt->getAddr()));
-        }
-        if (sbb.reqSize != pkt->getSize()) {
-            fatal("sbb.reqSize != pkt->getSize(): %d != %d\n", sbb.reqSize, pkt->getSize());
-        }
-        primary_type = secondary_type = RubyRequestType_EXPOSE;
+        assert(false);
     } else if (pkt->isLLSC()) {
         //
         // Alpha LL/SC instructions need to be handled carefully by the cache
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index 66ff92777..8e1f08a48 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -256,10 +256,6 @@ class Sequencer : public RubyPort
     std::vector<Stats::Counter> m_IncompleteTimes;
 
     EventFunctionWrapper deadlockCheckEvent;
-
-    std::vector<SBE> m_specBuf;
-    std::queue<std::pair<PacketPtr, Tick>> m_specRequestQueue;
-    EventFunctionWrapper specBufferHitEvent;
 };
 
 inline std::ostream&
-- 
cgit v1.2.3