10 files changed, 324 insertions, 14 deletions
diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript
index be52c02d0..e6d45a419 100644
--- a/src/mem/ruby/SConscript
+++ b/src/mem/ruby/SConscript
@@ -59,6 +59,9 @@ DebugFlag('RubySystem')
 DebugFlag('RubyTester')
 DebugFlag('RubyStats')
 DebugFlag('RubyResourceStalls')
+DebugFlag('SpecBuffer')
+DebugFlag('SpecBufferValidate')
+DebugFlag('MemSpecBuffer')
 
 CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester',
     'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache',
diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc
index 57834f2e2..7d4d71eb3 100644
--- a/src/mem/ruby/network/Network.cc
+++ b/src/mem/ruby/network/Network.cc
@@ -144,12 +144,18 @@ Network::MessageSizeType_to_int(MessageSizeType size_type)
       case MessageSizeType_Unblock_Control:
       case MessageSizeType_Persistent_Control:
       case MessageSizeType_Completion_Control:
+      case MessageSizeType_SPECLD_Control:
+      case MessageSizeType_SPECLD_Request_Control:
+      case MessageSizeType_EXPOSE_Control:
+      case MessageSizeType_EXPOSE_Request_Control:
         return m_control_msg_size;
       case MessageSizeType_Data:
       case MessageSizeType_Response_Data:
       case MessageSizeType_ResponseLocal_Data:
       case MessageSizeType_ResponseL2hit_Data:
       case MessageSizeType_Writeback_Data:
+      case MessageSizeType_SPECLD_Data:
+      case MessageSizeType_EXPOSE_Data:
         return m_data_msg_size;
       default:
         panic("Invalid range for type MessageSizeType");
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index de7e03dd7..a4ba1fe07 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -41,6 +41,7 @@
 #include "mem/ruby/slicc_interface/AbstractController.hh"
 
 #include "debug/RubyQueue.hh"
+#include "debug/MemSpecBuffer.hh"
 #include "mem/protocol/MemoryMsg.hh"
 #include "mem/ruby/network/Network.hh"
 #include "mem/ruby/system/GPUCoalescer.hh"
@@ -96,6 +97,14 @@ AbstractController::regStats()
         .name(name() + ".fully_busy_cycles")
         .desc("cycles for which number of transistions == max transitions")
         .flags(Stats::nozero);
+    m_expose_hits
+        .name(name() + ".expose_hits")
+        .desc("number of expose hits at LLC spec buffer")
+        .flags(Stats::nozero);
+    m_expose_misses
+        .name(name() + ".expose_misses")
+        .desc("number of expose misses at LLC spec buffer")
+        .flags(Stats::nozero);
 }
 
 void
@@ -238,8 +247,67 @@ AbstractController::getMasterPort(const std::string &if_name,
 
 void
 AbstractController::queueMemoryRead(const MachineID &id, Addr addr,
-                                    Cycles latency)
+                                    Cycles latency, MachineID origin, int idx, int type)
 {
+    int coreId = origin.num;
+    int sbeId = idx;
+    // type 0: non-spec 1: spec 2: expose
+    // DPRINTFR(MemSpecBuffer, "%10s MemRead (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr));
+    // if idx == -1, it is a write request which cannot be spec or expose.
+    assert(!(type != 0 && sbeId == -1));
+    assert(sbeId >= -1 && sbeId <= 65);
+    assert(coreId < 8);
+    assert(type >=0 && type <= 2);
+    if (type == 0) {
+        for (int c = 0; c < 8; ++c) {
+            for (int i = 0; i < 66; ++i) {
+                if (m_specBuf[c][i].address == addr) {
+                    DPRINTFR(MemSpecBuffer, "%10s Cleared by Read (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr));
+                    m_specBuf[c][i].address = 0;
+                    m_specBuf[c][i].data.clear();
+                }
+            }
+        }
+    } else if (type == 1) {
+
+    } else if (type == 2) {
+        if (m_specBuf[coreId][sbeId].address == addr) {
+            DPRINTFR(MemSpecBuffer, "%10s Expose Hit (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr));
+            ++m_expose_hits;
+            assert(getMemoryQueue());
+            std::shared_ptr<MemoryMsg> msg = std::make_shared<MemoryMsg>(clockEdge());
+            (*msg).m_addr = addr;
+            (*msg).m_Sender = m_machineID;
+            (*msg).m_OriginalRequestorMachId = id;
+            (*msg).m_Type = MemoryRequestType_MEMORY_READ;
+            (*msg).m_MessageSize = MessageSizeType_Response_Data;
+            (*msg).m_DataBlk = m_specBuf[coreId][sbeId].data;
+            getMemoryQueue()->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)));
+            for (int c = 0; c < 8; ++c) {
+                for (int i = 0; i < 66; ++i) {
+                    if (m_specBuf[c][i].address == addr) {
+                        DPRINTFR(MemSpecBuffer, "%10s Cleared by Expose Hit (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr));
+                        m_specBuf[c][i].address = 0;
+                        m_specBuf[c][i].data.clear();
+                    }
+                }
+            }
+            return;
+        } else {
+            DPRINTFR(MemSpecBuffer, "%10s Expose Miss (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr));
+            ++m_expose_misses;
+            for (int c = 0; c < 8; ++c) {
+                for (int i = 0; i < 66; ++i) {
+                    if (m_specBuf[c][i].address == addr) {
+                        DPRINTFR(MemSpecBuffer, "%10s Cleared by Expose Miss (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr));
+                        m_specBuf[c][i].address = 0;
+                        m_specBuf[c][i].data.clear();
+                    }
+                }
+            }
+        }
+    }
+    
     RequestPtr req = std::make_shared<Request>(
         addr, RubySystem::getBlockSizeBytes(), 0, m_masterId);
 
@@ -248,6 +316,9 @@ AbstractController::queueMemoryRead(const MachineID &id, Addr addr,
     pkt->dataDynamic(newData);
 
     SenderState *s = new SenderState(id);
+    s->type = type;
+    s->coreId = coreId;
+    s->sbeId = sbeId;
     pkt->pushSenderState(s);
 
     // Use functional rather than timing accesses during warmup
@@ -339,6 +410,9 @@ AbstractController::recvTimingResp(PacketPtr pkt)
 
     SenderState *s = dynamic_cast<SenderState *>(pkt->senderState);
     (*msg).m_OriginalRequestorMachId = s->id;
+    int type = s->type;
+    int coreId = s->coreId;
+    int sbeId = s->sbeId;
     delete s;
 
     if (pkt->isRead()) {
@@ -348,6 +422,12 @@ AbstractController::recvTimingResp(PacketPtr pkt)
         // Copy data from the packet
         (*msg).m_DataBlk.setData(pkt->getPtr<uint8_t>(), 0,
                                  RubySystem::getBlockSizeBytes());
+        if (type == 1) {
+            DPRINTFR(MemSpecBuffer, "%10s Updated by ReadSpec (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(pkt->getAddr()));
+            m_specBuf[coreId][sbeId].address = pkt->getAddr();
+            m_specBuf[coreId][sbeId].data.setData(pkt->getPtr<uint8_t>(), 0,
+                                                  RubySystem::getBlockSizeBytes());
+        }
     } else if (pkt->isWrite()) {
         (*msg).m_Type = MemoryRequestType_MEMORY_WB;
         (*msg).m_MessageSize = MessageSizeType_Writeback_Control;
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 35cd3d2a5..b65a511d0 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -129,7 +129,7 @@ class AbstractController : public MemObject, public Consumer
     BaseMasterPort& getMasterPort(const std::string& if_name,
                                   PortID idx = InvalidPortID);
 
-    void queueMemoryRead(const MachineID &id, Addr addr, Cycles latency);
+    void queueMemoryRead(const MachineID &id, Addr addr, Cycles latency, MachineID origin, int idx, int type);
     void queueMemoryWrite(const MachineID &id, Addr addr, Cycles latency,
                           const DataBlock &block);
     void queueMemoryWritePartial(const MachineID &id, Addr addr, Cycles latency,
@@ -199,6 +199,8 @@ class AbstractController : public MemObject, public Consumer
     //! Counter for the number of cycles when the transitions carried out
     //! were equal to the maximum allowed
     Stats::Scalar m_fully_busy_cycles;
+    Stats::Scalar m_expose_hits;
+    Stats::Scalar m_expose_misses;
 
     //! Histogram for profiling delay for the messages this controller
     //! cares for
@@ -250,6 +252,9 @@ class AbstractController : public MemObject, public Consumer
     {
         // Id of the machine from which the request originated.
         MachineID id;
+        int type;
+        int coreId;
+        int sbeId;
 
         SenderState(MachineID _id) : id(_id)
         {}
@@ -258,6 +263,14 @@ class AbstractController : public MemObject, public Consumer
   private:
     /** The address range to which the controller responds on the CPU side. */
     const AddrRangeList addrRanges;
+
+    struct SBE
+    {
+      Addr address;
+      DataBlock data;
+    };
+
+    SBE m_specBuf[8][66];
 };
 
 #endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index 6c84f3823..2fc4c9f98 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -60,6 +60,7 @@ class RubyRequest : public Message
     int m_wfid;
     HSAScope m_scope;
     HSASegment m_segment;
+    int m_idx;
 
 
     RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
@@ -82,6 +83,11 @@ class RubyRequest : public Message
           m_segment(_segment)
     {
         m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        if (_pkt->reqIdx == -1) {
+            m_idx = _pkt->reqIdx;
+        } else {
+            m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1);
+        }
     }
 
     RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
@@ -109,6 +115,11 @@ class RubyRequest : public Message
           m_segment(_segment)
     {
         m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        if (_pkt->reqIdx == -1) {
+            m_idx = _pkt->reqIdx;
+        } else {
+            m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1);
+        }
     }
 
     RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
@@ -137,6 +148,11 @@ class RubyRequest : public Message
           m_segment(_segment)
     {
         m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        if (_pkt->reqIdx == -1) {
+            m_idx = _pkt->reqIdx;
+        } else {
+            m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1);
+        }
     }
 
 
diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc
index 8d99c90aa..dc5898bea 100644
--- a/src/mem/ruby/structures/CacheMemory.cc
+++ b/src/mem/ruby/structures/CacheMemory.cc
@@ -176,7 +176,9 @@ CacheMemory::tryCacheAccess(Addr address, RubyRequestType type,
             return true;
         }
         if ((entry->m_Permission == AccessPermission_Read_Only) &&
-            (type == RubyRequestType_LD || type == RubyRequestType_IFETCH)) {
+            (type == RubyRequestType_LD ||
+             type == RubyRequestType_IFETCH ||
+             type == RubyRequestType_SPEC_LD)) {
             return true;
         }
         // The line must not be accessible
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 84a70c0f1..15013e056 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -246,6 +246,7 @@ RubyPort::PioSlavePort::recvAtomic(PacketPtr pkt)
     panic("Could not find address in Ruby PIO address ranges!\n");
 }
 
+// [InvisiSpec] Request on the way from CPU to Ruby
 bool
 RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
 {
@@ -429,6 +430,7 @@ RubyPort::MemSlavePort::recvFunctional(PacketPtr pkt)
     }
 }
 
+// [InvisiSpec] On the way from Ruby to CPU
 void
 RubyPort::ruby_hit_callback(PacketPtr pkt)
 {
@@ -512,6 +514,7 @@ RubyPort::drain()
     }
 }
 
+// [InvisiSpec] Still on the way from Ruby to CPU
 void
 RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
 {
@@ -545,7 +548,7 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
     }
 
     // Flush, acquire, release requests don't access physical memory
-    if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) {
+    if (pkt->isFlush() || pkt->isExpose() || pkt->cmd == MemCmd::MemFenceReq) {
         accessPhysMem = false;
     }
 
@@ -572,6 +575,7 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
         // Ruby protocol.
         schedTimingResp(pkt, curTick());
     } else {
+        // [InvisiSpec] Delete the packet if a reponse is not required
         delete pkt;
     }
 
@@ -602,7 +606,7 @@ RubyPort::MemSlavePort::isPhysMemAddress(Addr addr) const
 }
 
 void
-RubyPort::ruby_eviction_callback(Addr address)
+RubyPort::ruby_eviction_callback(Addr address, bool external)
 {
     DPRINTF(RubyPort, "Sending invalidations.\n");
     // Allocate the invalidate request and packet on the stack, as it is
@@ -615,6 +619,9 @@ RubyPort::ruby_eviction_callback(Addr address)
     // Use a single packet to signal all snooping ports of the invalidation.
     // This assumes that snooping ports do NOT modify the packet/request
     Packet pkt(request, MemCmd::InvalidateReq);
+    if (external) {
+        pkt.setExternalEviction();
+    }
     for (CpuPortIter p = slave_ports.begin(); p != slave_ports.end(); ++p) {
         // check if the connected master port is snooping
         if ((*p)->isSnooping()) {
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index 146443282..9c0200829 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -172,7 +172,7 @@ class RubyPort : public MemObject
     void trySendRetries();
     void ruby_hit_callback(PacketPtr pkt);
     void testDrainComplete();
-    void ruby_eviction_callback(Addr address);
+    void ruby_eviction_callback(Addr address, bool external);
 
     /**
      * Called by the PIO port when receiving a timing response.
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index f30369710..5a11d3165 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -36,6 +36,8 @@
 #include "debug/ProtocolTrace.hh"
 #include "debug/RubySequencer.hh"
 #include "debug/RubyStats.hh"
+#include "debug/SpecBuffer.hh"
+#include "debug/SpecBufferValidate.hh"
 #include "mem/packet.hh"
 #include "mem/protocol/PrefetchBit.hh"
 #include "mem/protocol/RubyAccessMode.hh"
@@ -54,7 +56,9 @@ RubySequencerParams::create()
 
 Sequencer::Sequencer(const Params *p)
     : RubyPort(p), m_IncompleteTimes(MachineType_NUM),
-      deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check")
+      deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check"),
+      m_specBuf(33),
+      specBufferHitEvent([this]{ specBufferHitCallback(); }, "Sequencer spec buffer hit")
 {
     m_outstanding_count = 0;
 
@@ -160,6 +164,7 @@ void Sequencer::resetStats()
     }
 }
 
+// [InvisiSpec] Request on the way from CPU to Ruby
 // Insert the request on the correct request table.  Return true if
 // the entry was already present.
 RequestStatus
@@ -190,6 +195,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
     RequestTable::value_type default_entry(line_addr,
                                            (SequencerRequest*) NULL);
 
+    // [InvisiSpec] If store
     if ((request_type == RubyRequestType_ST) ||
         (request_type == RubyRequestType_RMW_Read) ||
         (request_type == RubyRequestType_RMW_Write) ||
@@ -217,6 +223,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
           m_store_waiting_on_store++;
           return RequestStatus_Aliased;
         }
+    // [InvisiSpec] If load
     } else {
         // Check if there is any outstanding write request for the same
         // cache line.
@@ -232,6 +239,16 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
             RequestTable::iterator i = r.first;
             i->second = new SequencerRequest(pkt, request_type, curCycle());
             m_outstanding_count++;
+        } else if (request_type == RubyRequestType_SPEC_LD) {
+            auto i = m_readRequestTable.find(line_addr);
+            if (i->second->m_type == RubyRequestType_SPEC_LD) {
+                DPRINTFR(SpecBuffer, "%10s Merging (idx=%d-%d, addr=%#x) with %d\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()), i->second->pkt->reqIdx);
+                i->second->dependentSpecRequests.push_back(pkt);
+                return RequestStatus_Merged;
+            } else {
+                m_load_waiting_on_load++;
+                return RequestStatus_Aliased;
+            }
         } else {
             // There is an outstanding read request for the cache line
             m_load_waiting_on_load++;
@@ -412,6 +429,19 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
                 initialRequestTime, forwardRequestTime, firstResponseTime);
 }
 
+bool Sequencer::updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress) {
+    uint8_t idx = pkt->reqIdx;
+    SBE& sbe = m_specBuf[idx];
+    int blkIdx = pkt->isFirst() ? 0 : 1;
+    SBB& sbb = sbe.blocks[blkIdx];
+    if (makeLineAddress(sbb.reqAddress) == dataAddress) {
+        sbb.data = data;
+        return true;
+    }
+    return false;
+}
+
+// [InvisiSpec] Called by Ruby to send a response to CPU.
 void
 Sequencer::readCallback(Addr address, DataBlock& data,
                         bool externalHit, const MachineType mach,
@@ -430,13 +460,79 @@ Sequencer::readCallback(Addr address, DataBlock& data,
     markRemoved();
 
     assert((request->m_type == RubyRequestType_LD) ||
+           (request->m_type == RubyRequestType_SPEC_LD) ||
+           (request->m_type == RubyRequestType_EXPOSE) ||
            (request->m_type == RubyRequestType_IFETCH));
+    
+    PacketPtr pkt = request->pkt;
+    if (pkt->isSpec()) {
+        assert(!pkt->onlyAccessSpecBuff());
+        DPRINTFR(SpecBuffer, "%10s SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+        updateSBB(pkt, data, address);
+        if (!externalHit) {
+            pkt->setL1Hit();
+        }
+    } else if (pkt->isExpose()) {
+        DPRINTFR(SpecBuffer, "%10s EXPOSE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+    } else if (pkt->isValidate()) {
+        DPRINTFR(SpecBuffer, "%10s VALIDATE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+        uint8_t idx = pkt->reqIdx;
+        SBE& sbe = m_specBuf[idx];
+        int blkIdx = pkt->isFirst() ? 0 : 1;
+        SBB& sbb = sbe.blocks[blkIdx];
+        assert(makeLineAddress(sbb.reqAddress) == address);
+        if (!memcmp(sbb.data.getData(getOffset(pkt->getAddr()), pkt->getSize()), data.getData(getOffset(pkt->getAddr()), pkt->getSize()), pkt->getSize())) {
+            *(pkt->getPtr<uint8_t>()) = 1;
+        } else {
+            // std::ostringstream os;
+            // sbb.data.print(os);
+            // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
+            // os.str("");
+            // data.print(os);
+            // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
+            *(pkt->getPtr<uint8_t>()) = 0;
+        }
+    }
+
+    for (auto& dependentPkt : request->dependentSpecRequests) {
+        assert(!dependentPkt->onlyAccessSpecBuff());
+        DPRINTFR(SpecBuffer, "%10s Merged SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), dependentPkt->reqIdx, dependentPkt->isFirst()? 0 : 1, printAddress(dependentPkt->getAddr()));
+        assert(dependentPkt->isSpec());
+        updateSBB(dependentPkt, data, address);
+        if (!externalHit) {
+            dependentPkt->setL1Hit();
+        }
+        memcpy(dependentPkt->getPtr<uint8_t>(),
+               data.getData(getOffset(dependentPkt->getAddr()), dependentPkt->getSize()),
+               dependentPkt->getSize());
+        ruby_hit_callback(dependentPkt);
+    }
 
     hitCallback(request, data, true, mach, externalHit,
                 initialRequestTime, forwardRequestTime, firstResponseTime);
 }
 
 void
+Sequencer::specBufferHitCallback()
+{
+    assert(m_specRequestQueue.size());
+    while (m_specRequestQueue.size()) {
+        auto specReq = m_specRequestQueue.front();
+        if (specReq.second <= curTick()) {
+            PacketPtr pkt = specReq.first;
+            assert(pkt->onlyAccessSpecBuff());
+            DPRINTFR(SpecBuffer, "%10s SB Hit Callback (idx=%d, addr=%#x)\n", curTick(), pkt->reqIdx, printAddress(pkt->getAddr()));
+            ruby_hit_callback(pkt);
+            m_specRequestQueue.pop();
+        } else {
+            schedule(specBufferHitEvent, specReq.second);
+            break;
+        }
+    }
+}
+
+// [InvisiSpec] Response on the way from Ruby to CPU
+void
 Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
                        bool llscSuccess,
                        const MachineType mach, const bool externalHit,
@@ -470,8 +566,9 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
     if (RubySystem::getWarmupEnabled()) {
         data.setData(pkt->getConstPtr<uint8_t>(),
                      getOffset(request_address), pkt->getSize());
-    } else if (!pkt->isFlush()) {
+    } else if (!pkt->isFlush() && !pkt->isExpose() && !pkt->isValidate()) {
         if ((type == RubyRequestType_LD) ||
+            (type == RubyRequestType_SPEC_LD) ||
             (type == RubyRequestType_IFETCH) ||
             (type == RubyRequestType_RMW_Read) ||
             (type == RubyRequestType_Locked_RMW_Read) ||
@@ -533,6 +630,7 @@ Sequencer::empty() const
     return m_writeRequestTable.empty() && m_readRequestTable.empty();
 }
 
+// [InvisiSpec] Request on the way from CPU to Ruby
 RequestStatus
 Sequencer::makeRequest(PacketPtr pkt)
 {
@@ -543,7 +641,56 @@ Sequencer::makeRequest(PacketPtr pkt)
     RubyRequestType primary_type = RubyRequestType_NULL;
     RubyRequestType secondary_type = RubyRequestType_NULL;
 
-    if (pkt->isLLSC()) {
+    // [InvisiSpec] Handle new requests
+    if (pkt->isSpec()) {
+        assert(pkt->cmd == MemCmd::ReadSpecReq);
+        assert(pkt->isSplit || pkt->isFirst());
+        uint8_t idx = pkt->reqIdx;
+        SBE& sbe = m_specBuf[idx];
+        sbe.isSplit = pkt->isSplit;
+        int blkIdx = pkt->isFirst() ? 0 : 1;
+        SBB& sbb = sbe.blocks[blkIdx];
+        sbb.reqAddress = pkt->getAddr();
+        sbb.reqSize = pkt->getSize();
+        if (pkt->onlyAccessSpecBuff()) {
+            int srcIdx = pkt->srcIdx;
+            SBE& srcEntry = m_specBuf[srcIdx];
+            if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[0].reqAddress)) {
+                sbb.data = srcEntry.blocks[0].data;
+            } else if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[1].reqAddress)) {
+                sbb.data = srcEntry.blocks[1].data;
+            } else {
+                fatal("Requested address %#x is not present in the spec buffer\n", printAddress(sbb.reqAddress));
+            }
+            memcpy(pkt->getPtr<uint8_t>(),
+                   sbb.data.getData(getOffset(sbb.reqAddress), sbb.reqSize),
+                   sbb.reqSize);
+            m_specRequestQueue.push({pkt, curTick()});
+            DPRINTFR(SpecBuffer, "%10s SB Hit (idx=%d, addr=%#x) on (srcIdx=%d)\n", curTick(), idx, printAddress(sbb.reqAddress), srcIdx);
+            if (!specBufferHitEvent.scheduled()) {
+                schedule(specBufferHitEvent, clockEdge(Cycles(1)));
+            }
+            return RequestStatus_Issued;
+        } else {
+            // assert it is not in the buffer
+            primary_type = secondary_type = RubyRequestType_SPEC_LD;
+        }
+    } else if (pkt->isExpose() || pkt->isValidate()) {
+        assert(pkt->cmd == MemCmd::ExposeReq || pkt->cmd == MemCmd::ValidateReq);
+        assert(pkt->isSplit || pkt->isFirst());
+        uint8_t idx = pkt->reqIdx;
+        SBE& sbe = m_specBuf[idx];
+        sbe.isSplit = pkt->isSplit;
+        int blkIdx = pkt->isFirst() ? 0 : 1;
+        SBB& sbb = sbe.blocks[blkIdx];
+        if (sbb.reqAddress != pkt->getAddr()) {
+            fatal("sbb.reqAddress != pkt->getAddr: %#x != %#x\n", printAddress(sbb.reqAddress), printAddress(pkt->getAddr()));
+        }
+        if (sbb.reqSize != pkt->getSize()) {
+            fatal("sbb.reqSize != pkt->getSize(): %d != %d\n", sbb.reqSize, pkt->getSize());
+        }
+        primary_type = secondary_type = RubyRequestType_EXPOSE;
+    } else if (pkt->isLLSC()) {
         //
         // Alpha LL/SC instructions need to be handled carefully by the cache
         // coherence protocol to ensure they follow the proper semantics. In
@@ -614,8 +761,22 @@ Sequencer::makeRequest(PacketPtr pkt)
     }
 
     RequestStatus status = insertRequest(pkt, primary_type);
-    if (status != RequestStatus_Ready)
+    if (status == RequestStatus_Merged) {
+        return RequestStatus_Issued;
+    } else if (status != RequestStatus_Ready) {
         return status;
+    }
+
+    if (pkt->isSpec()) {
+        DPRINTFR(SpecBuffer, "%10s Issuing SPEC_LD (idx=%d-%d, addr=%#x)\n",
+                 curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+    } else if (pkt->isExpose()) {
+        DPRINTFR(SpecBuffer, "%10s Issuing EXPOSE (idx=%d-%d, addr=%#x)\n",
+                 curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+    } else if (pkt->isValidate()) {
+        DPRINTFR(SpecBuffer, "%10s Issuing VALIDATE (idx=%d-%d, addr=%#x)\n",
+                 curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+    }
 
     issueRequest(pkt, secondary_type);
 
@@ -642,7 +803,7 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
     // requests do not
     std::shared_ptr<RubyRequest> msg =
         std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
-                                      pkt->isFlush() ?
+                                      pkt->isFlush() || pkt->isExpose() ?
                                       nullptr : pkt->getPtr<uint8_t>(),
                                       pkt->getSize(), pc, secondary_type,
                                       RubyAccessMode_Supervisor, pkt,
@@ -716,9 +877,9 @@ Sequencer::recordRequestType(SequencerRequestType requestType) {
 
 
 void
-Sequencer::evictionCallback(Addr address)
+Sequencer::evictionCallback(Addr address, bool external)
 {
-    ruby_eviction_callback(address);
+    ruby_eviction_callback(address, external);
 }
 
 void
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index fcfa8ad86..66ff92777 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -31,6 +31,7 @@
 
 #include <iostream>
 #include <unordered_map>
+#include <queue>
 
 #include "mem/protocol/MachineType.hh"
 #include "mem/protocol/RubyRequestType.hh"
@@ -45,6 +46,7 @@ struct SequencerRequest
     PacketPtr pkt;
     RubyRequestType m_type;
     Cycles issue_time;
+    std::vector<PacketPtr> dependentSpecRequests;
 
     SequencerRequest(PacketPtr _pkt, RubyRequestType _m_type,
                      Cycles _issue_time)
@@ -54,6 +56,19 @@ struct SequencerRequest
 
 std::ostream& operator<<(std::ostream& out, const SequencerRequest& obj);
 
+struct SBB // SpecBufferBlock
+{
+  Addr reqAddress;
+  unsigned reqSize;
+  DataBlock data;
+};
+
+struct SBE // SpecBufferEntry
+{
+  bool isSplit;
+  SBB blocks[2];
+};
+
 class Sequencer : public RubyPort
 {
   public:
@@ -83,6 +98,9 @@ class Sequencer : public RubyPort
                       const Cycles forwardRequestTime = Cycles(0),
                       const Cycles firstResponseTime = Cycles(0));
 
+    void specBufferHitCallback();
+    bool updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress);
+
     RequestStatus makeRequest(PacketPtr pkt);
     bool empty() const;
     int outstandingCount() const { return m_outstanding_count; }
@@ -97,7 +115,7 @@ class Sequencer : public RubyPort
     void checkCoherence(Addr address);
 
     void markRemoved();
-    void evictionCallback(Addr address);
+    void evictionCallback(Addr address, bool external);
     void invalidateSC(Addr address);
     int coreId() const { return m_coreId; }
 
@@ -238,6 +256,10 @@ class Sequencer : public RubyPort
     std::vector<Stats::Counter> m_IncompleteTimes;
 
     EventFunctionWrapper deadlockCheckEvent;
+
+    std::vector<SBE> m_specBuf;
+    std::queue<std::pair<PacketPtr, Tick>> m_specRequestQueue;
+    EventFunctionWrapper specBufferHitEvent;
 };
 
 inline std::ostream&