summaryrefslogtreecommitdiff
path: root/src/mem
diff options
context:
space:
mode:
Diffstat (limited to 'src/mem')
-rw-r--r--src/mem/packet.cc19
-rw-r--r--src/mem/packet.hh113
-rw-r--r--src/mem/port.cc1
-rw-r--r--src/mem/protocol/MESI_Two_Level-L1cache.sm264
-rw-r--r--src/mem/protocol/MESI_Two_Level-L2cache.sm279
-rw-r--r--src/mem/protocol/MESI_Two_Level-dir.sm89
-rw-r--r--src/mem/protocol/MESI_Two_Level-msg.sm6
-rw-r--r--src/mem/protocol/RubySlicc_Defines.sm2
-rw-r--r--src/mem/protocol/RubySlicc_Exports.sm12
-rw-r--r--src/mem/protocol/RubySlicc_Types.sm3
-rw-r--r--src/mem/request.hh4
-rw-r--r--src/mem/ruby/SConscript3
-rw-r--r--src/mem/ruby/network/Network.cc6
-rw-r--r--src/mem/ruby/slicc_interface/AbstractController.cc82
-rw-r--r--src/mem/ruby/slicc_interface/AbstractController.hh15
-rw-r--r--src/mem/ruby/slicc_interface/RubyRequest.hh16
-rw-r--r--src/mem/ruby/structures/CacheMemory.cc4
-rw-r--r--src/mem/ruby/system/RubyPort.cc11
-rw-r--r--src/mem/ruby/system/RubyPort.hh2
-rw-r--r--src/mem/ruby/system/Sequencer.cc175
-rw-r--r--src/mem/ruby/system/Sequencer.hh24
21 files changed, 1078 insertions, 52 deletions
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index ffda3d5af..f664878e2 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -219,7 +219,24 @@ MemCmd::commandInfo[] =
InvalidateResp, "InvalidateReq" },
/* Invalidation Response */
{ SET2(IsInvalidate, IsResponse),
- InvalidCmd, "InvalidateResp" }
+ InvalidCmd, "InvalidateResp" },
+ /* [InvisiSpec] New command info */
+ { SET4(IsRead, IsRequest, NeedsResponse, IsSpec),
+ ReadSpecResp, "ReadSpecReq" },
+ { SET4(IsRead, IsResponse, HasData, IsSpec),
+ InvalidCmd, "ReadSpecResp" },
+ { SET4(IsRead, IsRequest, NeedsResponse, IsValidate),
+ ValidateResp, "ValidateReq" },
+ { SET4(IsRead, IsResponse, HasData, IsValidate),
+ InvalidCmd, "ValidateResp" },
+ { SET4(IsRead, IsRequest, NeedsResponse, IsExpose),
+ ExposeResp, "ExposeReq" },
+ { SET3(IsRead, IsResponse, IsExpose),
+ InvalidCmd, "ExposeResp" },
+ { SET3(IsRequest, NeedsResponse, IsSpecFlush),
+ SpecFlushResp, "SpecFlushReq" },
+ { SET2(IsResponse, IsSpecFlush),
+ InvalidCmd, "SpecFlushResp" }
};
bool
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 66625b382..026cdabfc 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -133,6 +133,15 @@ class MemCmd
FlushReq, //request for a cache flush
InvalidateReq, // request for address to be invalidated
InvalidateResp,
+ /* [InvisiSpec] New commands */
+ ReadSpecReq,
+ ReadSpecResp,
+ ValidateReq,
+ ValidateResp,
+ ExposeReq,
+ ExposeResp,
+ SpecFlushReq,
+ SpecFlushResp,
NUM_MEM_CMDS
};
@@ -160,6 +169,11 @@ class MemCmd
IsPrint, //!< Print state matching address (for debugging)
IsFlush, //!< Flush the address from caches
FromCache, //!< Request originated from a caching agent
+ /* [InvisiSpec] New attributes */
+ IsSpec, //!< Speculatively issued
+ IsValidate,
+ IsExpose,
+ IsSpecFlush,
NUM_COMMAND_ATTRIBUTES
};
@@ -226,6 +240,12 @@ class MemCmd
bool isPrint() const { return testCmdAttrib(IsPrint); }
bool isFlush() const { return testCmdAttrib(IsFlush); }
+ /// [InvisiSpec] InvisiSpec attributes
+ bool isSpec() const { return testCmdAttrib(IsSpec); }
+ bool isValidate() const { return testCmdAttrib(IsValidate); }
+ bool isExpose() const { return testCmdAttrib(IsExpose); }
+ bool isSpecFlush() const { return testCmdAttrib(IsSpecFlush); }
+
Command
responseCommand() const
{
@@ -306,7 +326,17 @@ class Packet : public Printable
// Signal block present to squash prefetch and cache evict packets
// through express snoop flag
- BLOCK_CACHED = 0x00010000
+ BLOCK_CACHED = 0x00010000,
+
+ // [InvisiSpec] ReadSpecReq was L1 hit.
+ L1_HIT = 0x00020000,
+
+ // [InvisiSpec] this packet is the first one of split packets
+ // maximum split is 2
+ FIRST_IN_SPLIT = 0x00040000,
+ ONLY_ACCESS_SPEC_BUFF = 0x00080000,
+
+ EXTERNAL_EVICTION = 0x00100000,
};
Flags flags;
@@ -376,6 +406,12 @@ class Packet : public Printable
*/
uint32_t payloadDelay;
+ //[InvisiSpec] indicate the source buffer entry
+ //if the load should get data from specbuffer
+ int srcIdx;
+ int reqIdx;
+ bool isSplit;
+
/**
* A virtual base opaque structure used to hold state associated
* with the packet (e.g., an MSHR), specific to a MemObject that
@@ -549,6 +585,45 @@ class Packet : public Printable
bool isPrint() const { return cmd.isPrint(); }
bool isFlush() const { return cmd.isFlush(); }
+ /// [InvisiSpec] InvisiSpec flags
+ bool isSpec() const { return cmd.isSpec(); }
+ bool isValidate() const { return cmd.isValidate(); }
+ bool isExpose() const { return cmd.isExpose(); }
+ bool isSpecFlush() const { return cmd.isSpecFlush(); }
+ bool isL1Hit() const { return flags.isSet(L1_HIT); }
+ bool isExternalEviction() const { return flags.isSet(EXTERNAL_EVICTION); }
+ // [InvisiSpec] Check whether it is the first in split packets
+ bool isFirst() const { return flags.isSet(FIRST_IN_SPLIT); }
+ bool onlyAccessSpecBuff() const
+ { return flags.isSet(ONLY_ACCESS_SPEC_BUFF); }
+
+ void setL1Hit()
+ {
+ assert(isSpec());
+ assert(!flags.isSet(L1_HIT));
+ flags.set(L1_HIT);
+ }
+
+ void setExternalEviction()
+ {
+ assert(!flags.isSet(EXTERNAL_EVICTION));
+ flags.set(EXTERNAL_EVICTION);
+ }
+
+ void setOnlyAccessSpecBuff()
+ {
+ assert(isSpec());
+ assert(!flags.isSet(ONLY_ACCESS_SPEC_BUFF));
+ flags.set(ONLY_ACCESS_SPEC_BUFF);
+ }
+
+ void setFirst()
+ {
+ //assert(isSpec());
+ assert(!flags.isSet(FIRST_IN_SPLIT));
+ flags.set(FIRST_IN_SPLIT);
+ }
+
//@{
/// Snoop flags
/**
@@ -748,7 +823,8 @@ class Packet : public Printable
Packet(const RequestPtr _req, MemCmd _cmd)
: cmd(_cmd), id((PacketId)_req), req(_req), data(nullptr), addr(0),
_isSecure(false), size(0), headerDelay(0), snoopDelay(0),
- payloadDelay(0), senderState(NULL)
+ payloadDelay(0), srcIdx(-1), reqIdx(-1), isSplit(false),
+ senderState(NULL)
{
if (req->hasPaddr()) {
addr = req->getPaddr();
@@ -769,7 +845,8 @@ class Packet : public Printable
Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize, PacketId _id = 0)
: cmd(_cmd), id(_id ? _id : (PacketId)_req), req(_req), data(nullptr),
addr(0), _isSecure(false), headerDelay(0), snoopDelay(0),
- payloadDelay(0), senderState(NULL)
+ payloadDelay(0), srcIdx(-1), reqIdx(-1), isSplit(false),
+ senderState(NULL)
{
if (req->hasPaddr()) {
addr = req->getPaddr() & ~(_blkSize - 1);
@@ -795,6 +872,9 @@ class Packet : public Printable
headerDelay(pkt->headerDelay),
snoopDelay(0),
payloadDelay(pkt->payloadDelay),
+ srcIdx(pkt->srcIdx),
+ reqIdx(pkt->reqIdx),
+ isSplit(pkt->isSplit),
senderState(pkt->senderState)
{
if (!clear_flags)
@@ -869,6 +949,33 @@ class Packet : public Printable
}
/**
+ * [InvisiSpec] Methods that return Packets for InvisiSpec.
+ */
+ static PacketPtr
+ createReadSpec(const RequestPtr req)
+ {
+ return new Packet(req, MemCmd::ReadSpecReq);
+ }
+
+ static PacketPtr
+ createValidate(const RequestPtr req)
+ {
+ return new Packet(req, MemCmd::ValidateReq);
+ }
+
+ static PacketPtr
+ createExpose(const RequestPtr req)
+ {
+ return new Packet(req, MemCmd::ExposeReq);
+ }
+
+ static PacketPtr
+ createSpecFlush(const RequestPtr req)
+ {
+ return new Packet(req, MemCmd::SpecFlushReq);
+ }
+
+ /**
* clean up packet variables
*/
~Packet()
diff --git a/src/mem/port.cc b/src/mem/port.cc
index 756eb8bdd..924a2188c 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -176,6 +176,7 @@ MasterPort::sendFunctional(PacketPtr pkt)
return _slavePort->recvFunctional(pkt);
}
+// [InvisiSpec] Request from CPU to Ruby
bool
MasterPort::sendTimingReq(PacketPtr pkt)
{
diff --git a/src/mem/protocol/MESI_Two_Level-L1cache.sm b/src/mem/protocol/MESI_Two_Level-L1cache.sm
index 87684ce10..846af7da5 100644
--- a/src/mem/protocol/MESI_Two_Level-L1cache.sm
+++ b/src/mem/protocol/MESI_Two_Level-L1cache.sm
@@ -76,10 +76,12 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
S, AccessPermission:Read_Only, desc="a L1 cache entry Shared";
E, AccessPermission:Read_Only, desc="a L1 cache entry Exclusive";
M, AccessPermission:Read_Write, desc="a L1 cache entry Modified", format="!b";
+ X, AccessPermission:Read_Only, desc="a L1 cache entry Speculatively observed";
// Transient States
IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet";
IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet";
+ IX, AccessPermission:Busy, desc="L1 idle, issued GETSPEC, have not seen response yet";
SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet";
IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit";
@@ -99,6 +101,8 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
Load, desc="Load request from the home processor";
Ifetch, desc="I-fetch request from the home processor";
Store, desc="Store request from the home processor";
+ SpecLoad, desc="SpecLoad request from the home processor";
+ Expose, desc="Expose request from the home processor";
Inv, desc="Invalidate request from L2 bank";
@@ -110,6 +114,8 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
Fwd_GETX, desc="GETX from other processor";
Fwd_GETS, desc="GETS from other processor";
Fwd_GET_INSTR, desc="GET_INSTR from other processor";
+ Fwd_GETSPEC, desc="GETSPEC from other processor";
+ Fwd_EXPOSE, desc="EXPOSE from other processor";
Data, desc="Data for processor";
Data_Exclusive, desc="Data for processor";
@@ -188,6 +194,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ // [InvisiSpec] The same cache line cannot be present in L1D and L1I at the same time.
assert((L1Dcache.isTagPresent(addr) && L1Icache.isTagPresent(addr)) == false);
if(is_valid(tbe)) {
@@ -265,6 +272,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
return Event:Ifetch;
} else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
return Event:Store;
+ } else if (type == RubyRequestType:SPEC_LD) {
+ return Event:SpecLoad;
+ } else if (type == RubyRequestType:EXPOSE) {
+ return Event:Expose;
} else {
error("Invalid RubyRequestType");
}
@@ -387,6 +398,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
trigger(Event:Data_Exclusive, in_msg.addr, cache_entry, tbe);
} else if(in_msg.Type == CoherenceResponseType:DATA) {
if ((getState(tbe, cache_entry, in_msg.addr) == State:IS ||
+ getState(tbe, cache_entry, in_msg.addr) == State:IX ||
getState(tbe, cache_entry, in_msg.addr) == State:IS_I ||
getState(tbe, cache_entry, in_msg.addr) == State:PF_IS ||
getState(tbe, cache_entry, in_msg.addr) == State:PF_IS_I) &&
@@ -433,6 +445,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
trigger(Event:Fwd_GETS, in_msg.addr, cache_entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:GET_INSTR) {
trigger(Event:Fwd_GET_INSTR, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:GETSPEC) {
+ trigger(Event:Fwd_GETSPEC, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:EXPOSE) {
+ trigger(Event:Fwd_EXPOSE, in_msg.addr, cache_entry, tbe);
} else {
error("Invalid forwarded request type");
}
@@ -534,6 +550,43 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
+ }
+ }
+ }
+
+ action(as_issueGETSPEC, "as", desc="Issue GETSPEC") {
+ peek(mandatoryQueue_in, RubyRequest) {
+ enqueue(requestL1Network_out, RequestMsg, l1_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:GETSPEC;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits, intToID(0)));
+ DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
+ address, out_msg.Destination);
+ out_msg.MessageSize := MessageSizeType:SPECLD_Control;
+ out_msg.Prefetch := in_msg.Prefetch;
+ out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
+ }
+ }
+ }
+
+ action(ex_issueEXPOSE, "ex", desc="Issue EXPOSE") {
+ peek(mandatoryQueue_in, RubyRequest) {
+ enqueue(requestL1Network_out, RequestMsg, l1_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:EXPOSE;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits, intToID(0)));
+ DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
+ address, out_msg.Destination);
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Control;
+ out_msg.Prefetch := in_msg.Prefetch;
+ out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
}
}
}
@@ -568,6 +621,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
}
}
}
@@ -606,6 +660,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
}
}
}
@@ -643,6 +698,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
}
}
}
@@ -662,6 +718,36 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
}
+ action(dex_sendDataToExposeRequestor, "dex", desc="send data to requestor") {
+ peek(requestL1Network_in, RequestMsg) {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+ }
+
+ action(ds_sendDataToSpecRequestor, "ds", desc="send data to requestor") {
+ peek(requestL1Network_in, RequestMsg) {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+ }
+ }
+ }
+
action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") {
enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
assert(is_valid(cache_entry));
@@ -676,6 +762,20 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
}
+ action(d2ex_sendExposeDataToL2, "d2ex", desc="send data to the L2 cache because of M downgrade") {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits, intToID(0)));
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+
action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") {
peek(requestL1Network_in, RequestMsg) {
enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
@@ -691,6 +791,36 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
}
+ action(dtex_sendDataToExposeRequestor_fromTBE, "dtex", desc="send data to requestor") {
+ peek(requestL1Network_in, RequestMsg) {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+ }
+
+ action(dts_sendDataToSpecRequestor_fromTBE, "dts", desc="send data to requestor") {
+ peek(requestL1Network_in, RequestMsg) {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+ }
+ }
+ }
+
action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") {
enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
assert(is_valid(tbe));
@@ -705,6 +835,20 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
}
+ action(d2tex_sendExposeDataToL2_fromTBE, "d2tex", desc="send data to the L2 cache") {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits, intToID(0)));
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+
action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") {
peek(requestL1Network_in, RequestMsg) {
enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
@@ -761,7 +905,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
if (send_evictions) {
DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address);
- sequencer.evictionCallback(address);
+ sequencer.evictionCallback(address, false);
+ }
+ }
+
+ action(forward_external_eviction_to_cpu, "\ccc", desc="sends external eviction information to the processor") {
+ if (send_evictions) {
+ DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address);
+ sequencer.evictionCallback(address, true);
}
}
@@ -822,6 +973,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
sequencer.readCallback(address, cache_entry.DataBlk);
}
+ action(h_spec_load_hit, "hs",
+ desc="Notify sequencer the spec load completed.")
+ {
+ assert(is_valid(cache_entry));
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ sequencer.readCallback(address, cache_entry.DataBlk);
+ }
+
action(h_ifetch_hit, "hi", desc="Notify sequencer the instruction fetch completed.")
{
assert(is_valid(cache_entry));
@@ -839,6 +998,15 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
sequencer.readCallback(address, cache_entry.DataBlk, true);
}
+ action(hsx_spec_load_hit, "hsx", desc="Notify sequencer the external load completed.")
+ {
+ peek(responseL1Network_in, ResponseMsg) {
+ // [InvisiSpec] Hack for in_msg.DataBlk returning const DataBlk
+ tbe.DataBlk := in_msg.DataBlk;
+ sequencer.readCallback(address, tbe.DataBlk, true);
+ }
+ }
+
action(hh_store_hit, "\h", desc="Notify sequencer that store completed.")
{
assert(is_valid(cache_entry));
@@ -868,6 +1036,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
tbe.DataBlk := cache_entry.DataBlk;
}
+ action(iw_allocateTBEWithoutCacheEntry, "iw", desc="Allocate TBE without a cache entry") {
+ check_allocate(TBEs);
+ assert(!is_valid(cache_entry) || cache_entry.CacheState == State:I);
+ TBEs.allocate(address);
+ set_tbe(TBEs[address]);
+ tbe.isPrefetch := false;
+ }
+
action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
mandatoryQueue_in.dequeue(clockEdge());
}
@@ -989,13 +1165,19 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
cache_entry.isPrefetch := true;
}
+ action(x_expose_done, "xd",
+ desc="Notify sequencer the expose completed.")
+ {
+ sequencer.readCallback(address, cache_entry.DataBlk);
+ }
+
//*****************************************************
// TRANSITIONS
//*****************************************************
// Transitions for Load/Store/Replacement/WriteBack from transient states
- transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store, L1_Replacement}) {
+ transition({IS, IM, IX, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Expose, SpecLoad, Ifetch, Store, L1_Replacement}) {
z_stallAndWaitMandatoryQueue;
}
@@ -1003,7 +1185,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
z_stallAndWaitMandatoryQueue;
}
- transition({PF_IM, PF_SM}, {Load, Ifetch, L1_Replacement}) {
+ transition({PF_IM, PF_SM}, {Load, Expose, SpecLoad, Ifetch, L1_Replacement}) {
z_stallAndWaitMandatoryQueue;
}
@@ -1016,7 +1198,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
ff_deallocateL1CacheBlock;
}
- transition({S,E,M,IS,IM,SM,IS_I,PF_IS_I,M_I,SINK_WB_ACK,PF_IS,PF_IM},
+ transition({S,E,M,IS,IM,IX,SM,IS_I,PF_IS_I,M_I,SINK_WB_ACK,PF_IS,PF_IM},
{PF_Load, PF_Store, PF_Ifetch}) {
pq_popPrefetchQueue;
}
@@ -1030,6 +1212,21 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
k_popMandatoryQueue;
}
+ transition({NP,I}, Expose, IS) {
+ oo_allocateL1DCacheBlock;
+ i_allocateTBE;
+ ex_issueEXPOSE;
+ uu_profileDataMiss;
+ //po_observeMiss;
+ k_popMandatoryQueue;
+ }
+
+ transition({NP,I}, SpecLoad, IX) {
+ iw_allocateTBEWithoutCacheEntry;
+ as_issueGETSPEC;
+ k_popMandatoryQueue;
+ }
+
transition({NP,I}, PF_Load, PF_IS) {
oo_allocateL1DCacheBlock;
i_allocateTBE;
@@ -1037,13 +1234,13 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
pq_popPrefetchQueue;
}
- transition(PF_IS, Load, IS) {
+ transition(PF_IS, {Load, Expose}, IS) {
uu_profileDataMiss;
ppm_observePfMiss;
k_popMandatoryQueue;
}
- transition(PF_IS_I, Load, IS_I) {
+ transition(PF_IS_I, {Load, Expose}, IS_I) {
uu_profileDataMiss;
ppm_observePfMiss;
k_popMandatoryQueue;
@@ -1055,6 +1252,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
k_popMandatoryQueue;
}
+ transition({PF_IS, PF_IS_I}, SpecLoad) {
+ k_popMandatoryQueue;
+ }
+
transition({NP,I}, Ifetch, IS) {
pp_allocateL1ICacheBlock;
i_allocateTBE;
@@ -1107,19 +1308,24 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
k_popMandatoryQueue;
}
- transition({NP, I}, Inv) {
+ transition({NP, I, IX}, Inv) {
fi_sendInvAck;
l_popRequestQueue;
}
// Transitions from Shared
- transition({S,E,M}, Load) {
+ transition({S,E,M}, {Load, Expose}) {
h_load_hit;
uu_profileDataHit;
po_observeHit;
k_popMandatoryQueue;
}
+ transition({S,E,M}, SpecLoad) {
+ h_spec_load_hit;
+ k_popMandatoryQueue;
+ }
+
transition({S,E,M}, Ifetch) {
h_ifetch_hit;
uu_profileInstHit;
@@ -1140,7 +1346,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
transition(S, Inv, I) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
fi_sendInvAck;
l_popRequestQueue;
}
@@ -1164,13 +1370,13 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
transition(E, Inv, I) {
// don't send data
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
fi_sendInvAck;
l_popRequestQueue;
}
transition(E, Fwd_GETX, I) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
d_sendDataToRequestor;
l_popRequestQueue;
}
@@ -1181,6 +1387,17 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
l_popRequestQueue;
}
+ transition({E, M}, Fwd_GETSPEC) {
+ ds_sendDataToSpecRequestor;
+ l_popRequestQueue;
+ }
+
+ transition({E, M}, Fwd_EXPOSE, S) {
+ dex_sendDataToExposeRequestor;
+ d2ex_sendExposeDataToL2;
+ l_popRequestQueue;
+ }
+
// Transitions from Modified
transition(M, {L1_Replacement, PF_L1_Replacement}, M_I) {
@@ -1197,7 +1414,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
transition(M, Inv, I) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
f_sendDataToL2;
l_popRequestQueue;
}
@@ -1208,7 +1425,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
transition(M, Fwd_GETX, I) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
d_sendDataToRequestor;
l_popRequestQueue;
}
@@ -1230,6 +1447,17 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
l_popRequestQueue;
}
+ transition(M_I, Fwd_EXPOSE, SINK_WB_ACK) {
+ dtex_sendDataToExposeRequestor_fromTBE;
+ d2tex_sendExposeDataToL2_fromTBE;
+ l_popRequestQueue;
+ }
+
+ transition(M_I, Fwd_GETSPEC) {
+ dts_sendDataToSpecRequestor_fromTBE;
+ l_popRequestQueue;
+ }
+
// Transitions from IS
transition({IS, IS_I}, Inv, IS_I) {
fi_sendInvAck;
@@ -1341,6 +1569,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
kd_wakeUpDependents;
}
+ // [InvisiSpec] Data and Data_Exclusive are not possible at IX
+ transition(IX, {Data_all_Acks, DataS_fromL1}, I) {
+ hsx_spec_load_hit;
+ s_deallocateTBE;
+ o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
+ }
+
// Transitions from IM
transition(IM, Inv, IM) {
fi_sendInvAck;
@@ -1384,7 +1620,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
// transitions from SM
transition(SM, Inv, IM) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
fi_sendInvAck;
dg_invalidate_sc;
l_popRequestQueue;
diff --git a/src/mem/protocol/MESI_Two_Level-L2cache.sm b/src/mem/protocol/MESI_Two_Level-L2cache.sm
index 5a8cfae6d..ea884133e 100644
--- a/src/mem/protocol/MESI_Two_Level-L2cache.sm
+++ b/src/mem/protocol/MESI_Two_Level-L2cache.sm
@@ -72,6 +72,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
ISS, AccessPermission:Busy, desc="L2 idle, got single L1_GETS, issued memory fetch, have not seen response yet";
IS, AccessPermission:Busy, desc="L2 idle, got L1_GET_INSTR or multiple L1_GETS, issued memory fetch, have not seen response yet";
IM, AccessPermission:Busy, desc="L2 idle, got L1_GETX, issued memory fetch, have not seen response(s) yet";
+ II, AccessPermission:Busy, desc="L2 idle, got single L1_GETSPEC, issued memory fetch, have not seen response yet";
+ IEE, AccessPermission:Busy, desc="L2 idle, got single L1_EXPOSE, issued memory fetch, have not seen response yet";
// Blocking states
SS_MB, AccessPermission:Busy, desc="Blocked for L1_GETX from SS";
@@ -96,6 +98,9 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
L1_PUTX, desc="L1 replacing data";
L1_PUTX_old, desc="L1 replacing data, but no longer sharer";
+ L1_GETSPEC, desc="L1 GETSPEC request for a block mapped to us";
+ L1_EXPOSE, desc="L1 EXPOSE request for a block mapped to us";
+
// events initiated by this L2
L2_Replacement, desc="L2 Replacement", format="!r";
L2_Replacement_clean, desc="L2 Replacement, but data is clean", format="!r";
@@ -135,6 +140,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
bool Dirty, default="false", desc="Data is Dirty";
NetDest L1_GetS_IDs, desc="Set of the internal processors that want the block in shared state";
+ NetDest L1_GetSPEC_IDs, desc="Set of the internal processors that want the block speculatively";
+ NetDest L1_Expose_IDs, desc="Set of the internal processors that want the block to be exposed";
MachineID L1_GetX_ID, desc="ID of the L1 cache to forward the block to once we get a response";
int pendingAcks, desc="number of pending acks for invalidates during writeback";
}
@@ -267,6 +274,10 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
} else {
return Event:L1_PUTX_old;
}
+ } else if (type == CoherenceRequestType:GETSPEC) {
+ return Event:L1_GETSPEC;
+ } else if (type == CoherenceRequestType:EXPOSE) {
+ return Event:L1_EXPOSE;
} else {
DPRINTF(RubySlicc, "address: %#x, Request Type: %s\n", addr, type);
error("Invalid L1 forwarded request type");
@@ -399,10 +410,12 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
peek(L1RequestL2Network_in, RequestMsg) {
enqueue(DirRequestL2Network_out, RequestMsg, l2_request_latency) {
out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:GETS;
+ out_msg.Type := in_msg.Type;
out_msg.Requestor := machineID;
out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.idx := in_msg.idx;
+ out_msg.origin := in_msg.Requestor;
}
}
}
@@ -420,6 +433,32 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(bs_forwardSpecRequestToExclusive, "bs", desc="Forward request to the exclusive L1") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(L1RequestL2Network_out, RequestMsg, to_l1_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.add(cache_entry.Exclusive);
+ out_msg.MessageSize := MessageSizeType:SPECLD_Request_Control;
+ }
+ }
+ }
+
+ action(bex_forwardExposeRequestToExclusive, "bex", desc="Forward request to the exclusive L1") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(L1RequestL2Network_out, RequestMsg, to_l1_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.add(cache_entry.Exclusive);
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Request_Control;
+ }
+ }
+ }
+
action(c_exclusiveReplacement, "c", desc="Send data to memory") {
enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
assert(is_valid(cache_entry));
@@ -494,6 +533,25 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(ddex_sendExclusiveDataToExposeRequestor, "ddex", desc="Send data from cache to reqeustor") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+
+ out_msg.AckCount := 0 - cache_entry.Sharers.count();
+ if (cache_entry.Sharers.isElement(in_msg.Requestor)) {
+ out_msg.AckCount := out_msg.AckCount + 1;
+ }
+ }
+ }
+ }
+
action(ds_sendSharedDataToRequestor, "ds", desc="Send data from cache to reqeustor") {
peek(L1RequestL2Network_in, RequestMsg) {
enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
@@ -509,9 +567,39 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(dss_sendSharedDataToSpecRequestor, "dss", desc="Send data from cache to reqeustor") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+ out_msg.AckCount := 0;
+ }
+ }
+ }
+
+ action(dsex_sendSharedDataToExposeRequestor, "dsex", desc="Send data from cache to reqeustor") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ out_msg.AckCount := 0;
+ }
+ }
+ }
+
action(e_sendDataToGetSRequestors, "e", desc="Send data from cache to all GetS IDs") {
assert(is_valid(tbe));
- assert(tbe.L1_GetS_IDs.count() > 0);
+ assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() > 0);
enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
assert(is_valid(cache_entry));
out_msg.addr := address;
@@ -523,9 +611,40 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(es_sendDataToGetSpecRequestors, "es", desc="Send data from cache to all GetSpec IDs") {
+ assert(is_valid(tbe));
+ assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() > 0);
+ peek(responseL2Network_in, ResponseMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination := tbe.L1_GetSPEC_IDs; // internal nodes
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+ }
+ }
+ }
+
+ action(eex_sendDataToExposeRequestors, "eex", desc="Send data from cache to all GetSpec IDs") {
+ assert(is_valid(tbe));
+ assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() > 0);
+ peek(responseL2Network_in, ResponseMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination := tbe.L1_Expose_IDs; // internal nodes
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+ }
+
action(ex_sendExclusiveDataToGetSRequestors, "ex", desc="Send data from cache to all GetS IDs") {
assert(is_valid(tbe));
assert(tbe.L1_GetS_IDs.count() == 1);
+ assert(tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() == 0);
enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
assert(is_valid(cache_entry));
out_msg.addr := address;
@@ -537,6 +656,21 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(exex_sendExclusiveDataToExposeRequestors, "exex", desc="Send data from cache to all GetS IDs") {
+ assert(is_valid(tbe));
+ assert(tbe.L1_Expose_IDs.count() == 1);
+ assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() == 0);
+ enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
+ out_msg.Sender := machineID;
+ out_msg.Destination := tbe.L1_Expose_IDs; // internal nodes
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+
action(ee_sendDataToGetXRequestor, "ee", desc="Send data from cache to GetX ID") {
enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
assert(is_valid(tbe));
@@ -598,11 +732,23 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
TBEs.allocate(address);
set_tbe(TBEs[address]);
tbe.L1_GetS_IDs.clear();
+ tbe.L1_GetSPEC_IDs.clear();
+ tbe.L1_Expose_IDs.clear();
tbe.DataBlk := cache_entry.DataBlk;
tbe.Dirty := cache_entry.Dirty;
tbe.pendingAcks := cache_entry.Sharers.count();
}
+ action(iw_allocateTBEWithoutCacheEntry, "iw", desc="Allocate TBE for request without a cache entry") {
+ check_allocate(TBEs);
+ assert(!is_valid(cache_entry));
+ TBEs.allocate(address);
+ set_tbe(TBEs[address]);
+ tbe.L1_GetS_IDs.clear();
+ tbe.L1_GetSPEC_IDs.clear();
+ tbe.L1_Expose_IDs.clear();
+ }
+
action(s_deallocateTBE, "s", desc="Deallocate external TBE") {
TBEs.deallocate(address);
unset_tbe();
@@ -668,6 +814,20 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(sss_recordGetSPECL1ID, "\sss", desc="Record L1 GetSpec for load response") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ assert(is_valid(tbe));
+ tbe.L1_GetSPEC_IDs.add(in_msg.Requestor);
+ }
+ }
+
+ action(ssss_recordExposeL1ID, "\ssss", desc="Record L1 Expose for load response") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ assert(is_valid(tbe));
+ tbe.L1_Expose_IDs.add(in_msg.Requestor);
+ }
+ }
+
action(xx_recordGetXL1ID, "\x", desc="Record L1 GetX for store response") {
peek(L1RequestL2Network_in, RequestMsg) {
assert(is_valid(tbe));
@@ -793,21 +953,22 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
// BASE STATE - I
// Transitions from I (Idle)
- transition({NP, IS, ISS, IM, SS, M, M_I, I_I, S_I, MT_IB, MT_SB}, L1_PUTX) {
+ transition({NP, IS, ISS, IEE, IM, II, SS, M, M_I, I_I, S_I, MT_IB, MT_SB}, L1_PUTX) {
t_sendWBAck;
jj_popL1RequestQueue;
}
- transition({NP, SS, M, MT, M_I, I_I, S_I, IS, ISS, IM, MT_IB, MT_SB}, L1_PUTX_old) {
+ transition({NP, SS, M, MT, M_I, I_I, S_I, IS, ISS, IEE, IM, II, MT_IB, MT_SB}, L1_PUTX_old) {
t_sendWBAck;
jj_popL1RequestQueue;
}
- transition({IM, IS, ISS, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L2_Replacement, L2_Replacement_clean}) {
+ transition({IM, IS, ISS, IEE, II, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L2_Replacement, L2_Replacement_clean}) {
zz_stallAndWaitL1RequestQueue;
}
- transition({IM, IS, ISS, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) {
+ // [InvisiSpec] TODO: How to handle Mem_Inv at II? Stall or ignore?
+ transition({IM, IS, ISS, IEE, II, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) {
zn_recycleResponseNetwork;
}
@@ -816,7 +977,7 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
- transition({SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_GET_INSTR, L1_GETX, L1_UPGRADE}) {
+ transition({SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_EXPOSE, L1_GET_INSTR, L1_GETX, L1_UPGRADE, L1_GETSPEC}) {
zz_stallAndWaitL1RequestQueue;
}
@@ -832,6 +993,17 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ transition(NP, L1_EXPOSE, IEE) {
+ qq_allocateL2CacheBlock;
+ ll_clearSharers;
+ nn_addSharer;
+ i_allocateTBE;
+ ssss_recordExposeL1ID;
+ a_issueFetchToMemory;
+ uu_profileMiss;
+ jj_popL1RequestQueue;
+ }
+
transition(NP, L1_GET_INSTR, IS) {
qq_allocateL2CacheBlock;
ll_clearSharers;
@@ -854,12 +1026,28 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ transition(NP, L1_GETSPEC, II) {
+ iw_allocateTBEWithoutCacheEntry;
+ sss_recordGetSPECL1ID;
+ a_issueFetchToMemory;
+ jj_popL1RequestQueue;
+ }
+
// transitions from IS/IM
transition(ISS, Mem_Data, MT_MB) {
m_writeDataToCache;
ex_sendExclusiveDataToGetSRequestors;
+ es_sendDataToGetSpecRequestors;
+ s_deallocateTBE;
+ o_popIncomingResponseQueue;
+ }
+
+ transition(IEE, Mem_Data, MT_MB) {
+ m_writeDataToCache;
+ exex_sendExclusiveDataToExposeRequestors;
+ es_sendDataToGetSpecRequestors;
s_deallocateTBE;
o_popIncomingResponseQueue;
}
@@ -867,6 +1055,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
transition(IS, Mem_Data, SS) {
m_writeDataToCache;
e_sendDataToGetSRequestors;
+ es_sendDataToGetSpecRequestors;
+ eex_sendDataToExposeRequestors;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
@@ -879,18 +1069,48 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
o_popIncomingResponseQueue;
}
- transition({IS, ISS}, {L1_GETS, L1_GET_INSTR}, IS) {
+ transition(II, Mem_Data, NP) {
+ es_sendDataToGetSpecRequestors;
+ s_deallocateTBE;
+ o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
+ }
+
+ transition({IS, ISS, IEE}, {L1_GETS, L1_GET_INSTR}, IS) {
nn_addSharer;
ss_recordGetSL1ID;
uu_profileMiss;
jj_popL1RequestQueue;
}
- transition({IS, ISS}, L1_GETX) {
+ transition({IS, ISS, IEE}, L1_EXPOSE, IS) {
+ nn_addSharer;
+ ssss_recordExposeL1ID;
+ uu_profileMiss;
+ jj_popL1RequestQueue;
+ }
+
+ transition({IS, ISS, IEE}, L1_GETSPEC, IS) {
+ sss_recordGetSPECL1ID;
+ jj_popL1RequestQueue;
+ }
+
+ transition(II, L1_GETSPEC) {
+ sss_recordGetSPECL1ID;
+ jj_popL1RequestQueue;
+ }
+
+ // [InvisiSpec] L1_GET_INSTR should not be received at II
+ transition(II, {L1_GETS, L1_EXPOSE}) {
zz_stallAndWaitL1RequestQueue;
}
- transition(IM, {L1_GETX, L1_GETS, L1_GET_INSTR}) {
+ // [InvisiSpec] TODO: Maybe we can optimize this?
+ transition({IS, ISS, IEE, II}, L1_GETX) {
+ zz_stallAndWaitL1RequestQueue;
+ }
+
+ transition(IM, {L1_GETX, L1_GETS, L1_EXPOSE, L1_GET_INSTR, L1_GETSPEC}) {
zz_stallAndWaitL1RequestQueue;
}
@@ -903,6 +1123,19 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ transition(SS, L1_EXPOSE) {
+ dsex_sendSharedDataToExposeRequestor;
+ nn_addSharer;
+ set_setMRU;
+ uu_profileHit;
+ jj_popL1RequestQueue;
+ }
+
+ transition({SS, M}, L1_GETSPEC) {
+ dss_sendSharedDataToSpecRequestor;
+ jj_popL1RequestQueue;
+ }
+
transition(SS, L1_GETX, SS_MB) {
d_sendDataToRequestor;
@@ -956,6 +1189,14 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ // [InvisiSpec] TODO
+ transition(M, L1_EXPOSE, MT_MB) {
+ ddex_sendExclusiveDataToExposeRequestor;
+ set_setMRU;
+ uu_profileHit;
+ jj_popL1RequestQueue;
+ }
+
transition(M, {L2_Replacement, MEM_Inv}, M_I) {
i_allocateTBE;
c_exclusiveReplacement;
@@ -986,6 +1227,20 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ // [InvisiSpec] TODO: Ack packets are currently not recorded as EXPOSE traffic.
+ transition(MT, L1_EXPOSE, MT_IIB) {
+ bex_forwardExposeRequestToExclusive;
+ uu_profileMiss;
+ set_setMRU;
+ jj_popL1RequestQueue;
+ }
+
+ // [InvisiSpec] Do we need to block?
+ transition(MT, L1_GETSPEC) {
+ bs_forwardSpecRequestToExclusive;
+ jj_popL1RequestQueue;
+ }
+
transition(MT, {L2_Replacement, MEM_Inv}, MT_I) {
i_allocateTBE;
f_sendInvToSharers;
@@ -1039,7 +1294,7 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
// writeback states
- transition({I_I, S_I, MT_I, MCT_I, M_I}, {L1_GETX, L1_UPGRADE, L1_GETS, L1_GET_INSTR}) {
+ transition({I_I, S_I, MT_I, MCT_I, M_I}, {L1_GETX, L1_UPGRADE, L1_GETS, L1_EXPOSE, L1_GET_INSTR, L1_GETSPEC}) {
zz_stallAndWaitL1RequestQueue;
}
diff --git a/src/mem/protocol/MESI_Two_Level-dir.sm b/src/mem/protocol/MESI_Two_Level-dir.sm
index 991de5a2c..9934f57a8 100644
--- a/src/mem/protocol/MESI_Two_Level-dir.sm
+++ b/src/mem/protocol/MESI_Two_Level-dir.sm
@@ -49,6 +49,8 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
M, AccessPermission:Maybe_Stale, desc="memory copy may be stale, i.e. other modified copies may exist";
IM, AccessPermission:Busy, desc="Intermediate State I>M";
+ IE, AccessPermission:Busy, desc="Intermediate State I>M";
+ II, AccessPermission:Busy, desc="Intermediate State I>I for SpecFetch";
MI, AccessPermission:Busy, desc="Intermediate State M>I";
M_DRD, AccessPermission:Busy, desc="Intermediate State when there is a dma read";
M_DRDI, AccessPermission:Busy, desc="Intermediate State when there is a dma read";
@@ -59,6 +61,8 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
// Events
enumeration(Event, desc="Directory events") {
Fetch, desc="A memory fetch arrives";
+ Expose, desc="A memory expose arrives";
+ SpecFetch, desc="A memory fetch for speculative execution arrives";
Data, desc="writeback data arrives";
Memory_Data, desc="Fetched data from memory arrives";
Memory_Ack, desc="Writeback Ack from memory arrives";
@@ -198,6 +202,10 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
assert(in_msg.Destination.isElement(machineID));
if (isGETRequest(in_msg.Type)) {
trigger(Event:Fetch, in_msg.addr, TBEs[in_msg.addr]);
+ } else if (in_msg.Type == CoherenceRequestType:EXPOSE) {
+ trigger(Event:Expose, in_msg.addr, TBEs[in_msg.addr]);
+ } else if (in_msg.Type == CoherenceRequestType:GETSPEC) {
+ trigger(Event:SpecFetch, in_msg.addr, TBEs[in_msg.addr]);
} else if (in_msg.Type == CoherenceRequestType:DMA_READ) {
trigger(Event:DMA_READ, makeLineAddress(in_msg.addr),
TBEs[makeLineAddress(in_msg.addr)]);
@@ -275,6 +283,40 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
}
}
+ action(dex_sendExposeData, "dex", desc="Send data to requestor") {
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, to_mem_ctrl_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:MEMORY_DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.OriginalRequestorMachId);
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Dirty := false;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+
+ Entry e := getDirectoryEntry(in_msg.addr);
+ e.Owner := in_msg.OriginalRequestorMachId;
+ }
+ }
+ }
+
+ action(ds_sendSpecData, "ds", desc="Send data to requestor") {
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, to_mem_ctrl_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:MEMORY_DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.OriginalRequestorMachId);
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Dirty := false;
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+
+ Entry e := getDirectoryEntry(in_msg.addr);
+ e.Owner := in_msg.OriginalRequestorMachId;
+ }
+ }
+ }
+
// Actions
action(aa_sendAck, "aa", desc="Send ack to L2") {
peek(memQueue_in, MemoryMsg) {
@@ -306,7 +348,19 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
peek(requestNetwork_in, RequestMsg) {
- queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency);
+ queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.origin, in_msg.idx, 0);
+ }
+ }
+
+ action(qfs_queueMemorySpecFetchRequest, "qfs", desc="Queue off-chip fetch request") {
+ peek(requestNetwork_in, RequestMsg) {
+ queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.origin, in_msg.idx, 1);
+ }
+ }
+
+ action(qfe_queueMemoryExposeRequest, "qfe", desc="Queue off-chip fetch request") {
+ peek(requestNetwork_in, RequestMsg) {
+ queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.origin, in_msg.idx, 2);
}
}
@@ -320,7 +374,8 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
//added by SS for dma
action(qf_queueMemoryFetchRequestDMA, "qfd", desc="Queue off-chip fetch request") {
peek(requestNetwork_in, RequestMsg) {
- queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency);
+ assert(false);
+ queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.Requestor, -1, -1);
}
}
@@ -425,7 +480,18 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
j_popIncomingRequestQueue;
}
- transition(M, Fetch) {
+ transition(I, Expose, IE) {
+ qfe_queueMemoryExposeRequest;
+ j_popIncomingRequestQueue;
+ }
+
+ transition(I, SpecFetch, II) {
+ qfs_queueMemorySpecFetchRequest;
+ j_popIncomingRequestQueue;
+ }
+
+ // [InvisiSpec] Is it secure?
+ transition(M, {Fetch, Expose, SpecFetch}) {
inv_sendCacheInvalidate;
z_stallAndWaitRequest;
}
@@ -435,6 +501,19 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
l_popMemQueue;
kd_wakeUpDependents;
}
+
+ transition(IE, Memory_Data, M) {
+ dex_sendExposeData;
+ l_popMemQueue;
+ kd_wakeUpDependents;
+ }
+
+ transition(II, Memory_Data, I) {
+ ds_sendSpecData;
+ l_popMemQueue;
+ kd_wakeUpDependents;
+ }
+
//added by SS
transition(M, CleanReplacement, I) {
a_sendAck;
@@ -481,11 +560,11 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
kd_wakeUpDependents;
}
- transition({ID, ID_W, M_DRDI, M_DWRI, IM, MI}, {Fetch, Data} ) {
+ transition({ID, ID_W, M_DRDI, M_DWRI, IM, IE, MI, II}, {Fetch, Expose, SpecFetch, Data} ) {
z_stallAndWaitRequest;
}
- transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, MI}, {DMA_WRITE, DMA_READ} ) {
+ transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, IE, MI, II}, {DMA_WRITE, DMA_READ} ) {
zz_recycleDMAQueue;
}
diff --git a/src/mem/protocol/MESI_Two_Level-msg.sm b/src/mem/protocol/MESI_Two_Level-msg.sm
index 738019e7b..d4269193d 100644
--- a/src/mem/protocol/MESI_Two_Level-msg.sm
+++ b/src/mem/protocol/MESI_Two_Level-msg.sm
@@ -36,6 +36,8 @@ enumeration(CoherenceRequestType, desc="...") {
GET_INSTR, desc="Get Instruction";
INV, desc="INValidate";
PUTX, desc="Replacement message";
+ GETSPEC, desc="Get Speculatively";
+ EXPOSE, desc="Expose";
WB_ACK, desc="Writeback ack";
@@ -68,7 +70,9 @@ structure(RequestMsg, desc="...", interface="Message") {
int Len;
bool Dirty, default="false", desc="Dirty bit";
PrefetchBit Prefetch, desc="Is this a prefetch request";
-
+ MachineID origin;
+ int idx, default="-1", desc="LQ index";
+
bool functionalRead(Packet *pkt) {
// Only PUTX messages contains the data block
if (Type == CoherenceRequestType:PUTX) {
diff --git a/src/mem/protocol/RubySlicc_Defines.sm b/src/mem/protocol/RubySlicc_Defines.sm
index eb235f8f3..7df82847e 100644
--- a/src/mem/protocol/RubySlicc_Defines.sm
+++ b/src/mem/protocol/RubySlicc_Defines.sm
@@ -35,7 +35,7 @@ Cycles recycle_latency;
// Functions implemented in the AbstractController class for
// making timing access to the memory maintained by the
// memory controllers.
-void queueMemoryRead(MachineID id, Addr addr, Cycles latency);
+void queueMemoryRead(MachineID id, Addr addr, Cycles latency, MachineID origin, int idx, int type);
void queueMemoryWrite(MachineID id, Addr addr, Cycles latency,
DataBlock block);
void queueMemoryWritePartial(MachineID id, Addr addr, Cycles latency,
diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm
index 1beb3f2e0..a330e731e 100644
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -175,6 +175,11 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
Release, desc="Release operation";
Acquire, desc="Acquire opertion";
AcquireRelease, desc="Acquire and Release opertion";
+ // [InvisiSpec] New request types
+ SPEC_LD, desc="Speculative load";
+ EXPOSE, desc="Expose";
+ VALIDATE, desc="Validate";
+ SPEC_FLUSH, desc="Flush SpecBuffer";
}
enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
@@ -255,6 +260,12 @@ enumeration(MessageSizeType, desc="...") {
Unblock_Control, desc="Unblock control";
Persistent_Control, desc="Persistent request activation messages";
Completion_Control, desc="Completion messages";
+ SPECLD_Control, desc="SPECLD control message";
+ SPECLD_Request_Control, desc="SPECLD forward message";
+ SPECLD_Data, desc="SPECLD data response";
+ EXPOSE_Control, desc="EXPOSE control message";
+ EXPOSE_Request_Control, desc="EXPOSE forward request";
+ EXPOSE_Data, desc="EXPOSE data response";
}
// AccessType
@@ -344,6 +355,7 @@ enumeration(RequestStatus, desc="...", default="RequestStatus_NULL") {
Issued, desc="The sequencer successfully issued the request";
BufferFull, desc="Can not issue because the sequencer is full";
Aliased, desc="This request aliased with a currently outstanding request";
+ Merged, desc="This request merged with a currently outstanding request";
NULL, desc="";
}
diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm
index 27a045d29..5c73b4320 100644
--- a/src/mem/protocol/RubySlicc_Types.sm
+++ b/src/mem/protocol/RubySlicc_Types.sm
@@ -113,7 +113,7 @@ structure (Sequencer, external = "yes") {
Cycles, Cycles, Cycles);
void checkCoherence(Addr);
- void evictionCallback(Addr);
+ void evictionCallback(Addr, bool);
void recordRequestType(SequencerRequestType);
bool checkResourceAvailable(CacheResourceType, Addr);
void invalidateSC(Addr);
@@ -172,6 +172,7 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
HSAScope scope, desc="HSA scope";
HSASegment segment, desc="HSA segment";
PacketPtr pkt, desc="Packet associated with this request";
+ int idx, desc="LQ index";
}
structure(AbstractEntry, primitive="yes", external = "yes") {
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 5cb08ca39..f912a0a6c 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -149,6 +149,9 @@ class Request
MEM_SWAP = 0x00400000,
MEM_SWAP_COND = 0x00800000,
+ /** [InvisiSpec] it is a spec request */
+ SPEC = 0x00004000,
+
/** The request is a prefetch. */
PREFETCH = 0x01000000,
/** The request should be prefetched into the exclusive state. */
@@ -784,6 +787,7 @@ class Request
bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
bool isInstFetch() const { return _flags.isSet(INST_FETCH); }
bool isPrefetch() const { return _flags.isSet(PREFETCH); }
+ bool isSpec() const { return _flags.isSet(SPEC); }
bool isLLSC() const { return _flags.isSet(LLSC); }
bool isPriv() const { return _flags.isSet(PRIVILEGED); }
bool isLockedRMW() const { return _flags.isSet(LOCKED_RMW); }
diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript
index 64e798fd5..b79306c04 100644
--- a/src/mem/ruby/SConscript
+++ b/src/mem/ruby/SConscript
@@ -56,6 +56,9 @@ DebugFlag('RubySystem')
DebugFlag('RubyTester')
DebugFlag('RubyStats')
DebugFlag('RubyResourceStalls')
+DebugFlag('SpecBuffer')
+DebugFlag('SpecBufferValidate')
+DebugFlag('MemSpecBuffer')
CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester',
'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache',
diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc
index 57834f2e2..7d4d71eb3 100644
--- a/src/mem/ruby/network/Network.cc
+++ b/src/mem/ruby/network/Network.cc
@@ -144,12 +144,18 @@ Network::MessageSizeType_to_int(MessageSizeType size_type)
case MessageSizeType_Unblock_Control:
case MessageSizeType_Persistent_Control:
case MessageSizeType_Completion_Control:
+ case MessageSizeType_SPECLD_Control:
+ case MessageSizeType_SPECLD_Request_Control:
+ case MessageSizeType_EXPOSE_Control:
+ case MessageSizeType_EXPOSE_Request_Control:
return m_control_msg_size;
case MessageSizeType_Data:
case MessageSizeType_Response_Data:
case MessageSizeType_ResponseLocal_Data:
case MessageSizeType_ResponseL2hit_Data:
case MessageSizeType_Writeback_Data:
+ case MessageSizeType_SPECLD_Data:
+ case MessageSizeType_EXPOSE_Data:
return m_data_msg_size;
default:
panic("Invalid range for type MessageSizeType");
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index b920ff7b0..742c07705 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -41,6 +41,7 @@
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "debug/RubyQueue.hh"
+#include "debug/MemSpecBuffer.hh"
#include "mem/protocol/MemoryMsg.hh"
#include "mem/ruby/network/Network.hh"
#include "mem/ruby/system/GPUCoalescer.hh"
@@ -96,6 +97,14 @@ AbstractController::regStats()
.name(name() + ".fully_busy_cycles")
.desc("cycles for which number of transistions == max transitions")
.flags(Stats::nozero);
+ m_expose_hits
+ .name(name() + ".expose_hits")
+ .desc("number of expose hits at LLC spec buffer")
+ .flags(Stats::nozero);
+ m_expose_misses
+ .name(name() + ".expose_misses")
+ .desc("number of expose misses at LLC spec buffer")
+ .flags(Stats::nozero);
}
void
@@ -238,8 +247,67 @@ AbstractController::getMasterPort(const std::string &if_name,
void
AbstractController::queueMemoryRead(const MachineID &id, Addr addr,
- Cycles latency)
+ Cycles latency, MachineID origin, int idx, int type)
{
+ int coreId = origin.num;
+ int sbeId = idx;
+ // type 0: non-spec 1: spec 2: expose
+ // DPRINTFR(MemSpecBuffer, "%10s MemRead (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr));
+ // if idx == -1, it is a write request which cannot be spec or expose.
+ assert(!(type != 0 && sbeId == -1));
+ assert(sbeId >= -1 && sbeId <= 65);
+ assert(coreId < 8);
+ assert(type >=0 && type <= 2);
+ if (type == 0) {
+ for (int c = 0; c < 8; ++c) {
+ for (int i = 0; i < 66; ++i) {
+ if (m_specBuf[c][i].address == addr) {
+ DPRINTFR(MemSpecBuffer, "%10s Cleared by Read (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr));
+ m_specBuf[c][i].address = 0;
+ m_specBuf[c][i].data.clear();
+ }
+ }
+ }
+ } else if (type == 1) {
+
+ } else if (type == 2) {
+ if (m_specBuf[coreId][sbeId].address == addr) {
+ DPRINTFR(MemSpecBuffer, "%10s Expose Hit (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr));
+ ++m_expose_hits;
+ assert(getMemoryQueue());
+ std::shared_ptr<MemoryMsg> msg = std::make_shared<MemoryMsg>(clockEdge());
+ (*msg).m_addr = addr;
+ (*msg).m_Sender = m_machineID;
+ (*msg).m_OriginalRequestorMachId = id;
+ (*msg).m_Type = MemoryRequestType_MEMORY_READ;
+ (*msg).m_MessageSize = MessageSizeType_Response_Data;
+ (*msg).m_DataBlk = m_specBuf[coreId][sbeId].data;
+ getMemoryQueue()->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)));
+ for (int c = 0; c < 8; ++c) {
+ for (int i = 0; i < 66; ++i) {
+ if (m_specBuf[c][i].address == addr) {
+ DPRINTFR(MemSpecBuffer, "%10s Cleared by Expose Hit (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr));
+ m_specBuf[c][i].address = 0;
+ m_specBuf[c][i].data.clear();
+ }
+ }
+ }
+ return;
+ } else {
+ DPRINTFR(MemSpecBuffer, "%10s Expose Miss (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr));
+ ++m_expose_misses;
+ for (int c = 0; c < 8; ++c) {
+ for (int i = 0; i < 66; ++i) {
+ if (m_specBuf[c][i].address == addr) {
+ DPRINTFR(MemSpecBuffer, "%10s Cleared by Expose Miss (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr));
+ m_specBuf[c][i].address = 0;
+ m_specBuf[c][i].data.clear();
+ }
+ }
+ }
+ }
+ }
+
RequestPtr req = new Request(addr, RubySystem::getBlockSizeBytes(), 0,
m_masterId);
@@ -248,6 +316,9 @@ AbstractController::queueMemoryRead(const MachineID &id, Addr addr,
pkt->dataDynamic(newData);
SenderState *s = new SenderState(id);
+ s->type = type;
+ s->coreId = coreId;
+ s->sbeId = sbeId;
pkt->pushSenderState(s);
// Use functional rather than timing accesses during warmup
@@ -339,6 +410,9 @@ AbstractController::recvTimingResp(PacketPtr pkt)
SenderState *s = dynamic_cast<SenderState *>(pkt->senderState);
(*msg).m_OriginalRequestorMachId = s->id;
+ int type = s->type;
+ int coreId = s->coreId;
+ int sbeId = s->sbeId;
delete s;
if (pkt->isRead()) {
@@ -348,6 +422,12 @@ AbstractController::recvTimingResp(PacketPtr pkt)
// Copy data from the packet
(*msg).m_DataBlk.setData(pkt->getPtr<uint8_t>(), 0,
RubySystem::getBlockSizeBytes());
+ if (type == 1) {
+ DPRINTFR(MemSpecBuffer, "%10s Updated by ReadSpec (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(pkt->getAddr()));
+ m_specBuf[coreId][sbeId].address = pkt->getAddr();
+ m_specBuf[coreId][sbeId].data.setData(pkt->getPtr<uint8_t>(), 0,
+ RubySystem::getBlockSizeBytes());
+ }
} else if (pkt->isWrite()) {
(*msg).m_Type = MemoryRequestType_MEMORY_WB;
(*msg).m_MessageSize = MessageSizeType_Writeback_Control;
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 35cd3d2a5..b65a511d0 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -129,7 +129,7 @@ class AbstractController : public MemObject, public Consumer
BaseMasterPort& getMasterPort(const std::string& if_name,
PortID idx = InvalidPortID);
- void queueMemoryRead(const MachineID &id, Addr addr, Cycles latency);
+ void queueMemoryRead(const MachineID &id, Addr addr, Cycles latency, MachineID origin, int idx, int type);
void queueMemoryWrite(const MachineID &id, Addr addr, Cycles latency,
const DataBlock &block);
void queueMemoryWritePartial(const MachineID &id, Addr addr, Cycles latency,
@@ -199,6 +199,8 @@ class AbstractController : public MemObject, public Consumer
//! Counter for the number of cycles when the transitions carried out
//! were equal to the maximum allowed
Stats::Scalar m_fully_busy_cycles;
+ Stats::Scalar m_expose_hits;
+ Stats::Scalar m_expose_misses;
//! Histogram for profiling delay for the messages this controller
//! cares for
@@ -250,6 +252,9 @@ class AbstractController : public MemObject, public Consumer
{
// Id of the machine from which the request originated.
MachineID id;
+ int type;
+ int coreId;
+ int sbeId;
SenderState(MachineID _id) : id(_id)
{}
@@ -258,6 +263,14 @@ class AbstractController : public MemObject, public Consumer
private:
/** The address range to which the controller responds on the CPU side. */
const AddrRangeList addrRanges;
+
+ struct SBE
+ {
+ Addr address;
+ DataBlock data;
+ };
+
+ SBE m_specBuf[8][66];
};
#endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index 6c84f3823..2fc4c9f98 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -60,6 +60,7 @@ class RubyRequest : public Message
int m_wfid;
HSAScope m_scope;
HSASegment m_segment;
+ int m_idx;
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
@@ -82,6 +83,11 @@ class RubyRequest : public Message
m_segment(_segment)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
+ if (_pkt->reqIdx == -1) {
+ m_idx = _pkt->reqIdx;
+ } else {
+ m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1);
+ }
}
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
@@ -109,6 +115,11 @@ class RubyRequest : public Message
m_segment(_segment)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
+ if (_pkt->reqIdx == -1) {
+ m_idx = _pkt->reqIdx;
+ } else {
+ m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1);
+ }
}
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
@@ -137,6 +148,11 @@ class RubyRequest : public Message
m_segment(_segment)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
+ if (_pkt->reqIdx == -1) {
+ m_idx = _pkt->reqIdx;
+ } else {
+ m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1);
+ }
}
diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc
index 8d99c90aa..dc5898bea 100644
--- a/src/mem/ruby/structures/CacheMemory.cc
+++ b/src/mem/ruby/structures/CacheMemory.cc
@@ -176,7 +176,9 @@ CacheMemory::tryCacheAccess(Addr address, RubyRequestType type,
return true;
}
if ((entry->m_Permission == AccessPermission_Read_Only) &&
- (type == RubyRequestType_LD || type == RubyRequestType_IFETCH)) {
+ (type == RubyRequestType_LD ||
+ type == RubyRequestType_IFETCH ||
+ type == RubyRequestType_SPEC_LD)) {
return true;
}
// The line must not be accessible
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 5977ce9ef..bf030034b 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -246,6 +246,7 @@ RubyPort::PioSlavePort::recvAtomic(PacketPtr pkt)
panic("Could not find address in Ruby PIO address ranges!\n");
}
+// [InvisiSpec] Request on the way from CPU to Ruby
bool
RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
{
@@ -428,6 +429,7 @@ RubyPort::MemSlavePort::recvFunctional(PacketPtr pkt)
}
}
+// [InvisiSpec] On the way from Ruby to CPU
void
RubyPort::ruby_hit_callback(PacketPtr pkt)
{
@@ -511,6 +513,7 @@ RubyPort::drain()
}
}
+// [InvisiSpec] Still on the way from Ruby to CPU
void
RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
{
@@ -544,7 +547,7 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
}
// Flush, acquire, release requests don't access physical memory
- if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) {
+ if (pkt->isFlush() || pkt->isExpose() || pkt->cmd == MemCmd::MemFenceReq) {
accessPhysMem = false;
}
@@ -571,6 +574,7 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
// Ruby protocol.
schedTimingResp(pkt, curTick());
} else {
+ // [InvisiSpec] Delete the packet if a reponse is not required
delete pkt;
}
@@ -601,7 +605,7 @@ RubyPort::MemSlavePort::isPhysMemAddress(Addr addr) const
}
void
-RubyPort::ruby_eviction_callback(Addr address)
+RubyPort::ruby_eviction_callback(Addr address, bool external)
{
DPRINTF(RubyPort, "Sending invalidations.\n");
// Allocate the invalidate request and packet on the stack, as it is
@@ -612,6 +616,9 @@ RubyPort::ruby_eviction_callback(Addr address)
// Use a single packet to signal all snooping ports of the invalidation.
// This assumes that snooping ports do NOT modify the packet/request
Packet pkt(&request, MemCmd::InvalidateReq);
+ if (external) {
+ pkt.setExternalEviction();
+ }
for (CpuPortIter p = slave_ports.begin(); p != slave_ports.end(); ++p) {
// check if the connected master port is snooping
if ((*p)->isSnooping()) {
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index 146443282..9c0200829 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -172,7 +172,7 @@ class RubyPort : public MemObject
void trySendRetries();
void ruby_hit_callback(PacketPtr pkt);
void testDrainComplete();
- void ruby_eviction_callback(Addr address);
+ void ruby_eviction_callback(Addr address, bool external);
/**
* Called by the PIO port when receiving a timing response.
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 4037fb8f1..ed663f9c6 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -36,6 +36,8 @@
#include "debug/ProtocolTrace.hh"
#include "debug/RubySequencer.hh"
#include "debug/RubyStats.hh"
+#include "debug/SpecBuffer.hh"
+#include "debug/SpecBufferValidate.hh"
#include "mem/packet.hh"
#include "mem/protocol/PrefetchBit.hh"
#include "mem/protocol/RubyAccessMode.hh"
@@ -54,7 +56,9 @@ RubySequencerParams::create()
Sequencer::Sequencer(const Params *p)
: RubyPort(p), m_IncompleteTimes(MachineType_NUM),
- deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check")
+ deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check"),
+ m_specBuf(33),
+ specBufferHitEvent([this]{ specBufferHitCallback(); }, "Sequencer spec buffer hit")
{
m_outstanding_count = 0;
@@ -160,6 +164,7 @@ void Sequencer::resetStats()
}
}
+// [InvisiSpec] Request on the way from CPU to Ruby
// Insert the request on the correct request table. Return true if
// the entry was already present.
RequestStatus
@@ -190,6 +195,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
RequestTable::value_type default_entry(line_addr,
(SequencerRequest*) NULL);
+ // [InvisiSpec] If store
if ((request_type == RubyRequestType_ST) ||
(request_type == RubyRequestType_RMW_Read) ||
(request_type == RubyRequestType_RMW_Write) ||
@@ -217,6 +223,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
m_store_waiting_on_store++;
return RequestStatus_Aliased;
}
+ // [InvisiSpec] If load
} else {
// Check if there is any outstanding write request for the same
// cache line.
@@ -232,6 +239,16 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
RequestTable::iterator i = r.first;
i->second = new SequencerRequest(pkt, request_type, curCycle());
m_outstanding_count++;
+ } else if (request_type == RubyRequestType_SPEC_LD) {
+ auto i = m_readRequestTable.find(line_addr);
+ if (i->second->m_type == RubyRequestType_SPEC_LD) {
+ DPRINTFR(SpecBuffer, "%10s Merging (idx=%d-%d, addr=%#x) with %d\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()), i->second->pkt->reqIdx);
+ i->second->dependentSpecRequests.push_back(pkt);
+ return RequestStatus_Merged;
+ } else {
+ m_load_waiting_on_load++;
+ return RequestStatus_Aliased;
+ }
} else {
// There is an outstanding read request for the cache line
m_load_waiting_on_load++;
@@ -412,6 +429,19 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
initialRequestTime, forwardRequestTime, firstResponseTime);
}
+bool Sequencer::updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress) {
+ uint8_t idx = pkt->reqIdx;
+ SBE& sbe = m_specBuf[idx];
+ int blkIdx = pkt->isFirst() ? 0 : 1;
+ SBB& sbb = sbe.blocks[blkIdx];
+ if (makeLineAddress(sbb.reqAddress) == dataAddress) {
+ sbb.data = data;
+ return true;
+ }
+ return false;
+}
+
+// [InvisiSpec] Called by Ruby to send a response to CPU.
void
Sequencer::readCallback(Addr address, DataBlock& data,
bool externalHit, const MachineType mach,
@@ -430,13 +460,79 @@ Sequencer::readCallback(Addr address, DataBlock& data,
markRemoved();
assert((request->m_type == RubyRequestType_LD) ||
+ (request->m_type == RubyRequestType_SPEC_LD) ||
+ (request->m_type == RubyRequestType_EXPOSE) ||
(request->m_type == RubyRequestType_IFETCH));
+
+ PacketPtr pkt = request->pkt;
+ if (pkt->isSpec()) {
+ assert(!pkt->onlyAccessSpecBuff());
+ DPRINTFR(SpecBuffer, "%10s SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ updateSBB(pkt, data, address);
+ if (!externalHit) {
+ pkt->setL1Hit();
+ }
+ } else if (pkt->isExpose()) {
+ DPRINTFR(SpecBuffer, "%10s EXPOSE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ } else if (pkt->isValidate()) {
+ DPRINTFR(SpecBuffer, "%10s VALIDATE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ uint8_t idx = pkt->reqIdx;
+ SBE& sbe = m_specBuf[idx];
+ int blkIdx = pkt->isFirst() ? 0 : 1;
+ SBB& sbb = sbe.blocks[blkIdx];
+ assert(makeLineAddress(sbb.reqAddress) == address);
+ if (!memcmp(sbb.data.getData(getOffset(pkt->getAddr()), pkt->getSize()), data.getData(getOffset(pkt->getAddr()), pkt->getSize()), pkt->getSize())) {
+ *(pkt->getPtr<uint8_t>()) = 1;
+ } else {
+ // std::ostringstream os;
+ // sbb.data.print(os);
+ // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
+ // os.str("");
+ // data.print(os);
+ // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
+ *(pkt->getPtr<uint8_t>()) = 0;
+ }
+ }
+
+ for (auto& dependentPkt : request->dependentSpecRequests) {
+ assert(!dependentPkt->onlyAccessSpecBuff());
+ DPRINTFR(SpecBuffer, "%10s Merged SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), dependentPkt->reqIdx, dependentPkt->isFirst()? 0 : 1, printAddress(dependentPkt->getAddr()));
+ assert(dependentPkt->isSpec());
+ updateSBB(dependentPkt, data, address);
+ if (!externalHit) {
+ dependentPkt->setL1Hit();
+ }
+ memcpy(dependentPkt->getPtr<uint8_t>(),
+ data.getData(getOffset(dependentPkt->getAddr()), dependentPkt->getSize()),
+ dependentPkt->getSize());
+ ruby_hit_callback(dependentPkt);
+ }
hitCallback(request, data, true, mach, externalHit,
initialRequestTime, forwardRequestTime, firstResponseTime);
}
void
+Sequencer::specBufferHitCallback()
+{
+ assert(m_specRequestQueue.size());
+ while (m_specRequestQueue.size()) {
+ auto specReq = m_specRequestQueue.front();
+ if (specReq.second <= curTick()) {
+ PacketPtr pkt = specReq.first;
+ assert(pkt->onlyAccessSpecBuff());
+ DPRINTFR(SpecBuffer, "%10s SB Hit Callback (idx=%d, addr=%#x)\n", curTick(), pkt->reqIdx, printAddress(pkt->getAddr()));
+ ruby_hit_callback(pkt);
+ m_specRequestQueue.pop();
+ } else {
+ schedule(specBufferHitEvent, specReq.second);
+ break;
+ }
+ }
+}
+
+// [InvisiSpec] Response on the way from Ruby to CPU
+void
Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
bool llscSuccess,
const MachineType mach, const bool externalHit,
@@ -470,8 +566,9 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
if (RubySystem::getWarmupEnabled()) {
data.setData(pkt->getConstPtr<uint8_t>(),
getOffset(request_address), pkt->getSize());
- } else if (!pkt->isFlush()) {
+ } else if (!pkt->isFlush() && !pkt->isExpose() && !pkt->isValidate()) {
if ((type == RubyRequestType_LD) ||
+ (type == RubyRequestType_SPEC_LD) ||
(type == RubyRequestType_IFETCH) ||
(type == RubyRequestType_RMW_Read) ||
(type == RubyRequestType_Locked_RMW_Read) ||
@@ -534,6 +631,7 @@ Sequencer::empty() const
return m_writeRequestTable.empty() && m_readRequestTable.empty();
}
+// [InvisiSpec] Request on the way from CPU to Ruby
RequestStatus
Sequencer::makeRequest(PacketPtr pkt)
{
@@ -544,7 +642,56 @@ Sequencer::makeRequest(PacketPtr pkt)
RubyRequestType primary_type = RubyRequestType_NULL;
RubyRequestType secondary_type = RubyRequestType_NULL;
- if (pkt->isLLSC()) {
+ // [InvisiSpec] Handle new requests
+ if (pkt->isSpec()) {
+ assert(pkt->cmd == MemCmd::ReadSpecReq);
+ assert(pkt->isSplit || pkt->isFirst());
+ uint8_t idx = pkt->reqIdx;
+ SBE& sbe = m_specBuf[idx];
+ sbe.isSplit = pkt->isSplit;
+ int blkIdx = pkt->isFirst() ? 0 : 1;
+ SBB& sbb = sbe.blocks[blkIdx];
+ sbb.reqAddress = pkt->getAddr();
+ sbb.reqSize = pkt->getSize();
+ if (pkt->onlyAccessSpecBuff()) {
+ int srcIdx = pkt->srcIdx;
+ SBE& srcEntry = m_specBuf[srcIdx];
+ if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[0].reqAddress)) {
+ sbb.data = srcEntry.blocks[0].data;
+ } else if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[1].reqAddress)) {
+ sbb.data = srcEntry.blocks[1].data;
+ } else {
+ fatal("Requested address %#x is not present in the spec buffer\n", printAddress(sbb.reqAddress));
+ }
+ memcpy(pkt->getPtr<uint8_t>(),
+ sbb.data.getData(getOffset(sbb.reqAddress), sbb.reqSize),
+ sbb.reqSize);
+ m_specRequestQueue.push({pkt, curTick()});
+ DPRINTFR(SpecBuffer, "%10s SB Hit (idx=%d, addr=%#x) on (srcIdx=%d)\n", curTick(), idx, printAddress(sbb.reqAddress), srcIdx);
+ if (!specBufferHitEvent.scheduled()) {
+ schedule(specBufferHitEvent, clockEdge(Cycles(1)));
+ }
+ return RequestStatus_Issued;
+ } else {
+ // assert it is not in the buffer
+ primary_type = secondary_type = RubyRequestType_SPEC_LD;
+ }
+ } else if (pkt->isExpose() || pkt->isValidate()) {
+ assert(pkt->cmd == MemCmd::ExposeReq || pkt->cmd == MemCmd::ValidateReq);
+ assert(pkt->isSplit || pkt->isFirst());
+ uint8_t idx = pkt->reqIdx;
+ SBE& sbe = m_specBuf[idx];
+ sbe.isSplit = pkt->isSplit;
+ int blkIdx = pkt->isFirst() ? 0 : 1;
+ SBB& sbb = sbe.blocks[blkIdx];
+ if (sbb.reqAddress != pkt->getAddr()) {
+ fatal("sbb.reqAddress != pkt->getAddr: %#x != %#x\n", printAddress(sbb.reqAddress), printAddress(pkt->getAddr()));
+ }
+ if (sbb.reqSize != pkt->getSize()) {
+ fatal("sbb.reqSize != pkt->getSize(): %d != %d\n", sbb.reqSize, pkt->getSize());
+ }
+ primary_type = secondary_type = RubyRequestType_EXPOSE;
+ } else if (pkt->isLLSC()) {
//
// Alpha LL/SC instructions need to be handled carefully by the cache
// coherence protocol to ensure they follow the proper semantics. In
@@ -615,8 +762,22 @@ Sequencer::makeRequest(PacketPtr pkt)
}
RequestStatus status = insertRequest(pkt, primary_type);
- if (status != RequestStatus_Ready)
+ if (status == RequestStatus_Merged) {
+ return RequestStatus_Issued;
+ } else if (status != RequestStatus_Ready) {
return status;
+ }
+
+ if (pkt->isSpec()) {
+ DPRINTFR(SpecBuffer, "%10s Issuing SPEC_LD (idx=%d-%d, addr=%#x)\n",
+ curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ } else if (pkt->isExpose()) {
+ DPRINTFR(SpecBuffer, "%10s Issuing EXPOSE (idx=%d-%d, addr=%#x)\n",
+ curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ } else if (pkt->isValidate()) {
+ DPRINTFR(SpecBuffer, "%10s Issuing VALIDATE (idx=%d-%d, addr=%#x)\n",
+ curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ }
issueRequest(pkt, secondary_type);
@@ -643,7 +804,7 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
// requests do not
std::shared_ptr<RubyRequest> msg =
std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
- pkt->isFlush() ?
+ pkt->isFlush() || pkt->isExpose() ?
nullptr : pkt->getPtr<uint8_t>(),
pkt->getSize(), pc, secondary_type,
RubyAccessMode_Supervisor, pkt,
@@ -717,9 +878,9 @@ Sequencer::recordRequestType(SequencerRequestType requestType) {
void
-Sequencer::evictionCallback(Addr address)
+Sequencer::evictionCallback(Addr address, bool external)
{
- ruby_eviction_callback(address);
+ ruby_eviction_callback(address, external);
}
void
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index fcfa8ad86..66ff92777 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -31,6 +31,7 @@
#include <iostream>
#include <unordered_map>
+#include <queue>
#include "mem/protocol/MachineType.hh"
#include "mem/protocol/RubyRequestType.hh"
@@ -45,6 +46,7 @@ struct SequencerRequest
PacketPtr pkt;
RubyRequestType m_type;
Cycles issue_time;
+ std::vector<PacketPtr> dependentSpecRequests;
SequencerRequest(PacketPtr _pkt, RubyRequestType _m_type,
Cycles _issue_time)
@@ -54,6 +56,19 @@ struct SequencerRequest
std::ostream& operator<<(std::ostream& out, const SequencerRequest& obj);
+struct SBB // SpecBufferBlock
+{
+ Addr reqAddress;
+ unsigned reqSize;
+ DataBlock data;
+};
+
+struct SBE // SpecBufferEntry
+{
+ bool isSplit;
+ SBB blocks[2];
+};
+
class Sequencer : public RubyPort
{
public:
@@ -83,6 +98,9 @@ class Sequencer : public RubyPort
const Cycles forwardRequestTime = Cycles(0),
const Cycles firstResponseTime = Cycles(0));
+ void specBufferHitCallback();
+ bool updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress);
+
RequestStatus makeRequest(PacketPtr pkt);
bool empty() const;
int outstandingCount() const { return m_outstanding_count; }
@@ -97,7 +115,7 @@ class Sequencer : public RubyPort
void checkCoherence(Addr address);
void markRemoved();
- void evictionCallback(Addr address);
+ void evictionCallback(Addr address, bool external);
void invalidateSC(Addr address);
int coreId() const { return m_coreId; }
@@ -238,6 +256,10 @@ class Sequencer : public RubyPort
std::vector<Stats::Counter> m_IncompleteTimes;
EventFunctionWrapper deadlockCheckEvent;
+
+ std::vector<SBE> m_specBuf;
+ std::queue<std::pair<PacketPtr, Tick>> m_specRequestQueue;
+ EventFunctionWrapper specBufferHitEvent;
};
inline std::ostream&