From 2c99e0e616c61bb31c6902fb7d11e10042b5b210 Mon Sep 17 00:00:00 2001 From: Iru Cai Date: Thu, 28 Feb 2019 17:07:16 +0800 Subject: invisispec-1.0 source --- src/mem/packet.cc | 19 +- src/mem/packet.hh | 113 ++++++++- src/mem/port.cc | 1 + src/mem/protocol/MESI_Two_Level-L1cache.sm | 264 +++++++++++++++++-- src/mem/protocol/MESI_Two_Level-L2cache.sm | 279 ++++++++++++++++++++- src/mem/protocol/MESI_Two_Level-dir.sm | 89 ++++++- src/mem/protocol/MESI_Two_Level-msg.sm | 6 +- src/mem/protocol/RubySlicc_Defines.sm | 2 +- src/mem/protocol/RubySlicc_Exports.sm | 12 + src/mem/protocol/RubySlicc_Types.sm | 3 +- src/mem/request.hh | 4 + src/mem/ruby/SConscript | 3 + src/mem/ruby/network/Network.cc | 6 + src/mem/ruby/slicc_interface/AbstractController.cc | 82 +++++- src/mem/ruby/slicc_interface/AbstractController.hh | 15 +- src/mem/ruby/slicc_interface/RubyRequest.hh | 16 ++ src/mem/ruby/structures/CacheMemory.cc | 4 +- src/mem/ruby/system/RubyPort.cc | 11 +- src/mem/ruby/system/RubyPort.hh | 2 +- src/mem/ruby/system/Sequencer.cc | 175 ++++++++++++- src/mem/ruby/system/Sequencer.hh | 24 +- 21 files changed, 1078 insertions(+), 52 deletions(-) (limited to 'src/mem') diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 866bc9051..d6089f9f7 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -221,7 +221,24 @@ MemCmd::commandInfo[] = InvalidateResp, "InvalidateReq" }, /* Invalidation Response */ { SET2(IsInvalidate, IsResponse), - InvalidCmd, "InvalidateResp" } + InvalidCmd, "InvalidateResp" }, + /* [InvisiSpec] New command info */ + { SET4(IsRead, IsRequest, NeedsResponse, IsSpec), + ReadSpecResp, "ReadSpecReq" }, + { SET4(IsRead, IsResponse, HasData, IsSpec), + InvalidCmd, "ReadSpecResp" }, + { SET4(IsRead, IsRequest, NeedsResponse, IsValidate), + ValidateResp, "ValidateReq" }, + { SET4(IsRead, IsResponse, HasData, IsValidate), + InvalidCmd, "ValidateResp" }, + { SET4(IsRead, IsRequest, NeedsResponse, IsExpose), + ExposeResp, "ExposeReq" }, + { SET3(IsRead, IsResponse, IsExpose), + InvalidCmd, "ExposeResp" }, + { SET3(IsRequest, NeedsResponse, IsSpecFlush), + SpecFlushResp, "SpecFlushReq" }, + { SET2(IsResponse, IsSpecFlush), + InvalidCmd, "SpecFlushResp" } }; bool diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 36a46438a..80aa756b0 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -133,6 +133,15 @@ class MemCmd FlushReq, //request for a cache flush InvalidateReq, // request for address to be invalidated InvalidateResp, + /* [InvisiSpec] New commands */ + ReadSpecReq, + ReadSpecResp, + ValidateReq, + ValidateResp, + ExposeReq, + ExposeResp, + SpecFlushReq, + SpecFlushResp, NUM_MEM_CMDS }; @@ -160,6 +169,11 @@ class MemCmd IsPrint, //!< Print state matching address (for debugging) IsFlush, //!< Flush the address from caches FromCache, //!< Request originated from a caching agent + /* [InvisiSpec] New attributes */ + IsSpec, //!< Speculatively issued + IsValidate, + IsExpose, + IsSpecFlush, NUM_COMMAND_ATTRIBUTES }; @@ -226,6 +240,12 @@ class MemCmd bool isPrint() const { return testCmdAttrib(IsPrint); } bool isFlush() const { return testCmdAttrib(IsFlush); } + /// [InvisiSpec] InvisiSpec attributes + bool isSpec() const { return testCmdAttrib(IsSpec); } + bool isValidate() const { return testCmdAttrib(IsValidate); } + bool isExpose() const { return testCmdAttrib(IsExpose); } + bool isSpecFlush() const { return testCmdAttrib(IsSpecFlush); } + Command responseCommand() const { @@ -306,7 +326,17 @@ class Packet : public Printable // Signal block present to squash prefetch and cache evict packets // through express snoop flag - BLOCK_CACHED = 0x00010000 + BLOCK_CACHED = 0x00010000, + + // [InvisiSpec] ReadSpecReq was L1 hit. + L1_HIT = 0x00020000, + + // [InvisiSpec] this packet is the first one of split packets + // maximum split is 2 + FIRST_IN_SPLIT = 0x00040000, + ONLY_ACCESS_SPEC_BUFF = 0x00080000, + + EXTERNAL_EVICTION = 0x00100000, }; Flags flags; @@ -376,6 +406,12 @@ class Packet : public Printable */ uint32_t payloadDelay; + //[InvisiSpec] indicate the source buffer entry + //if the load should get data from specbuffer + int srcIdx; + int reqIdx; + bool isSplit; + /** * A virtual base opaque structure used to hold state associated * with the packet (e.g., an MSHR), specific to a MemObject that @@ -549,6 +585,45 @@ class Packet : public Printable bool isPrint() const { return cmd.isPrint(); } bool isFlush() const { return cmd.isFlush(); } + /// [InvisiSpec] InvisiSpec flags + bool isSpec() const { return cmd.isSpec(); } + bool isValidate() const { return cmd.isValidate(); } + bool isExpose() const { return cmd.isExpose(); } + bool isSpecFlush() const { return cmd.isSpecFlush(); } + bool isL1Hit() const { return flags.isSet(L1_HIT); } + bool isExternalEviction() const { return flags.isSet(EXTERNAL_EVICTION); } + // [InvisiSpec] Check whether it is the first in split packets + bool isFirst() const { return flags.isSet(FIRST_IN_SPLIT); } + bool onlyAccessSpecBuff() const + { return flags.isSet(ONLY_ACCESS_SPEC_BUFF); } + + void setL1Hit() + { + assert(isSpec()); + assert(!flags.isSet(L1_HIT)); + flags.set(L1_HIT); + } + + void setExternalEviction() + { + assert(!flags.isSet(EXTERNAL_EVICTION)); + flags.set(EXTERNAL_EVICTION); + } + + void setOnlyAccessSpecBuff() + { + assert(isSpec()); + assert(!flags.isSet(ONLY_ACCESS_SPEC_BUFF)); + flags.set(ONLY_ACCESS_SPEC_BUFF); + } + + void setFirst() + { + //assert(isSpec()); + assert(!flags.isSet(FIRST_IN_SPLIT)); + flags.set(FIRST_IN_SPLIT); + } + //@{ /// Snoop flags /** @@ -748,7 +823,8 @@ class Packet : public Printable Packet(const RequestPtr &_req, MemCmd _cmd) : cmd(_cmd), id((PacketId)_req.get()), req(_req), data(nullptr), addr(0), _isSecure(false), size(0), headerDelay(0), snoopDelay(0), - payloadDelay(0), senderState(NULL) + payloadDelay(0), srcIdx(-1), reqIdx(-1), isSplit(false), + senderState(NULL) { if (req->hasPaddr()) { addr = req->getPaddr(); @@ -769,7 +845,8 @@ class Packet : public Printable Packet(const RequestPtr &_req, MemCmd _cmd, int _blkSize, PacketId _id = 0) : cmd(_cmd), id(_id ? _id : (PacketId)_req.get()), req(_req), data(nullptr), addr(0), _isSecure(false), headerDelay(0), - snoopDelay(0), payloadDelay(0), senderState(NULL) + snoopDelay(0), payloadDelay(0), srcIdx(-1), reqIdx(-1), isSplit(false), + senderState(NULL) { if (req->hasPaddr()) { addr = req->getPaddr() & ~(_blkSize - 1); @@ -795,6 +872,9 @@ class Packet : public Printable headerDelay(pkt->headerDelay), snoopDelay(0), payloadDelay(pkt->payloadDelay), + srcIdx(pkt->srcIdx), + reqIdx(pkt->reqIdx), + isSplit(pkt->isSplit), senderState(pkt->senderState) { if (!clear_flags) @@ -868,6 +948,33 @@ class Packet : public Printable return new Packet(req, makeWriteCmd(req)); } + /** + * [InvisiSpec] Methods that return Packets for InvisiSpec. + */ + static PacketPtr + createReadSpec(const RequestPtr req) + { + return new Packet(req, MemCmd::ReadSpecReq); + } + + static PacketPtr + createValidate(const RequestPtr req) + { + return new Packet(req, MemCmd::ValidateReq); + } + + static PacketPtr + createExpose(const RequestPtr req) + { + return new Packet(req, MemCmd::ExposeReq); + } + + static PacketPtr + createSpecFlush(const RequestPtr req) + { + return new Packet(req, MemCmd::SpecFlushReq); + } + /** * clean up packet variables */ diff --git a/src/mem/port.cc b/src/mem/port.cc index 47f56e633..318a65308 100644 --- a/src/mem/port.cc +++ b/src/mem/port.cc @@ -176,6 +176,7 @@ MasterPort::sendFunctional(PacketPtr pkt) return _slavePort->recvFunctional(pkt); } +// [InvisiSpec] Request from CPU to Ruby bool MasterPort::sendTimingReq(PacketPtr pkt) { diff --git a/src/mem/protocol/MESI_Two_Level-L1cache.sm b/src/mem/protocol/MESI_Two_Level-L1cache.sm index 87684ce10..846af7da5 100644 --- a/src/mem/protocol/MESI_Two_Level-L1cache.sm +++ b/src/mem/protocol/MESI_Two_Level-L1cache.sm @@ -76,10 +76,12 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") S, AccessPermission:Read_Only, desc="a L1 cache entry Shared"; E, AccessPermission:Read_Only, desc="a L1 cache entry Exclusive"; M, AccessPermission:Read_Write, desc="a L1 cache entry Modified", format="!b"; + X, AccessPermission:Read_Only, desc="a L1 cache entry Speculatively observed"; // Transient States IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet"; IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet"; + IX, AccessPermission:Busy, desc="L1 idle, issued GETSPEC, have not seen response yet"; SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet"; IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit"; @@ -99,6 +101,8 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") Load, desc="Load request from the home processor"; Ifetch, desc="I-fetch request from the home processor"; Store, desc="Store request from the home processor"; + SpecLoad, desc="SpecLoad request from the home processor"; + Expose, desc="Expose request from the home processor"; Inv, desc="Invalidate request from L2 bank"; @@ -110,6 +114,8 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") Fwd_GETX, desc="GETX from other processor"; Fwd_GETS, desc="GETS from other processor"; Fwd_GET_INSTR, desc="GET_INSTR from other processor"; + Fwd_GETSPEC, desc="GETSPEC from other processor"; + Fwd_EXPOSE, desc="EXPOSE from other processor"; Data, desc="Data for processor"; Data_Exclusive, desc="Data for processor"; @@ -188,6 +194,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") } State getState(TBE tbe, Entry cache_entry, Addr addr) { + // [InvisiSpec] The same cache line cannot be present in L1D and L1I at the same time. assert((L1Dcache.isTagPresent(addr) && L1Icache.isTagPresent(addr)) == false); if(is_valid(tbe)) { @@ -265,6 +272,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") return Event:Ifetch; } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) { return Event:Store; + } else if (type == RubyRequestType:SPEC_LD) { + return Event:SpecLoad; + } else if (type == RubyRequestType:EXPOSE) { + return Event:Expose; } else { error("Invalid RubyRequestType"); } @@ -387,6 +398,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") trigger(Event:Data_Exclusive, in_msg.addr, cache_entry, tbe); } else if(in_msg.Type == CoherenceResponseType:DATA) { if ((getState(tbe, cache_entry, in_msg.addr) == State:IS || + getState(tbe, cache_entry, in_msg.addr) == State:IX || getState(tbe, cache_entry, in_msg.addr) == State:IS_I || getState(tbe, cache_entry, in_msg.addr) == State:PF_IS || getState(tbe, cache_entry, in_msg.addr) == State:PF_IS_I) && @@ -433,6 +445,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") trigger(Event:Fwd_GETS, in_msg.addr, cache_entry, tbe); } else if (in_msg.Type == CoherenceRequestType:GET_INSTR) { trigger(Event:Fwd_GET_INSTR, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:GETSPEC) { + trigger(Event:Fwd_GETSPEC, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:EXPOSE) { + trigger(Event:Fwd_EXPOSE, in_msg.addr, cache_entry, tbe); } else { error("Invalid forwarded request type"); } @@ -534,6 +550,43 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") out_msg.MessageSize := MessageSizeType:Control; out_msg.Prefetch := in_msg.Prefetch; out_msg.AccessMode := in_msg.AccessMode; + out_msg.idx := in_msg.idx; + } + } + } + + action(as_issueGETSPEC, "as", desc="Issue GETSPEC") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestL1Network_out, RequestMsg, l1_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:GETSPEC; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, intToID(0))); + DPRINTF(RubySlicc, "address: %#x, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:SPECLD_Control; + out_msg.Prefetch := in_msg.Prefetch; + out_msg.AccessMode := in_msg.AccessMode; + out_msg.idx := in_msg.idx; + } + } + } + + action(ex_issueEXPOSE, "ex", desc="Issue EXPOSE") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestL1Network_out, RequestMsg, l1_request_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceRequestType:EXPOSE; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, intToID(0))); + DPRINTF(RubySlicc, "address: %#x, destination: %s\n", + address, out_msg.Destination); + out_msg.MessageSize := MessageSizeType:EXPOSE_Control; + out_msg.Prefetch := in_msg.Prefetch; + out_msg.AccessMode := in_msg.AccessMode; + out_msg.idx := in_msg.idx; } } } @@ -568,6 +621,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") out_msg.MessageSize := MessageSizeType:Control; out_msg.Prefetch := in_msg.Prefetch; out_msg.AccessMode := in_msg.AccessMode; + out_msg.idx := in_msg.idx; } } } @@ -606,6 +660,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") out_msg.MessageSize := MessageSizeType:Control; out_msg.Prefetch := in_msg.Prefetch; out_msg.AccessMode := in_msg.AccessMode; + out_msg.idx := in_msg.idx; } } } @@ -643,6 +698,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") out_msg.MessageSize := MessageSizeType:Control; out_msg.Prefetch := in_msg.Prefetch; out_msg.AccessMode := in_msg.AccessMode; + out_msg.idx := in_msg.idx; } } } @@ -662,6 +718,36 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") } } + action(dex_sendDataToExposeRequestor, "dex", desc="send data to requestor") { + peek(requestL1Network_in, RequestMsg) { + enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:EXPOSE_Data; + } + } + } + + action(ds_sendDataToSpecRequestor, "ds", desc="send data to requestor") { + peek(requestL1Network_in, RequestMsg) { + enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:SPECLD_Data; + } + } + } + action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") { enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { assert(is_valid(cache_entry)); @@ -676,6 +762,20 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") } } + action(d2ex_sendExposeDataToL2, "d2ex", desc="send data to the L2 cache because of M downgrade") { + enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.Dirty := cache_entry.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, intToID(0))); + out_msg.MessageSize := MessageSizeType:EXPOSE_Data; + } + } + action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") { peek(requestL1Network_in, RequestMsg) { enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { @@ -691,6 +791,36 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") } } + action(dtex_sendDataToExposeRequestor_fromTBE, "dtex", desc="send data to requestor") { + peek(requestL1Network_in, RequestMsg) { + enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { + assert(is_valid(tbe)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:EXPOSE_Data; + } + } + } + + action(dts_sendDataToSpecRequestor_fromTBE, "dts", desc="send data to requestor") { + peek(requestL1Network_in, RequestMsg) { + enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { + assert(is_valid(tbe)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:SPECLD_Data; + } + } + } + action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") { enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { assert(is_valid(tbe)); @@ -705,6 +835,20 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") } } + action(d2tex_sendExposeDataToL2_fromTBE, "d2tex", desc="send data to the L2 cache") { + enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { + assert(is_valid(tbe)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + out_msg.Sender := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits, intToID(0))); + out_msg.MessageSize := MessageSizeType:EXPOSE_Data; + } + } + action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") { peek(requestL1Network_in, RequestMsg) { enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) { @@ -761,7 +905,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") { if (send_evictions) { DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address); - sequencer.evictionCallback(address); + sequencer.evictionCallback(address, false); + } + } + + action(forward_external_eviction_to_cpu, "\ccc", desc="sends external eviction information to the processor") { + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address); + sequencer.evictionCallback(address, true); } } @@ -822,6 +973,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") sequencer.readCallback(address, cache_entry.DataBlk); } + action(h_spec_load_hit, "hs", + desc="Notify sequencer the spec load completed.") + { + assert(is_valid(cache_entry)); + DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk); + sequencer.readCallback(address, cache_entry.DataBlk); + } + action(h_ifetch_hit, "hi", desc="Notify sequencer the instruction fetch completed.") { assert(is_valid(cache_entry)); @@ -839,6 +998,15 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") sequencer.readCallback(address, cache_entry.DataBlk, true); } + action(hsx_spec_load_hit, "hsx", desc="Notify sequencer the external load completed.") + { + peek(responseL1Network_in, ResponseMsg) { + // [InvisiSpec] Hack for in_msg.DataBlk returning const DataBlk + tbe.DataBlk := in_msg.DataBlk; + sequencer.readCallback(address, tbe.DataBlk, true); + } + } + action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") { assert(is_valid(cache_entry)); @@ -868,6 +1036,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") tbe.DataBlk := cache_entry.DataBlk; } + action(iw_allocateTBEWithoutCacheEntry, "iw", desc="Allocate TBE without a cache entry") { + check_allocate(TBEs); + assert(!is_valid(cache_entry) || cache_entry.CacheState == State:I); + TBEs.allocate(address); + set_tbe(TBEs[address]); + tbe.isPrefetch := false; + } + action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") { mandatoryQueue_in.dequeue(clockEdge()); } @@ -989,13 +1165,19 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") cache_entry.isPrefetch := true; } + action(x_expose_done, "xd", + desc="Notify sequencer the expose completed.") + { + sequencer.readCallback(address, cache_entry.DataBlk); + } + //***************************************************** // TRANSITIONS //***************************************************** // Transitions for Load/Store/Replacement/WriteBack from transient states - transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store, L1_Replacement}) { + transition({IS, IM, IX, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Expose, SpecLoad, Ifetch, Store, L1_Replacement}) { z_stallAndWaitMandatoryQueue; } @@ -1003,7 +1185,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") z_stallAndWaitMandatoryQueue; } - transition({PF_IM, PF_SM}, {Load, Ifetch, L1_Replacement}) { + transition({PF_IM, PF_SM}, {Load, Expose, SpecLoad, Ifetch, L1_Replacement}) { z_stallAndWaitMandatoryQueue; } @@ -1016,7 +1198,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") ff_deallocateL1CacheBlock; } - transition({S,E,M,IS,IM,SM,IS_I,PF_IS_I,M_I,SINK_WB_ACK,PF_IS,PF_IM}, + transition({S,E,M,IS,IM,IX,SM,IS_I,PF_IS_I,M_I,SINK_WB_ACK,PF_IS,PF_IM}, {PF_Load, PF_Store, PF_Ifetch}) { pq_popPrefetchQueue; } @@ -1030,6 +1212,21 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") k_popMandatoryQueue; } + transition({NP,I}, Expose, IS) { + oo_allocateL1DCacheBlock; + i_allocateTBE; + ex_issueEXPOSE; + uu_profileDataMiss; + //po_observeMiss; + k_popMandatoryQueue; + } + + transition({NP,I}, SpecLoad, IX) { + iw_allocateTBEWithoutCacheEntry; + as_issueGETSPEC; + k_popMandatoryQueue; + } + transition({NP,I}, PF_Load, PF_IS) { oo_allocateL1DCacheBlock; i_allocateTBE; @@ -1037,13 +1234,13 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") pq_popPrefetchQueue; } - transition(PF_IS, Load, IS) { + transition(PF_IS, {Load, Expose}, IS) { uu_profileDataMiss; ppm_observePfMiss; k_popMandatoryQueue; } - transition(PF_IS_I, Load, IS_I) { + transition(PF_IS_I, {Load, Expose}, IS_I) { uu_profileDataMiss; ppm_observePfMiss; k_popMandatoryQueue; @@ -1055,6 +1252,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") k_popMandatoryQueue; } + transition({PF_IS, PF_IS_I}, SpecLoad) { + k_popMandatoryQueue; + } + transition({NP,I}, Ifetch, IS) { pp_allocateL1ICacheBlock; i_allocateTBE; @@ -1107,19 +1308,24 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") k_popMandatoryQueue; } - transition({NP, I}, Inv) { + transition({NP, I, IX}, Inv) { fi_sendInvAck; l_popRequestQueue; } // Transitions from Shared - transition({S,E,M}, Load) { + transition({S,E,M}, {Load, Expose}) { h_load_hit; uu_profileDataHit; po_observeHit; k_popMandatoryQueue; } + transition({S,E,M}, SpecLoad) { + h_spec_load_hit; + k_popMandatoryQueue; + } + transition({S,E,M}, Ifetch) { h_ifetch_hit; uu_profileInstHit; @@ -1140,7 +1346,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") } transition(S, Inv, I) { - forward_eviction_to_cpu; + forward_external_eviction_to_cpu; fi_sendInvAck; l_popRequestQueue; } @@ -1164,13 +1370,13 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") transition(E, Inv, I) { // don't send data - forward_eviction_to_cpu; + forward_external_eviction_to_cpu; fi_sendInvAck; l_popRequestQueue; } transition(E, Fwd_GETX, I) { - forward_eviction_to_cpu; + forward_external_eviction_to_cpu; d_sendDataToRequestor; l_popRequestQueue; } @@ -1181,6 +1387,17 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") l_popRequestQueue; } + transition({E, M}, Fwd_GETSPEC) { + ds_sendDataToSpecRequestor; + l_popRequestQueue; + } + + transition({E, M}, Fwd_EXPOSE, S) { + dex_sendDataToExposeRequestor; + d2ex_sendExposeDataToL2; + l_popRequestQueue; + } + // Transitions from Modified transition(M, {L1_Replacement, PF_L1_Replacement}, M_I) { @@ -1197,7 +1414,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") } transition(M, Inv, I) { - forward_eviction_to_cpu; + forward_external_eviction_to_cpu; f_sendDataToL2; l_popRequestQueue; } @@ -1208,7 +1425,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") } transition(M, Fwd_GETX, I) { - forward_eviction_to_cpu; + forward_external_eviction_to_cpu; d_sendDataToRequestor; l_popRequestQueue; } @@ -1230,6 +1447,17 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") l_popRequestQueue; } + transition(M_I, Fwd_EXPOSE, SINK_WB_ACK) { + dtex_sendDataToExposeRequestor_fromTBE; + d2tex_sendExposeDataToL2_fromTBE; + l_popRequestQueue; + } + + transition(M_I, Fwd_GETSPEC) { + dts_sendDataToSpecRequestor_fromTBE; + l_popRequestQueue; + } + // Transitions from IS transition({IS, IS_I}, Inv, IS_I) { fi_sendInvAck; @@ -1341,6 +1569,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") kd_wakeUpDependents; } + // [InvisiSpec] Data and Data_Exclusive are not possible at IX + transition(IX, {Data_all_Acks, DataS_fromL1}, I) { + hsx_spec_load_hit; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + // Transitions from IM transition(IM, Inv, IM) { fi_sendInvAck; @@ -1384,7 +1620,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") // transitions from SM transition(SM, Inv, IM) { - forward_eviction_to_cpu; + forward_external_eviction_to_cpu; fi_sendInvAck; dg_invalidate_sc; l_popRequestQueue; diff --git a/src/mem/protocol/MESI_Two_Level-L2cache.sm b/src/mem/protocol/MESI_Two_Level-L2cache.sm index 5a8cfae6d..ea884133e 100644 --- a/src/mem/protocol/MESI_Two_Level-L2cache.sm +++ b/src/mem/protocol/MESI_Two_Level-L2cache.sm @@ -72,6 +72,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") ISS, AccessPermission:Busy, desc="L2 idle, got single L1_GETS, issued memory fetch, have not seen response yet"; IS, AccessPermission:Busy, desc="L2 idle, got L1_GET_INSTR or multiple L1_GETS, issued memory fetch, have not seen response yet"; IM, AccessPermission:Busy, desc="L2 idle, got L1_GETX, issued memory fetch, have not seen response(s) yet"; + II, AccessPermission:Busy, desc="L2 idle, got single L1_GETSPEC, issued memory fetch, have not seen response yet"; + IEE, AccessPermission:Busy, desc="L2 idle, got single L1_EXPOSE, issued memory fetch, have not seen response yet"; // Blocking states SS_MB, AccessPermission:Busy, desc="Blocked for L1_GETX from SS"; @@ -96,6 +98,9 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") L1_PUTX, desc="L1 replacing data"; L1_PUTX_old, desc="L1 replacing data, but no longer sharer"; + L1_GETSPEC, desc="L1 GETSPEC request for a block mapped to us"; + L1_EXPOSE, desc="L1 EXPOSE request for a block mapped to us"; + // events initiated by this L2 L2_Replacement, desc="L2 Replacement", format="!r"; L2_Replacement_clean, desc="L2 Replacement, but data is clean", format="!r"; @@ -135,6 +140,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") bool Dirty, default="false", desc="Data is Dirty"; NetDest L1_GetS_IDs, desc="Set of the internal processors that want the block in shared state"; + NetDest L1_GetSPEC_IDs, desc="Set of the internal processors that want the block speculatively"; + NetDest L1_Expose_IDs, desc="Set of the internal processors that want the block to be exposed"; MachineID L1_GetX_ID, desc="ID of the L1 cache to forward the block to once we get a response"; int pendingAcks, desc="number of pending acks for invalidates during writeback"; } @@ -267,6 +274,10 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") } else { return Event:L1_PUTX_old; } + } else if (type == CoherenceRequestType:GETSPEC) { + return Event:L1_GETSPEC; + } else if (type == CoherenceRequestType:EXPOSE) { + return Event:L1_EXPOSE; } else { DPRINTF(RubySlicc, "address: %#x, Request Type: %s\n", addr, type); error("Invalid L1 forwarded request type"); @@ -399,10 +410,12 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") peek(L1RequestL2Network_in, RequestMsg) { enqueue(DirRequestL2Network_out, RequestMsg, l2_request_latency) { out_msg.addr := address; - out_msg.Type := CoherenceRequestType:GETS; + out_msg.Type := in_msg.Type; out_msg.Requestor := machineID; out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.MessageSize := MessageSizeType:Control; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.idx := in_msg.idx; + out_msg.origin := in_msg.Requestor; } } } @@ -420,6 +433,32 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") } } + action(bs_forwardSpecRequestToExclusive, "bs", desc="Forward request to the exclusive L1") { + peek(L1RequestL2Network_in, RequestMsg) { + enqueue(L1RequestL2Network_out, RequestMsg, to_l1_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(cache_entry.Exclusive); + out_msg.MessageSize := MessageSizeType:SPECLD_Request_Control; + } + } + } + + action(bex_forwardExposeRequestToExclusive, "bex", desc="Forward request to the exclusive L1") { + peek(L1RequestL2Network_in, RequestMsg) { + enqueue(L1RequestL2Network_out, RequestMsg, to_l1_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(cache_entry.Exclusive); + out_msg.MessageSize := MessageSizeType:EXPOSE_Request_Control; + } + } + } + action(c_exclusiveReplacement, "c", desc="Send data to memory") { enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) { assert(is_valid(cache_entry)); @@ -494,6 +533,25 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") } } + action(ddex_sendExclusiveDataToExposeRequestor, "ddex", desc="Send data from cache to reqeustor") { + peek(L1RequestL2Network_in, RequestMsg) { + enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:EXPOSE_Data; + + out_msg.AckCount := 0 - cache_entry.Sharers.count(); + if (cache_entry.Sharers.isElement(in_msg.Requestor)) { + out_msg.AckCount := out_msg.AckCount + 1; + } + } + } + } + action(ds_sendSharedDataToRequestor, "ds", desc="Send data from cache to reqeustor") { peek(L1RequestL2Network_in, RequestMsg) { enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) { @@ -509,9 +567,39 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") } } + action(dss_sendSharedDataToSpecRequestor, "dss", desc="Send data from cache to reqeustor") { + peek(L1RequestL2Network_in, RequestMsg) { + enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:SPECLD_Data; + out_msg.AckCount := 0; + } + } + } + + action(dsex_sendSharedDataToExposeRequestor, "dsex", desc="Send data from cache to reqeustor") { + peek(L1RequestL2Network_in, RequestMsg) { + enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:EXPOSE_Data; + out_msg.AckCount := 0; + } + } + } + action(e_sendDataToGetSRequestors, "e", desc="Send data from cache to all GetS IDs") { assert(is_valid(tbe)); - assert(tbe.L1_GetS_IDs.count() > 0); + assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() > 0); enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) { assert(is_valid(cache_entry)); out_msg.addr := address; @@ -523,9 +611,40 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") } } + action(es_sendDataToGetSpecRequestors, "es", desc="Send data from cache to all GetSpec IDs") { + assert(is_valid(tbe)); + assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() > 0); + peek(responseL2Network_in, ResponseMsg) { + enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.Sender := machineID; + out_msg.Destination := tbe.L1_GetSPEC_IDs; // internal nodes + out_msg.DataBlk := in_msg.DataBlk; + out_msg.MessageSize := MessageSizeType:SPECLD_Data; + } + } + } + + action(eex_sendDataToExposeRequestors, "eex", desc="Send data from cache to all GetSpec IDs") { + assert(is_valid(tbe)); + assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() > 0); + peek(responseL2Network_in, ResponseMsg) { + enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA; + out_msg.Sender := machineID; + out_msg.Destination := tbe.L1_Expose_IDs; // internal nodes + out_msg.DataBlk := in_msg.DataBlk; + out_msg.MessageSize := MessageSizeType:EXPOSE_Data; + } + } + } + action(ex_sendExclusiveDataToGetSRequestors, "ex", desc="Send data from cache to all GetS IDs") { assert(is_valid(tbe)); assert(tbe.L1_GetS_IDs.count() == 1); + assert(tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() == 0); enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) { assert(is_valid(cache_entry)); out_msg.addr := address; @@ -537,6 +656,21 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") } } + action(exex_sendExclusiveDataToExposeRequestors, "exex", desc="Send data from cache to all GetS IDs") { + assert(is_valid(tbe)); + assert(tbe.L1_Expose_IDs.count() == 1); + assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() == 0); + enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) { + assert(is_valid(cache_entry)); + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; + out_msg.Sender := machineID; + out_msg.Destination := tbe.L1_Expose_IDs; // internal nodes + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.MessageSize := MessageSizeType:EXPOSE_Data; + } + } + action(ee_sendDataToGetXRequestor, "ee", desc="Send data from cache to GetX ID") { enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) { assert(is_valid(tbe)); @@ -598,11 +732,23 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") TBEs.allocate(address); set_tbe(TBEs[address]); tbe.L1_GetS_IDs.clear(); + tbe.L1_GetSPEC_IDs.clear(); + tbe.L1_Expose_IDs.clear(); tbe.DataBlk := cache_entry.DataBlk; tbe.Dirty := cache_entry.Dirty; tbe.pendingAcks := cache_entry.Sharers.count(); } + action(iw_allocateTBEWithoutCacheEntry, "iw", desc="Allocate TBE for request without a cache entry") { + check_allocate(TBEs); + assert(!is_valid(cache_entry)); + TBEs.allocate(address); + set_tbe(TBEs[address]); + tbe.L1_GetS_IDs.clear(); + tbe.L1_GetSPEC_IDs.clear(); + tbe.L1_Expose_IDs.clear(); + } + action(s_deallocateTBE, "s", desc="Deallocate external TBE") { TBEs.deallocate(address); unset_tbe(); @@ -668,6 +814,20 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") } } + action(sss_recordGetSPECL1ID, "\sss", desc="Record L1 GetSpec for load response") { + peek(L1RequestL2Network_in, RequestMsg) { + assert(is_valid(tbe)); + tbe.L1_GetSPEC_IDs.add(in_msg.Requestor); + } + } + + action(ssss_recordExposeL1ID, "\ssss", desc="Record L1 Expose for load response") { + peek(L1RequestL2Network_in, RequestMsg) { + assert(is_valid(tbe)); + tbe.L1_Expose_IDs.add(in_msg.Requestor); + } + } + action(xx_recordGetXL1ID, "\x", desc="Record L1 GetX for store response") { peek(L1RequestL2Network_in, RequestMsg) { assert(is_valid(tbe)); @@ -793,21 +953,22 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") // BASE STATE - I // Transitions from I (Idle) - transition({NP, IS, ISS, IM, SS, M, M_I, I_I, S_I, MT_IB, MT_SB}, L1_PUTX) { + transition({NP, IS, ISS, IEE, IM, II, SS, M, M_I, I_I, S_I, MT_IB, MT_SB}, L1_PUTX) { t_sendWBAck; jj_popL1RequestQueue; } - transition({NP, SS, M, MT, M_I, I_I, S_I, IS, ISS, IM, MT_IB, MT_SB}, L1_PUTX_old) { + transition({NP, SS, M, MT, M_I, I_I, S_I, IS, ISS, IEE, IM, II, MT_IB, MT_SB}, L1_PUTX_old) { t_sendWBAck; jj_popL1RequestQueue; } - transition({IM, IS, ISS, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L2_Replacement, L2_Replacement_clean}) { + transition({IM, IS, ISS, IEE, II, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L2_Replacement, L2_Replacement_clean}) { zz_stallAndWaitL1RequestQueue; } - transition({IM, IS, ISS, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) { + // [InvisiSpec] TODO: How to handle Mem_Inv at II? Stall or ignore? + transition({IM, IS, ISS, IEE, II, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) { zn_recycleResponseNetwork; } @@ -816,7 +977,7 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") } - transition({SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_GET_INSTR, L1_GETX, L1_UPGRADE}) { + transition({SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_EXPOSE, L1_GET_INSTR, L1_GETX, L1_UPGRADE, L1_GETSPEC}) { zz_stallAndWaitL1RequestQueue; } @@ -832,6 +993,17 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") jj_popL1RequestQueue; } + transition(NP, L1_EXPOSE, IEE) { + qq_allocateL2CacheBlock; + ll_clearSharers; + nn_addSharer; + i_allocateTBE; + ssss_recordExposeL1ID; + a_issueFetchToMemory; + uu_profileMiss; + jj_popL1RequestQueue; + } + transition(NP, L1_GET_INSTR, IS) { qq_allocateL2CacheBlock; ll_clearSharers; @@ -854,12 +1026,28 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") jj_popL1RequestQueue; } + transition(NP, L1_GETSPEC, II) { + iw_allocateTBEWithoutCacheEntry; + sss_recordGetSPECL1ID; + a_issueFetchToMemory; + jj_popL1RequestQueue; + } + // transitions from IS/IM transition(ISS, Mem_Data, MT_MB) { m_writeDataToCache; ex_sendExclusiveDataToGetSRequestors; + es_sendDataToGetSpecRequestors; + s_deallocateTBE; + o_popIncomingResponseQueue; + } + + transition(IEE, Mem_Data, MT_MB) { + m_writeDataToCache; + exex_sendExclusiveDataToExposeRequestors; + es_sendDataToGetSpecRequestors; s_deallocateTBE; o_popIncomingResponseQueue; } @@ -867,6 +1055,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") transition(IS, Mem_Data, SS) { m_writeDataToCache; e_sendDataToGetSRequestors; + es_sendDataToGetSpecRequestors; + eex_sendDataToExposeRequestors; s_deallocateTBE; o_popIncomingResponseQueue; kd_wakeUpDependents; @@ -879,18 +1069,48 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") o_popIncomingResponseQueue; } - transition({IS, ISS}, {L1_GETS, L1_GET_INSTR}, IS) { + transition(II, Mem_Data, NP) { + es_sendDataToGetSpecRequestors; + s_deallocateTBE; + o_popIncomingResponseQueue; + kd_wakeUpDependents; + } + + transition({IS, ISS, IEE}, {L1_GETS, L1_GET_INSTR}, IS) { nn_addSharer; ss_recordGetSL1ID; uu_profileMiss; jj_popL1RequestQueue; } - transition({IS, ISS}, L1_GETX) { + transition({IS, ISS, IEE}, L1_EXPOSE, IS) { + nn_addSharer; + ssss_recordExposeL1ID; + uu_profileMiss; + jj_popL1RequestQueue; + } + + transition({IS, ISS, IEE}, L1_GETSPEC, IS) { + sss_recordGetSPECL1ID; + jj_popL1RequestQueue; + } + + transition(II, L1_GETSPEC) { + sss_recordGetSPECL1ID; + jj_popL1RequestQueue; + } + + // [InvisiSpec] L1_GET_INSTR should not be received at II + transition(II, {L1_GETS, L1_EXPOSE}) { zz_stallAndWaitL1RequestQueue; } - transition(IM, {L1_GETX, L1_GETS, L1_GET_INSTR}) { + // [InvisiSpec] TODO: Maybe we can optimize this? + transition({IS, ISS, IEE, II}, L1_GETX) { + zz_stallAndWaitL1RequestQueue; + } + + transition(IM, {L1_GETX, L1_GETS, L1_EXPOSE, L1_GET_INSTR, L1_GETSPEC}) { zz_stallAndWaitL1RequestQueue; } @@ -903,6 +1123,19 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") jj_popL1RequestQueue; } + transition(SS, L1_EXPOSE) { + dsex_sendSharedDataToExposeRequestor; + nn_addSharer; + set_setMRU; + uu_profileHit; + jj_popL1RequestQueue; + } + + transition({SS, M}, L1_GETSPEC) { + dss_sendSharedDataToSpecRequestor; + jj_popL1RequestQueue; + } + transition(SS, L1_GETX, SS_MB) { d_sendDataToRequestor; @@ -956,6 +1189,14 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") jj_popL1RequestQueue; } + // [InvisiSpec] TODO + transition(M, L1_EXPOSE, MT_MB) { + ddex_sendExclusiveDataToExposeRequestor; + set_setMRU; + uu_profileHit; + jj_popL1RequestQueue; + } + transition(M, {L2_Replacement, MEM_Inv}, M_I) { i_allocateTBE; c_exclusiveReplacement; @@ -986,6 +1227,20 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") jj_popL1RequestQueue; } + // [InvisiSpec] TODO: Ack packets are currently not recorded as EXPOSE traffic. + transition(MT, L1_EXPOSE, MT_IIB) { + bex_forwardExposeRequestToExclusive; + uu_profileMiss; + set_setMRU; + jj_popL1RequestQueue; + } + + // [InvisiSpec] Do we need to block? + transition(MT, L1_GETSPEC) { + bs_forwardSpecRequestToExclusive; + jj_popL1RequestQueue; + } + transition(MT, {L2_Replacement, MEM_Inv}, MT_I) { i_allocateTBE; f_sendInvToSharers; @@ -1039,7 +1294,7 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP") } // writeback states - transition({I_I, S_I, MT_I, MCT_I, M_I}, {L1_GETX, L1_UPGRADE, L1_GETS, L1_GET_INSTR}) { + transition({I_I, S_I, MT_I, MCT_I, M_I}, {L1_GETX, L1_UPGRADE, L1_GETS, L1_EXPOSE, L1_GET_INSTR, L1_GETSPEC}) { zz_stallAndWaitL1RequestQueue; } diff --git a/src/mem/protocol/MESI_Two_Level-dir.sm b/src/mem/protocol/MESI_Two_Level-dir.sm index 991de5a2c..9934f57a8 100644 --- a/src/mem/protocol/MESI_Two_Level-dir.sm +++ b/src/mem/protocol/MESI_Two_Level-dir.sm @@ -49,6 +49,8 @@ machine(MachineType:Directory, "MESI Two Level directory protocol") M, AccessPermission:Maybe_Stale, desc="memory copy may be stale, i.e. other modified copies may exist"; IM, AccessPermission:Busy, desc="Intermediate State I>M"; + IE, AccessPermission:Busy, desc="Intermediate State I>M"; + II, AccessPermission:Busy, desc="Intermediate State I>I for SpecFetch"; MI, AccessPermission:Busy, desc="Intermediate State M>I"; M_DRD, AccessPermission:Busy, desc="Intermediate State when there is a dma read"; M_DRDI, AccessPermission:Busy, desc="Intermediate State when there is a dma read"; @@ -59,6 +61,8 @@ machine(MachineType:Directory, "MESI Two Level directory protocol") // Events enumeration(Event, desc="Directory events") { Fetch, desc="A memory fetch arrives"; + Expose, desc="A memory expose arrives"; + SpecFetch, desc="A memory fetch for speculative execution arrives"; Data, desc="writeback data arrives"; Memory_Data, desc="Fetched data from memory arrives"; Memory_Ack, desc="Writeback Ack from memory arrives"; @@ -198,6 +202,10 @@ machine(MachineType:Directory, "MESI Two Level directory protocol") assert(in_msg.Destination.isElement(machineID)); if (isGETRequest(in_msg.Type)) { trigger(Event:Fetch, in_msg.addr, TBEs[in_msg.addr]); + } else if (in_msg.Type == CoherenceRequestType:EXPOSE) { + trigger(Event:Expose, in_msg.addr, TBEs[in_msg.addr]); + } else if (in_msg.Type == CoherenceRequestType:GETSPEC) { + trigger(Event:SpecFetch, in_msg.addr, TBEs[in_msg.addr]); } else if (in_msg.Type == CoherenceRequestType:DMA_READ) { trigger(Event:DMA_READ, makeLineAddress(in_msg.addr), TBEs[makeLineAddress(in_msg.addr)]); @@ -275,6 +283,40 @@ machine(MachineType:Directory, "MESI Two Level directory protocol") } } + action(dex_sendExposeData, "dex", desc="Send data to requestor") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, to_mem_ctrl_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:MEMORY_DATA; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.OriginalRequestorMachId); + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Dirty := false; + out_msg.MessageSize := MessageSizeType:EXPOSE_Data; + + Entry e := getDirectoryEntry(in_msg.addr); + e.Owner := in_msg.OriginalRequestorMachId; + } + } + } + + action(ds_sendSpecData, "ds", desc="Send data to requestor") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, to_mem_ctrl_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:MEMORY_DATA; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.OriginalRequestorMachId); + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Dirty := false; + out_msg.MessageSize := MessageSizeType:SPECLD_Data; + + Entry e := getDirectoryEntry(in_msg.addr); + e.Owner := in_msg.OriginalRequestorMachId; + } + } + } + // Actions action(aa_sendAck, "aa", desc="Send ack to L2") { peek(memQueue_in, MemoryMsg) { @@ -306,7 +348,19 @@ machine(MachineType:Directory, "MESI Two Level directory protocol") action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { peek(requestNetwork_in, RequestMsg) { - queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency); + queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.origin, in_msg.idx, 0); + } + } + + action(qfs_queueMemorySpecFetchRequest, "qfs", desc="Queue off-chip fetch request") { + peek(requestNetwork_in, RequestMsg) { + queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.origin, in_msg.idx, 1); + } + } + + action(qfe_queueMemoryExposeRequest, "qfe", desc="Queue off-chip fetch request") { + peek(requestNetwork_in, RequestMsg) { + queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.origin, in_msg.idx, 2); } } @@ -320,7 +374,8 @@ machine(MachineType:Directory, "MESI Two Level directory protocol") //added by SS for dma action(qf_queueMemoryFetchRequestDMA, "qfd", desc="Queue off-chip fetch request") { peek(requestNetwork_in, RequestMsg) { - queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency); + assert(false); + queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.Requestor, -1, -1); } } @@ -425,7 +480,18 @@ machine(MachineType:Directory, "MESI Two Level directory protocol") j_popIncomingRequestQueue; } - transition(M, Fetch) { + transition(I, Expose, IE) { + qfe_queueMemoryExposeRequest; + j_popIncomingRequestQueue; + } + + transition(I, SpecFetch, II) { + qfs_queueMemorySpecFetchRequest; + j_popIncomingRequestQueue; + } + + // [InvisiSpec] Is it secure? + transition(M, {Fetch, Expose, SpecFetch}) { inv_sendCacheInvalidate; z_stallAndWaitRequest; } @@ -435,6 +501,19 @@ machine(MachineType:Directory, "MESI Two Level directory protocol") l_popMemQueue; kd_wakeUpDependents; } + + transition(IE, Memory_Data, M) { + dex_sendExposeData; + l_popMemQueue; + kd_wakeUpDependents; + } + + transition(II, Memory_Data, I) { + ds_sendSpecData; + l_popMemQueue; + kd_wakeUpDependents; + } + //added by SS transition(M, CleanReplacement, I) { a_sendAck; @@ -481,11 +560,11 @@ machine(MachineType:Directory, "MESI Two Level directory protocol") kd_wakeUpDependents; } - transition({ID, ID_W, M_DRDI, M_DWRI, IM, MI}, {Fetch, Data} ) { + transition({ID, ID_W, M_DRDI, M_DWRI, IM, IE, MI, II}, {Fetch, Expose, SpecFetch, Data} ) { z_stallAndWaitRequest; } - transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, MI}, {DMA_WRITE, DMA_READ} ) { + transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, IE, MI, II}, {DMA_WRITE, DMA_READ} ) { zz_recycleDMAQueue; } diff --git a/src/mem/protocol/MESI_Two_Level-msg.sm b/src/mem/protocol/MESI_Two_Level-msg.sm index 738019e7b..d4269193d 100644 --- a/src/mem/protocol/MESI_Two_Level-msg.sm +++ b/src/mem/protocol/MESI_Two_Level-msg.sm @@ -36,6 +36,8 @@ enumeration(CoherenceRequestType, desc="...") { GET_INSTR, desc="Get Instruction"; INV, desc="INValidate"; PUTX, desc="Replacement message"; + GETSPEC, desc="Get Speculatively"; + EXPOSE, desc="Expose"; WB_ACK, desc="Writeback ack"; @@ -68,7 +70,9 @@ structure(RequestMsg, desc="...", interface="Message") { int Len; bool Dirty, default="false", desc="Dirty bit"; PrefetchBit Prefetch, desc="Is this a prefetch request"; - + MachineID origin; + int idx, default="-1", desc="LQ index"; + bool functionalRead(Packet *pkt) { // Only PUTX messages contains the data block if (Type == CoherenceRequestType:PUTX) { diff --git a/src/mem/protocol/RubySlicc_Defines.sm b/src/mem/protocol/RubySlicc_Defines.sm index eb235f8f3..7df82847e 100644 --- a/src/mem/protocol/RubySlicc_Defines.sm +++ b/src/mem/protocol/RubySlicc_Defines.sm @@ -35,7 +35,7 @@ Cycles recycle_latency; // Functions implemented in the AbstractController class for // making timing access to the memory maintained by the // memory controllers. -void queueMemoryRead(MachineID id, Addr addr, Cycles latency); +void queueMemoryRead(MachineID id, Addr addr, Cycles latency, MachineID origin, int idx, int type); void queueMemoryWrite(MachineID id, Addr addr, Cycles latency, DataBlock block); void queueMemoryWritePartial(MachineID id, Addr addr, Cycles latency, diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm index 8e17f9849..be64706b9 100644 --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -175,6 +175,11 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") { Release, desc="Release operation"; Acquire, desc="Acquire opertion"; AcquireRelease, desc="Acquire and Release opertion"; + // [InvisiSpec] New request types + SPEC_LD, desc="Speculative load"; + EXPOSE, desc="Expose"; + VALIDATE, desc="Validate"; + SPEC_FLUSH, desc="Flush SpecBuffer"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { @@ -256,6 +261,12 @@ enumeration(MessageSizeType, desc="...") { Unblock_Control, desc="Unblock control"; Persistent_Control, desc="Persistent request activation messages"; Completion_Control, desc="Completion messages"; + SPECLD_Control, desc="SPECLD control message"; + SPECLD_Request_Control, desc="SPECLD forward message"; + SPECLD_Data, desc="SPECLD data response"; + EXPOSE_Control, desc="EXPOSE control message"; + EXPOSE_Request_Control, desc="EXPOSE forward request"; + EXPOSE_Data, desc="EXPOSE data response"; } // AccessType @@ -345,6 +356,7 @@ enumeration(RequestStatus, desc="...", default="RequestStatus_NULL") { Issued, desc="The sequencer successfully issued the request"; BufferFull, desc="Can not issue because the sequencer is full"; Aliased, desc="This request aliased with a currently outstanding request"; + Merged, desc="This request merged with a currently outstanding request"; NULL, desc=""; } diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm index 27a045d29..5c73b4320 100644 --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -113,7 +113,7 @@ structure (Sequencer, external = "yes") { Cycles, Cycles, Cycles); void checkCoherence(Addr); - void evictionCallback(Addr); + void evictionCallback(Addr, bool); void recordRequestType(SequencerRequestType); bool checkResourceAvailable(CacheResourceType, Addr); void invalidateSC(Addr); @@ -172,6 +172,7 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") { HSAScope scope, desc="HSA scope"; HSASegment segment, desc="HSA segment"; PacketPtr pkt, desc="Packet associated with this request"; + int idx, desc="LQ index"; } structure(AbstractEntry, primitive="yes", external = "yes") { diff --git a/src/mem/request.hh b/src/mem/request.hh index 189d160ab..e46eac09b 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -149,6 +149,9 @@ class Request MEM_SWAP = 0x00400000, MEM_SWAP_COND = 0x00800000, + /** [InvisiSpec] it is a spec request */ + SPEC = 0x00004000, + /** The request is a prefetch. */ PREFETCH = 0x01000000, /** The request should be prefetched into the exclusive state. */ @@ -803,6 +806,7 @@ class Request bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); } bool isInstFetch() const { return _flags.isSet(INST_FETCH); } bool isPrefetch() const { return _flags.isSet(PREFETCH); } + bool isSpec() const { return _flags.isSet(SPEC); } bool isLLSC() const { return _flags.isSet(LLSC); } bool isPriv() const { return _flags.isSet(PRIVILEGED); } bool isLockedRMW() const { return _flags.isSet(LOCKED_RMW); } diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript index be52c02d0..e6d45a419 100644 --- a/src/mem/ruby/SConscript +++ b/src/mem/ruby/SConscript @@ -59,6 +59,9 @@ DebugFlag('RubySystem') DebugFlag('RubyTester') DebugFlag('RubyStats') DebugFlag('RubyResourceStalls') +DebugFlag('SpecBuffer') +DebugFlag('SpecBufferValidate') +DebugFlag('MemSpecBuffer') CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester', 'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache', diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc index 57834f2e2..7d4d71eb3 100644 --- a/src/mem/ruby/network/Network.cc +++ b/src/mem/ruby/network/Network.cc @@ -144,12 +144,18 @@ Network::MessageSizeType_to_int(MessageSizeType size_type) case MessageSizeType_Unblock_Control: case MessageSizeType_Persistent_Control: case MessageSizeType_Completion_Control: + case MessageSizeType_SPECLD_Control: + case MessageSizeType_SPECLD_Request_Control: + case MessageSizeType_EXPOSE_Control: + case MessageSizeType_EXPOSE_Request_Control: return m_control_msg_size; case MessageSizeType_Data: case MessageSizeType_Response_Data: case MessageSizeType_ResponseLocal_Data: case MessageSizeType_ResponseL2hit_Data: case MessageSizeType_Writeback_Data: + case MessageSizeType_SPECLD_Data: + case MessageSizeType_EXPOSE_Data: return m_data_msg_size; default: panic("Invalid range for type MessageSizeType"); diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index de7e03dd7..a4ba1fe07 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -41,6 +41,7 @@ #include "mem/ruby/slicc_interface/AbstractController.hh" #include "debug/RubyQueue.hh" +#include "debug/MemSpecBuffer.hh" #include "mem/protocol/MemoryMsg.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/system/GPUCoalescer.hh" @@ -96,6 +97,14 @@ AbstractController::regStats() .name(name() + ".fully_busy_cycles") .desc("cycles for which number of transistions == max transitions") .flags(Stats::nozero); + m_expose_hits + .name(name() + ".expose_hits") + .desc("number of expose hits at LLC spec buffer") + .flags(Stats::nozero); + m_expose_misses + .name(name() + ".expose_misses") + .desc("number of expose misses at LLC spec buffer") + .flags(Stats::nozero); } void @@ -238,8 +247,67 @@ AbstractController::getMasterPort(const std::string &if_name, void AbstractController::queueMemoryRead(const MachineID &id, Addr addr, - Cycles latency) + Cycles latency, MachineID origin, int idx, int type) { + int coreId = origin.num; + int sbeId = idx; + // type 0: non-spec 1: spec 2: expose + // DPRINTFR(MemSpecBuffer, "%10s MemRead (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr)); + // if idx == -1, it is a write request which cannot be spec or expose. + assert(!(type != 0 && sbeId == -1)); + assert(sbeId >= -1 && sbeId <= 65); + assert(coreId < 8); + assert(type >=0 && type <= 2); + if (type == 0) { + for (int c = 0; c < 8; ++c) { + for (int i = 0; i < 66; ++i) { + if (m_specBuf[c][i].address == addr) { + DPRINTFR(MemSpecBuffer, "%10s Cleared by Read (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr)); + m_specBuf[c][i].address = 0; + m_specBuf[c][i].data.clear(); + } + } + } + } else if (type == 1) { + + } else if (type == 2) { + if (m_specBuf[coreId][sbeId].address == addr) { + DPRINTFR(MemSpecBuffer, "%10s Expose Hit (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr)); + ++m_expose_hits; + assert(getMemoryQueue()); + std::shared_ptr msg = std::make_shared(clockEdge()); + (*msg).m_addr = addr; + (*msg).m_Sender = m_machineID; + (*msg).m_OriginalRequestorMachId = id; + (*msg).m_Type = MemoryRequestType_MEMORY_READ; + (*msg).m_MessageSize = MessageSizeType_Response_Data; + (*msg).m_DataBlk = m_specBuf[coreId][sbeId].data; + getMemoryQueue()->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + for (int c = 0; c < 8; ++c) { + for (int i = 0; i < 66; ++i) { + if (m_specBuf[c][i].address == addr) { + DPRINTFR(MemSpecBuffer, "%10s Cleared by Expose Hit (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr)); + m_specBuf[c][i].address = 0; + m_specBuf[c][i].data.clear(); + } + } + } + return; + } else { + DPRINTFR(MemSpecBuffer, "%10s Expose Miss (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr)); + ++m_expose_misses; + for (int c = 0; c < 8; ++c) { + for (int i = 0; i < 66; ++i) { + if (m_specBuf[c][i].address == addr) { + DPRINTFR(MemSpecBuffer, "%10s Cleared by Expose Miss (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr)); + m_specBuf[c][i].address = 0; + m_specBuf[c][i].data.clear(); + } + } + } + } + } + RequestPtr req = std::make_shared( addr, RubySystem::getBlockSizeBytes(), 0, m_masterId); @@ -248,6 +316,9 @@ AbstractController::queueMemoryRead(const MachineID &id, Addr addr, pkt->dataDynamic(newData); SenderState *s = new SenderState(id); + s->type = type; + s->coreId = coreId; + s->sbeId = sbeId; pkt->pushSenderState(s); // Use functional rather than timing accesses during warmup @@ -339,6 +410,9 @@ AbstractController::recvTimingResp(PacketPtr pkt) SenderState *s = dynamic_cast(pkt->senderState); (*msg).m_OriginalRequestorMachId = s->id; + int type = s->type; + int coreId = s->coreId; + int sbeId = s->sbeId; delete s; if (pkt->isRead()) { @@ -348,6 +422,12 @@ AbstractController::recvTimingResp(PacketPtr pkt) // Copy data from the packet (*msg).m_DataBlk.setData(pkt->getPtr(), 0, RubySystem::getBlockSizeBytes()); + if (type == 1) { + DPRINTFR(MemSpecBuffer, "%10s Updated by ReadSpec (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(pkt->getAddr())); + m_specBuf[coreId][sbeId].address = pkt->getAddr(); + m_specBuf[coreId][sbeId].data.setData(pkt->getPtr(), 0, + RubySystem::getBlockSizeBytes()); + } } else if (pkt->isWrite()) { (*msg).m_Type = MemoryRequestType_MEMORY_WB; (*msg).m_MessageSize = MessageSizeType_Writeback_Control; diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index 35cd3d2a5..b65a511d0 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -129,7 +129,7 @@ class AbstractController : public MemObject, public Consumer BaseMasterPort& getMasterPort(const std::string& if_name, PortID idx = InvalidPortID); - void queueMemoryRead(const MachineID &id, Addr addr, Cycles latency); + void queueMemoryRead(const MachineID &id, Addr addr, Cycles latency, MachineID origin, int idx, int type); void queueMemoryWrite(const MachineID &id, Addr addr, Cycles latency, const DataBlock &block); void queueMemoryWritePartial(const MachineID &id, Addr addr, Cycles latency, @@ -199,6 +199,8 @@ class AbstractController : public MemObject, public Consumer //! Counter for the number of cycles when the transitions carried out //! were equal to the maximum allowed Stats::Scalar m_fully_busy_cycles; + Stats::Scalar m_expose_hits; + Stats::Scalar m_expose_misses; //! Histogram for profiling delay for the messages this controller //! cares for @@ -250,6 +252,9 @@ class AbstractController : public MemObject, public Consumer { // Id of the machine from which the request originated. MachineID id; + int type; + int coreId; + int sbeId; SenderState(MachineID _id) : id(_id) {} @@ -258,6 +263,14 @@ class AbstractController : public MemObject, public Consumer private: /** The address range to which the controller responds on the CPU side. */ const AddrRangeList addrRanges; + + struct SBE + { + Addr address; + DataBlock data; + }; + + SBE m_specBuf[8][66]; }; #endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__ diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh index 6c84f3823..2fc4c9f98 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -60,6 +60,7 @@ class RubyRequest : public Message int m_wfid; HSAScope m_scope; HSASegment m_segment; + int m_idx; RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, @@ -82,6 +83,11 @@ class RubyRequest : public Message m_segment(_segment) { m_LineAddress = makeLineAddress(m_PhysicalAddress); + if (_pkt->reqIdx == -1) { + m_idx = _pkt->reqIdx; + } else { + m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1); + } } RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, @@ -109,6 +115,11 @@ class RubyRequest : public Message m_segment(_segment) { m_LineAddress = makeLineAddress(m_PhysicalAddress); + if (_pkt->reqIdx == -1) { + m_idx = _pkt->reqIdx; + } else { + m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1); + } } RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, @@ -137,6 +148,11 @@ class RubyRequest : public Message m_segment(_segment) { m_LineAddress = makeLineAddress(m_PhysicalAddress); + if (_pkt->reqIdx == -1) { + m_idx = _pkt->reqIdx; + } else { + m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1); + } } diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index 8d99c90aa..dc5898bea 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -176,7 +176,9 @@ CacheMemory::tryCacheAccess(Addr address, RubyRequestType type, return true; } if ((entry->m_Permission == AccessPermission_Read_Only) && - (type == RubyRequestType_LD || type == RubyRequestType_IFETCH)) { + (type == RubyRequestType_LD || + type == RubyRequestType_IFETCH || + type == RubyRequestType_SPEC_LD)) { return true; } // The line must not be accessible diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 84a70c0f1..15013e056 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -246,6 +246,7 @@ RubyPort::PioSlavePort::recvAtomic(PacketPtr pkt) panic("Could not find address in Ruby PIO address ranges!\n"); } +// [InvisiSpec] Request on the way from CPU to Ruby bool RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt) { @@ -429,6 +430,7 @@ RubyPort::MemSlavePort::recvFunctional(PacketPtr pkt) } } +// [InvisiSpec] On the way from Ruby to CPU void RubyPort::ruby_hit_callback(PacketPtr pkt) { @@ -512,6 +514,7 @@ RubyPort::drain() } } +// [InvisiSpec] Still on the way from Ruby to CPU void RubyPort::MemSlavePort::hitCallback(PacketPtr pkt) { @@ -545,7 +548,7 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt) } // Flush, acquire, release requests don't access physical memory - if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) { + if (pkt->isFlush() || pkt->isExpose() || pkt->cmd == MemCmd::MemFenceReq) { accessPhysMem = false; } @@ -572,6 +575,7 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt) // Ruby protocol. schedTimingResp(pkt, curTick()); } else { + // [InvisiSpec] Delete the packet if a reponse is not required delete pkt; } @@ -602,7 +606,7 @@ RubyPort::MemSlavePort::isPhysMemAddress(Addr addr) const } void -RubyPort::ruby_eviction_callback(Addr address) +RubyPort::ruby_eviction_callback(Addr address, bool external) { DPRINTF(RubyPort, "Sending invalidations.\n"); // Allocate the invalidate request and packet on the stack, as it is @@ -615,6 +619,9 @@ RubyPort::ruby_eviction_callback(Addr address) // Use a single packet to signal all snooping ports of the invalidation. // This assumes that snooping ports do NOT modify the packet/request Packet pkt(request, MemCmd::InvalidateReq); + if (external) { + pkt.setExternalEviction(); + } for (CpuPortIter p = slave_ports.begin(); p != slave_ports.end(); ++p) { // check if the connected master port is snooping if ((*p)->isSnooping()) { diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 146443282..9c0200829 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -172,7 +172,7 @@ class RubyPort : public MemObject void trySendRetries(); void ruby_hit_callback(PacketPtr pkt); void testDrainComplete(); - void ruby_eviction_callback(Addr address); + void ruby_eviction_callback(Addr address, bool external); /** * Called by the PIO port when receiving a timing response. diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index f30369710..5a11d3165 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -36,6 +36,8 @@ #include "debug/ProtocolTrace.hh" #include "debug/RubySequencer.hh" #include "debug/RubyStats.hh" +#include "debug/SpecBuffer.hh" +#include "debug/SpecBufferValidate.hh" #include "mem/packet.hh" #include "mem/protocol/PrefetchBit.hh" #include "mem/protocol/RubyAccessMode.hh" @@ -54,7 +56,9 @@ RubySequencerParams::create() Sequencer::Sequencer(const Params *p) : RubyPort(p), m_IncompleteTimes(MachineType_NUM), - deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check") + deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check"), + m_specBuf(33), + specBufferHitEvent([this]{ specBufferHitCallback(); }, "Sequencer spec buffer hit") { m_outstanding_count = 0; @@ -160,6 +164,7 @@ void Sequencer::resetStats() } } +// [InvisiSpec] Request on the way from CPU to Ruby // Insert the request on the correct request table. Return true if // the entry was already present. RequestStatus @@ -190,6 +195,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type) RequestTable::value_type default_entry(line_addr, (SequencerRequest*) NULL); + // [InvisiSpec] If store if ((request_type == RubyRequestType_ST) || (request_type == RubyRequestType_RMW_Read) || (request_type == RubyRequestType_RMW_Write) || @@ -217,6 +223,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type) m_store_waiting_on_store++; return RequestStatus_Aliased; } + // [InvisiSpec] If load } else { // Check if there is any outstanding write request for the same // cache line. @@ -232,6 +239,16 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type) RequestTable::iterator i = r.first; i->second = new SequencerRequest(pkt, request_type, curCycle()); m_outstanding_count++; + } else if (request_type == RubyRequestType_SPEC_LD) { + auto i = m_readRequestTable.find(line_addr); + if (i->second->m_type == RubyRequestType_SPEC_LD) { + DPRINTFR(SpecBuffer, "%10s Merging (idx=%d-%d, addr=%#x) with %d\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()), i->second->pkt->reqIdx); + i->second->dependentSpecRequests.push_back(pkt); + return RequestStatus_Merged; + } else { + m_load_waiting_on_load++; + return RequestStatus_Aliased; + } } else { // There is an outstanding read request for the cache line m_load_waiting_on_load++; @@ -412,6 +429,19 @@ Sequencer::writeCallback(Addr address, DataBlock& data, initialRequestTime, forwardRequestTime, firstResponseTime); } +bool Sequencer::updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress) { + uint8_t idx = pkt->reqIdx; + SBE& sbe = m_specBuf[idx]; + int blkIdx = pkt->isFirst() ? 0 : 1; + SBB& sbb = sbe.blocks[blkIdx]; + if (makeLineAddress(sbb.reqAddress) == dataAddress) { + sbb.data = data; + return true; + } + return false; +} + +// [InvisiSpec] Called by Ruby to send a response to CPU. void Sequencer::readCallback(Addr address, DataBlock& data, bool externalHit, const MachineType mach, @@ -430,12 +460,78 @@ Sequencer::readCallback(Addr address, DataBlock& data, markRemoved(); assert((request->m_type == RubyRequestType_LD) || + (request->m_type == RubyRequestType_SPEC_LD) || + (request->m_type == RubyRequestType_EXPOSE) || (request->m_type == RubyRequestType_IFETCH)); + + PacketPtr pkt = request->pkt; + if (pkt->isSpec()) { + assert(!pkt->onlyAccessSpecBuff()); + DPRINTFR(SpecBuffer, "%10s SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr())); + updateSBB(pkt, data, address); + if (!externalHit) { + pkt->setL1Hit(); + } + } else if (pkt->isExpose()) { + DPRINTFR(SpecBuffer, "%10s EXPOSE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr())); + } else if (pkt->isValidate()) { + DPRINTFR(SpecBuffer, "%10s VALIDATE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr())); + uint8_t idx = pkt->reqIdx; + SBE& sbe = m_specBuf[idx]; + int blkIdx = pkt->isFirst() ? 0 : 1; + SBB& sbb = sbe.blocks[blkIdx]; + assert(makeLineAddress(sbb.reqAddress) == address); + if (!memcmp(sbb.data.getData(getOffset(pkt->getAddr()), pkt->getSize()), data.getData(getOffset(pkt->getAddr()), pkt->getSize()), pkt->getSize())) { + *(pkt->getPtr()) = 1; + } else { + // std::ostringstream os; + // sbb.data.print(os); + // DPRINTFR(SpecBufferValidate, "%s\n", os.str()); + // os.str(""); + // data.print(os); + // DPRINTFR(SpecBufferValidate, "%s\n", os.str()); + *(pkt->getPtr()) = 0; + } + } + + for (auto& dependentPkt : request->dependentSpecRequests) { + assert(!dependentPkt->onlyAccessSpecBuff()); + DPRINTFR(SpecBuffer, "%10s Merged SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), dependentPkt->reqIdx, dependentPkt->isFirst()? 0 : 1, printAddress(dependentPkt->getAddr())); + assert(dependentPkt->isSpec()); + updateSBB(dependentPkt, data, address); + if (!externalHit) { + dependentPkt->setL1Hit(); + } + memcpy(dependentPkt->getPtr(), + data.getData(getOffset(dependentPkt->getAddr()), dependentPkt->getSize()), + dependentPkt->getSize()); + ruby_hit_callback(dependentPkt); + } hitCallback(request, data, true, mach, externalHit, initialRequestTime, forwardRequestTime, firstResponseTime); } +void +Sequencer::specBufferHitCallback() +{ + assert(m_specRequestQueue.size()); + while (m_specRequestQueue.size()) { + auto specReq = m_specRequestQueue.front(); + if (specReq.second <= curTick()) { + PacketPtr pkt = specReq.first; + assert(pkt->onlyAccessSpecBuff()); + DPRINTFR(SpecBuffer, "%10s SB Hit Callback (idx=%d, addr=%#x)\n", curTick(), pkt->reqIdx, printAddress(pkt->getAddr())); + ruby_hit_callback(pkt); + m_specRequestQueue.pop(); + } else { + schedule(specBufferHitEvent, specReq.second); + break; + } + } +} + +// [InvisiSpec] Response on the way from Ruby to CPU void Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, bool llscSuccess, @@ -470,8 +566,9 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, if (RubySystem::getWarmupEnabled()) { data.setData(pkt->getConstPtr(), getOffset(request_address), pkt->getSize()); - } else if (!pkt->isFlush()) { + } else if (!pkt->isFlush() && !pkt->isExpose() && !pkt->isValidate()) { if ((type == RubyRequestType_LD) || + (type == RubyRequestType_SPEC_LD) || (type == RubyRequestType_IFETCH) || (type == RubyRequestType_RMW_Read) || (type == RubyRequestType_Locked_RMW_Read) || @@ -533,6 +630,7 @@ Sequencer::empty() const return m_writeRequestTable.empty() && m_readRequestTable.empty(); } +// [InvisiSpec] Request on the way from CPU to Ruby RequestStatus Sequencer::makeRequest(PacketPtr pkt) { @@ -543,7 +641,56 @@ Sequencer::makeRequest(PacketPtr pkt) RubyRequestType primary_type = RubyRequestType_NULL; RubyRequestType secondary_type = RubyRequestType_NULL; - if (pkt->isLLSC()) { + // [InvisiSpec] Handle new requests + if (pkt->isSpec()) { + assert(pkt->cmd == MemCmd::ReadSpecReq); + assert(pkt->isSplit || pkt->isFirst()); + uint8_t idx = pkt->reqIdx; + SBE& sbe = m_specBuf[idx]; + sbe.isSplit = pkt->isSplit; + int blkIdx = pkt->isFirst() ? 0 : 1; + SBB& sbb = sbe.blocks[blkIdx]; + sbb.reqAddress = pkt->getAddr(); + sbb.reqSize = pkt->getSize(); + if (pkt->onlyAccessSpecBuff()) { + int srcIdx = pkt->srcIdx; + SBE& srcEntry = m_specBuf[srcIdx]; + if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[0].reqAddress)) { + sbb.data = srcEntry.blocks[0].data; + } else if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[1].reqAddress)) { + sbb.data = srcEntry.blocks[1].data; + } else { + fatal("Requested address %#x is not present in the spec buffer\n", printAddress(sbb.reqAddress)); + } + memcpy(pkt->getPtr(), + sbb.data.getData(getOffset(sbb.reqAddress), sbb.reqSize), + sbb.reqSize); + m_specRequestQueue.push({pkt, curTick()}); + DPRINTFR(SpecBuffer, "%10s SB Hit (idx=%d, addr=%#x) on (srcIdx=%d)\n", curTick(), idx, printAddress(sbb.reqAddress), srcIdx); + if (!specBufferHitEvent.scheduled()) { + schedule(specBufferHitEvent, clockEdge(Cycles(1))); + } + return RequestStatus_Issued; + } else { + // assert it is not in the buffer + primary_type = secondary_type = RubyRequestType_SPEC_LD; + } + } else if (pkt->isExpose() || pkt->isValidate()) { + assert(pkt->cmd == MemCmd::ExposeReq || pkt->cmd == MemCmd::ValidateReq); + assert(pkt->isSplit || pkt->isFirst()); + uint8_t idx = pkt->reqIdx; + SBE& sbe = m_specBuf[idx]; + sbe.isSplit = pkt->isSplit; + int blkIdx = pkt->isFirst() ? 0 : 1; + SBB& sbb = sbe.blocks[blkIdx]; + if (sbb.reqAddress != pkt->getAddr()) { + fatal("sbb.reqAddress != pkt->getAddr: %#x != %#x\n", printAddress(sbb.reqAddress), printAddress(pkt->getAddr())); + } + if (sbb.reqSize != pkt->getSize()) { + fatal("sbb.reqSize != pkt->getSize(): %d != %d\n", sbb.reqSize, pkt->getSize()); + } + primary_type = secondary_type = RubyRequestType_EXPOSE; + } else if (pkt->isLLSC()) { // // Alpha LL/SC instructions need to be handled carefully by the cache // coherence protocol to ensure they follow the proper semantics. In @@ -614,8 +761,22 @@ Sequencer::makeRequest(PacketPtr pkt) } RequestStatus status = insertRequest(pkt, primary_type); - if (status != RequestStatus_Ready) + if (status == RequestStatus_Merged) { + return RequestStatus_Issued; + } else if (status != RequestStatus_Ready) { return status; + } + + if (pkt->isSpec()) { + DPRINTFR(SpecBuffer, "%10s Issuing SPEC_LD (idx=%d-%d, addr=%#x)\n", + curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr())); + } else if (pkt->isExpose()) { + DPRINTFR(SpecBuffer, "%10s Issuing EXPOSE (idx=%d-%d, addr=%#x)\n", + curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr())); + } else if (pkt->isValidate()) { + DPRINTFR(SpecBuffer, "%10s Issuing VALIDATE (idx=%d-%d, addr=%#x)\n", + curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr())); + } issueRequest(pkt, secondary_type); @@ -642,7 +803,7 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) // requests do not std::shared_ptr msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->isFlush() ? + pkt->isFlush() || pkt->isExpose() ? nullptr : pkt->getPtr(), pkt->getSize(), pc, secondary_type, RubyAccessMode_Supervisor, pkt, @@ -716,9 +877,9 @@ Sequencer::recordRequestType(SequencerRequestType requestType) { void -Sequencer::evictionCallback(Addr address) +Sequencer::evictionCallback(Addr address, bool external) { - ruby_eviction_callback(address); + ruby_eviction_callback(address, external); } void diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index fcfa8ad86..66ff92777 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -31,6 +31,7 @@ #include #include +#include #include "mem/protocol/MachineType.hh" #include "mem/protocol/RubyRequestType.hh" @@ -45,6 +46,7 @@ struct SequencerRequest PacketPtr pkt; RubyRequestType m_type; Cycles issue_time; + std::vector dependentSpecRequests; SequencerRequest(PacketPtr _pkt, RubyRequestType _m_type, Cycles _issue_time) @@ -54,6 +56,19 @@ struct SequencerRequest std::ostream& operator<<(std::ostream& out, const SequencerRequest& obj); +struct SBB // SpecBufferBlock +{ + Addr reqAddress; + unsigned reqSize; + DataBlock data; +}; + +struct SBE // SpecBufferEntry +{ + bool isSplit; + SBB blocks[2]; +}; + class Sequencer : public RubyPort { public: @@ -83,6 +98,9 @@ class Sequencer : public RubyPort const Cycles forwardRequestTime = Cycles(0), const Cycles firstResponseTime = Cycles(0)); + void specBufferHitCallback(); + bool updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress); + RequestStatus makeRequest(PacketPtr pkt); bool empty() const; int outstandingCount() const { return m_outstanding_count; } @@ -97,7 +115,7 @@ class Sequencer : public RubyPort void checkCoherence(Addr address); void markRemoved(); - void evictionCallback(Addr address); + void evictionCallback(Addr address, bool external); void invalidateSC(Addr address); int coreId() const { return m_coreId; } @@ -238,6 +256,10 @@ class Sequencer : public RubyPort std::vector m_IncompleteTimes; EventFunctionWrapper deadlockCheckEvent; + + std::vector m_specBuf; + std::queue> m_specRequestQueue; + EventFunctionWrapper specBufferHitEvent; }; inline std::ostream& -- cgit v1.2.3