diff options
author | Somayeh Sardashti <somayeh@cs.wisc.edu> | 2011-03-28 10:49:45 -0500 |
---|---|---|
committer | Somayeh Sardashti <somayeh@cs.wisc.edu> | 2011-03-28 10:49:45 -0500 |
commit | c8bbfed93752c2c79d36bb9dedbc2208b856dae6 (patch) | |
tree | c33a164e435603a4424f81f7f09ec50b5f01e455 | |
parent | ef987a4064f1e81fd1b61f3de03834a51658645f (diff) | |
download | gem5-c8bbfed93752c2c79d36bb9dedbc2208b856dae6.tar.xz |
This patch supports cache flushing in MOESI_hammer
-rw-r--r-- | configs/example/ruby_random_test.py | 9 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/Check.cc | 35 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/Check.hh | 1 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/RubyTester.cc | 3 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/RubyTester.hh | 2 | ||||
-rw-r--r-- | src/cpu/testers/rubytest/RubyTester.py | 1 | ||||
-rw-r--r-- | src/mem/packet.cc | 4 | ||||
-rw-r--r-- | src/mem/packet.hh | 4 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-cache.sm | 312 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-dir.sm | 135 | ||||
-rw-r--r-- | src/mem/protocol/MOESI_hammer-msg.sm | 3 | ||||
-rw-r--r-- | src/mem/protocol/RubySlicc_Exports.sm | 1 | ||||
-rw-r--r-- | src/mem/ruby/system/RubyPort.cc | 14 | ||||
-rw-r--r-- | src/mem/ruby/system/Sequencer.cc | 12 |
14 files changed, 508 insertions, 28 deletions
diff --git a/configs/example/ruby_random_test.py b/configs/example/ruby_random_test.py index ddd6a53af..8c415641b 100644 --- a/configs/example/ruby_random_test.py +++ b/configs/example/ruby_random_test.py @@ -82,7 +82,14 @@ if args: # # Create the ruby random tester # -tester = RubyTester(checks_to_complete = options.checks, + +# Check the protocol +check_flush = False +if buildEnv['PROTOCOL'] == 'MOESI_hammer': + check_flush = True + +tester = RubyTester(check_flush = check_flush, + checks_to_complete = options.checks, wakeup_frequency = options.wakeup_freq) # diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc index 9eed7270b..b536e2287 100644 --- a/src/cpu/testers/rubytest/Check.cc +++ b/src/cpu/testers/rubytest/Check.cc @@ -59,6 +59,10 @@ Check::initiate() initiatePrefetch(); // Prefetch from random processor } + if (m_tester_ptr->getCheckFlush() && (random() & 0xff) == 0) { + initiateFlush(); // issue a Flush request from random processor + } + if (m_status == TesterStatus_Idle) { initiateAction(); } else if (m_status == TesterStatus_Ready) { @@ -124,6 +128,37 @@ Check::initiatePrefetch() } void +Check::initiateFlush() +{ + + DPRINTF(RubyTest, "initiating Flush\n"); + + int index = random() % m_num_cpu_sequencers; + RubyTester::CpuPort* port = + safe_cast<RubyTester::CpuPort*>(m_tester_ptr->getCpuPort(index)); + + Request::Flags flags; + + Request *req = new Request(m_address.getAddress(), CHECK_SIZE, flags, curTick(), + m_pc.getAddress()); + + Packet::Command cmd; + + cmd = MemCmd::FlushReq; + + PacketPtr pkt = new Packet(req, cmd, port->idx); + + // push the subblock onto the sender state. The sequencer will + // update the subblock on the return + pkt->senderState = + new SenderState(m_address, req->getSize(), pkt->senderState); + + if (port->sendTiming(pkt)) { + DPRINTF(RubyTest, "initiating Flush - successful\n"); + } +} + +void Check::initiateAction() { DPRINTF(RubyTest, "initiating Action\n"); diff --git a/src/cpu/testers/rubytest/Check.hh b/src/cpu/testers/rubytest/Check.hh index d16c10f57..6861a74d3 100644 --- a/src/cpu/testers/rubytest/Check.hh +++ b/src/cpu/testers/rubytest/Check.hh @@ -58,6 +58,7 @@ class Check void print(std::ostream& out) const; private: + void initiateFlush(); void initiatePrefetch(); void initiateAction(); void initiateCheck(); diff --git a/src/cpu/testers/rubytest/RubyTester.cc b/src/cpu/testers/rubytest/RubyTester.cc index 1d477dad2..024cb741e 100644 --- a/src/cpu/testers/rubytest/RubyTester.cc +++ b/src/cpu/testers/rubytest/RubyTester.cc @@ -40,7 +40,8 @@ RubyTester::RubyTester(const Params *p) : MemObject(p), checkStartEvent(this), m_checks_to_complete(p->checks_to_complete), m_deadlock_threshold(p->deadlock_threshold), - m_wakeup_frequency(p->wakeup_frequency) + m_wakeup_frequency(p->wakeup_frequency), + m_check_flush(p->check_flush) { m_checks_completed = 0; diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh index ac023b43f..4ea5bda73 100644 --- a/src/cpu/testers/rubytest/RubyTester.hh +++ b/src/cpu/testers/rubytest/RubyTester.hh @@ -99,6 +99,7 @@ class RubyTester : public MemObject void printConfig(std::ostream& out) const {} void print(std::ostream& out) const; + bool getCheckFlush() { return m_check_flush; } protected: class CheckStartEvent : public Event @@ -134,6 +135,7 @@ class RubyTester : public MemObject int m_deadlock_threshold; int m_num_cpu_sequencers; int m_wakeup_frequency; + bool m_check_flush; }; inline std::ostream& diff --git a/src/cpu/testers/rubytest/RubyTester.py b/src/cpu/testers/rubytest/RubyTester.py index af37d2ff1..fd6e9aefd 100644 --- a/src/cpu/testers/rubytest/RubyTester.py +++ b/src/cpu/testers/rubytest/RubyTester.py @@ -36,3 +36,4 @@ class RubyTester(MemObject): checks_to_complete = Param.Int(100, "checks to complete") deadlock_threshold = Param.Int(50000, "how often to check for deadlock") wakeup_frequency = Param.Int(10, "number of cycles between wakeups") + check_flush = Param.Bool(False, "check cache flushing") diff --git a/src/mem/packet.cc b/src/mem/packet.cc index d0b1fed83..547695697 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -148,7 +148,9 @@ MemCmd::commandInfo[] = /* BadAddressError -- memory address invalid */ { SET2(IsResponse, IsError), InvalidCmd, "BadAddressError" }, /* PrintReq */ - { SET2(IsRequest, IsPrint), InvalidCmd, "PrintReq" } + { SET2(IsRequest, IsPrint), InvalidCmd, "PrintReq" }, + /* Flush Request */ + { SET3(IsRequest, IsFlush, NeedsExclusive), InvalidCmd, "FlushReq" } }; bool diff --git a/src/mem/packet.hh b/src/mem/packet.hh index bcf9d8d68..7e75dc297 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -105,6 +105,7 @@ class MemCmd BadAddressError, // memory address invalid // Fake simulator-only commands PrintReq, // Print state matching address + FlushReq, //request for a cache flush NUM_MEM_CMDS }; @@ -129,6 +130,7 @@ class MemCmd HasData, //!< There is an associated payload IsError, //!< Error response IsPrint, //!< Print state matching address (for debugging) + IsFlush, //!< Flush the address from caches NUM_COMMAND_ATTRIBUTES }; @@ -175,6 +177,7 @@ class MemCmd bool isLLSC() const { return testCmdAttrib(IsLlsc); } bool isError() const { return testCmdAttrib(IsError); } bool isPrint() const { return testCmdAttrib(IsPrint); } + bool isFlush() const { return testCmdAttrib(IsFlush); } const Command responseCommand() const @@ -411,6 +414,7 @@ class Packet : public FastAlloc, public Printable bool isLLSC() const { return cmd.isLLSC(); } bool isError() const { return cmd.isError(); } bool isPrint() const { return cmd.isPrint(); } + bool isFlush() const { return cmd.isFlush(); } // Snoop flags void assertMemInhibit() { flags.set(MEM_INHIBIT); } diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index 865acf275..546160b73 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -78,7 +78,16 @@ machine(L1Cache, "AMD Hammer-like protocol") ST, AccessPermission:Busy, "ST", desc="S block transferring to L1"; OT, AccessPermission:Busy, "OT", desc="O block transferring to L1"; MT, AccessPermission:Busy, "MT", desc="M block transferring to L1"; - MMT, AccessPermission:Busy, "MMT", desc="MM block transferring to L1"; + MMT, AccessPermission:Busy, "MMT", desc="MM block transferring to L0"; + + //Transition States Related to Flushing + MI_F, AccessPermission:Busy, "MI_F", desc="Issued PutX due to a Flush, waiting for ack"; + MM_F, AccessPermission:Busy, "MM_F", desc="Issued GETF due to a Flush, waiting for ack"; + IM_F, AccessPermission:Busy, "IM_F", desc="Issued GetX due to a Flush"; + ISM_F, AccessPermission:Read_Only, "ISM_F", desc="Issued GetX, received data, waiting for all acks"; + SM_F, AccessPermission:Read_Only, "SM_F", desc="Issued GetX, we still have an old copy of the line"; + OM_F, AccessPermission:Read_Only, "OM_F", desc="Issued GetX, received data"; + MM_WF, AccessPermission:Busy, "MM_WF", desc="Issued GetX, received exclusive data"; } // EVENTS @@ -113,6 +122,10 @@ machine(L1Cache, "AMD Hammer-like protocol") // Triggers All_acks, desc="Received all required data and message acks"; All_acks_no_sharers, desc="Received all acks and no other processor has a shared copy"; + + // For Flush + Flush_line, desc="flush the cache line from all caches"; + Block_Ack, desc="the directory is blocked and ready for the flush"; } // TYPES @@ -221,6 +234,8 @@ machine(L1Cache, "AMD Hammer-like protocol") return Event:Ifetch; } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) { return Event:Store; + } else if ((type == RubyRequestType:FLUSH)) { + return Event:Flush_line; } else { error("Invalid RubyRequestType"); } @@ -318,7 +333,7 @@ machine(L1Cache, "AMD Hammer-like protocol") Entry cache_entry := getCacheEntry(in_msg.Address); TBE tbe := TBEs[in_msg.Address]; - if (in_msg.Type == CoherenceRequestType:GETX) { + if ((in_msg.Type == CoherenceRequestType:GETX) || (in_msg.Type == CoherenceRequestType:GETF)) { trigger(Event:Other_GETX, in_msg.Address, cache_entry, tbe); } else if (in_msg.Type == CoherenceRequestType:MERGED_GETS) { trigger(Event:Merged_GETS, in_msg.Address, cache_entry, tbe); @@ -342,6 +357,8 @@ machine(L1Cache, "AMD Hammer-like protocol") trigger(Event:Writeback_Ack, in_msg.Address, cache_entry, tbe); } else if (in_msg.Type == CoherenceRequestType:WB_NACK) { trigger(Event:Writeback_Nack, in_msg.Address, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:BLOCK_ACK) { + trigger(Event:Block_Ack, in_msg.Address, cache_entry, tbe); } else { error("Unexpected message"); } @@ -504,6 +521,19 @@ machine(L1Cache, "AMD Hammer-like protocol") } } + action(bf_issueGETF, "bf", desc="Issue GETF") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { + assert(is_valid(tbe)); + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETF; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := get_time(); + tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); // One from each other cache (n-1) plus the memory (+1) + } + } + action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") { peek(forwardToCache_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { @@ -527,6 +557,29 @@ machine(L1Cache, "AMD Hammer-like protocol") } } + action(ct_sendExclusiveDataFromTBE, "ct", desc="Send exclusive data from tbe to requestor") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + assert(is_valid(tbe)); + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + if (in_msg.DirectedProbe) { + out_msg.Acks := machineCount(MachineType:L1Cache); + } else { + out_msg.Acks := 2; + } + out_msg.SilentAcks := in_msg.SilentAcks; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; + } + } + } + action(d_issuePUT, "d", desc="Issue PUT") { enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { out_msg.Address := address; @@ -537,6 +590,16 @@ machine(L1Cache, "AMD Hammer-like protocol") } } + action(df_issuePUTF, "df", desc="Issue PUTF") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:PUTF; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + action(e_sendData, "e", desc="Send data from cache to requestor") { peek(forwardToCache_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { @@ -583,7 +646,31 @@ machine(L1Cache, "AMD Hammer-like protocol") } } } - + + action(et_sendDataSharedFromTBE, "\et", desc="Send data from TBE to requestor, keep a shared copy") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + assert(is_valid(tbe)); + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_SHARED; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); + if (in_msg.DirectedProbe) { + out_msg.Acks := machineCount(MachineType:L1Cache); + } else { + out_msg.Acks := 2; + } + out_msg.SilentAcks := in_msg.SilentAcks; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; + } + } + } + action(em_sendDataSharedMultiple, "em", desc="Send data from cache to all requestors, still the owner") { peek(forwardToCache_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { @@ -604,6 +691,26 @@ machine(L1Cache, "AMD Hammer-like protocol") } } + action(emt_sendDataSharedMultipleFromTBE, "emt", desc="Send data from tbe to all requestors") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + assert(is_valid(tbe)); + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_SHARED; + out_msg.Sender := machineID; + out_msg.Destination := in_msg.MergedRequestors; + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); + out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.SilentAcks := in_msg.SilentAcks; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; + } + } + } + action(f_sendAck, "f", desc="Send ack from cache to requestor") { peek(forwardToCache_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { @@ -706,6 +813,12 @@ machine(L1Cache, "AMD Hammer-like protocol") } } + action(hh_flush_hit, "\hf", desc="Notify sequencer that flush completed.") { + assert(is_valid(tbe)); + DPRINTF(RubySlicc, "%s\n", tbe.DataBlk); + sequencer.writeCallback(address, GenericMachineType:L1Cache,tbe.DataBlk); + } + action(sx_external_store_hit, "sx", desc="store required external msgs.") { assert(is_valid(cache_entry)); assert(is_valid(tbe)); @@ -747,6 +860,14 @@ machine(L1Cache, "AMD Hammer-like protocol") tbe.Sharers := false; } + action(it_allocateTBE, "it", desc="Allocate TBE") { + check_allocate(TBEs); + TBEs.allocate(address); + set_tbe(TBEs[address]); + tbe.Dirty := false; + tbe.Sharers := false; + } + action(j_popTriggerQueue, "j", desc="Pop trigger queue.") { triggerQueue_in.dequeue(); } @@ -980,6 +1101,14 @@ machine(L1Cache, "AMD Hammer-like protocol") } } + action(uf_writeDataToCacheTBE, "uf", desc="Write data to TBE") { + peek(responseToCache_in, ResponseMsg) { + assert(is_valid(tbe)); + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + } + } + action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") { peek(responseToCache_in, ResponseMsg) { assert(is_valid(cache_entry)); @@ -990,6 +1119,17 @@ machine(L1Cache, "AMD Hammer-like protocol") cache_entry.Dirty := in_msg.Dirty || cache_entry.Dirty; } } + + action(vt_writeDataToTBEVerify, "vt", desc="Write data to TBE, assert it was same as before") { + peek(responseToCache_in, ResponseMsg) { + assert(is_valid(tbe)); + DPRINTF(RubySlicc, "Cached Data Block: %s, Msg Data Block: %s\n", + tbe.DataBlk, in_msg.DataBlk); + assert(tbe.DataBlk == in_msg.DataBlk); + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty || tbe.Dirty; + } + } action(gg_deallocateL1CacheBlock, "\g", desc="Deallocate cache block. Sets the cache to invalid, allowing a replacement in parallel with a fetch.") { if (L1DcacheMemory.isTagPresent(address)) { @@ -1051,23 +1191,35 @@ machine(L1Cache, "AMD Hammer-like protocol") //***************************************************** // Transitions for Load/Store/L2_Replacement from transient states - transition({IM, SM, ISM, OM, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) { + transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) { + zz_stallAndWaitMandatoryQueue; + } + + transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II}, {Flush_line}) { zz_stallAndWaitMandatoryQueue; } - transition({M_W, MM_W}, {L2_Replacement}) { + transition({M_W, MM_W}, {L2_Replacement, Flush_line}) { zz_stallAndWaitMandatoryQueue; } - transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT}, {Load, Ifetch}) { + transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) { zz_stallAndWaitMandatoryQueue; } - transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT}, L1_to_L2) { + transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F}, L1_to_L2) { zz_stallAndWaitMandatoryQueue; } - transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate}) { + transition({MI_F, MM_F}, {Store}) { + zz_stallAndWaitMandatoryQueue; + } + + transition({MM_F, MI_F}, {Flush_line}) { + zz_stallAndWaitMandatoryQueue; + } + + transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) { // stall } @@ -1241,6 +1393,13 @@ machine(L1Cache, "AMD Hammer-like protocol") k_popMandatoryQueue; } + transition(I, Flush_line, IM_F) { + it_allocateTBE; + bf_issueGETF; + uu_profileMiss; + k_popMandatoryQueue; + } + transition(I, L2_Replacement) { rr_deallocateL2CacheBlock; ka_wakeUpAllDependents; @@ -1264,6 +1423,14 @@ machine(L1Cache, "AMD Hammer-like protocol") k_popMandatoryQueue; } + transition(S, Flush_line, SM_F) { + i_allocateTBE; + bf_issueGETF; + uu_profileMiss; + gg_deallocateL1CacheBlock; + k_popMandatoryQueue; + } + transition(S, L2_Replacement, I) { rr_deallocateL2CacheBlock; ka_wakeUpAllDependents; @@ -1292,6 +1459,14 @@ machine(L1Cache, "AMD Hammer-like protocol") uu_profileMiss; k_popMandatoryQueue; } + transition(O, Flush_line, OM_F) { + i_allocateTBE; + bf_issueGETF; + p_decrementNumberOfMessagesByOne; + uu_profileMiss; + gg_deallocateL1CacheBlock; + k_popMandatoryQueue; + } transition(O, L2_Replacement, OI) { i_allocateTBE; @@ -1326,6 +1501,20 @@ machine(L1Cache, "AMD Hammer-like protocol") k_popMandatoryQueue; } + transition({MM, M}, Flush_line, MM_F) { + i_allocateTBE; + bf_issueGETF; + p_decrementNumberOfMessagesByOne; + gg_deallocateL1CacheBlock; + k_popMandatoryQueue; + } + + transition(MM_F, Block_Ack, MI_F) { + df_issuePUTF; + l_popForwardQueue; + kd_wakeUpDependents; + } + transition(MM, L2_Replacement, MI) { i_allocateTBE; d_issuePUT; @@ -1398,12 +1587,12 @@ machine(L1Cache, "AMD Hammer-like protocol") // Transitions from IM - transition(IM, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) { + transition({IM, IM_F}, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) { f_sendAck; l_popForwardQueue; } - transition(IM, Ack) { + transition({IM, IM_F, MM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1416,6 +1605,13 @@ machine(L1Cache, "AMD Hammer-like protocol") n_popResponseQueue; } + transition(IM_F, Data, ISM_F) { + uf_writeDataToCacheTBE; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + transition(IM, Exclusive_Data, MM_W) { u_writeDataToCache; m_decrementNumberOfMessages; @@ -1425,8 +1621,15 @@ machine(L1Cache, "AMD Hammer-like protocol") kd_wakeUpDependents; } + transition(IM_F, Exclusive_Data, MM_WF) { + uf_writeDataToCacheTBE; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + // Transitions from SM - transition(SM, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) { + transition({SM, SM_F}, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) { ff_sendAckShared; l_popForwardQueue; } @@ -1436,7 +1639,12 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; } - transition(SM, Ack) { + transition(SM_F, {Other_GETX, Invalidate}, IM_F) { + f_sendAck; + l_popForwardQueue; + } + + transition({SM, SM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1449,8 +1657,15 @@ machine(L1Cache, "AMD Hammer-like protocol") n_popResponseQueue; } + transition(SM_F, {Data, Exclusive_Data}, ISM_F) { + vt_writeDataToTBEVerify; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + // Transitions from ISM - transition(ISM, Ack) { + transition({ISM, ISM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1464,6 +1679,12 @@ machine(L1Cache, "AMD Hammer-like protocol") kd_wakeUpDependents; } + transition(ISM_F, All_acks_no_sharers, MI_F) { + df_issuePUTF; + j_popTriggerQueue; + kd_wakeUpDependents; + } + // Transitions from OM transition(OM, {Other_GETX, Invalidate}, IM) { @@ -1472,6 +1693,12 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; } + transition(OM_F, {Other_GETX, Invalidate}, IM_F) { + q_sendDataFromTBEToCache; + pp_incrementNumberOfMessagesByOne; + l_popForwardQueue; + } + transition(OM, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) { ee_sendDataShared; l_popForwardQueue; @@ -1482,7 +1709,17 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; } - transition(OM, Ack) { + transition(OM_F, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) { + et_sendDataSharedFromTBE; + l_popForwardQueue; + } + + transition(OM_F, Merged_GETS) { + emt_sendDataSharedMultipleFromTBE; + l_popForwardQueue; + } + + transition({OM, OM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1496,6 +1733,11 @@ machine(L1Cache, "AMD Hammer-like protocol") kd_wakeUpDependents; } + transition({MM_F, OM_F}, {All_acks, All_acks_no_sharers}, MI_F) { + df_issuePUTF; + j_popTriggerQueue; + kd_wakeUpDependents; + } // Transitions from IS transition(IS, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) { @@ -1583,7 +1825,7 @@ machine(L1Cache, "AMD Hammer-like protocol") k_popMandatoryQueue; } - transition(MM_W, Ack) { + transition({MM_W, MM_WF}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1596,6 +1838,11 @@ machine(L1Cache, "AMD Hammer-like protocol") kd_wakeUpDependents; } + transition(MM_WF, All_acks_no_sharers, MI_F) { + df_issuePUTF; + j_popTriggerQueue; + kd_wakeUpDependents; + } // Transitions from M_W transition(M_W, Store, MM_W) { @@ -1640,6 +1887,14 @@ machine(L1Cache, "AMD Hammer-like protocol") kd_wakeUpDependents; } + transition(MI_F, Writeback_Ack, I) { + hh_flush_hit; + t_sendExclusiveDataFromTBEToMemory; + s_deallocateTBE; + l_popForwardQueue; + kd_wakeUpDependents; + } + transition(OI, Writeback_Ack, I) { qq_sendDataFromTBEToMemory; s_deallocateTBE; @@ -1665,4 +1920,31 @@ machine(L1Cache, "AMD Hammer-like protocol") l_popForwardQueue; kd_wakeUpDependents; } + + transition(MM_F, {Other_GETX, Invalidate}, IM_F) { + ct_sendExclusiveDataFromTBE; + pp_incrementNumberOfMessagesByOne; + l_popForwardQueue; + } + + transition(MM_F, Other_GETS, IM_F) { + ct_sendExclusiveDataFromTBE; + pp_incrementNumberOfMessagesByOne; + l_popForwardQueue; + } + + transition(MM_F, NC_DMA_GETS) { + ct_sendExclusiveDataFromTBE; + l_popForwardQueue; + } + + transition(MM_F, Other_GETS_No_Mig, OM_F) { + et_sendDataSharedFromTBE; + l_popForwardQueue; + } + + transition(MM_F, Merged_GETS, OM_F) { + emt_sendDataSharedMultipleFromTBE; + l_popForwardQueue; + } } diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm index 369f8784b..f364b5846 100644 --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -88,6 +88,9 @@ machine(Directory, "AMD Hammer-like protocol") WB, AccessPermission:Invalid, desc="Blocked on a writeback"; WB_O_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to O"; WB_E_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to E"; + + NO_F, AccessPermission:Invalid, desc="Blocked on a flush"; + NO_F_W, AccessPermission:Invalid, desc="Not Owner, Blocked, waiting for Dram"; } // Events @@ -126,6 +129,8 @@ machine(Directory, "AMD Hammer-like protocol") All_acks_and_owner_data, desc="Received shared data and message acks"; All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy"; All_Unblocks, desc="Received all unblocks for a merged gets request"; + GETF, desc="A GETF arrives"; + PUTF, desc="A PUTF arrives"; } // TYPES @@ -233,6 +238,8 @@ machine(Directory, "AMD Hammer-like protocol") return Event:GETS; } else if (type == CoherenceRequestType:GETX) { return Event:GETX; + } else if (type == CoherenceRequestType:GETF) { + return Event:GETF; } else { error("Invalid CoherenceRequestType"); } @@ -355,6 +362,8 @@ machine(Directory, "AMD Hammer-like protocol") TBE tbe := TBEs[in_msg.Address]; if (in_msg.Type == CoherenceRequestType:PUT) { trigger(Event:PUT, in_msg.Address, pf_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:PUTF) { + trigger(Event:PUTF, in_msg.Address, pf_entry, tbe); } else { if (probe_filter_enabled || full_bit_dir_enabled) { if (is_valid(pf_entry)) { @@ -453,6 +462,20 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(oc_sendBlockAck, "oc", desc="Send block ack to the owner") { + peek(requestQueue_in, RequestMsg) { + if ((probe_filter_enabled || full_bit_dir_enabled) && (in_msg.Requestor == cache_entry.Owner)) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:BLOCK_ACK; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + } + action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") { peek(requestQueue_in, RequestMsg) { enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { @@ -966,6 +989,42 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(nofc_forwardRequestConditionalOwner, "nofc", desc="Forward request to one or more nodes if the requestor is not the owner") { + assert(machineCount(MachineType:L1Cache) > 1); + + if (probe_filter_enabled || full_bit_dir_enabled) { + peek(requestQueue_in, RequestMsg) { + if (in_msg.Requestor != cache_entry.Owner) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + assert(is_valid(cache_entry)); + out_msg.Address := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(cache_entry.Owner); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.DirectedProbe := true; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := get_time(); + } + } + } + } else { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := in_msg.Type; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches + out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor + out_msg.MessageSize := MessageSizeType:Broadcast_Control; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := get_time(); + } + } + } + + } + action(f_forwardWriteFromDma, "fw", desc="Forward requests") { assert(is_valid(tbe)); if (tbe.NumPendingMsgs > 0) { @@ -1185,6 +1244,16 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } + transition(E, GETF, NO_F_W) { + pfa_probeFilterAllocate; + v_allocateTBE; + rx_recordExclusiveInTBE; + saa_setAcksToAllIfPF; + qf_queueMemoryFetchRequest; + fn_forwardRequestIfNecessary; + i_popIncomingRequestQueue; + } + transition(E, GETS, NO_B_W) { pfa_probeFilterAllocate; v_allocateTBE; @@ -1223,6 +1292,17 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } + transition(O, GETF, NO_F_W) { + r_setMRU; + v_allocateTBE; + r_recordDataInTBE; + sa_setAcksToOne; + qf_queueMemoryFetchRequest; + fb_forwardRequestBcast; + cs_clearSharers; + i_popIncomingRequestQueue; + } + // This transition is dumb, if a shared copy exists on-chip, then that should // provide data, not slow off-chip dram. The problem is that the current // caches don't provide data in S state @@ -1286,6 +1366,13 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } + transition(NX, GETF, NO_F) { + r_setMRU; + fb_forwardRequestBcast; + cs_clearSharers; + i_popIncomingRequestQueue; + } + // Transitions out of NO state transition(NO, GETX, NO_B) { r_setMRU; @@ -1295,6 +1382,15 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } + transition(NO, GETF, NO_F) { + r_setMRU; + //ano_assertNotOwner; + nofc_forwardRequestConditionalOwner; //forward request if the requester is not the owner + cs_clearSharers; + oc_sendBlockAck; // send ack if the owner + i_popIncomingRequestQueue; + } + transition(S, GETX, NO_B) { r_setMRU; fb_forwardRequestBcast; @@ -1302,6 +1398,13 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } + transition(S, GETF, NO_F) { + r_setMRU; + fb_forwardRequestBcast; + cs_clearSharers; + i_popIncomingRequestQueue; + } + transition(S, GETS, NO_B) { r_setMRU; ano_assertNotOwner; @@ -1348,12 +1451,16 @@ machine(Directory, "AMD Hammer-like protocol") // Blocked transient states transition({NO_B_X, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W, - NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R}, - {GETS, GETX, PUT, Pf_Replacement}) { + NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R, NO_F_W}, + {GETS, GETX, GETF, PUT, Pf_Replacement}) { z_stallAndWaitRequest; } - transition(NO_B, GETX, NO_B_X) { + transition(NO_F, {GETS, GETX, GETF, PUT, Pf_Replacement}){ + z_stallAndWaitRequest; + } + + transition(NO_B, {GETX, GETF}, NO_B_X) { z_stallAndWaitRequest; } @@ -1361,13 +1468,13 @@ machine(Directory, "AMD Hammer-like protocol") z_stallAndWaitRequest; } - transition(NO_B_S, {GETX, PUT, Pf_Replacement}) { + transition(NO_B_S, {GETX, GETF, PUT, Pf_Replacement}) { z_stallAndWaitRequest; } transition({NO_B_X, NO_B, NO_B_S, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W, - NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R}, + NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R, NO_F_W}, {DMA_READ, DMA_WRITE}) { zd_stallAndWaitDMARequest; } @@ -1444,6 +1551,12 @@ machine(Directory, "AMD Hammer-like protocol") l_popMemQueue; } + transition(NO_F_W, Memory_Data, NO_F) { + d_sendData; + w_deallocateTBE; + l_popMemQueue; + } + transition(NO_DR_B_W, Memory_Data, NO_DR_B) { r_recordMemoryData; o_checkForCompletion; @@ -1738,4 +1851,16 @@ machine(Directory, "AMD Hammer-like protocol") k_wakeUpDependents; j_popIncomingUnblockQueue; } + + transition(NO_F, PUTF, WB) { + a_sendWriteBackAck; + i_popIncomingRequestQueue; + } + + //possible race between GETF and UnblockM -- not sure needed any more? + transition(NO_F, UnblockM) { + us_updateSharerIfFBD; + uo_updateOwnerIfPf; + j_popIncomingUnblockQueue; + } } diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm index 063cc91ee..6ead7200a 100644 --- a/src/mem/protocol/MOESI_hammer-msg.sm +++ b/src/mem/protocol/MOESI_hammer-msg.sm @@ -37,6 +37,9 @@ enumeration(CoherenceRequestType, desc="...") { PUT, desc="Put Ownership"; WB_ACK, desc="Writeback ack"; WB_NACK, desc="Writeback neg. ack"; + PUTF, desc="PUT on a Flush"; + GETF, desc="Issue exclusive for Flushing"; + BLOCK_ACK, desc="Dir Block ack"; INV, desc="Invalidate"; } diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm index 0ef3df29b..5fb5f9912 100644 --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -119,6 +119,7 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") { Locked_RMW_Write, desc=""; COMMIT, desc="Commit version"; NULL, desc="Invalid request type"; + FLUSH, desc="Flush request type"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 92627740f..6d5cb71bf 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -244,6 +244,8 @@ RubyPort::M5Port::recvTiming(PacketPtr pkt) // Note: M5 packets do not differentiate ST from RMW_Write // type = RubyRequestType_ST; + } else if (pkt->isFlush()) { + type = RubyRequestType_FLUSH; } else { panic("Unsupported ruby packet type\n"); } @@ -335,7 +337,7 @@ RubyPort::M5Port::hitCallback(PacketPtr pkt) // // Unless specified at configuraiton, all responses except failed SC - // operations access M5 physical memory. + // and Flush operations access M5 physical memory. // bool accessPhysMem = access_phys_mem; @@ -361,11 +363,19 @@ RubyPort::M5Port::hitCallback(PacketPtr pkt) pkt->convertLlToRead(); } } + + // + // Flush requests don't access physical memory + // + if (pkt->isFlush()) { + accessPhysMem = false; + } + DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse); if (accessPhysMem) { ruby_port->physMemPort->sendAtomic(pkt); - } else { + } else if (needsResponse) { pkt->makeResponse(); } diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 7eb46e006..94ba6c2d3 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -234,7 +234,8 @@ Sequencer::insertRequest(SequencerRequest* request) (request->ruby_request.m_Type == RubyRequestType_Load_Linked) || (request->ruby_request.m_Type == RubyRequestType_Store_Conditional) || (request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Read) || - (request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Write)) { + (request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Write) || + (request->ruby_request.m_Type == RubyRequestType_FLUSH)) { pair<RequestTable::iterator, bool> r = m_writeRequestTable.insert(RequestTable::value_type(line_addr, 0)); bool success = r.second; @@ -338,7 +339,7 @@ Sequencer::handleLlsc(const Address& address, SequencerRequest* request) // previously locked cache lines? // m_dataCache_ptr->setLocked(address, m_version); - } else if (m_dataCache_ptr->isLocked(address, m_version)) { + } else if ((m_dataCache_ptr->isTagPresent(address)) && (m_dataCache_ptr->isLocked(address, m_version))) { // // Normal writes should clear the locked address // @@ -385,7 +386,9 @@ Sequencer::writeCallback(const Address& address, (request->ruby_request.m_Type == RubyRequestType_Load_Linked) || (request->ruby_request.m_Type == RubyRequestType_Store_Conditional) || (request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Read) || - (request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Write)); + (request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Write) || + (request->ruby_request.m_Type == RubyRequestType_FLUSH)); + // // For Alpha, properly handle LL, SC, and write requests with respect to @@ -619,6 +622,9 @@ Sequencer::issueRequest(const RubyRequest& request) case RubyRequestType_LD: ctype = RubyRequestType_LD; break; + case RubyRequestType_FLUSH: + ctype = RubyRequestType_FLUSH; + break; case RubyRequestType_ST: case RubyRequestType_RMW_Read: case RubyRequestType_RMW_Write: |