/* * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Authors: Lisa Hsu */ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") : Sequencer* sequencer; CacheMemory * L1cache; int TCC_select_num_bits; Cycles issue_latency := 80; // time to send data down to TCC Cycles l2_hit_latency := 18; MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request"; MessageBuffer * responseFromSQC, network="To", virtual_network="3", vnet_type="response"; MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock"; MessageBuffer * probeToSQC, network="From", virtual_network="1", vnet_type="request"; MessageBuffer * responseToSQC, network="From", virtual_network="3", vnet_type="response"; MessageBuffer * mandatoryQueue; { state_declaration(State, desc="SQC Cache States", default="SQC_State_I") { I, AccessPermission:Invalid, desc="Invalid"; S, AccessPermission:Read_Only, desc="Shared"; I_S, AccessPermission:Busy, desc="Invalid, issued RdBlkS, have not seen response yet"; S_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack"; I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCCdir for canceled WB"; } enumeration(Event, desc="SQC Events") { // Core initiated Fetch, desc="Fetch"; //TCC initiated TCC_AckS, desc="TCC Ack to Core Request"; TCC_AckWB, desc="TCC Ack for WB"; TCC_NackWB, desc="TCC Nack for WB"; // Mem sys initiated Repl, desc="Replacing block from cache"; // Probe Events PrbInvData, desc="probe, return M data"; PrbInv, desc="probe, no need for data"; PrbShrData, desc="probe downgrade, return data"; } enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { DataArrayRead, desc="Read the data array"; DataArrayWrite, desc="Write the data array"; TagArrayRead, desc="Read the data array"; TagArrayWrite, desc="Write the data array"; } structure(Entry, desc="...", interface="AbstractCacheEntry") { State CacheState, desc="cache state"; bool Dirty, desc="Is the data dirty (diff than memory)?"; DataBlock DataBlk, desc="data for the block"; bool FromL2, default="false", desc="block just moved from L2"; } structure(TBE, desc="...") { State TBEState, desc="Transient state"; DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; bool Dirty, desc="Is the data dirty (different than memory)?"; int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; bool Shared, desc="Victim hit by shared probe"; } structure(TBETable, external="yes") { TBE lookup(Addr); void allocate(Addr); void deallocate(Addr); bool isPresent(Addr); } TBETable TBEs, template="", constructor="m_number_of_TBEs"; int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); void set_cache_entry(AbstractCacheEntry b); void unset_cache_entry(); void set_tbe(TBE b); void unset_tbe(); void wakeUpAllBuffers(); void wakeUpBuffers(Addr a); Cycles curCycle(); // Internal functions Entry getCacheEntry(Addr address), return_by_pointer="yes" { Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address)); return cache_entry; } DataBlock getDataBlock(Addr addr), return_by_ref="yes" { TBE tbe := TBEs.lookup(addr); if(is_valid(tbe)) { return tbe.DataBlk; } else { return getCacheEntry(addr).DataBlk; } } State getState(TBE tbe, Entry cache_entry, Addr addr) { if(is_valid(tbe)) { return tbe.TBEState; } else if (is_valid(cache_entry)) { return cache_entry.CacheState; } return State:I; } void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { if (is_valid(tbe)) { tbe.TBEState := state; } if (is_valid(cache_entry)) { cache_entry.CacheState := state; } } AccessPermission getAccessPermission(Addr addr) { TBE tbe := TBEs.lookup(addr); if(is_valid(tbe)) { return SQC_State_to_permission(tbe.TBEState); } Entry cache_entry := getCacheEntry(addr); if(is_valid(cache_entry)) { return SQC_State_to_permission(cache_entry.CacheState); } return AccessPermission:NotPresent; } void setAccessPermission(Entry cache_entry, Addr addr, State state) { if (is_valid(cache_entry)) { cache_entry.changePermission(SQC_State_to_permission(state)); } } void functionalRead(Addr addr, Packet *pkt) { TBE tbe := TBEs.lookup(addr); if(is_valid(tbe)) { testAndRead(addr, tbe.DataBlk, pkt); } else { functionalMemoryRead(pkt); } } int functionalWrite(Addr addr, Packet *pkt) { int num_functional_writes := 0; TBE tbe := TBEs.lookup(addr); if(is_valid(tbe)) { num_functional_writes := num_functional_writes + testAndWrite(addr, tbe.DataBlk, pkt); } num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); return num_functional_writes; } void recordRequestType(RequestType request_type, Addr addr) { if (request_type == RequestType:DataArrayRead) { L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); } else if (request_type == RequestType:DataArrayWrite) { L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); } else if (request_type == RequestType:TagArrayRead) { L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); } else if (request_type == RequestType:TagArrayWrite) { L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); } } bool checkResourceAvailable(RequestType request_type, Addr addr) { if (request_type == RequestType:DataArrayRead) { return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); } else if (request_type == RequestType:DataArrayWrite) { return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); } else if (request_type == RequestType:TagArrayRead) { return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); } else if (request_type == RequestType:TagArrayWrite) { return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); } else { error("Invalid RequestType type in checkResourceAvailable"); return true; } } // Out Ports out_port(requestNetwork_out, CPURequestMsg, requestFromSQC); out_port(responseNetwork_out, ResponseMsg, responseFromSQC); out_port(unblockNetwork_out, UnblockMsg, unblockFromCore); // In Ports in_port(probeNetwork_in, TDProbeRequestMsg, probeToSQC) { if (probeNetwork_in.isReady(clockEdge())) { peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") { Entry cache_entry := getCacheEntry(in_msg.addr); TBE tbe := TBEs.lookup(in_msg.addr); if (in_msg.Type == ProbeRequestType:PrbInv) { if (in_msg.ReturnData) { trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); } else { trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); } } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { assert(in_msg.ReturnData); trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); } } } } in_port(responseToSQC_in, ResponseMsg, responseToSQC) { if (responseToSQC_in.isReady(clockEdge())) { peek(responseToSQC_in, ResponseMsg, block_on="addr") { Entry cache_entry := getCacheEntry(in_msg.addr); TBE tbe := TBEs.lookup(in_msg.addr); if (in_msg.Type == CoherenceResponseType:TDSysResp) { if (in_msg.State == CoherenceState:Shared) { trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe); } else { error("SQC should not receive TDSysResp other than CoherenceState:Shared"); } } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) { trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe); } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) { trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe); } else { error("Unexpected Response Message to Core"); } } } } in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { if (mandatoryQueue_in.isReady(clockEdge())) { peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { Entry cache_entry := getCacheEntry(in_msg.LineAddress); TBE tbe := TBEs.lookup(in_msg.LineAddress); assert(in_msg.Type == RubyRequestType:IFETCH); if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe); } else { Addr victim := L1cache.cacheProbe(in_msg.LineAddress); trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); } } } } // Actions action(ic_invCache, "ic", desc="invalidate cache") { if(is_valid(cache_entry)) { L1cache.deallocate(address); } unset_cache_entry(); } action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") { enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlkS; out_msg.Requestor := machineID; out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, TCC_select_low_bit, TCC_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); } } action(vc_victim, "vc", desc="Victimize E/S Data") { enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { out_msg.addr := address; out_msg.Requestor := machineID; out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, TCC_select_low_bit, TCC_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.Type := CoherenceRequestType:VicClean; out_msg.InitialRequestTime := curCycle(); if (cache_entry.CacheState == State:S) { out_msg.Shared := true; } else { out_msg.Shared := false; } out_msg.InitialRequestTime := curCycle(); } } action(a_allocate, "a", desc="allocate block") { if (is_invalid(cache_entry)) { set_cache_entry(L1cache.allocate(address, new Entry)); } } action(t_allocateTBE, "t", desc="allocate TBE Entry") { check_allocate(TBEs); assert(is_valid(cache_entry)); TBEs.allocate(address); set_tbe(TBEs.lookup(address)); tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs tbe.Dirty := cache_entry.Dirty; tbe.Shared := false; } action(d_deallocateTBE, "d", desc="Deallocate TBE") { TBEs.deallocate(address); unset_tbe(); } action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { mandatoryQueue_in.dequeue(clockEdge()); } action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { responseToSQC_in.dequeue(clockEdge()); } action(pp_popProbeQueue, "pp", desc="pop probe queue") { probeNetwork_in.dequeue(clockEdge()); } action(l_loadDone, "l", desc="local load done") { assert(is_valid(cache_entry)); sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); APPEND_TRANSITION_COMMENT(cache_entry.DataBlk); } action(xl_loadDone, "xl", desc="remote load done") { peek(responseToSQC_in, ResponseMsg) { assert(is_valid(cache_entry)); sequencer.readCallback(address, cache_entry.DataBlk, false, machineIDToMachineType(in_msg.Sender), in_msg.InitialRequestTime, in_msg.ForwardRequestTime, in_msg.ProbeRequestStartTime); APPEND_TRANSITION_COMMENT(cache_entry.DataBlk); } } action(w_writeCache, "w", desc="write data to cache") { peek(responseToSQC_in, ResponseMsg) { assert(is_valid(cache_entry)); cache_entry.DataBlk := in_msg.DataBlk; cache_entry.Dirty := in_msg.Dirty; } } action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { peek(responseToSQC_in, ResponseMsg) { enqueue(responseNetwork_out, ResponseMsg, issue_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:StaleNotif; out_msg.Sender := machineID; out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, TCC_select_low_bit, TCC_select_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; DPRINTF(RubySlicc, "%s\n", out_msg); } } } action(wb_data, "wb", desc="write back data") { peek(responseToSQC_in, ResponseMsg) { enqueue(responseNetwork_out, ResponseMsg, issue_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUData; out_msg.Sender := machineID; out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, TCC_select_low_bit, TCC_select_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (tbe.Shared) { out_msg.NbReqShared := true; } else { out_msg.NbReqShared := false; } out_msg.State := CoherenceState:Shared; // faux info out_msg.MessageSize := MessageSizeType:Writeback_Data; DPRINTF(RubySlicc, "%s\n", out_msg); } } } action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { enqueue(responseNetwork_out, ResponseMsg, issue_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, TCC_select_low_bit, TCC_select_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; out_msg.State := CoherenceState:NA; out_msg.MessageSize := MessageSizeType:Response_Control; } } action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { enqueue(responseNetwork_out, ResponseMsg, issue_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, TCC_select_low_bit, TCC_select_num_bits)); out_msg.Dirty := false; out_msg.Ntsl := true; out_msg.Hit := false; out_msg.State := CoherenceState:NA; out_msg.MessageSize := MessageSizeType:Response_Control; } } action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") { enqueue(responseNetwork_out, ResponseMsg, issue_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, TCC_select_low_bit, TCC_select_num_bits)); out_msg.Dirty := false; // only true if sending back data i think out_msg.Hit := false; out_msg.Ntsl := false; out_msg.State := CoherenceState:NA; out_msg.MessageSize := MessageSizeType:Response_Control; } } action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { enqueue(responseNetwork_out, ResponseMsg, issue_latency) { assert(is_valid(cache_entry) || is_valid(tbe)); out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; // will this always be ok? probably not for multisocket out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, TCC_select_low_bit, TCC_select_num_bits)); out_msg.DataBlk := getDataBlock(address); if (is_valid(tbe)) { out_msg.Dirty := tbe.Dirty; } else { out_msg.Dirty := cache_entry.Dirty; } out_msg.Hit := true; out_msg.State := CoherenceState:NA; out_msg.MessageSize := MessageSizeType:Response_Data; } } action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { enqueue(responseNetwork_out, ResponseMsg, issue_latency) { assert(is_valid(cache_entry) || is_valid(tbe)); assert(is_valid(cache_entry)); out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; // will this always be ok? probably not for multisocket out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, TCC_select_low_bit, TCC_select_num_bits)); out_msg.DataBlk := getDataBlock(address); if (is_valid(tbe)) { out_msg.Dirty := tbe.Dirty; } else { out_msg.Dirty := cache_entry.Dirty; } out_msg.Hit := true; out_msg.State := CoherenceState:NA; out_msg.MessageSize := MessageSizeType:Response_Data; } } action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") { assert(is_valid(tbe)); tbe.Shared := true; } action(uu_sendUnblock, "uu", desc="state changed, unblock") { enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { out_msg.addr := address; out_msg.Sender := machineID; out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, TCC_select_low_bit, TCC_select_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; DPRINTF(RubySlicc, "%s\n", out_msg); } } action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") { probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); } action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") { mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); } // Transitions // transitions from base transition(I, Fetch, I_S) {TagArrayRead, TagArrayWrite} { a_allocate; nS_issueRdBlkS; p_popMandatoryQueue; } // simple hit transitions transition(S, Fetch) {TagArrayRead, DataArrayRead} { l_loadDone; p_popMandatoryQueue; } // recycles from transients transition({I_S, S_I, I_C}, {Fetch, Repl}) {} { zz_recycleMandatoryQueue; } transition(S, Repl, S_I) {TagArrayRead} { t_allocateTBE; vc_victim; ic_invCache; } // TCC event transition(I_S, TCC_AckS, S) {DataArrayRead, DataArrayWrite} { w_writeCache; xl_loadDone; uu_sendUnblock; pr_popResponseQueue; } transition(S_I, TCC_NackWB, I){TagArrayWrite} { d_deallocateTBE; pr_popResponseQueue; } transition(S_I, TCC_AckWB, I) {TagArrayWrite} { wb_data; d_deallocateTBE; pr_popResponseQueue; } transition(I_C, TCC_AckWB, I){TagArrayWrite} { ss_sendStaleNotification; d_deallocateTBE; pr_popResponseQueue; } transition(I_C, TCC_NackWB, I) {TagArrayWrite} { d_deallocateTBE; pr_popResponseQueue; } // Probe transitions transition({S, I}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { pd_sendProbeResponseData; ic_invCache; pp_popProbeQueue; } transition(I_C, PrbInvData, I_C) { pi_sendProbeResponseInv; ic_invCache; pp_popProbeQueue; } transition({S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} { pi_sendProbeResponseInv; ic_invCache; pp_popProbeQueue; } transition({S}, PrbShrData, S) {DataArrayRead} { pd_sendProbeResponseData; pp_popProbeQueue; } transition({I, I_C}, PrbShrData) {TagArrayRead} { prm_sendProbeResponseMiss; pp_popProbeQueue; } transition(I_C, PrbInv, I_C){ pi_sendProbeResponseInv; ic_invCache; pp_popProbeQueue; } transition(I_S, {PrbInv, PrbInvData}) {} { pi_sendProbeResponseInv; ic_invCache; a_allocate; // but make sure there is room for incoming data when it arrives pp_popProbeQueue; } transition(I_S, PrbShrData) {} { prm_sendProbeResponseMiss; pp_popProbeQueue; } transition(S_I, PrbInvData, I_C) {TagArrayWrite} { pi_sendProbeResponseInv; ic_invCache; pp_popProbeQueue; } transition(S_I, PrbInv, I_C) {TagArrayWrite} { pi_sendProbeResponseInv; ic_invCache; pp_popProbeQueue; } transition(S_I, PrbShrData) {DataArrayRead} { pd_sendProbeResponseData; sf_setSharedFlip; pp_popProbeQueue; } }