diff options
Diffstat (limited to 'src/mem/ruby/system/GPUCoalescer.hh')
-rw-r--r-- | src/mem/ruby/system/GPUCoalescer.hh | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh new file mode 100644 index 000000000..dbd47059c --- /dev/null +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Sooraj Puthoor + */ + +#ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ +#define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ + +#include <iostream> +#include <unordered_map> + +#include "base/statistics.hh" +#include "mem/protocol/HSAScope.hh" +#include "mem/protocol/HSASegment.hh" +#include "mem/protocol/PrefetchBit.hh" +#include "mem/protocol/RubyAccessMode.hh" +#include "mem/protocol/RubyRequestType.hh" +#include "mem/protocol/SequencerRequestType.hh" +#include "mem/request.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/Consumer.hh" +#include "mem/ruby/system/RubyPort.hh" + +class DataBlock; +class CacheMsg; +class MachineID; +class CacheMemory; + +class RubyGPUCoalescerParams; + +HSAScope reqScopeToHSAScope(Request* req); +HSASegment reqSegmentToHSASegment(Request* req); + +struct GPUCoalescerRequest +{ + PacketPtr pkt; + RubyRequestType m_type; + Cycles issue_time; + + GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type, + Cycles _issue_time) + : pkt(_pkt), m_type(_m_type), issue_time(_issue_time) + {} +}; + +std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj); + +class GPUCoalescer : public RubyPort +{ + public: + typedef RubyGPUCoalescerParams Params; + GPUCoalescer(const Params *); + ~GPUCoalescer(); + + // Public Methods + void wakeup(); // Used only for deadlock detection + + void printProgress(std::ostream& out) const; + void resetStats(); + void collateStats(); + void regStats(); + + void writeCallback(Addr address, DataBlock& data); + + void writeCallback(Addr address, + MachineType mach, + DataBlock& data); + + void writeCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion); + + void writeCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime); + + void readCallback(Addr address, DataBlock& data); + + void readCallback(Addr address, + MachineType mach, + DataBlock& data); + + void readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime); + + void readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion); + /* atomics need their own callback because the data + might be const coming from SLICC */ + void atomicCallback(Addr address, + MachineType mach, + const DataBlock& data); + + void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID); + void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID); + + // Alternate implementations in VIPER Coalescer + virtual RequestStatus makeRequest(PacketPtr pkt); + + int outstandingCount() const { return m_outstanding_count; } + + bool + isDeadlockEventScheduled() const + { + return deadlockCheckEvent.scheduled(); + } + + void + descheduleDeadlockEvent() + { + deschedule(deadlockCheckEvent); + } + + bool empty() const; + + void print(std::ostream& out) const; + void checkCoherence(Addr address); + + void markRemoved(); + void removeRequest(GPUCoalescerRequest* request); + void evictionCallback(Addr address); + void completeIssue(); + + void insertKernel(int wavefront_id, PacketPtr pkt); + + void recordRequestType(SequencerRequestType requestType); + Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; } + + Stats::Histogram& getLatencyHist() { return m_latencyHist; } + Stats::Histogram& getTypeLatencyHist(uint32_t t) + { return *m_typeLatencyHist[t]; } + + Stats::Histogram& getMissLatencyHist() + { return m_missLatencyHist; } + Stats::Histogram& getMissTypeLatencyHist(uint32_t t) + { return *m_missTypeLatencyHist[t]; } + + Stats::Histogram& getMissMachLatencyHist(uint32_t t) const + { return *m_missMachLatencyHist[t]; } + + Stats::Histogram& + getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const + { return *m_missTypeMachLatencyHist[r][t]; } + + Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const + { return *m_IssueToInitialDelayHist[t]; } + + Stats::Histogram& + getInitialToForwardDelayHist(const MachineType t) const + { return *m_InitialToForwardDelayHist[t]; } + + Stats::Histogram& + getForwardRequestToFirstResponseHist(const MachineType t) const + { return *m_ForwardToFirstResponseDelayHist[t]; } + + Stats::Histogram& + getFirstResponseToCompletionDelayHist(const MachineType t) const + { return *m_FirstResponseToCompletionDelayHist[t]; } + + // Changed to protected to enable inheritance by VIPER Coalescer + protected: + bool tryCacheAccess(Addr addr, RubyRequestType type, + Addr pc, RubyAccessMode access_mode, + int size, DataBlock*& data_ptr); + // Alternate implementations in VIPER Coalescer + virtual void issueRequest(PacketPtr pkt, RubyRequestType type); + + void kernelCallback(int wavfront_id); + + void hitCallback(GPUCoalescerRequest* request, + MachineType mach, + DataBlock& data, + bool success, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion); + void recordMissLatency(GPUCoalescerRequest* request, + MachineType mach, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool success, bool isRegion); + void completeHitCallback(std::vector<PacketPtr> & mylist, int len); + PacketPtr mapAddrToPkt(Addr address); + + + RequestStatus getRequestStatus(PacketPtr pkt, + RubyRequestType request_type); + bool insertRequest(PacketPtr pkt, RubyRequestType request_type); + + bool handleLlsc(Addr address, GPUCoalescerRequest* request); + + // Private copy constructor and assignment operator + GPUCoalescer(const GPUCoalescer& obj); + GPUCoalescer& operator=(const GPUCoalescer& obj); + + class IssueEvent : public Event + { + private: + GPUCoalescer *seq; + public: + IssueEvent(GPUCoalescer *_seq); + void process(); + const char *description() const; + }; + + IssueEvent issueEvent; + + + // Changed to protected to enable inheritance by VIPER Coalescer + protected: + int m_max_outstanding_requests; + int m_deadlock_threshold; + + CacheMemory* m_dataCache_ptr; + CacheMemory* m_instCache_ptr; + + // The cache access latency for this GPU data cache. This is assessed at the + // beginning of each access. This should be very similar to the + // implementation in Sequencer() as this is very much like a Sequencer + Cycles m_data_cache_hit_latency; + + // We need to track both the primary and secondary request types. + // The secondary request type comprises a subset of RubyRequestTypes that + // are understood by the L1 Controller. A primary request type can be any + // RubyRequestType. + enum {PrimaryType, SecondaryType}; + typedef std::pair<PacketPtr, std::vector<RubyRequestType> > RequestDesc; + typedef std::unordered_map<Addr, std::vector<RequestDesc> > CoalescingTable; + CoalescingTable reqCoalescer; + std::vector<Addr> newRequests; + + typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable; + RequestTable m_writeRequestTable; + RequestTable m_readRequestTable; + // Global outstanding request count, across all request tables + int m_outstanding_count; + bool m_deadlock_check_scheduled; + std::unordered_map<int, PacketPtr> kernelEndList; + std::vector<int> newKernelEnds; + + int m_store_waiting_on_load_cycles; + int m_store_waiting_on_store_cycles; + int m_load_waiting_on_store_cycles; + int m_load_waiting_on_load_cycles; + + bool m_usingNetworkTester; + + class GPUCoalescerWakeupEvent : public Event + { + private: + GPUCoalescer *m_GPUCoalescer_ptr; + + public: + GPUCoalescerWakeupEvent(GPUCoalescer *_seq) : + m_GPUCoalescer_ptr(_seq) {} + void process() { m_GPUCoalescer_ptr->wakeup(); } + const char *description() const + { + return "GPUCoalescer deadlock check"; + } + }; + + GPUCoalescerWakeupEvent deadlockCheckEvent; + bool assumingRfOCoherence; + + // m5 style stats for TCP hit/miss counts + Stats::Scalar GPU_TCPLdHits; + Stats::Scalar GPU_TCPLdTransfers; + Stats::Scalar GPU_TCCLdHits; + Stats::Scalar GPU_LdMiss; + + Stats::Scalar GPU_TCPStHits; + Stats::Scalar GPU_TCPStTransfers; + Stats::Scalar GPU_TCCStHits; + Stats::Scalar GPU_StMiss; + + Stats::Scalar CP_TCPLdHits; + Stats::Scalar CP_TCPLdTransfers; + Stats::Scalar CP_TCCLdHits; + Stats::Scalar CP_LdMiss; + + Stats::Scalar CP_TCPStHits; + Stats::Scalar CP_TCPStTransfers; + Stats::Scalar CP_TCCStHits; + Stats::Scalar CP_StMiss; + + //! Histogram for number of outstanding requests per cycle. + Stats::Histogram m_outstandReqHist; + + //! Histogram for holding latency profile of all requests. + Stats::Histogram m_latencyHist; + std::vector<Stats::Histogram *> m_typeLatencyHist; + + //! Histogram for holding latency profile of all requests that + //! miss in the controller connected to this sequencer. + Stats::Histogram m_missLatencyHist; + std::vector<Stats::Histogram *> m_missTypeLatencyHist; + + //! Histograms for profiling the latencies for requests that + //! required external messages. + std::vector<Stats::Histogram *> m_missMachLatencyHist; + std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist; + + //! Histograms for recording the breakdown of miss latency + std::vector<Stats::Histogram *> m_IssueToInitialDelayHist; + std::vector<Stats::Histogram *> m_InitialToForwardDelayHist; + std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist; + std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist; +}; + +inline std::ostream& +operator<<(std::ostream& out, const GPUCoalescer& obj) +{ + obj.print(out); + out << std::flush; + return out; +} + +#endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ + |