summaryrefslogtreecommitdiff
path: root/src/mem/ruby/system/GPUCoalescer.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/mem/ruby/system/GPUCoalescer.hh')
-rw-r--r--src/mem/ruby/system/GPUCoalescer.hh368
1 files changed, 368 insertions, 0 deletions
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh
new file mode 100644
index 000000000..dbd47059c
--- /dev/null
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+#define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+
+#include <iostream>
+#include <unordered_map>
+
+#include "base/statistics.hh"
+#include "mem/protocol/HSAScope.hh"
+#include "mem/protocol/HSASegment.hh"
+#include "mem/protocol/PrefetchBit.hh"
+#include "mem/protocol/RubyAccessMode.hh"
+#include "mem/protocol/RubyRequestType.hh"
+#include "mem/protocol/SequencerRequestType.hh"
+#include "mem/request.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Consumer.hh"
+#include "mem/ruby/system/RubyPort.hh"
+
+class DataBlock;
+class CacheMsg;
+class MachineID;
+class CacheMemory;
+
+class RubyGPUCoalescerParams;
+
+HSAScope reqScopeToHSAScope(Request* req);
+HSASegment reqSegmentToHSASegment(Request* req);
+
+struct GPUCoalescerRequest
+{
+ PacketPtr pkt;
+ RubyRequestType m_type;
+ Cycles issue_time;
+
+ GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
+ Cycles _issue_time)
+ : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
+ {}
+};
+
+std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
+
+class GPUCoalescer : public RubyPort
+{
+ public:
+ typedef RubyGPUCoalescerParams Params;
+ GPUCoalescer(const Params *);
+ ~GPUCoalescer();
+
+ // Public Methods
+ void wakeup(); // Used only for deadlock detection
+
+ void printProgress(std::ostream& out) const;
+ void resetStats();
+ void collateStats();
+ void regStats();
+
+ void writeCallback(Addr address, DataBlock& data);
+
+ void writeCallback(Addr address,
+ MachineType mach,
+ DataBlock& data);
+
+ void writeCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool isRegion);
+
+ void writeCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime);
+
+ void readCallback(Addr address, DataBlock& data);
+
+ void readCallback(Addr address,
+ MachineType mach,
+ DataBlock& data);
+
+ void readCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime);
+
+ void readCallback(Addr address,
+ MachineType mach,
+ DataBlock& data,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool isRegion);
+ /* atomics need their own callback because the data
+ might be const coming from SLICC */
+ void atomicCallback(Addr address,
+ MachineType mach,
+ const DataBlock& data);
+
+ void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
+ void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
+
+ // Alternate implementations in VIPER Coalescer
+ virtual RequestStatus makeRequest(PacketPtr pkt);
+
+ int outstandingCount() const { return m_outstanding_count; }
+
+ bool
+ isDeadlockEventScheduled() const
+ {
+ return deadlockCheckEvent.scheduled();
+ }
+
+ void
+ descheduleDeadlockEvent()
+ {
+ deschedule(deadlockCheckEvent);
+ }
+
+ bool empty() const;
+
+ void print(std::ostream& out) const;
+ void checkCoherence(Addr address);
+
+ void markRemoved();
+ void removeRequest(GPUCoalescerRequest* request);
+ void evictionCallback(Addr address);
+ void completeIssue();
+
+ void insertKernel(int wavefront_id, PacketPtr pkt);
+
+ void recordRequestType(SequencerRequestType requestType);
+ Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
+
+ Stats::Histogram& getLatencyHist() { return m_latencyHist; }
+ Stats::Histogram& getTypeLatencyHist(uint32_t t)
+ { return *m_typeLatencyHist[t]; }
+
+ Stats::Histogram& getMissLatencyHist()
+ { return m_missLatencyHist; }
+ Stats::Histogram& getMissTypeLatencyHist(uint32_t t)
+ { return *m_missTypeLatencyHist[t]; }
+
+ Stats::Histogram& getMissMachLatencyHist(uint32_t t) const
+ { return *m_missMachLatencyHist[t]; }
+
+ Stats::Histogram&
+ getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
+ { return *m_missTypeMachLatencyHist[r][t]; }
+
+ Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const
+ { return *m_IssueToInitialDelayHist[t]; }
+
+ Stats::Histogram&
+ getInitialToForwardDelayHist(const MachineType t) const
+ { return *m_InitialToForwardDelayHist[t]; }
+
+ Stats::Histogram&
+ getForwardRequestToFirstResponseHist(const MachineType t) const
+ { return *m_ForwardToFirstResponseDelayHist[t]; }
+
+ Stats::Histogram&
+ getFirstResponseToCompletionDelayHist(const MachineType t) const
+ { return *m_FirstResponseToCompletionDelayHist[t]; }
+
+ // Changed to protected to enable inheritance by VIPER Coalescer
+ protected:
+ bool tryCacheAccess(Addr addr, RubyRequestType type,
+ Addr pc, RubyAccessMode access_mode,
+ int size, DataBlock*& data_ptr);
+ // Alternate implementations in VIPER Coalescer
+ virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
+
+ void kernelCallback(int wavfront_id);
+
+ void hitCallback(GPUCoalescerRequest* request,
+ MachineType mach,
+ DataBlock& data,
+ bool success,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool isRegion);
+ void recordMissLatency(GPUCoalescerRequest* request,
+ MachineType mach,
+ Cycles initialRequestTime,
+ Cycles forwardRequestTime,
+ Cycles firstResponseTime,
+ bool success, bool isRegion);
+ void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
+ PacketPtr mapAddrToPkt(Addr address);
+
+
+ RequestStatus getRequestStatus(PacketPtr pkt,
+ RubyRequestType request_type);
+ bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
+
+ bool handleLlsc(Addr address, GPUCoalescerRequest* request);
+
+ // Private copy constructor and assignment operator
+ GPUCoalescer(const GPUCoalescer& obj);
+ GPUCoalescer& operator=(const GPUCoalescer& obj);
+
+ class IssueEvent : public Event
+ {
+ private:
+ GPUCoalescer *seq;
+ public:
+ IssueEvent(GPUCoalescer *_seq);
+ void process();
+ const char *description() const;
+ };
+
+ IssueEvent issueEvent;
+
+
+ // Changed to protected to enable inheritance by VIPER Coalescer
+ protected:
+ int m_max_outstanding_requests;
+ int m_deadlock_threshold;
+
+ CacheMemory* m_dataCache_ptr;
+ CacheMemory* m_instCache_ptr;
+
+ // The cache access latency for this GPU data cache. This is assessed at the
+ // beginning of each access. This should be very similar to the
+ // implementation in Sequencer() as this is very much like a Sequencer
+ Cycles m_data_cache_hit_latency;
+
+ // We need to track both the primary and secondary request types.
+ // The secondary request type comprises a subset of RubyRequestTypes that
+ // are understood by the L1 Controller. A primary request type can be any
+ // RubyRequestType.
+ enum {PrimaryType, SecondaryType};
+ typedef std::pair<PacketPtr, std::vector<RubyRequestType> > RequestDesc;
+ typedef std::unordered_map<Addr, std::vector<RequestDesc> > CoalescingTable;
+ CoalescingTable reqCoalescer;
+ std::vector<Addr> newRequests;
+
+ typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
+ RequestTable m_writeRequestTable;
+ RequestTable m_readRequestTable;
+ // Global outstanding request count, across all request tables
+ int m_outstanding_count;
+ bool m_deadlock_check_scheduled;
+ std::unordered_map<int, PacketPtr> kernelEndList;
+ std::vector<int> newKernelEnds;
+
+ int m_store_waiting_on_load_cycles;
+ int m_store_waiting_on_store_cycles;
+ int m_load_waiting_on_store_cycles;
+ int m_load_waiting_on_load_cycles;
+
+ bool m_usingNetworkTester;
+
+ class GPUCoalescerWakeupEvent : public Event
+ {
+ private:
+ GPUCoalescer *m_GPUCoalescer_ptr;
+
+ public:
+ GPUCoalescerWakeupEvent(GPUCoalescer *_seq) :
+ m_GPUCoalescer_ptr(_seq) {}
+ void process() { m_GPUCoalescer_ptr->wakeup(); }
+ const char *description() const
+ {
+ return "GPUCoalescer deadlock check";
+ }
+ };
+
+ GPUCoalescerWakeupEvent deadlockCheckEvent;
+ bool assumingRfOCoherence;
+
+ // m5 style stats for TCP hit/miss counts
+ Stats::Scalar GPU_TCPLdHits;
+ Stats::Scalar GPU_TCPLdTransfers;
+ Stats::Scalar GPU_TCCLdHits;
+ Stats::Scalar GPU_LdMiss;
+
+ Stats::Scalar GPU_TCPStHits;
+ Stats::Scalar GPU_TCPStTransfers;
+ Stats::Scalar GPU_TCCStHits;
+ Stats::Scalar GPU_StMiss;
+
+ Stats::Scalar CP_TCPLdHits;
+ Stats::Scalar CP_TCPLdTransfers;
+ Stats::Scalar CP_TCCLdHits;
+ Stats::Scalar CP_LdMiss;
+
+ Stats::Scalar CP_TCPStHits;
+ Stats::Scalar CP_TCPStTransfers;
+ Stats::Scalar CP_TCCStHits;
+ Stats::Scalar CP_StMiss;
+
+ //! Histogram for number of outstanding requests per cycle.
+ Stats::Histogram m_outstandReqHist;
+
+ //! Histogram for holding latency profile of all requests.
+ Stats::Histogram m_latencyHist;
+ std::vector<Stats::Histogram *> m_typeLatencyHist;
+
+ //! Histogram for holding latency profile of all requests that
+ //! miss in the controller connected to this sequencer.
+ Stats::Histogram m_missLatencyHist;
+ std::vector<Stats::Histogram *> m_missTypeLatencyHist;
+
+ //! Histograms for profiling the latencies for requests that
+ //! required external messages.
+ std::vector<Stats::Histogram *> m_missMachLatencyHist;
+ std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist;
+
+ //! Histograms for recording the breakdown of miss latency
+ std::vector<Stats::Histogram *> m_IssueToInitialDelayHist;
+ std::vector<Stats::Histogram *> m_InitialToForwardDelayHist;
+ std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist;
+ std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist;
+};
+
+inline std::ostream&
+operator<<(std::ostream& out, const GPUCoalescer& obj)
+{
+ obj.print(out);
+ out << std::flush;
+ return out;
+}
+
+#endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+