1 files changed, 368 insertions, 0 deletions
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh
new file mode 100644
index 000000000..dbd47059c
--- /dev/null
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Sooraj Puthoor
+ */
+
+#ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+#define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+
+#include <iostream>
+#include <unordered_map>
+
+#include "base/statistics.hh"
+#include "mem/protocol/HSAScope.hh"
+#include "mem/protocol/HSASegment.hh"
+#include "mem/protocol/PrefetchBit.hh"
+#include "mem/protocol/RubyAccessMode.hh"
+#include "mem/protocol/RubyRequestType.hh"
+#include "mem/protocol/SequencerRequestType.hh"
+#include "mem/request.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Consumer.hh"
+#include "mem/ruby/system/RubyPort.hh"
+
+class DataBlock;
+class CacheMsg;
+class MachineID;
+class CacheMemory;
+
+class RubyGPUCoalescerParams;
+
+HSAScope reqScopeToHSAScope(Request* req);
+HSASegment reqSegmentToHSASegment(Request* req);
+
+struct GPUCoalescerRequest
+{
+    PacketPtr pkt;
+    RubyRequestType m_type;
+    Cycles issue_time;
+
+    GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
+                        Cycles _issue_time)
+        : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
+    {}
+};
+
+std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
+
+class GPUCoalescer : public RubyPort
+{
+  public:
+    typedef RubyGPUCoalescerParams Params;
+    GPUCoalescer(const Params *);
+    ~GPUCoalescer();
+
+    // Public Methods
+    void wakeup(); // Used only for deadlock detection
+
+    void printProgress(std::ostream& out) const;
+    void resetStats();
+    void collateStats();
+    void regStats();
+
+    void writeCallback(Addr address, DataBlock& data);
+
+    void writeCallback(Addr address,
+                       MachineType mach,
+                       DataBlock& data);
+
+    void writeCallback(Addr address,
+                       MachineType mach,
+                       DataBlock& data,
+                       Cycles initialRequestTime,
+                       Cycles forwardRequestTime,
+                       Cycles firstResponseTime,
+                       bool isRegion);
+
+    void writeCallback(Addr address,
+                       MachineType mach,
+                       DataBlock& data,
+                       Cycles initialRequestTime,
+                       Cycles forwardRequestTime,
+                       Cycles firstResponseTime);
+
+    void readCallback(Addr address, DataBlock& data);
+
+    void readCallback(Addr address,
+                      MachineType mach,
+                      DataBlock& data);
+
+    void readCallback(Addr address,
+                      MachineType mach,
+                      DataBlock& data,
+                      Cycles initialRequestTime,
+                      Cycles forwardRequestTime,
+                      Cycles firstResponseTime);
+
+    void readCallback(Addr address,
+                      MachineType mach,
+                      DataBlock& data,
+                      Cycles initialRequestTime,
+                      Cycles forwardRequestTime,
+                      Cycles firstResponseTime,
+                      bool isRegion);
+    /* atomics need their own callback because the data
+       might be const coming from SLICC */
+    void atomicCallback(Addr address,
+                        MachineType mach,
+                        const DataBlock& data);
+
+    void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
+    void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
+
+    // Alternate implementations in VIPER Coalescer
+    virtual RequestStatus makeRequest(PacketPtr pkt);
+
+    int outstandingCount() const { return m_outstanding_count; }
+
+    bool
+    isDeadlockEventScheduled() const
+    {
+        return deadlockCheckEvent.scheduled();
+    }
+
+    void
+    descheduleDeadlockEvent()
+    {
+        deschedule(deadlockCheckEvent);
+    }
+
+    bool empty() const;
+
+    void print(std::ostream& out) const;
+    void checkCoherence(Addr address);
+
+    void markRemoved();
+    void removeRequest(GPUCoalescerRequest* request);
+    void evictionCallback(Addr address);
+    void completeIssue();
+
+    void insertKernel(int wavefront_id, PacketPtr pkt);
+
+    void recordRequestType(SequencerRequestType requestType);
+    Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
+
+    Stats::Histogram& getLatencyHist() { return m_latencyHist; }
+    Stats::Histogram& getTypeLatencyHist(uint32_t t)
+    { return *m_typeLatencyHist[t]; }
+
+    Stats::Histogram& getMissLatencyHist()
+    { return m_missLatencyHist; }
+    Stats::Histogram& getMissTypeLatencyHist(uint32_t t)
+    { return *m_missTypeLatencyHist[t]; }
+
+    Stats::Histogram& getMissMachLatencyHist(uint32_t t) const
+    { return *m_missMachLatencyHist[t]; }
+
+    Stats::Histogram&
+    getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
+    { return *m_missTypeMachLatencyHist[r][t]; }
+
+    Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const
+    { return *m_IssueToInitialDelayHist[t]; }
+
+    Stats::Histogram&
+    getInitialToForwardDelayHist(const MachineType t) const
+    { return *m_InitialToForwardDelayHist[t]; }
+
+    Stats::Histogram&
+    getForwardRequestToFirstResponseHist(const MachineType t) const
+    { return *m_ForwardToFirstResponseDelayHist[t]; }
+
+    Stats::Histogram&
+    getFirstResponseToCompletionDelayHist(const MachineType t) const
+    { return *m_FirstResponseToCompletionDelayHist[t]; }
+
+  // Changed to protected to enable inheritance by VIPER Coalescer
+  protected:
+    bool tryCacheAccess(Addr addr, RubyRequestType type,
+                        Addr pc, RubyAccessMode access_mode,
+                        int size, DataBlock*& data_ptr);
+    // Alternate implementations in VIPER Coalescer
+    virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
+
+    void kernelCallback(int wavfront_id);
+
+    void hitCallback(GPUCoalescerRequest* request,
+                     MachineType mach,
+                     DataBlock& data,
+                     bool success,
+                     Cycles initialRequestTime,
+                     Cycles forwardRequestTime,
+                     Cycles firstResponseTime,
+                     bool isRegion);
+    void recordMissLatency(GPUCoalescerRequest* request,
+                           MachineType mach,
+                           Cycles initialRequestTime,
+                           Cycles forwardRequestTime,
+                           Cycles firstResponseTime,
+                           bool success, bool isRegion);
+    void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
+    PacketPtr mapAddrToPkt(Addr address);
+
+
+    RequestStatus getRequestStatus(PacketPtr pkt,
+                                   RubyRequestType request_type);
+    bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
+
+    bool handleLlsc(Addr address, GPUCoalescerRequest* request);
+
+    // Private copy constructor and assignment operator
+    GPUCoalescer(const GPUCoalescer& obj);
+    GPUCoalescer& operator=(const GPUCoalescer& obj);
+
+    class IssueEvent : public Event
+    {
+      private:
+        GPUCoalescer *seq;
+      public:
+        IssueEvent(GPUCoalescer *_seq);
+        void process();
+        const char *description() const;
+    };
+
+    IssueEvent issueEvent;
+
+
+  // Changed to protected to enable inheritance by VIPER Coalescer
+  protected:
+    int m_max_outstanding_requests;
+    int m_deadlock_threshold;
+
+    CacheMemory* m_dataCache_ptr;
+    CacheMemory* m_instCache_ptr;
+
+    // The cache access latency for this GPU data cache. This is assessed at the
+    // beginning of each access. This should be very similar to the
+    // implementation in Sequencer() as this is very much like a Sequencer
+    Cycles m_data_cache_hit_latency;
+
+    // We need to track both the primary and secondary request types.
+    // The secondary request type comprises a subset of RubyRequestTypes that
+    // are understood by the L1 Controller. A primary request type can be any
+    // RubyRequestType.
+    enum {PrimaryType, SecondaryType};
+    typedef std::pair<PacketPtr, std::vector<RubyRequestType> > RequestDesc;
+    typedef std::unordered_map<Addr, std::vector<RequestDesc> > CoalescingTable;
+    CoalescingTable reqCoalescer;
+    std::vector<Addr> newRequests;
+
+    typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
+    RequestTable m_writeRequestTable;
+    RequestTable m_readRequestTable;
+    // Global outstanding request count, across all request tables
+    int m_outstanding_count;
+    bool m_deadlock_check_scheduled;
+    std::unordered_map<int, PacketPtr> kernelEndList;
+    std::vector<int> newKernelEnds;
+
+    int m_store_waiting_on_load_cycles;
+    int m_store_waiting_on_store_cycles;
+    int m_load_waiting_on_store_cycles;
+    int m_load_waiting_on_load_cycles;
+
+    bool m_usingNetworkTester;
+
+    class GPUCoalescerWakeupEvent : public Event
+    {
+      private:
+        GPUCoalescer *m_GPUCoalescer_ptr;
+
+      public:
+        GPUCoalescerWakeupEvent(GPUCoalescer *_seq) :
+            m_GPUCoalescer_ptr(_seq) {}
+        void process() { m_GPUCoalescer_ptr->wakeup(); }
+        const char *description() const
+        {
+            return "GPUCoalescer deadlock check";
+        }
+    };
+
+    GPUCoalescerWakeupEvent deadlockCheckEvent;
+    bool assumingRfOCoherence;
+
+    // m5 style stats for TCP hit/miss counts
+    Stats::Scalar GPU_TCPLdHits;
+    Stats::Scalar GPU_TCPLdTransfers;
+    Stats::Scalar GPU_TCCLdHits;
+    Stats::Scalar GPU_LdMiss;
+
+    Stats::Scalar GPU_TCPStHits;
+    Stats::Scalar GPU_TCPStTransfers;
+    Stats::Scalar GPU_TCCStHits;
+    Stats::Scalar GPU_StMiss;
+
+    Stats::Scalar CP_TCPLdHits;
+    Stats::Scalar CP_TCPLdTransfers;
+    Stats::Scalar CP_TCCLdHits;
+    Stats::Scalar CP_LdMiss;
+
+    Stats::Scalar CP_TCPStHits;
+    Stats::Scalar CP_TCPStTransfers;
+    Stats::Scalar CP_TCCStHits;
+    Stats::Scalar CP_StMiss;
+
+    //! Histogram for number of outstanding requests per cycle.
+    Stats::Histogram m_outstandReqHist;
+
+    //! Histogram for holding latency profile of all requests.
+    Stats::Histogram m_latencyHist;
+    std::vector<Stats::Histogram *> m_typeLatencyHist;
+
+    //! Histogram for holding latency profile of all requests that
+    //! miss in the controller connected to this sequencer.
+    Stats::Histogram m_missLatencyHist;
+    std::vector<Stats::Histogram *> m_missTypeLatencyHist;
+
+    //! Histograms for profiling the latencies for requests that
+    //! required external messages.
+    std::vector<Stats::Histogram *> m_missMachLatencyHist;
+    std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist;
+
+    //! Histograms for recording the breakdown of miss latency
+    std::vector<Stats::Histogram *> m_IssueToInitialDelayHist;
+    std::vector<Stats::Histogram *> m_InitialToForwardDelayHist;
+    std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist;
+    std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist;
+};
+
+inline std::ostream&
+operator<<(std::ostream& out, const GPUCoalescer& obj)
+{
+    obj.print(out);
+    out << std::flush;
+    return out;
+}
+
+#endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
+