summaryrefslogtreecommitdiff
path: root/src/gpu-compute/tlb_coalescer.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu-compute/tlb_coalescer.hh')
-rw-r--r--src/gpu-compute/tlb_coalescer.hh252
1 files changed, 252 insertions, 0 deletions
diff --git a/src/gpu-compute/tlb_coalescer.hh b/src/gpu-compute/tlb_coalescer.hh
new file mode 100644
index 000000000..09210148b
--- /dev/null
+++ b/src/gpu-compute/tlb_coalescer.hh
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+#ifndef __TLB_COALESCER_HH__
+#define __TLB_COALESCER_HH__
+
+#include <list>
+#include <queue>
+#include <string>
+#include <vector>
+
+#include "arch/generic/tlb.hh"
+#include "arch/isa.hh"
+#include "arch/isa_traits.hh"
+#include "arch/x86/pagetable.hh"
+#include "arch/x86/regs/segment.hh"
+#include "base/misc.hh"
+#include "base/statistics.hh"
+#include "gpu-compute/gpu_tlb.hh"
+#include "mem/mem_object.hh"
+#include "mem/port.hh"
+#include "mem/request.hh"
+#include "params/TLBCoalescer.hh"
+
+class BaseTLB;
+class Packet;
+class ThreadContext;
+
+/**
+ * The TLBCoalescer is a MemObject sitting on the front side (CPUSide) of
+ * each TLB. It receives packets and issues coalesced requests to the
+ * TLB below it. It controls how requests are coalesced (the rules)
+ * and the permitted number of TLB probes per cycle (i.e., how many
+ * coalesced requests it feeds the TLB per cycle).
+ */
+class TLBCoalescer : public MemObject
+{
+ protected:
+ // TLB clock: will inherit clock from shader's clock period in terms
+ // of nuber of ticks of curTime (aka global simulation clock)
+ // The assignment of TLB clock from shader clock is done in the
+ // python config files.
+ int clock;
+
+ public:
+ typedef TLBCoalescerParams Params;
+ TLBCoalescer(const Params *p);
+ ~TLBCoalescer() { }
+
+ // Number of TLB probes per cycle. Parameterizable - default 2.
+ int TLBProbesPerCycle;
+
+ // Consider coalescing across that many ticks.
+ // Paraemterizable - default 1.
+ int coalescingWindow;
+
+ // Each coalesced request consists of multiple packets
+ // that all fall within the same virtual page
+ typedef std::vector<PacketPtr> coalescedReq;
+
+ // disables coalescing when true
+ bool disableCoalescing;
+
+ /*
+ * This is a hash map with <tick_index> as a key.
+ * It contains a vector of coalescedReqs per <tick_index>.
+ * Requests are buffered here until they can be issued to
+ * the TLB, at which point they are copied to the
+ * issuedTranslationsTable hash map.
+ *
+ * In terms of coalescing, we coalesce requests in a given
+ * window of x cycles by using tick_index = issueTime/x as a
+ * key, where x = coalescingWindow. issueTime is the issueTime
+ * of the pkt from the ComputeUnit's perspective, but another
+ * option is to change it to curTick(), so we coalesce based
+ * on the receive time.
+ */
+ typedef std::unordered_map<int64_t, std::vector<coalescedReq>> CoalescingFIFO;
+
+ CoalescingFIFO coalescerFIFO;
+
+ /*
+ * issuedTranslationsTabler: a hash_map indexed by virtual page
+ * address. Each hash_map entry has a vector of PacketPtr associated
+ * with it denoting the different packets that share an outstanding
+ * coalesced translation request for the same virtual page.
+ *
+ * The rules that determine which requests we can coalesce are
+ * specified in the canCoalesce() method.
+ */
+ typedef std::unordered_map<Addr, coalescedReq> CoalescingTable;
+
+ CoalescingTable issuedTranslationsTable;
+
+ // number of packets the coalescer receives
+ Stats::Scalar uncoalescedAccesses;
+ // number packets the coalescer send to the TLB
+ Stats::Scalar coalescedAccesses;
+
+ // Number of cycles the coalesced requests spend waiting in
+ // coalescerFIFO. For each packet the coalescer receives we take into
+ // account the number of all uncoalesced requests this pkt "represents"
+ Stats::Scalar queuingCycles;
+
+ // On average how much time a request from the
+ // uncoalescedAccesses that reaches the TLB
+ // spends waiting?
+ Stats::Scalar localqueuingCycles;
+ // localqueuingCycles/uncoalescedAccesses
+ Stats::Formula localLatency;
+
+ bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2);
+ void updatePhysAddresses(PacketPtr pkt);
+ void regStats();
+
+ // Clock related functions. Maps to-and-from
+ // Simulation ticks and object clocks.
+ Tick frequency() const { return SimClock::Frequency / clock; }
+ Tick ticks(int numCycles) const { return (Tick)clock * numCycles; }
+ Tick curCycle() const { return curTick() / clock; }
+ Tick tickToCycles(Tick val) const { return val / clock;}
+
+ class CpuSidePort : public SlavePort
+ {
+ public:
+ CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
+ PortID _index)
+ : SlavePort(_name, tlb_coalescer), coalescer(tlb_coalescer),
+ index(_index) { }
+
+ protected:
+ TLBCoalescer *coalescer;
+ int index;
+
+ virtual bool recvTimingReq(PacketPtr pkt);
+ virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
+ virtual void recvFunctional(PacketPtr pkt);
+ virtual void recvRangeChange() { }
+ virtual void recvReqRetry();
+
+ virtual void
+ recvRespRetry()
+ {
+ fatal("recvRespRetry() is not implemented in the TLB coalescer.\n");
+ }
+
+ virtual AddrRangeList getAddrRanges() const;
+ };
+
+ class MemSidePort : public MasterPort
+ {
+ public:
+ MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
+ PortID _index)
+ : MasterPort(_name, tlb_coalescer), coalescer(tlb_coalescer),
+ index(_index) { }
+
+ std::deque<PacketPtr> retries;
+
+ protected:
+ TLBCoalescer *coalescer;
+ int index;
+
+ virtual bool recvTimingResp(PacketPtr pkt);
+ virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
+ virtual void recvFunctional(PacketPtr pkt);
+ virtual void recvRangeChange() { }
+ virtual void recvReqRetry();
+
+ virtual void
+ recvRespRetry()
+ {
+ fatal("recvRespRetry() not implemented in TLB coalescer");
+ }
+ };
+
+ // Coalescer slave ports on the cpu Side
+ std::vector<CpuSidePort*> cpuSidePort;
+ // Coalescer master ports on the memory side
+ std::vector<MemSidePort*> memSidePort;
+
+ BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx);
+ BaseSlavePort& getSlavePort(const std::string &if_name, PortID idx);
+
+ class IssueProbeEvent : public Event
+ {
+ private:
+ TLBCoalescer *coalescer;
+
+ public:
+ IssueProbeEvent(TLBCoalescer *_coalescer);
+ void process();
+ const char *description() const;
+ };
+
+ // this event issues the TLB probes
+ IssueProbeEvent probeTLBEvent;
+
+ // the cleanupEvent is scheduled after a TLBEvent triggers
+ // in order to free memory and do the required clean-up
+ class CleanupEvent : public Event
+ {
+ private:
+ TLBCoalescer *coalescer;
+
+ public:
+ CleanupEvent(TLBCoalescer *_coalescer);
+ void process();
+ const char* description() const;
+ };
+
+ // schedule cleanup
+ CleanupEvent cleanupEvent;
+
+ // this FIFO queue keeps track of the virt. page
+ // addresses that are pending cleanup
+ std::queue<Addr> cleanupQueue;
+};
+
+#endif // __TLB_COALESCER_HH__