diff options
Diffstat (limited to 'src/gpu-compute/tlb_coalescer.hh')
-rw-r--r-- | src/gpu-compute/tlb_coalescer.hh | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/src/gpu-compute/tlb_coalescer.hh b/src/gpu-compute/tlb_coalescer.hh new file mode 100644 index 000000000..09210148b --- /dev/null +++ b/src/gpu-compute/tlb_coalescer.hh @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +#ifndef __TLB_COALESCER_HH__ +#define __TLB_COALESCER_HH__ + +#include <list> +#include <queue> +#include <string> +#include <vector> + +#include "arch/generic/tlb.hh" +#include "arch/isa.hh" +#include "arch/isa_traits.hh" +#include "arch/x86/pagetable.hh" +#include "arch/x86/regs/segment.hh" +#include "base/misc.hh" +#include "base/statistics.hh" +#include "gpu-compute/gpu_tlb.hh" +#include "mem/mem_object.hh" +#include "mem/port.hh" +#include "mem/request.hh" +#include "params/TLBCoalescer.hh" + +class BaseTLB; +class Packet; +class ThreadContext; + +/** + * The TLBCoalescer is a MemObject sitting on the front side (CPUSide) of + * each TLB. It receives packets and issues coalesced requests to the + * TLB below it. It controls how requests are coalesced (the rules) + * and the permitted number of TLB probes per cycle (i.e., how many + * coalesced requests it feeds the TLB per cycle). + */ +class TLBCoalescer : public MemObject +{ + protected: + // TLB clock: will inherit clock from shader's clock period in terms + // of nuber of ticks of curTime (aka global simulation clock) + // The assignment of TLB clock from shader clock is done in the + // python config files. + int clock; + + public: + typedef TLBCoalescerParams Params; + TLBCoalescer(const Params *p); + ~TLBCoalescer() { } + + // Number of TLB probes per cycle. Parameterizable - default 2. + int TLBProbesPerCycle; + + // Consider coalescing across that many ticks. + // Paraemterizable - default 1. + int coalescingWindow; + + // Each coalesced request consists of multiple packets + // that all fall within the same virtual page + typedef std::vector<PacketPtr> coalescedReq; + + // disables coalescing when true + bool disableCoalescing; + + /* + * This is a hash map with <tick_index> as a key. + * It contains a vector of coalescedReqs per <tick_index>. + * Requests are buffered here until they can be issued to + * the TLB, at which point they are copied to the + * issuedTranslationsTable hash map. + * + * In terms of coalescing, we coalesce requests in a given + * window of x cycles by using tick_index = issueTime/x as a + * key, where x = coalescingWindow. issueTime is the issueTime + * of the pkt from the ComputeUnit's perspective, but another + * option is to change it to curTick(), so we coalesce based + * on the receive time. + */ + typedef std::unordered_map<int64_t, std::vector<coalescedReq>> CoalescingFIFO; + + CoalescingFIFO coalescerFIFO; + + /* + * issuedTranslationsTabler: a hash_map indexed by virtual page + * address. Each hash_map entry has a vector of PacketPtr associated + * with it denoting the different packets that share an outstanding + * coalesced translation request for the same virtual page. + * + * The rules that determine which requests we can coalesce are + * specified in the canCoalesce() method. + */ + typedef std::unordered_map<Addr, coalescedReq> CoalescingTable; + + CoalescingTable issuedTranslationsTable; + + // number of packets the coalescer receives + Stats::Scalar uncoalescedAccesses; + // number packets the coalescer send to the TLB + Stats::Scalar coalescedAccesses; + + // Number of cycles the coalesced requests spend waiting in + // coalescerFIFO. For each packet the coalescer receives we take into + // account the number of all uncoalesced requests this pkt "represents" + Stats::Scalar queuingCycles; + + // On average how much time a request from the + // uncoalescedAccesses that reaches the TLB + // spends waiting? + Stats::Scalar localqueuingCycles; + // localqueuingCycles/uncoalescedAccesses + Stats::Formula localLatency; + + bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2); + void updatePhysAddresses(PacketPtr pkt); + void regStats(); + + // Clock related functions. Maps to-and-from + // Simulation ticks and object clocks. + Tick frequency() const { return SimClock::Frequency / clock; } + Tick ticks(int numCycles) const { return (Tick)clock * numCycles; } + Tick curCycle() const { return curTick() / clock; } + Tick tickToCycles(Tick val) const { return val / clock;} + + class CpuSidePort : public SlavePort + { + public: + CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer, + PortID _index) + : SlavePort(_name, tlb_coalescer), coalescer(tlb_coalescer), + index(_index) { } + + protected: + TLBCoalescer *coalescer; + int index; + + virtual bool recvTimingReq(PacketPtr pkt); + virtual Tick recvAtomic(PacketPtr pkt) { return 0; } + virtual void recvFunctional(PacketPtr pkt); + virtual void recvRangeChange() { } + virtual void recvReqRetry(); + + virtual void + recvRespRetry() + { + fatal("recvRespRetry() is not implemented in the TLB coalescer.\n"); + } + + virtual AddrRangeList getAddrRanges() const; + }; + + class MemSidePort : public MasterPort + { + public: + MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer, + PortID _index) + : MasterPort(_name, tlb_coalescer), coalescer(tlb_coalescer), + index(_index) { } + + std::deque<PacketPtr> retries; + + protected: + TLBCoalescer *coalescer; + int index; + + virtual bool recvTimingResp(PacketPtr pkt); + virtual Tick recvAtomic(PacketPtr pkt) { return 0; } + virtual void recvFunctional(PacketPtr pkt); + virtual void recvRangeChange() { } + virtual void recvReqRetry(); + + virtual void + recvRespRetry() + { + fatal("recvRespRetry() not implemented in TLB coalescer"); + } + }; + + // Coalescer slave ports on the cpu Side + std::vector<CpuSidePort*> cpuSidePort; + // Coalescer master ports on the memory side + std::vector<MemSidePort*> memSidePort; + + BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx); + BaseSlavePort& getSlavePort(const std::string &if_name, PortID idx); + + class IssueProbeEvent : public Event + { + private: + TLBCoalescer *coalescer; + + public: + IssueProbeEvent(TLBCoalescer *_coalescer); + void process(); + const char *description() const; + }; + + // this event issues the TLB probes + IssueProbeEvent probeTLBEvent; + + // the cleanupEvent is scheduled after a TLBEvent triggers + // in order to free memory and do the required clean-up + class CleanupEvent : public Event + { + private: + TLBCoalescer *coalescer; + + public: + CleanupEvent(TLBCoalescer *_coalescer); + void process(); + const char* description() const; + }; + + // schedule cleanup + CleanupEvent cleanupEvent; + + // this FIFO queue keeps track of the virt. page + // addresses that are pending cleanup + std::queue<Addr> cleanupQueue; +}; + +#endif // __TLB_COALESCER_HH__ |