/* * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Authors: Lisa Hsu */ #ifndef __TLB_COALESCER_HH__ #define __TLB_COALESCER_HH__ #include #include #include #include #include "arch/generic/tlb.hh" #include "arch/isa.hh" #include "arch/isa_traits.hh" #include "arch/x86/pagetable.hh" #include "arch/x86/regs/segment.hh" #include "base/logging.hh" #include "base/statistics.hh" #include "gpu-compute/gpu_tlb.hh" #include "mem/mem_object.hh" #include "mem/port.hh" #include "mem/request.hh" #include "params/TLBCoalescer.hh" class BaseTLB; class Packet; class ThreadContext; /** * The TLBCoalescer is a MemObject sitting on the front side (CPUSide) of * each TLB. It receives packets and issues coalesced requests to the * TLB below it. It controls how requests are coalesced (the rules) * and the permitted number of TLB probes per cycle (i.e., how many * coalesced requests it feeds the TLB per cycle). */ class TLBCoalescer : public MemObject { protected: // TLB clock: will inherit clock from shader's clock period in terms // of nuber of ticks of curTime (aka global simulation clock) // The assignment of TLB clock from shader clock is done in the // python config files. int clock; public: typedef TLBCoalescerParams Params; TLBCoalescer(const Params *p); ~TLBCoalescer() { } // Number of TLB probes per cycle. Parameterizable - default 2. int TLBProbesPerCycle; // Consider coalescing across that many ticks. // Paraemterizable - default 1. int coalescingWindow; // Each coalesced request consists of multiple packets // that all fall within the same virtual page typedef std::vector coalescedReq; // disables coalescing when true bool disableCoalescing; /* * This is a hash map with as a key. * It contains a vector of coalescedReqs per . * Requests are buffered here until they can be issued to * the TLB, at which point they are copied to the * issuedTranslationsTable hash map. * * In terms of coalescing, we coalesce requests in a given * window of x cycles by using tick_index = issueTime/x as a * key, where x = coalescingWindow. issueTime is the issueTime * of the pkt from the ComputeUnit's perspective, but another * option is to change it to curTick(), so we coalesce based * on the receive time. */ typedef std::unordered_map> CoalescingFIFO; CoalescingFIFO coalescerFIFO; /* * issuedTranslationsTabler: a hash_map indexed by virtual page * address. Each hash_map entry has a vector of PacketPtr associated * with it denoting the different packets that share an outstanding * coalesced translation request for the same virtual page. * * The rules that determine which requests we can coalesce are * specified in the canCoalesce() method. */ typedef std::unordered_map CoalescingTable; CoalescingTable issuedTranslationsTable; // number of packets the coalescer receives Stats::Scalar uncoalescedAccesses; // number packets the coalescer send to the TLB Stats::Scalar coalescedAccesses; // Number of cycles the coalesced requests spend waiting in // coalescerFIFO. For each packet the coalescer receives we take into // account the number of all uncoalesced requests this pkt "represents" Stats::Scalar queuingCycles; // On average how much time a request from the // uncoalescedAccesses that reaches the TLB // spends waiting? Stats::Scalar localqueuingCycles; // localqueuingCycles/uncoalescedAccesses Stats::Formula localLatency; bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2); void updatePhysAddresses(PacketPtr pkt); void regStats(); // Clock related functions. Maps to-and-from // Simulation ticks and object clocks. Tick frequency() const { return SimClock::Frequency / clock; } Tick ticks(int numCycles) const { return (Tick)clock * numCycles; } Tick curCycle() const { return curTick() / clock; } Tick tickToCycles(Tick val) const { return val / clock;} class CpuSidePort : public SlavePort { public: CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer, PortID _index) : SlavePort(_name, tlb_coalescer), coalescer(tlb_coalescer), index(_index) { } protected: TLBCoalescer *coalescer; int index; virtual bool recvTimingReq(PacketPtr pkt); virtual Tick recvAtomic(PacketPtr pkt) { return 0; } virtual void recvFunctional(PacketPtr pkt); virtual void recvRangeChange() { } virtual void recvReqRetry(); virtual void recvRespRetry() { fatal("recvRespRetry() is not implemented in the TLB coalescer.\n"); } virtual AddrRangeList getAddrRanges() const; }; class MemSidePort : public MasterPort { public: MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer, PortID _index) : MasterPort(_name, tlb_coalescer), coalescer(tlb_coalescer), index(_index) { } std::deque retries; protected: TLBCoalescer *coalescer; int index; virtual bool recvTimingResp(PacketPtr pkt); virtual Tick recvAtomic(PacketPtr pkt) { return 0; } virtual void recvFunctional(PacketPtr pkt); virtual void recvRangeChange() { } virtual void recvReqRetry(); virtual void recvRespRetry() { fatal("recvRespRetry() not implemented in TLB coalescer"); } }; // Coalescer slave ports on the cpu Side std::vector cpuSidePort; // Coalescer master ports on the memory side std::vector memSidePort; BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx); BaseSlavePort& getSlavePort(const std::string &if_name, PortID idx); void processProbeTLBEvent(); /// This event issues the TLB probes EventFunctionWrapper probeTLBEvent; void processCleanupEvent(); /// The cleanupEvent is scheduled after a TLBEvent triggers /// in order to free memory and do the required clean-up EventFunctionWrapper cleanupEvent; // this FIFO queue keeps track of the virt. page // addresses that are pending cleanup std::queue cleanupQueue; }; #endif // __TLB_COALESCER_HH__