From 1a7d3f9fcb76a68540dd948f91413533a383bfde Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Tue, 19 Jan 2016 14:28:22 -0500 Subject: gpu-compute: AMD's baseline GPU model --- src/gpu-compute/lds_state.hh | 512 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 512 insertions(+) create mode 100644 src/gpu-compute/lds_state.hh (limited to 'src/gpu-compute/lds_state.hh') diff --git a/src/gpu-compute/lds_state.hh b/src/gpu-compute/lds_state.hh new file mode 100644 index 000000000..89f08a1d3 --- /dev/null +++ b/src/gpu-compute/lds_state.hh @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: John Kalamatianos, Joe Gross + */ + +#ifndef __LDS_STATE_HH__ +#define __LDS_STATE_HH__ + +#include +#include +#include +#include +#include +#include + +#include "enums/MemOpType.hh" +#include "enums/MemType.hh" +#include "gpu-compute/misc.hh" +#include "mem/mem_object.hh" +#include "mem/port.hh" +#include "params/LdsState.hh" + +class ComputeUnit; + +/** + * this represents a slice of the overall LDS, intended to be associated with an + * individual workgroup + */ +class LdsChunk +{ + public: + LdsChunk(const uint32_t x_size): + chunk(x_size) + { + } + + LdsChunk() {} + + /** + * a read operation + */ + template + T + read(const uint32_t index) + { + fatal_if(!chunk.size(), "cannot read from an LDS chunk of size 0"); + fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk"); + T *p0 = (T *) (&(chunk.at(index))); + return *p0; + } + + /** + * a write operation + */ + template + void + write(const uint32_t index, const T value) + { + fatal_if(!chunk.size(), "cannot write to an LDS chunk of size 0"); + fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk"); + T *p0 = (T *) (&(chunk.at(index))); + *p0 = value; + } + + /** + * get the size of this chunk + */ + std::vector::size_type + size() const + { + return chunk.size(); + } + + protected: + // the actual data store for this slice of the LDS + std::vector chunk; +}; + +// Local Data Share (LDS) State per Wavefront (contents of the LDS region +// allocated to the WorkGroup of this Wavefront) +class LdsState: public MemObject +{ + protected: + + /** + * an event to allow event-driven execution + */ + class TickEvent: public Event + { + protected: + + LdsState *ldsState = nullptr; + + Tick nextTick = 0; + + public: + + TickEvent(LdsState *_ldsState) : + ldsState(_ldsState) + { + } + + virtual void + process(); + + void + schedule(Tick when) + { + mainEventQueue[0]->schedule(this, when); + } + + void + deschedule() + { + mainEventQueue[0]->deschedule(this); + } + }; + + /** + * CuSidePort is the LDS Port closer to the CU side + */ + class CuSidePort: public SlavePort + { + public: + CuSidePort(const std::string &_name, LdsState *_ownerLds) : + SlavePort(_name, _ownerLds), ownerLds(_ownerLds) + { + } + + protected: + LdsState *ownerLds; + + virtual bool + recvTimingReq(PacketPtr pkt); + + virtual Tick + recvAtomic(PacketPtr pkt) + { + return 0; + } + + virtual void + recvFunctional(PacketPtr pkt); + + virtual void + recvRangeChange() + { + } + + virtual void + recvRetry(); + + virtual void + recvRespRetry(); + + virtual AddrRangeList + getAddrRanges() const + { + AddrRangeList ranges; + ranges.push_back(ownerLds->getAddrRange()); + return ranges; + } + + template + void + loadData(PacketPtr packet); + + template + void + storeData(PacketPtr packet); + + template + void + atomicOperation(PacketPtr packet); + }; + + protected: + + // the lds reference counter + // The key is the workgroup ID and dispatch ID + // The value is the number of wavefronts that reference this LDS, as + // wavefronts are launched, the counter goes up for that workgroup and when + // they return it decreases, once it reaches 0 then this chunk of the LDS is + // returned to the available pool. However,it is deallocated on the 1->0 + // transition, not whenever the counter is 0 as it always starts with 0 when + // the workgroup asks for space + std::unordered_map> refCounter; + + // the map that allows workgroups to access their own chunk of the LDS + std::unordered_map> chunkMap; + + // an event to allow the LDS to wake up at a specified time + TickEvent tickEvent; + + // the queue of packets that are going back to the CU after a + // read/write/atomic op + // TODO need to make this have a maximum size to create flow control + std::queue> returnQueue; + + // whether or not there are pending responses + bool retryResp = false; + + bool + process(); + + GPUDynInstPtr + getDynInstr(PacketPtr packet); + + bool + processPacket(PacketPtr packet); + + unsigned + countBankConflicts(PacketPtr packet, unsigned *bankAccesses); + + unsigned + countBankConflicts(GPUDynInstPtr gpuDynInst, + unsigned *numBankAccesses); + + public: + typedef LdsStateParams Params; + + LdsState(const Params *params); + + // prevent copy construction + LdsState(const LdsState&) = delete; + + ~LdsState() + { + parent = nullptr; + } + + const Params * + params() const + { + return dynamic_cast(_params); + } + + bool + isRetryResp() const + { + return retryResp; + } + + void + setRetryResp(const bool value) + { + retryResp = value; + } + + // prevent assignment + LdsState & + operator=(const LdsState &) = delete; + + /** + * use the dynamic wave id to create or just increase the reference count + */ + int + increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId) + { + int refCount = getRefCounter(dispatchId, wgId); + fatal_if(refCount < 0, + "reference count should not be below zero"); + return ++refCounter[dispatchId][wgId]; + } + + /** + * decrease the reference count after making sure it is in the list + * give back this chunk if the ref counter has reached 0 + */ + int + decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId) + { + int refCount = getRefCounter(dispatchId, wgId); + + fatal_if(refCount <= 0, + "reference count should not be below zero or at zero to" + "decrement"); + + refCounter[dispatchId][wgId]--; + + if (refCounter[dispatchId][wgId] == 0) { + releaseSpace(dispatchId, wgId); + return 0; + } else { + return refCounter[dispatchId][wgId]; + } + } + + /** + * return the current reference count for this workgroup id + */ + int + getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const + { + auto dispatchIter = chunkMap.find(dispatchId); + fatal_if(dispatchIter == chunkMap.end(), + "could not locate this dispatch id [%d]", dispatchId); + + auto workgroup = dispatchIter->second.find(wgId); + fatal_if(workgroup == dispatchIter->second.end(), + "could not find this workgroup id within this dispatch id" + " did[%d] wgid[%d]", dispatchId, wgId); + + auto refCountIter = refCounter.find(dispatchId); + if (refCountIter == refCounter.end()) { + fatal("could not locate this dispatch id [%d]", dispatchId); + } else { + auto workgroup = refCountIter->second.find(wgId); + if (workgroup == refCountIter->second.end()) { + fatal("could not find this workgroup id within this dispatch id" + " did[%d] wgid[%d]", dispatchId, wgId); + } else { + return refCounter.at(dispatchId).at(wgId); + } + } + + fatal("should not reach this point"); + return 0; + } + + /** + * assign a parent and request this amount of space be set aside + * for this wgid + */ + LdsChunk * + reserveSpace(const uint32_t dispatchId, const uint32_t wgId, + const uint32_t size) + { + if (chunkMap.find(dispatchId) != chunkMap.end()) { + fatal_if( + chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(), + "duplicate workgroup ID asking for space in the LDS " + "did[%d] wgid[%d]", dispatchId, wgId); + } + + fatal_if(bytesAllocated + size > maximumSize, + "request would ask for more space than is available"); + + bytesAllocated += size; + + chunkMap[dispatchId].emplace(wgId, LdsChunk(size)); + // make an entry for this workgroup + refCounter[dispatchId][wgId] = 0; + + return &chunkMap[dispatchId][wgId]; + } + + bool + returnQueuePush(std::pair thePair); + + Tick + earliestReturnTime() const + { + // TODO set to max(lastCommand+1, curTick()) + return returnQueue.empty() ? curTick() : returnQueue.back().first; + } + + void + setParent(ComputeUnit *x_parent); + + void + regStats(); + + // accessors + ComputeUnit * + getParent() const + { + return parent; + } + + std::string + getName() + { + return _name; + } + + int + getBanks() const + { + return banks; + } + + ComputeUnit * + getComputeUnit() const + { + return parent; + } + + int + getBankConflictPenalty() const + { + return bankConflictPenalty; + } + + /** + * get the allocated size for this workgroup + */ + std::size_t + ldsSize(const uint32_t x_wgId) + { + return chunkMap[x_wgId].size(); + } + + AddrRange + getAddrRange() const + { + return range; + } + + virtual BaseSlavePort & + getSlavePort(const std::string& if_name, PortID idx) + { + if (if_name == "cuPort") { + // TODO need to set name dynamically at this point? + return cuPort; + } else { + fatal("cannot resolve the port name " + if_name); + } + } + + /** + * can this much space be reserved for a workgroup? + */ + bool + canReserve(uint32_t x_size) const + { + return bytesAllocated + x_size <= maximumSize; + } + + private: + /** + * give back the space + */ + bool + releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId) + { + auto dispatchIter = chunkMap.find(x_dispatchId); + + if (dispatchIter == chunkMap.end()) { + fatal("dispatch id not found [%d]", x_dispatchId); + } else { + auto workgroupIter = dispatchIter->second.find(x_wgId); + if (workgroupIter == dispatchIter->second.end()) { + fatal("workgroup id [%d] not found in dispatch id [%d]", + x_wgId, x_dispatchId); + } + } + + fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(), + "releasing more space than was allocated"); + + bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size(); + chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId)); + return true; + } + + // the port that connects this LDS to its owner CU + CuSidePort cuPort; + + ComputeUnit* parent = nullptr; + + std::string _name; + + // the number of bytes currently reserved by all workgroups + int bytesAllocated = 0; + + // the size of the LDS, the most bytes available + int maximumSize; + + // Address range of this memory + AddrRange range; + + // the penalty, in cycles, for each LDS bank conflict + int bankConflictPenalty = 0; + + // the number of banks in the LDS underlying data store + int banks = 0; +}; + +#endif // __LDS_STATE_HH__ -- cgit v1.2.3