/* * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Authors: John Kalamatianos, * Sooraj Puthoor */ #ifndef __GLOBAL_MEMORY_PIPELINE_HH__ #define __GLOBAL_MEMORY_PIPELINE_HH__ #include #include #include "gpu-compute/misc.hh" #include "params/ComputeUnit.hh" #include "sim/stats.hh" /* * @file global_memory_pipeline.hh * * The global memory pipeline issues newly created global memory packets * from the pipeline to DTLB. The exec() method of the memory packet issues * the packet to the DTLB if there is space available in the return fifo. * This stage also retires previously issued loads and stores that have * returned from the memory sub-system. */ class ComputeUnit; class GlobalMemPipeline { public: GlobalMemPipeline(const ComputeUnitParams *params); void init(ComputeUnit *cu); void exec(); std::queue &getGMStRespFIFO() { return gmReturnedStores; } std::queue &getGMLdRespFIFO() { return gmReturnedLoads; } /** * find the next ready response to service. for OoO mode we * simply pop the oldest (based on when the response was * received) response in the response FIFOs. for in-order mode * we pop the oldest (in program order) response, and only if * it is marked as done. */ GPUDynInstPtr getNextReadyResp(); /** * once a memory request is finished we remove it from the * buffer. this method determines which response buffer * we're using based on the mode (in-order vs. OoO). */ void completeRequest(GPUDynInstPtr gpuDynInst); /** * issues a request to the pipeline - i.e., enqueue it * in the request buffer. */ void issueRequest(GPUDynInstPtr gpuDynInst); /** * this method handles responses sent to this GM pipeline by the * CU. in the case of in-order delivery it simply marks the reqeust * as done in the ordered buffer to indicate that the requst is * finished. for out-of-order data delivery, the requests are enqueued * (in the order in which they are received) in the response FIFOs. */ void handleResponse(GPUDynInstPtr gpuDynInst); bool isGMLdRespFIFOWrRdy() const { return gmReturnedLoads.size() < gmQueueSize; } bool isGMStRespFIFOWrRdy() const { return gmReturnedStores.size() < gmQueueSize; } bool isGMReqFIFOWrRdy(uint32_t pendReqs=0) const { return (gmIssuedRequests.size() + pendReqs) < gmQueueSize; } const std::string &name() const { return _name; } void regStats(); void incLoadVRFBankConflictCycles(int num_cycles) { loadVrfBankConflictCycles += num_cycles; } private: ComputeUnit *computeUnit; std::string _name; int gmQueueSize; bool outOfOrderDataDelivery; // number of cycles of delaying the update of a VGPR that is the // target of a load instruction (or the load component of an atomic) // The delay is due to VRF bank conflicts Stats::Scalar loadVrfBankConflictCycles; // Counters to track the inflight loads and stores // so that we can provide the proper backpressure // on the number of inflight memory operations. int inflightStores; int inflightLoads; // The size of global memory. int globalMemSize; /* * this buffer holds the memory responses when in-order data * deilvery is used - the responses are ordered by their unique * sequence number, which is monotonically increasing. when a * memory request returns its "done" flag is set to true. during * each tick the the GM pipeline will check if the oldest request * is finished, and if so it will be removed from the queue. * * key: memory instruction's sequence ID * * value: pair holding the instruction pointer and a bool that * is used to indicate whether or not the request has * completed */ std::map> gmOrderedRespBuffer; // Global Memory Request FIFO: all global memory requests // are issued to this FIFO from the memory pipelines std::queue gmIssuedRequests; // Globa Store Response FIFO: all responses of global memory // stores are sent to this FIFO from TCP std::queue gmReturnedStores; // Global Load Response FIFO: all responses of global memory // loads are sent to this FIFO from TCP std::queue gmReturnedLoads; }; #endif // __GLOBAL_MEMORY_PIPELINE_HH__