diff options
Diffstat (limited to 'src/gpu-compute/global_memory_pipeline.cc')
-rw-r--r-- | src/gpu-compute/global_memory_pipeline.cc | 124 |
1 files changed, 104 insertions, 20 deletions
diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index f48af5a6f..7583ebb9b 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -45,7 +45,8 @@ GlobalMemPipeline::GlobalMemPipeline(const ComputeUnitParams* p) : computeUnit(nullptr), gmQueueSize(p->global_mem_queue_size), - inflightStores(0), inflightLoads(0) + outOfOrderDataDelivery(p->out_of_order_data_delivery), inflightStores(0), + inflightLoads(0) { } @@ -61,8 +62,7 @@ void GlobalMemPipeline::exec() { // apply any returned global memory operations - GPUDynInstPtr m = !gmReturnedLoads.empty() ? gmReturnedLoads.front() : - !gmReturnedStores.empty() ? gmReturnedStores.front() : nullptr; + GPUDynInstPtr m = getNextReadyResp(); bool accessVrf = true; Wavefront *w = nullptr; @@ -74,30 +74,19 @@ GlobalMemPipeline::exec() accessVrf = w->computeUnit->vrf[w->simdId]-> - vrfOperandAccessReady(m->seqNum(), w, m, - VrfAccessType::WRITE); + vrfOperandAccessReady(m->seqNum(), w, m, VrfAccessType::WRITE); } - if ((!gmReturnedStores.empty() || !gmReturnedLoads.empty()) && - m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() && + if (m && m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() && accessVrf && m->statusBitVector == VectorMask(0) && (computeUnit->shader->coissue_return || - computeUnit->wfWait.at(m->pipeId).rdy())) { + computeUnit->wfWait.at(m->pipeId).rdy())) { w = m->wavefront(); m->completeAcc(m); - if (m->isLoad() || m->isAtomic()) { - gmReturnedLoads.pop(); - assert(inflightLoads > 0); - --inflightLoads; - } else { - assert(m->isStore()); - gmReturnedStores.pop(); - assert(inflightStores > 0); - --inflightStores; - } + completeRequest(m); // Decrement outstanding register count computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1); @@ -129,15 +118,30 @@ GlobalMemPipeline::exec() } else { ++inflightLoads; } - } else { + } else if (mp->isStore()) { if (inflightStores >= gmQueueSize) { return; - } else if (mp->isStore()) { + } else { ++inflightStores; } } mp->initiateAcc(mp); + + if (!outOfOrderDataDelivery && !mp->isMemFence()) { + /** + * if we are not in out-of-order data delivery mode + * then we keep the responses sorted in program order. + * in order to do so we must reserve an entry in the + * resp buffer before we issue the request to the mem + * system. mem fence requests will not be stored here + * because once they are issued from the GM pipeline, + * they do not send any response back to it. + */ + gmOrderedRespBuffer.insert(std::make_pair(mp->seqNum(), + std::make_pair(mp, false))); + } + gmIssuedRequests.pop(); DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n", @@ -145,6 +149,86 @@ GlobalMemPipeline::exec() } } +GPUDynInstPtr +GlobalMemPipeline::getNextReadyResp() +{ + if (outOfOrderDataDelivery) { + if (!gmReturnedLoads.empty()) { + return gmReturnedLoads.front(); + } else if (!gmReturnedStores.empty()) { + return gmReturnedStores.front(); + } + } else { + if (!gmOrderedRespBuffer.empty()) { + auto mem_req = gmOrderedRespBuffer.begin(); + + if (mem_req->second.second) { + return mem_req->second.first; + } + } + } + + return nullptr; +} + +void +GlobalMemPipeline::completeRequest(GPUDynInstPtr gpuDynInst) +{ + if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) { + assert(inflightLoads > 0); + --inflightLoads; + } else if (gpuDynInst->isStore()) { + assert(inflightStores > 0); + --inflightStores; + } + + if (outOfOrderDataDelivery) { + if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) { + assert(!gmReturnedLoads.empty()); + gmReturnedLoads.pop(); + } else if (gpuDynInst->isStore()) { + assert(!gmReturnedStores.empty()); + gmReturnedStores.pop(); + } + } else { + // we should only pop the oldest requst, and it + // should be marked as done if we are here + assert(gmOrderedRespBuffer.begin()->first == gpuDynInst->seqNum()); + assert(gmOrderedRespBuffer.begin()->second.first == gpuDynInst); + assert(gmOrderedRespBuffer.begin()->second.second); + // remove this instruction from the buffer by its + // unique seq ID + gmOrderedRespBuffer.erase(gpuDynInst->seqNum()); + } +} + +void +GlobalMemPipeline::issueRequest(GPUDynInstPtr gpuDynInst) +{ + gmIssuedRequests.push(gpuDynInst); +} + +void +GlobalMemPipeline::handleResponse(GPUDynInstPtr gpuDynInst) +{ + if (outOfOrderDataDelivery) { + if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) { + assert(isGMLdRespFIFOWrRdy()); + gmReturnedLoads.push(gpuDynInst); + } else { + assert(isGMStRespFIFOWrRdy()); + gmReturnedStores.push(gpuDynInst); + } + } else { + auto mem_req = gmOrderedRespBuffer.find(gpuDynInst->seqNum()); + // if we are getting a response for this mem request, + // then it ought to already be in the ordered response + // buffer + assert(mem_req != gmOrderedRespBuffer.end()); + mem_req->second.second = true; + } +} + void GlobalMemPipeline::regStats() { |