summaryrefslogtreecommitdiff
path: root/src/gpu-compute/global_memory_pipeline.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu-compute/global_memory_pipeline.cc')
-rw-r--r--src/gpu-compute/global_memory_pipeline.cc124
1 files changed, 104 insertions, 20 deletions
diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc
index f48af5a6f..7583ebb9b 100644
--- a/src/gpu-compute/global_memory_pipeline.cc
+++ b/src/gpu-compute/global_memory_pipeline.cc
@@ -45,7 +45,8 @@
GlobalMemPipeline::GlobalMemPipeline(const ComputeUnitParams* p) :
computeUnit(nullptr), gmQueueSize(p->global_mem_queue_size),
- inflightStores(0), inflightLoads(0)
+ outOfOrderDataDelivery(p->out_of_order_data_delivery), inflightStores(0),
+ inflightLoads(0)
{
}
@@ -61,8 +62,7 @@ void
GlobalMemPipeline::exec()
{
// apply any returned global memory operations
- GPUDynInstPtr m = !gmReturnedLoads.empty() ? gmReturnedLoads.front() :
- !gmReturnedStores.empty() ? gmReturnedStores.front() : nullptr;
+ GPUDynInstPtr m = getNextReadyResp();
bool accessVrf = true;
Wavefront *w = nullptr;
@@ -74,30 +74,19 @@ GlobalMemPipeline::exec()
accessVrf =
w->computeUnit->vrf[w->simdId]->
- vrfOperandAccessReady(m->seqNum(), w, m,
- VrfAccessType::WRITE);
+ vrfOperandAccessReady(m->seqNum(), w, m, VrfAccessType::WRITE);
}
- if ((!gmReturnedStores.empty() || !gmReturnedLoads.empty()) &&
- m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() &&
+ if (m && m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() &&
accessVrf && m->statusBitVector == VectorMask(0) &&
(computeUnit->shader->coissue_return ||
- computeUnit->wfWait.at(m->pipeId).rdy())) {
+ computeUnit->wfWait.at(m->pipeId).rdy())) {
w = m->wavefront();
m->completeAcc(m);
- if (m->isLoad() || m->isAtomic()) {
- gmReturnedLoads.pop();
- assert(inflightLoads > 0);
- --inflightLoads;
- } else {
- assert(m->isStore());
- gmReturnedStores.pop();
- assert(inflightStores > 0);
- --inflightStores;
- }
+ completeRequest(m);
// Decrement outstanding register count
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
@@ -129,15 +118,30 @@ GlobalMemPipeline::exec()
} else {
++inflightLoads;
}
- } else {
+ } else if (mp->isStore()) {
if (inflightStores >= gmQueueSize) {
return;
- } else if (mp->isStore()) {
+ } else {
++inflightStores;
}
}
mp->initiateAcc(mp);
+
+ if (!outOfOrderDataDelivery && !mp->isMemFence()) {
+ /**
+ * if we are not in out-of-order data delivery mode
+ * then we keep the responses sorted in program order.
+ * in order to do so we must reserve an entry in the
+ * resp buffer before we issue the request to the mem
+ * system. mem fence requests will not be stored here
+ * because once they are issued from the GM pipeline,
+ * they do not send any response back to it.
+ */
+ gmOrderedRespBuffer.insert(std::make_pair(mp->seqNum(),
+ std::make_pair(mp, false)));
+ }
+
gmIssuedRequests.pop();
DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
@@ -145,6 +149,86 @@ GlobalMemPipeline::exec()
}
}
+GPUDynInstPtr
+GlobalMemPipeline::getNextReadyResp()
+{
+ if (outOfOrderDataDelivery) {
+ if (!gmReturnedLoads.empty()) {
+ return gmReturnedLoads.front();
+ } else if (!gmReturnedStores.empty()) {
+ return gmReturnedStores.front();
+ }
+ } else {
+ if (!gmOrderedRespBuffer.empty()) {
+ auto mem_req = gmOrderedRespBuffer.begin();
+
+ if (mem_req->second.second) {
+ return mem_req->second.first;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+void
+GlobalMemPipeline::completeRequest(GPUDynInstPtr gpuDynInst)
+{
+ if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
+ assert(inflightLoads > 0);
+ --inflightLoads;
+ } else if (gpuDynInst->isStore()) {
+ assert(inflightStores > 0);
+ --inflightStores;
+ }
+
+ if (outOfOrderDataDelivery) {
+ if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
+ assert(!gmReturnedLoads.empty());
+ gmReturnedLoads.pop();
+ } else if (gpuDynInst->isStore()) {
+ assert(!gmReturnedStores.empty());
+ gmReturnedStores.pop();
+ }
+ } else {
+ // we should only pop the oldest requst, and it
+ // should be marked as done if we are here
+ assert(gmOrderedRespBuffer.begin()->first == gpuDynInst->seqNum());
+ assert(gmOrderedRespBuffer.begin()->second.first == gpuDynInst);
+ assert(gmOrderedRespBuffer.begin()->second.second);
+ // remove this instruction from the buffer by its
+ // unique seq ID
+ gmOrderedRespBuffer.erase(gpuDynInst->seqNum());
+ }
+}
+
+void
+GlobalMemPipeline::issueRequest(GPUDynInstPtr gpuDynInst)
+{
+ gmIssuedRequests.push(gpuDynInst);
+}
+
+void
+GlobalMemPipeline::handleResponse(GPUDynInstPtr gpuDynInst)
+{
+ if (outOfOrderDataDelivery) {
+ if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
+ assert(isGMLdRespFIFOWrRdy());
+ gmReturnedLoads.push(gpuDynInst);
+ } else {
+ assert(isGMStRespFIFOWrRdy());
+ gmReturnedStores.push(gpuDynInst);
+ }
+ } else {
+ auto mem_req = gmOrderedRespBuffer.find(gpuDynInst->seqNum());
+ // if we are getting a response for this mem request,
+ // then it ought to already be in the ordered response
+ // buffer
+ assert(mem_req != gmOrderedRespBuffer.end());
+ mem_req->second.second = true;
+ }
+}
+
void
GlobalMemPipeline::regStats()
{