summaryrefslogtreecommitdiff
path: root/src/gpu-compute/local_memory_pipeline.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu-compute/local_memory_pipeline.cc')
-rw-r--r--src/gpu-compute/local_memory_pipeline.cc126
1 files changed, 27 insertions, 99 deletions
diff --git a/src/gpu-compute/local_memory_pipeline.cc b/src/gpu-compute/local_memory_pipeline.cc
index 80dad6fcd..9e7dc6fb3 100644
--- a/src/gpu-compute/local_memory_pipeline.cc
+++ b/src/gpu-compute/local_memory_pipeline.cc
@@ -62,11 +62,13 @@ LocalMemPipeline::exec()
lmReturnedRequests.front() : nullptr;
bool accessVrf = true;
+ Wavefront *w = nullptr;
+
if ((m) && (m->isLoad() || m->isAtomicRet())) {
- Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
+ w = m->wavefront();
accessVrf =
- w->computeUnit->vrf[m->simdId]->
+ w->computeUnit->vrf[w->simdId]->
vrfOperandAccessReady(m->seqNum(), w, m,
VrfAccessType::WRITE);
}
@@ -74,44 +76,29 @@ LocalMemPipeline::exec()
if (!lmReturnedRequests.empty() && m->latency.rdy() && accessVrf &&
computeUnit->locMemToVrfBus.rdy() && (computeUnit->shader->coissue_return
|| computeUnit->wfWait.at(m->pipeId).rdy())) {
- if (m->v_type == VT_32 && m->m_type == Enums::M_U8)
- doSmReturn<uint32_t, uint8_t>(m);
- else if (m->v_type == VT_32 && m->m_type == Enums::M_U16)
- doSmReturn<uint32_t, uint16_t>(m);
- else if (m->v_type == VT_32 && m->m_type == Enums::M_U32)
- doSmReturn<uint32_t, uint32_t>(m);
- else if (m->v_type == VT_32 && m->m_type == Enums::M_S8)
- doSmReturn<int32_t, int8_t>(m);
- else if (m->v_type == VT_32 && m->m_type == Enums::M_S16)
- doSmReturn<int32_t, int16_t>(m);
- else if (m->v_type == VT_32 && m->m_type == Enums::M_S32)
- doSmReturn<int32_t, int32_t>(m);
- else if (m->v_type == VT_32 && m->m_type == Enums::M_F16)
- doSmReturn<float, Float16>(m);
- else if (m->v_type == VT_32 && m->m_type == Enums::M_F32)
- doSmReturn<float, float>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_U8)
- doSmReturn<uint64_t, uint8_t>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_U16)
- doSmReturn<uint64_t, uint16_t>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_U32)
- doSmReturn<uint64_t, uint32_t>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_U64)
- doSmReturn<uint64_t, uint64_t>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_S8)
- doSmReturn<int64_t, int8_t>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_S16)
- doSmReturn<int64_t, int16_t>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_S32)
- doSmReturn<int64_t, int32_t>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_S64)
- doSmReturn<int64_t, int64_t>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_F16)
- doSmReturn<double, Float16>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_F32)
- doSmReturn<double, float>(m);
- else if (m->v_type == VT_64 && m->m_type == Enums::M_F64)
- doSmReturn<double, double>(m);
+
+ lmReturnedRequests.pop();
+ w = m->wavefront();
+
+ m->completeAcc(m);
+
+ // Decrement outstanding request count
+ computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
+
+ if (m->isStore() || m->isAtomic()) {
+ computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
+ m->time, -1);
+ }
+
+ if (m->isLoad() || m->isAtomic()) {
+ computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
+ m->time, -1);
+ }
+
+ // Mark write bus busy for appropriate amount of time
+ computeUnit->locMemToVrfBus.set(m->time);
+ if (computeUnit->shader->coissue_return == 0)
+ w->computeUnit->wfWait.at(m->pipeId).set(m->time);
}
// If pipeline has executed a local memory instruction
@@ -129,65 +116,6 @@ LocalMemPipeline::exec()
}
}
-template<typename c0, typename c1>
-void
-LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
-{
- lmReturnedRequests.pop();
- Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
-
- // Return data to registers
- if (m->isLoad() || m->isAtomicRet()) {
- std::vector<uint32_t> regVec;
- for (int k = 0; k < m->n_reg; ++k) {
- int dst = m->dst_reg+k;
-
- if (m->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
- dst = m->dst_reg_vec[k];
- // virtual->physical VGPR mapping
- int physVgpr = w->remap(dst,sizeof(c0),1);
- // save the physical VGPR index
- regVec.push_back(physVgpr);
- c1 *p1 = &((c1 *)m->d_data)[k * w->computeUnit->wfSize()];
-
- for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
- if (m->exec_mask[i]) {
- // write the value into the physical VGPR. This is a purely
- // functional operation. No timing is modeled.
- w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr,
- *p1, i);
- }
- ++p1;
- }
- }
-
- // Schedule the write operation of the load data on the VRF. This simply
- // models the timing aspect of the VRF write operation. It does not
- // modify the physical VGPR.
- loadVrfBankConflictCycles +=
- w->computeUnit->vrf[w->simdId]->exec(m->seqNum(), w,
- regVec, sizeof(c0), m->time);
- }
-
- // Decrement outstanding request count
- computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
-
- if (m->isStore() || m->isAtomic()) {
- computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
- m->time, -1);
- }
-
- if (m->isLoad() || m->isAtomic()) {
- computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
- m->time, -1);
- }
-
- // Mark write bus busy for appropriate amount of time
- computeUnit->locMemToVrfBus.set(m->time);
- if (computeUnit->shader->coissue_return == 0)
- w->computeUnit->wfWait.at(m->pipeId).set(m->time);
-}
-
void
LocalMemPipeline::regStats()
{