diff options
Diffstat (limited to 'src/gpu-compute/global_memory_pipeline.cc')
-rw-r--r-- | src/gpu-compute/global_memory_pipeline.cc | 153 |
1 files changed, 36 insertions, 117 deletions
diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index ab3e8c47e..f48af5a6f 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -65,13 +65,15 @@ GlobalMemPipeline::exec() !gmReturnedStores.empty() ? gmReturnedStores.front() : nullptr; bool accessVrf = true; + Wavefront *w = nullptr; + // check the VRF to see if the operands of a load (or load component // of an atomic) are accessible if ((m) && (m->isLoad() || m->isAtomicRet())) { - Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; + w = m->wavefront(); accessVrf = - w->computeUnit->vrf[m->simdId]-> + w->computeUnit->vrf[w->simdId]-> vrfOperandAccessReady(m->seqNum(), w, m, VrfAccessType::WRITE); } @@ -82,44 +84,38 @@ GlobalMemPipeline::exec() (computeUnit->shader->coissue_return || computeUnit->wfWait.at(m->pipeId).rdy())) { - if (m->v_type == VT_32 && m->m_type == Enums::M_U8) - doGmReturn<uint32_t, uint8_t>(m); - else if (m->v_type == VT_32 && m->m_type == Enums::M_U16) - doGmReturn<uint32_t, uint16_t>(m); - else if (m->v_type == VT_32 && m->m_type == Enums::M_U32) - doGmReturn<uint32_t, uint32_t>(m); - else if (m->v_type == VT_32 && m->m_type == Enums::M_S8) - doGmReturn<int32_t, int8_t>(m); - else if (m->v_type == VT_32 && m->m_type == Enums::M_S16) - doGmReturn<int32_t, int16_t>(m); - else if (m->v_type == VT_32 && m->m_type == Enums::M_S32) - doGmReturn<int32_t, int32_t>(m); - else if (m->v_type == VT_32 && m->m_type == Enums::M_F16) - doGmReturn<float, Float16>(m); - else if (m->v_type == VT_32 && m->m_type == Enums::M_F32) - doGmReturn<float, float>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_U8) - doGmReturn<uint64_t, uint8_t>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_U16) - doGmReturn<uint64_t, uint16_t>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_U32) - doGmReturn<uint64_t, uint32_t>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_U64) - doGmReturn<uint64_t, uint64_t>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_S8) - doGmReturn<int64_t, int8_t>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_S16) - doGmReturn<int64_t, int16_t>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_S32) - doGmReturn<int64_t, int32_t>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_S64) - doGmReturn<int64_t, int64_t>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_F16) - doGmReturn<double, Float16>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_F32) - doGmReturn<double, float>(m); - else if (m->v_type == VT_64 && m->m_type == Enums::M_F64) - doGmReturn<double, double>(m); + w = m->wavefront(); + + m->completeAcc(m); + + if (m->isLoad() || m->isAtomic()) { + gmReturnedLoads.pop(); + assert(inflightLoads > 0); + --inflightLoads; + } else { + assert(m->isStore()); + gmReturnedStores.pop(); + assert(inflightStores > 0); + --inflightStores; + } + + // Decrement outstanding register count + computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1); + + if (m->isStore() || m->isAtomic()) { + computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, + m->time, -1); + } + + if (m->isLoad() || m->isAtomic()) { + computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, + m->time, -1); + } + + // Mark write bus busy for appropriate amount of time + computeUnit->glbMemToVrfBus.set(m->time); + if (!computeUnit->shader->coissue_return) + w->computeUnit->wfWait.at(m->pipeId).set(m->time); } // If pipeline has executed a global memory instruction @@ -149,83 +145,6 @@ GlobalMemPipeline::exec() } } -template<typename c0, typename c1> -void -GlobalMemPipeline::doGmReturn(GPUDynInstPtr m) -{ - Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId]; - - // Return data to registers - if (m->isLoad() || m->isAtomic()) { - gmReturnedLoads.pop(); - assert(inflightLoads > 0); - --inflightLoads; - - if (m->isLoad() || m->isAtomicRet()) { - std::vector<uint32_t> regVec; - // iterate over number of destination register operands since - // this is a load or atomic operation - for (int k = 0; k < m->n_reg; ++k) { - assert((sizeof(c1) * m->n_reg) <= MAX_WIDTH_FOR_MEM_INST); - int dst = m->dst_reg + k; - - if (m->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST) - dst = m->dst_reg_vec[k]; - // virtual->physical VGPR mapping - int physVgpr = w->remap(dst, sizeof(c0), 1); - // save the physical VGPR index - regVec.push_back(physVgpr); - c1 *p1 = &((c1 *)m->d_data)[k * w->computeUnit->wfSize()]; - - for (int i = 0; i < w->computeUnit->wfSize(); ++i) { - if (m->exec_mask[i]) { - DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: " - "$%s%d <- %d global ld done (src = wavefront " - "ld inst)\n", w->computeUnit->cu_id, w->simdId, - w->wfSlotId, i, sizeof(c0) == 4 ? "s" : "d", - dst, *p1); - // write the value into the physical VGPR. This is a - // purely functional operation. No timing is modeled. - w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr, - *p1, i); - } - ++p1; - } - } - - // Schedule the write operation of the load data on the VRF. - // This simply models the timing aspect of the VRF write operation. - // It does not modify the physical VGPR. - loadVrfBankConflictCycles += - w->computeUnit->vrf[w->simdId]->exec(m->seqNum(), - w, regVec, sizeof(c0), - m->time); - } - } else { - gmReturnedStores.pop(); - assert(inflightStores > 0); - --inflightStores; - } - - // Decrement outstanding register count - computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1); - - if (m->isStore() || m->isAtomic()) { - computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time, - -1); - } - - if (m->isLoad() || m->isAtomic()) { - computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time, - -1); - } - - // Mark write bus busy for appropriate amount of time - computeUnit->glbMemToVrfBus.set(m->time); - if (!computeUnit->shader->coissue_return) - w->computeUnit->wfWait.at(m->pipeId).set(m->time); -} - void GlobalMemPipeline::regStats() { |