diff options
Diffstat (limited to 'src/gpu-compute/compute_unit.cc')
-rw-r--r-- | src/gpu-compute/compute_unit.cc | 26 |
1 files changed, 12 insertions, 14 deletions
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 97e018713..abf8ff2c5 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p), req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()), resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()), _masterId(p->system->getMasterId(name() + ".ComputeUnit")), - lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize) + lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize), + kernelLaunchInst(new KernelLaunchStaticInst()) { /** * This check is necessary because std::bitset only provides conversion @@ -316,13 +317,11 @@ ComputeUnit::StartWorkgroup(NDRange *ndr) // Send L1 cache acquire // isKernel + isAcquire = Kernel Begin if (shader->impl_kern_boundary_sync) { - GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(this, - nullptr, - nullptr, 0); + GPUDynInstPtr gpuDynInst = + std::make_shared<GPUDynInst>(this, nullptr, kernelLaunchInst, + getAndIncSeqNum()); gpuDynInst->useContinuation = false; - gpuDynInst->memoryOrder = Enums::MEMORY_ORDER_SC_ACQUIRE; - gpuDynInst->scope = Enums::MEMORY_SCOPE_SYSTEM; injectGlobalMemFence(gpuDynInst, true); } @@ -647,7 +646,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt) gpuDynInst->wfSlotId, w->barrierCnt); if (gpuDynInst->useContinuation) { - assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); + assert(!gpuDynInst->isNoScope()); gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), gpuDynInst); } @@ -658,7 +657,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt) return true; } else if (pkt->req->isKernel() && pkt->req->isAcquire()) { if (gpuDynInst->useContinuation) { - assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); + assert(!gpuDynInst->isNoScope()); gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), gpuDynInst); } @@ -942,6 +941,8 @@ void ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch, Request* req) { + assert(gpuDynInst->isGlobalSeg()); + if (!req) { req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId); } @@ -950,8 +951,6 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch, req->setFlags(Request::KERNEL); } - gpuDynInst->s_type = SEG_GLOBAL; - // for non-kernel MemFence operations, memorder flags are set depending // on which type of request is currently being sent, so this // should be set by the caller (e.g. if an inst has acq-rel @@ -1033,8 +1032,7 @@ ComputeUnit::DataPort::MemRespEvent::process() if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST) gpuDynInst->statusVector.clear(); - if (gpuDynInst->m_op == Enums::MO_LD || MO_A(gpuDynInst->m_op) - || MO_ANR(gpuDynInst->m_op)) { + if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) { assert(compute_unit->globalMemoryPipe.isGMLdRespFIFOWrRdy()); compute_unit->globalMemoryPipe.getGMLdRespFIFO() @@ -1055,7 +1053,7 @@ ComputeUnit::DataPort::MemRespEvent::process() // the continuation may generate more work for // this memory request if (gpuDynInst->useContinuation) { - assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); + assert(!gpuDynInst->isNoScope()); gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), gpuDynInst); } @@ -1065,7 +1063,7 @@ ComputeUnit::DataPort::MemRespEvent::process() gpuDynInst->statusBitVector = VectorMask(0); if (gpuDynInst->useContinuation) { - assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE); + assert(!gpuDynInst->isNoScope()); gpuDynInst->execContinuation(gpuDynInst->staticInstruction(), gpuDynInst); } |