gpu-compute: remove inst enums and use bit flag for attributes

this patch removes the GPUStaticInst enums that were defined in GPU.py. instead, a simple set of attribute flags that can be set in the base instruction class are used. this will help unify the attributes of HSAIL and machine ISA instructions within the model itself. because the static instrution now carries the attributes, a GPUDynInst must carry a pointer to a valid GPUStaticInst so a new static kernel launch instruction is added, which carries the attributes needed to perform a the kernel launch.
author: Tony Gutierrez <anthony.gutierrez@amd.com> 2016-10-26 22:47:11 -0400
committer: Tony Gutierrez <anthony.gutierrez@amd.com> 2016-10-26 22:47:11 -0400
commit: 7ac38849abaf6aeccf39137bc8acb9e44d192e82 (patch)
tree: 7658e9d741604b310f871756cf051558b30e115e /src/gpu-compute/compute_unit.cc
parent: e1ad8035a379cea98ecef92e78d2894f60b2eedd (diff)
download: gem5-7ac38849abaf6aeccf39137bc8acb9e44d192e82.tar.xz
1 files changed, 12 insertions, 14 deletions
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 97e018713..abf8ff2c5 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p),
     req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
     resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
     _masterId(p->system->getMasterId(name() + ".ComputeUnit")),
-    lds(*p->localDataStore), globalSeqNum(0),  wavefrontSize(p->wfSize)
+    lds(*p->localDataStore), globalSeqNum(0),  wavefrontSize(p->wfSize),
+    kernelLaunchInst(new KernelLaunchStaticInst())
 {
     /**
      * This check is necessary because std::bitset only provides conversion
@@ -316,13 +317,11 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
     // Send L1 cache acquire
     // isKernel + isAcquire = Kernel Begin
     if (shader->impl_kern_boundary_sync) {
-        GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(this,
-                                                                nullptr,
-                                                                nullptr, 0);
+        GPUDynInstPtr gpuDynInst =
+            std::make_shared<GPUDynInst>(this, nullptr, kernelLaunchInst,
+                                         getAndIncSeqNum());
 
         gpuDynInst->useContinuation = false;
-        gpuDynInst->memoryOrder = Enums::MEMORY_ORDER_SC_ACQUIRE;
-        gpuDynInst->scope = Enums::MEMORY_SCOPE_SYSTEM;
         injectGlobalMemFence(gpuDynInst, true);
     }
 
@@ -647,7 +646,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
                 gpuDynInst->wfSlotId, w->barrierCnt);
 
         if (gpuDynInst->useContinuation) {
-            assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+            assert(!gpuDynInst->isNoScope());
             gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                            gpuDynInst);
         }
@@ -658,7 +657,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
         return true;
     } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
         if (gpuDynInst->useContinuation) {
-            assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+            assert(!gpuDynInst->isNoScope());
             gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                            gpuDynInst);
         }
@@ -942,6 +941,8 @@ void
 ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
                                   Request* req)
 {
+    assert(gpuDynInst->isGlobalSeg());
+
     if (!req) {
         req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
     }
@@ -950,8 +951,6 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
         req->setFlags(Request::KERNEL);
     }
 
-    gpuDynInst->s_type = SEG_GLOBAL;
-
     // for non-kernel MemFence operations, memorder flags are set depending
     // on which type of request is currently being sent, so this
     // should be set by the caller (e.g. if an inst has acq-rel
@@ -1033,8 +1032,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
                 if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
                     gpuDynInst->statusVector.clear();
 
-                if (gpuDynInst->m_op == Enums::MO_LD || MO_A(gpuDynInst->m_op)
-                    || MO_ANR(gpuDynInst->m_op)) {
+                if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
                     assert(compute_unit->globalMemoryPipe.isGMLdRespFIFOWrRdy());
 
                     compute_unit->globalMemoryPipe.getGMLdRespFIFO()
@@ -1055,7 +1053,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
                 // the continuation may generate more work for
                 // this memory request
                 if (gpuDynInst->useContinuation) {
-                    assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+                    assert(!gpuDynInst->isNoScope());
                     gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                                  gpuDynInst);
                 }
@@ -1065,7 +1063,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
         gpuDynInst->statusBitVector = VectorMask(0);
 
         if (gpuDynInst->useContinuation) {
-            assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+            assert(!gpuDynInst->isNoScope());
             gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                          gpuDynInst);
         }
author	Tony Gutierrez <anthony.gutierrez@amd.com>	2016-10-26 22:47:11 -0400
committer	Tony Gutierrez <anthony.gutierrez@amd.com>	2016-10-26 22:47:11 -0400
commit	7ac38849abaf6aeccf39137bc8acb9e44d192e82 (patch)
tree	7658e9d741604b310f871756cf051558b30e115e /src/gpu-compute/compute_unit.cc
parent	e1ad8035a379cea98ecef92e78d2894f60b2eedd (diff)
download	gem5-7ac38849abaf6aeccf39137bc8acb9e44d192e82.tar.xz