18 files changed, 802 insertions, 598 deletions
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index f580a09f7..b672f616c 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -171,56 +171,6 @@ class GpuDispatcher(DmaDevice):
 
     cl_driver = Param.ClDriver('pointer to driver')
 
-class OpType(Enum): vals = [
-    'OT_NULL',
-    'OT_ALU',
-    'OT_SPECIAL',
-    'OT_GLOBAL_READ',
-    'OT_GLOBAL_WRITE',
-    'OT_GLOBAL_ATOMIC',
-    'OT_GLOBAL_HIST',
-    'OT_GLOBAL_LDAS',
-    'OT_SHARED_READ',
-    'OT_SHARED_WRITE',
-    'OT_SHARED_ATOMIC',
-    'OT_SHARED_HIST',
-    'OT_SHARED_LDAS',
-    'OT_PRIVATE_READ',
-    'OT_PRIVATE_WRITE',
-    'OT_PRIVATE_ATOMIC',
-    'OT_PRIVATE_HIST',
-    'OT_PRIVATE_LDAS',
-    'OT_SPILL_READ',
-    'OT_SPILL_WRITE',
-    'OT_SPILL_ATOMIC',
-    'OT_SPILL_HIST',
-    'OT_SPILL_LDAS',
-    'OT_READONLY_READ',
-    'OT_READONLY_WRITE',
-    'OT_READONLY_ATOMIC',
-    'OT_READONLY_HIST',
-    'OT_READONLY_LDAS',
-    'OT_FLAT_READ',
-    'OT_FLAT_WRITE',
-    'OT_FLAT_ATOMIC',
-    'OT_FLAT_HIST',
-    'OT_FLAT_LDAS',
-    'OT_KERN_READ',
-    'OT_BRANCH',
-
-    # note: Only the OT_BOTH_MEMFENCE seems to be supported in the 1.0F version
-    #       of the compiler.
-    'OT_SHARED_MEMFENCE',
-    'OT_GLOBAL_MEMFENCE',
-    'OT_BOTH_MEMFENCE',
-
-    'OT_BARRIER',
-    'OT_PRINT',
-    'OT_RET',
-    'OT_NOP',
-    'OT_ARG'
-    ]
-
 class MemType(Enum): vals = [
     'M_U8',
     'M_U16',
@@ -235,47 +185,6 @@ class MemType(Enum): vals = [
     'M_F64',
     ]
 
-class MemOpType(Enum): vals = [
-    'MO_LD',
-    'MO_ST',
-    'MO_LDAS',
-    'MO_LDA',
-    'MO_AAND',
-    'MO_AOR',
-    'MO_AXOR',
-    'MO_ACAS',
-    'MO_AEXCH',
-    'MO_AADD',
-    'MO_ASUB',
-    'MO_AINC',
-    'MO_ADEC',
-    'MO_AMAX',
-    'MO_AMIN',
-    'MO_ANRAND',
-    'MO_ANROR',
-    'MO_ANRXOR',
-    'MO_ANRCAS',
-    'MO_ANREXCH',
-    'MO_ANRADD',
-    'MO_ANRSUB',
-    'MO_ANRINC',
-    'MO_ANRDEC',
-    'MO_ANRMAX',
-    'MO_ANRMIN',
-    'MO_HAND',
-    'MO_HOR',
-    'MO_HXOR',
-    'MO_HCAS',
-    'MO_HEXCH',
-    'MO_HADD',
-    'MO_HSUB',
-    'MO_HINC',
-    'MO_HDEC',
-    'MO_HMAX',
-    'MO_HMIN',
-    'MO_UNDEF'
-    ]
-
 class StorageClassType(Enum): vals = [
     'SC_SPILL',
     'SC_GLOBAL',
@@ -293,20 +202,3 @@ class RegisterType(Enum): vals = [
     'RT_HARDWARE',
     'RT_NONE',
     ]
-
-class GenericMemoryOrder(Enum): vals = [
-    'MEMORY_ORDER_NONE',
-    'MEMORY_ORDER_RELAXED',
-    'MEMORY_ORDER_SC_ACQUIRE',
-    'MEMORY_ORDER_SC_RELEASE',
-    'MEMORY_ORDER_SC_ACQUIRE_RELEASE',
-    ]
-
-class GenericMemoryScope(Enum): vals = [
-    'MEMORY_SCOPE_NONE',
-    'MEMORY_SCOPE_WORKITEM',
-    'MEMORY_SCOPE_WAVEFRONT',
-    'MEMORY_SCOPE_WORKGROUP',
-    'MEMORY_SCOPE_DEVICE',
-    'MEMORY_SCOPE_SYSTEM',
-    ]
diff --git a/src/gpu-compute/GPUStaticInstFlags.py b/src/gpu-compute/GPUStaticInstFlags.py
new file mode 100644
index 000000000..453fdced2
--- /dev/null
+++ b/src/gpu-compute/GPUStaticInstFlags.py
@@ -0,0 +1,111 @@
+# Copyright (c) 2016 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Anthony Gutierrez
+
+from m5.params import *
+
+class GPUStaticInstFlags(Enum):
+    wrapper_name = 'GPUStaticInstFlags'
+    wrapper_is_struct = True
+    enum_name = 'Flags'
+
+    vals = [
+        # Op types
+        'ALU',               # ALU op
+        'Branch',            # Branch instruction
+        'Nop',               # No-op (no effect at all)
+        'Return',            # Return instruction
+        'UnconditionalJump', #
+        'SpecialOp',         # Special op
+        'Waitcnt',           # Is a waitcnt instruction
+
+        # Memory ops
+        'MemBarrier',        # Barrier instruction
+        'MemFence',          # Memory fence instruction
+        'MemoryRef',         # References memory (load, store, or atomic)
+        'Flat',              # Flat memory op
+        'Load',              # Reads from memory
+        'Store',             # Writes to memory
+
+        # Atomic ops
+        'AtomicReturn',      # Atomic instruction that returns data
+        'AtomicNoReturn',    # Atomic instruction that doesn't return data
+
+        # Instruction attributes
+        'Scalar',            # A scalar (not vector) operation
+        'ReadsSCC',          # The instruction reads SCC
+        'WritesSCC',         # The instruction writes SCC
+        'ReadsVCC',          # The instruction reads VCC
+        'WritesVCC',         # The instruction writes VCC
+
+        # Atomic OP types
+        'AtomicAnd',
+        'AtomicOr',
+        'AtomicXor',
+        'AtomicCAS',
+        'AtomicExch',
+        'AtomicAdd',
+        'AtomicSub',
+        'AtomicInc',
+        'AtomicDec',
+        'AtomicMax',
+        'AtomicMin',
+
+        # Memory order flags
+        'RelaxedOrder',
+        'Acquire',           # Has acquire semantics
+        'Release',           # Has release semantics
+        'AcquireRelease',    # Has acquire and release semantics
+        'NoOrder',           # Has no ordering restrictions
+
+        # Segment access flags
+        'ArgSegment',        # Accesses the arg segment
+        'GlobalSegment',     # Accesses global memory
+        'GroupSegment',      # Accesses local memory (LDS), aka shared memory
+        'KernArgSegment',    # Accesses the kernel argument segment
+        'PrivateSegment',    # Accesses the private segment
+        'ReadOnlySegment',   # Accesses read only memory
+        'SpillSegment',      # Accesses the spill segment
+        'NoSegment',         # Does not have an associated segment
+
+        # Scope flags
+        'WorkitemScope',
+        'WavefrontScope',
+        'WorkgroupScope',
+        'DeviceScope',
+        'SystemScope',
+        'NoScope',           # Does not have an associated scope
+
+        # Coherence flags
+        'GloballyCoherent',  # Coherent with other workitems on same device
+        'SystemCoherent'     # Coherent with a different device, or the host
+        ]
diff --git a/src/gpu-compute/SConscript b/src/gpu-compute/SConscript
index 88c1cf036..8cf1ed8cf 100644
--- a/src/gpu-compute/SConscript
+++ b/src/gpu-compute/SConscript
@@ -41,6 +41,7 @@ if not env['BUILD_GPU']:
     Return()
 
 SimObject('GPU.py')
+SimObject('GPUStaticInstFlags.py')
 SimObject('LdsState.py')
 SimObject('X86GPUTLB.py')
 
diff --git a/src/gpu-compute/code_enums.hh b/src/gpu-compute/code_enums.hh
deleted file mode 100644
index 6cd9bfe26..000000000
--- a/src/gpu-compute/code_enums.hh
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __CODE_ENUMS_HH__
-#define __CODE_ENUMS_HH__
-
-#define IS_OT_GLOBAL(a) ((a)>=Enums::OT_GLOBAL_READ \
-                    && (a)<=Enums::OT_GLOBAL_LDAS)
-#define IS_OT_SHARED(a) ((a)>=Enums::OT_SHARED_READ \
-                    && (a)<=Enums::OT_SHARED_LDAS)
-#define IS_OT_PRIVATE(a) ((a)>=Enums::OT_PRIVATE_READ \
-                    && (a)<=Enums::OT_PRIVATE_LDAS)
-#define IS_OT_SPILL(a) ((a)>=Enums::OT_SPILL_READ \
-                    && (a)<=Enums::OT_SPILL_LDAS)
-#define IS_OT_READONLY(a) ((a)>=Enums::OT_READONLY_READ \
-                    && (a)<=Enums::OT_READONLY_LDAS)
-#define IS_OT_FLAT(a) ((a)>=Enums::OT_FLAT_READ && (a)<=Enums::OT_FLAT_LDAS)
-
-#define IS_OT_LDAS(a) ((a)==Enums::OT_GLOBAL_LDAS||(a)==Enums::OT_SHARED_LDAS \
-                    ||(a)==Enums::OT_PRIVATE_LDAS||(a)==Enums::OT_SPILL_LDAS \
-                    ||(a)==Enums::OT_READONLY_LDAS||(a)==Enums::OT_FLAT_LDAS)
-
-#define IS_OT_READ(a) ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SHARED_READ \
-                    ||(a)==Enums::OT_PRIVATE_READ||(a)==Enums::OT_SPILL_READ \
-                    ||(a)==Enums::OT_READONLY_READ||(a)==Enums::OT_FLAT_READ)
-
-#define IS_OT_READ_GM(a) \
-    ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SPILL_READ \
-    ||(a)==Enums::OT_READONLY_READ)
-
-#define IS_OT_READ_LM(a) ((a)==Enums::OT_SHARED_READ)
-
-#define IS_OT_READ_RM(a) ((a)==Enums::OT_READONLY_READ)
-
-#define IS_OT_READ_PM(a) ((a)==Enums::OT_PRIVATE_READ)
-
-#define IS_OT_WRITE(a) \
-    ((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SHARED_WRITE \
-    ||(a)==Enums::OT_PRIVATE_WRITE||(a)==Enums::OT_SPILL_WRITE \
-    ||(a)==Enums::OT_READONLY_WRITE||(a)==Enums::OT_FLAT_WRITE)
-
-#define IS_OT_WRITE_GM(a) \
-    ((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SPILL_WRITE \
-    ||(a)==Enums::OT_READONLY_WRITE)
-
-#define IS_OT_WRITE_LM(a) ((a)==Enums::OT_SHARED_WRITE)
-
-#define IS_OT_WRITE_PM(a) ((a)==Enums::OT_PRIVATE_WRITE)
-
-#define IS_OT_ATOMIC(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
-                    ||(a)==Enums::OT_SHARED_ATOMIC \
-                    ||(a)==Enums::OT_PRIVATE_ATOMIC \
-                    ||(a)==Enums::OT_SPILL_ATOMIC \
-                    ||(a)==Enums::OT_READONLY_ATOMIC \
-                    ||(a)==Enums::OT_BOTH_MEMFENCE \
-                    ||(a)==Enums::OT_FLAT_ATOMIC)
-
-#define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
-                    ||(a)==Enums::OT_SPILL_ATOMIC \
-                    ||(a)==Enums::OT_READONLY_ATOMIC \
-                    ||(a)==Enums::OT_GLOBAL_MEMFENCE \
-                    ||(a)==Enums::OT_BOTH_MEMFENCE)
-
-#define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \
-                    ||(a)==Enums::OT_SHARED_MEMFENCE)
-
-#define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC)
-
-#define IS_OT_HIST(a) ((a)==Enums::OT_GLOBAL_HIST \
-                    ||(a)==Enums::OT_SHARED_HIST \
-                    ||(a)==Enums::OT_PRIVATE_HIST \
-                    ||(a)==Enums::OT_SPILL_HIST \
-                    ||(a)==Enums::OT_READONLY_HIST \
-                    ||(a)==Enums::OT_FLAT_HIST)
-
-#define IS_OT_HIST_GM(a) ((a)==Enums::OT_GLOBAL_HIST \
-                    ||(a)==Enums::OT_SPILL_HIST \
-                    ||(a)==Enums::OT_READONLY_HIST)
-
-#define IS_OT_HIST_LM(a) ((a)==Enums::OT_SHARED_HIST)
-
-#define IS_OT_HIST_PM(a) ((a)==Enums::OT_PRIVATE_HIST)
-
-#endif // __CODE_ENUMS_HH__
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 97e018713..abf8ff2c5 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p),
     req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
     resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
     _masterId(p->system->getMasterId(name() + ".ComputeUnit")),
-    lds(*p->localDataStore), globalSeqNum(0),  wavefrontSize(p->wfSize)
+    lds(*p->localDataStore), globalSeqNum(0),  wavefrontSize(p->wfSize),
+    kernelLaunchInst(new KernelLaunchStaticInst())
 {
     /**
      * This check is necessary because std::bitset only provides conversion
@@ -316,13 +317,11 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
     // Send L1 cache acquire
     // isKernel + isAcquire = Kernel Begin
     if (shader->impl_kern_boundary_sync) {
-        GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(this,
-                                                                nullptr,
-                                                                nullptr, 0);
+        GPUDynInstPtr gpuDynInst =
+            std::make_shared<GPUDynInst>(this, nullptr, kernelLaunchInst,
+                                         getAndIncSeqNum());
 
         gpuDynInst->useContinuation = false;
-        gpuDynInst->memoryOrder = Enums::MEMORY_ORDER_SC_ACQUIRE;
-        gpuDynInst->scope = Enums::MEMORY_SCOPE_SYSTEM;
         injectGlobalMemFence(gpuDynInst, true);
     }
 
@@ -647,7 +646,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
                 gpuDynInst->wfSlotId, w->barrierCnt);
 
         if (gpuDynInst->useContinuation) {
-            assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+            assert(!gpuDynInst->isNoScope());
             gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                            gpuDynInst);
         }
@@ -658,7 +657,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
         return true;
     } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
         if (gpuDynInst->useContinuation) {
-            assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+            assert(!gpuDynInst->isNoScope());
             gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                            gpuDynInst);
         }
@@ -942,6 +941,8 @@ void
 ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
                                   Request* req)
 {
+    assert(gpuDynInst->isGlobalSeg());
+
     if (!req) {
         req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
     }
@@ -950,8 +951,6 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
         req->setFlags(Request::KERNEL);
     }
 
-    gpuDynInst->s_type = SEG_GLOBAL;
-
     // for non-kernel MemFence operations, memorder flags are set depending
     // on which type of request is currently being sent, so this
     // should be set by the caller (e.g. if an inst has acq-rel
@@ -1033,8 +1032,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
                 if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
                     gpuDynInst->statusVector.clear();
 
-                if (gpuDynInst->m_op == Enums::MO_LD || MO_A(gpuDynInst->m_op)
-                    || MO_ANR(gpuDynInst->m_op)) {
+                if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
                     assert(compute_unit->globalMemoryPipe.isGMLdRespFIFOWrRdy());
 
                     compute_unit->globalMemoryPipe.getGMLdRespFIFO()
@@ -1055,7 +1053,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
                 // the continuation may generate more work for
                 // this memory request
                 if (gpuDynInst->useContinuation) {
-                    assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+                    assert(!gpuDynInst->isNoScope());
                     gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                                  gpuDynInst);
                 }
@@ -1065,7 +1063,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
         gpuDynInst->statusBitVector = VectorMask(0);
 
         if (gpuDynInst->useContinuation) {
-            assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+            assert(!gpuDynInst->isNoScope());
             gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
                                          gpuDynInst);
         }
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh
index a3547402a..938658fd1 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -744,6 +744,7 @@ class ComputeUnit : public MemObject
   private:
     uint64_t globalSeqNum;
     int wavefrontSize;
+    GPUStaticInst *kernelLaunchInst;
 };
 
 #endif // __COMPUTE_UNIT_HH__
diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc
index 102905ec8..ab3e8c47e 100644
--- a/src/gpu-compute/global_memory_pipeline.cc
+++ b/src/gpu-compute/global_memory_pipeline.cc
@@ -67,7 +67,7 @@ GlobalMemPipeline::exec()
     bool accessVrf = true;
     // check the VRF to see if the operands of a load (or load component
     // of an atomic) are accessible
-    if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
+    if ((m) && (m->isLoad() || m->isAtomicRet())) {
         Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
 
         accessVrf =
@@ -127,10 +127,7 @@ GlobalMemPipeline::exec()
     // memory packets to DTLB
     if (!gmIssuedRequests.empty()) {
         GPUDynInstPtr mp = gmIssuedRequests.front();
-        if (mp->m_op == Enums::MO_LD ||
-            (mp->m_op >= Enums::MO_AAND && mp->m_op <= Enums::MO_AMIN) ||
-            (mp->m_op >= Enums::MO_ANRAND && mp->m_op <= Enums::MO_ANRMIN)) {
-
+        if (mp->isLoad() || mp->isAtomic()) {
             if (inflightLoads >= gmQueueSize) {
                 return;
             } else {
@@ -139,7 +136,7 @@ GlobalMemPipeline::exec()
         } else {
             if (inflightStores >= gmQueueSize) {
                 return;
-            } else if (mp->m_op == Enums::MO_ST) {
+            } else if (mp->isStore()) {
                 ++inflightStores;
             }
         }
@@ -147,9 +144,8 @@ GlobalMemPipeline::exec()
         mp->initiateAcc(mp);
         gmIssuedRequests.pop();
 
-        DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = %s\n",
-                computeUnit->cu_id, mp->simdId, mp->wfSlotId,
-                Enums::MemOpTypeStrings[mp->m_op]);
+        DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
+                computeUnit->cu_id, mp->simdId, mp->wfSlotId);
     }
 }
 
@@ -160,12 +156,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
     Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
 
     // Return data to registers
-    if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+    if (m->isLoad() || m->isAtomic()) {
         gmReturnedLoads.pop();
         assert(inflightLoads > 0);
         --inflightLoads;
 
-        if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
+        if (m->isLoad() || m->isAtomicRet()) {
             std::vector<uint32_t> regVec;
             // iterate over number of destination register operands since
             // this is a load or atomic operation
@@ -214,13 +210,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
     // Decrement outstanding register count
     computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
 
-    if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) ||
-        MO_H(m->m_op)) {
+    if (m->isStore() || m->isAtomic()) {
         computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time,
                                          -1);
     }
 
-    if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+    if (m->isLoad() || m->isAtomic()) {
         computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time,
                                          -1);
     }
diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc
index 1806e79e4..ec6340360 100644
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -41,11 +41,10 @@
 #include "gpu-compute/wavefront.hh"
 
 GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
-                       GPUStaticInst *_staticInst, uint64_t instSeqNum)
+                       GPUStaticInst *static_inst, uint64_t instSeqNum)
     : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
-      m_op(Enums::MO_UNDEF),
-      memoryOrder(Enums::MEMORY_ORDER_NONE), n_reg(0), useContinuation(false),
-      statusBitVector(0), staticInst(_staticInst), _seqNum(instSeqNum)
+      n_reg(0), useContinuation(false),
+      statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
 {
     tlbHitLevel.assign(computeUnit()->wfSize(), -1);
     d_data = new uint8_t[computeUnit()->wfSize() * 16];
@@ -68,77 +67,69 @@ GPUDynInst::~GPUDynInst()
 }
 
 void
-GPUDynInst::execute()
+GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
 {
-    GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(cu, wf, staticInst,
-                                                            _seqNum);
-    staticInst->execute(gpuDynInst);
+    _staticInst->execute(gpuDynInst);
 }
 
 int
 GPUDynInst::numSrcRegOperands()
 {
-    return staticInst->numSrcRegOperands();
+    return _staticInst->numSrcRegOperands();
 }
 
 int
 GPUDynInst::numDstRegOperands()
 {
-    return staticInst->numDstRegOperands();
+    return _staticInst->numDstRegOperands();
 }
 
 int
 GPUDynInst::getNumOperands()
 {
-    return staticInst->getNumOperands();
+    return _staticInst->getNumOperands();
 }
 
 bool
 GPUDynInst::isVectorRegister(int operandIdx)
 {
-    return staticInst->isVectorRegister(operandIdx);
+    return _staticInst->isVectorRegister(operandIdx);
 }
 
 bool
 GPUDynInst::isScalarRegister(int operandIdx)
 {
-    return staticInst->isScalarRegister(operandIdx);
+    return _staticInst->isScalarRegister(operandIdx);
 }
 
 int
 GPUDynInst::getRegisterIndex(int operandIdx)
 {
-    return staticInst->getRegisterIndex(operandIdx);
+    return _staticInst->getRegisterIndex(operandIdx);
 }
 
 int
 GPUDynInst::getOperandSize(int operandIdx)
 {
-    return staticInst->getOperandSize(operandIdx);
+    return _staticInst->getOperandSize(operandIdx);
 }
 
 bool
 GPUDynInst::isDstOperand(int operandIdx)
 {
-    return staticInst->isDstOperand(operandIdx);
+    return _staticInst->isDstOperand(operandIdx);
 }
 
 bool
 GPUDynInst::isSrcOperand(int operandIdx)
 {
-    return staticInst->isSrcOperand(operandIdx);
-}
-
-bool
-GPUDynInst::isArgLoad()
-{
-    return staticInst->isArgLoad();
+    return _staticInst->isSrcOperand(operandIdx);
 }
 
 const std::string&
 GPUDynInst::disassemble() const
 {
-    return staticInst->disassemble();
+    return _staticInst->disassemble();
 }
 
 uint64_t
@@ -147,16 +138,10 @@ GPUDynInst::seqNum() const
     return _seqNum;
 }
 
-Enums::OpType
-GPUDynInst::opType()
-{
-    return staticInst->o_type;
-}
-
 Enums::StorageClassType
 GPUDynInst::executedAs()
 {
-    return staticInst->executed_as;
+    return _staticInst->executed_as;
 }
 
 // Process a memory instruction and (if necessary) submit timing request
@@ -166,20 +151,347 @@ GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
     DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
             cu->cu_id, simdId, wfSlotId, exec_mask);
 
-    staticInst->initiateAcc(gpuDynInst);
+    _staticInst->initiateAcc(gpuDynInst);
     time = 0;
 }
 
+/**
+ * accessor methods for the attributes of
+ * the underlying GPU static instruction
+ */
+bool
+GPUDynInst::isALU() const
+{
+    return _staticInst->isALU();
+}
+
+bool
+GPUDynInst::isBranch() const
+{
+    return _staticInst->isBranch();
+}
+
+bool
+GPUDynInst::isNop() const
+{
+    return _staticInst->isNop();
+}
+
+bool
+GPUDynInst::isReturn() const
+{
+    return _staticInst->isReturn();
+}
+
+bool
+GPUDynInst::isUnconditionalJump() const
+{
+    return _staticInst->isUnconditionalJump();
+}
+
+bool
+GPUDynInst::isSpecialOp() const
+{
+    return _staticInst->isSpecialOp();
+}
+
+bool
+GPUDynInst::isWaitcnt() const
+{
+    return _staticInst->isWaitcnt();
+}
+
+bool
+GPUDynInst::isBarrier() const
+{
+    return _staticInst->isBarrier();
+}
+
+bool
+GPUDynInst::isMemFence() const
+{
+    return _staticInst->isMemFence();
+}
+
+bool
+GPUDynInst::isMemRef() const
+{
+    return _staticInst->isMemRef();
+}
+
+bool
+GPUDynInst::isFlat() const
+{
+    return _staticInst->isFlat();
+}
+
+bool
+GPUDynInst::isLoad() const
+{
+    return _staticInst->isLoad();
+}
+
+bool
+GPUDynInst::isStore() const
+{
+    return _staticInst->isStore();
+}
+
+bool
+GPUDynInst::isAtomic() const
+{
+    return _staticInst->isAtomic();
+}
+
+bool
+GPUDynInst::isAtomicNoRet() const
+{
+    return _staticInst->isAtomicNoRet();
+}
+
+bool
+GPUDynInst::isAtomicRet() const
+{
+    return _staticInst->isAtomicRet();
+}
+
+bool
+GPUDynInst::isScalar() const
+{
+    return _staticInst->isScalar();
+}
+
+bool
+GPUDynInst::readsSCC() const
+{
+    return _staticInst->readsSCC();
+}
+
+bool
+GPUDynInst::writesSCC() const
+{
+    return _staticInst->writesSCC();
+}
+
+bool
+GPUDynInst::readsVCC() const
+{
+    return _staticInst->readsVCC();
+}
+
+bool
+GPUDynInst::writesVCC() const
+{
+    return _staticInst->writesVCC();
+}
+
+bool
+GPUDynInst::isAtomicAnd() const
+{
+    return _staticInst->isAtomicAnd();
+}
+
+bool
+GPUDynInst::isAtomicOr() const
+{
+    return _staticInst->isAtomicOr();
+}
+
+bool
+GPUDynInst::isAtomicXor() const
+{
+    return _staticInst->isAtomicXor();
+}
+
+bool
+GPUDynInst::isAtomicCAS() const
+{
+    return _staticInst->isAtomicCAS();
+}
+
+bool GPUDynInst::isAtomicExch() const
+{
+    return _staticInst->isAtomicExch();
+}
+
+bool
+GPUDynInst::isAtomicAdd() const
+{
+    return _staticInst->isAtomicAdd();
+}
+
+bool
+GPUDynInst::isAtomicSub() const
+{
+    return _staticInst->isAtomicSub();
+}
+
+bool
+GPUDynInst::isAtomicInc() const
+{
+    return _staticInst->isAtomicInc();
+}
+
+bool
+GPUDynInst::isAtomicDec() const
+{
+    return _staticInst->isAtomicDec();
+}
+
+bool
+GPUDynInst::isAtomicMax() const
+{
+    return _staticInst->isAtomicMax();
+}
+
+bool
+GPUDynInst::isAtomicMin() const
+{
+    return _staticInst->isAtomicMin();
+}
+
+bool
+GPUDynInst::isArgLoad() const
+{
+    return _staticInst->isArgLoad();
+}
+
+bool
+GPUDynInst::isGlobalMem() const
+{
+    return _staticInst->isGlobalMem();
+}
+
+bool
+GPUDynInst::isLocalMem() const
+{
+    return _staticInst->isLocalMem();
+}
+
+bool
+GPUDynInst::isArgSeg() const
+{
+    return _staticInst->isArgSeg();
+}
+
+bool
+GPUDynInst::isGlobalSeg() const
+{
+    return _staticInst->isGlobalSeg();
+}
+
+bool
+GPUDynInst::isGroupSeg() const
+{
+    return _staticInst->isGroupSeg();
+}
+
+bool
+GPUDynInst::isKernArgSeg() const
+{
+    return _staticInst->isKernArgSeg();
+}
+
+bool
+GPUDynInst::isPrivateSeg() const
+{
+    return _staticInst->isPrivateSeg();
+}
+
+bool
+GPUDynInst::isReadOnlySeg() const
+{
+    return _staticInst->isReadOnlySeg();
+}
+
+bool
+GPUDynInst::isSpillSeg() const
+{
+    return _staticInst->isSpillSeg();
+}
+
+bool
+GPUDynInst::isWorkitemScope() const
+{
+    return _staticInst->isWorkitemScope();
+}
+
+bool
+GPUDynInst::isWavefrontScope() const
+{
+    return _staticInst->isWavefrontScope();
+}
+
+bool
+GPUDynInst::isWorkgroupScope() const
+{
+    return _staticInst->isWorkgroupScope();
+}
+
+bool
+GPUDynInst::isDeviceScope() const
+{
+    return _staticInst->isDeviceScope();
+}
+
+bool
+GPUDynInst::isSystemScope() const
+{
+    return _staticInst->isSystemScope();
+}
+
+bool
+GPUDynInst::isNoScope() const
+{
+    return _staticInst->isNoScope();
+}
+
+bool
+GPUDynInst::isRelaxedOrder() const
+{
+    return _staticInst->isRelaxedOrder();
+}
+
+bool
+GPUDynInst::isAcquire() const
+{
+    return _staticInst->isAcquire();
+}
+
+bool
+GPUDynInst::isRelease() const
+{
+    return _staticInst->isRelease();
+}
+
+bool
+GPUDynInst::isAcquireRelease() const
+{
+    return _staticInst->isAcquireRelease();
+}
+
+bool
+GPUDynInst::isNoOrder() const
+{
+    return _staticInst->isNoOrder();
+}
+
+bool
+GPUDynInst::isGloballyCoherent() const
+{
+    return _staticInst->isGloballyCoherent();
+}
+
 bool
-GPUDynInst::scalarOp() const
+GPUDynInst::isSystemCoherent() const
 {
-    return staticInst->scalarOp();
+    return _staticInst->isSystemCoherent();
 }
 
 void
 GPUDynInst::updateStats()
 {
-    if (staticInst->isLocalMem()) {
+    if (_staticInst->isLocalMem()) {
         // access to LDS (shared) memory
         cu->dynamicLMemInstrCnt++;
     } else {
diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh
index 46774d867..c07d85d78 100644
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -39,11 +39,7 @@
 #include <cstdint>
 #include <string>
 
-#include "enums/GenericMemoryOrder.hh"
-#include "enums/GenericMemoryScope.hh"
-#include "enums/MemOpType.hh"
 #include "enums/MemType.hh"
-#include "enums/OpType.hh"
 #include "enums/StorageClassType.hh"
 #include "gpu-compute/compute_unit.hh"
 #include "gpu-compute/gpu_exec_context.hh"
@@ -180,33 +176,19 @@ class AtomicOpMin : public TypedAtomicOpFunctor<T>
     }
 };
 
-#define MO_A(a) ((a)>=Enums::MO_AAND && (a)<=Enums::MO_AMIN)
-#define MO_ANR(a) ((a)>=Enums::MO_ANRAND && (a)<=Enums::MO_ANRMIN)
-#define MO_H(a) ((a)>=Enums::MO_HAND && (a)<=Enums::MO_HMIN)
-
 typedef enum
 {
     VT_32,
     VT_64,
 } vgpr_type;
 
-typedef enum
-{
-    SEG_PRIVATE,
-    SEG_SPILL,
-    SEG_GLOBAL,
-    SEG_SHARED,
-    SEG_READONLY,
-    SEG_FLAT
-} seg_type;
-
 class GPUDynInst : public GPUExecContext
 {
   public:
-    GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *_staticInst,
+    GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
                uint64_t instSeqNum);
     ~GPUDynInst();
-    void execute();
+    void execute(GPUDynInstPtr gpuDynInst);
     int numSrcRegOperands();
     int numDstRegOperands();
     int getNumOperands();
@@ -216,13 +198,11 @@ class GPUDynInst : public GPUExecContext
     int getOperandSize(int operandIdx);
     bool isDstOperand(int operandIdx);
     bool isSrcOperand(int operandIdx);
-    bool isArgLoad();
 
     const std::string &disassemble() const;
 
     uint64_t seqNum() const;
 
-    Enums::OpType opType();
     Enums::StorageClassType executedAs();
 
     // The address of the memory operation
@@ -240,14 +220,7 @@ class GPUDynInst : public GPUExecContext
 
     // The memory type (M_U32, M_S32, ...)
     Enums::MemType m_type;
-    // The memory operation (MO_LD, MO_ST, ...)
-    Enums::MemOpType m_op;
-    Enums::GenericMemoryOrder memoryOrder;
-
-    // Scope of the request
-    Enums::GenericMemoryScope scope;
-    // The memory segment (SEG_SHARED, SEG_GLOBAL, ...)
-    seg_type s_type;
+
     // The equivalency class
     int equiv;
     // The return VGPR type (VT_32 or VT_64)
@@ -288,10 +261,72 @@ class GPUDynInst : public GPUExecContext
 
     void updateStats();
 
-    GPUStaticInst* staticInstruction() { return staticInst; }
-
-    // Is the instruction a scalar or vector op?
-    bool scalarOp() const;
+    GPUStaticInst* staticInstruction() { return _staticInst; }
+
+    bool isALU() const;
+    bool isBranch() const;
+    bool isNop() const;
+    bool isReturn() const;
+    bool isUnconditionalJump() const;
+    bool isSpecialOp() const;
+    bool isWaitcnt() const;
+
+    bool isBarrier() const;
+    bool isMemFence() const;
+    bool isMemRef() const;
+    bool isFlat() const;
+    bool isLoad() const;
+    bool isStore() const;
+
+    bool isAtomic() const;
+    bool isAtomicNoRet() const;
+    bool isAtomicRet() const;
+
+    bool isScalar() const;
+    bool readsSCC() const;
+    bool writesSCC() const;
+    bool readsVCC() const;
+    bool writesVCC() const;
+
+    bool isAtomicAnd() const;
+    bool isAtomicOr() const;
+    bool isAtomicXor() const;
+    bool isAtomicCAS() const;
+    bool isAtomicExch() const;
+    bool isAtomicAdd() const;
+    bool isAtomicSub() const;
+    bool isAtomicInc() const;
+    bool isAtomicDec() const;
+    bool isAtomicMax() const;
+    bool isAtomicMin() const;
+
+    bool isArgLoad() const;
+    bool isGlobalMem() const;
+    bool isLocalMem() const;
+
+    bool isArgSeg() const;
+    bool isGlobalSeg() const;
+    bool isGroupSeg() const;
+    bool isKernArgSeg() const;
+    bool isPrivateSeg() const;
+    bool isReadOnlySeg() const;
+    bool isSpillSeg() const;
+
+    bool isWorkitemScope() const;
+    bool isWavefrontScope() const;
+    bool isWorkgroupScope() const;
+    bool isDeviceScope() const;
+    bool isSystemScope() const;
+    bool isNoScope() const;
+
+    bool isRelaxedOrder() const;
+    bool isAcquire() const;
+    bool isRelease() const;
+    bool isAcquireRelease() const;
+    bool isNoOrder() const;
+
+    bool isGloballyCoherent() const;
+    bool isSystemCoherent() const;
 
     /*
      * Loads/stores/atomics may have acquire/release semantics associated
@@ -312,46 +347,32 @@ class GPUDynInst : public GPUExecContext
     bool useContinuation;
 
     template<typename c0> AtomicOpFunctor*
-    makeAtomicOpFunctor(c0 *reg0, c0 *reg1, Enums::MemOpType op)
+    makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
     {
-        using namespace Enums;
-
-        switch(op) {
-          case MO_AAND:
-          case MO_ANRAND:
+        if (isAtomicAnd()) {
             return new AtomicOpAnd<c0>(*reg0);
-          case MO_AOR:
-          case MO_ANROR:
+        } else if (isAtomicOr()) {
             return new AtomicOpOr<c0>(*reg0);
-          case MO_AXOR:
-          case MO_ANRXOR:
+        } else if (isAtomicXor()) {
             return new AtomicOpXor<c0>(*reg0);
-          case MO_ACAS:
-          case MO_ANRCAS:
+        } else if (isAtomicCAS()) {
             return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
-          case MO_AEXCH:
-          case MO_ANREXCH:
+        } else if (isAtomicExch()) {
             return new AtomicOpExch<c0>(*reg0);
-          case MO_AADD:
-          case MO_ANRADD:
+        } else if (isAtomicAdd()) {
             return new AtomicOpAdd<c0>(*reg0);
-          case MO_ASUB:
-          case MO_ANRSUB:
+        } else if (isAtomicSub()) {
             return new AtomicOpSub<c0>(*reg0);
-          case MO_AINC:
-          case MO_ANRINC:
+        } else if (isAtomicInc()) {
             return new AtomicOpInc<c0>();
-          case MO_ADEC:
-          case MO_ANRDEC:
+        } else if (isAtomicDec()) {
             return new AtomicOpDec<c0>();
-          case MO_AMAX:
-          case MO_ANRMAX:
+        } else if (isAtomicMax()) {
             return new AtomicOpMax<c0>(*reg0);
-          case MO_AMIN:
-          case MO_ANRMIN:
+        } else if (isAtomicMin()) {
             return new AtomicOpMin<c0>(*reg0);
-          default:
-            panic("Unrecognized atomic operation");
+        } else {
+            fatal("Unrecognized atomic operation");
         }
     }
 
@@ -359,88 +380,58 @@ class GPUDynInst : public GPUExecContext
     setRequestFlags(Request *req, bool setMemOrder=true)
     {
         // currently these are the easy scopes to deduce
-        switch (s_type) {
-          case SEG_PRIVATE:
+        if (isPrivateSeg()) {
             req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
-            break;
-          case SEG_SPILL:
+        } else if (isSpillSeg()) {
             req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
-            break;
-          case SEG_GLOBAL:
+        } else if (isGlobalSeg()) {
             req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
-            break;
-          case SEG_READONLY:
+        } else if (isReadOnlySeg()) {
             req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
-            break;
-          case SEG_SHARED:
+        } else if (isGroupSeg()) {
             req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
-            break;
-          case SEG_FLAT:
+        } else if (isFlat()) {
             // TODO: translate to correct scope
             assert(false);
-          default:
-            panic("Bad segment type");
-            break;
+        } else {
+            fatal("%s has bad segment type\n", disassemble());
         }
 
-        switch (scope) {
-          case Enums::MEMORY_SCOPE_NONE:
-          case Enums::MEMORY_SCOPE_WORKITEM:
-            break;
-          case Enums::MEMORY_SCOPE_WAVEFRONT:
+        if (isWavefrontScope()) {
             req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
                                         Request::WAVEFRONT_SCOPE);
-            break;
-          case Enums::MEMORY_SCOPE_WORKGROUP:
+        } else if (isWorkgroupScope()) {
             req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
                                         Request::WORKGROUP_SCOPE);
-            break;
-          case Enums::MEMORY_SCOPE_DEVICE:
+        } else if (isDeviceScope()) {
             req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
                                         Request::DEVICE_SCOPE);
-            break;
-          case Enums::MEMORY_SCOPE_SYSTEM:
+        } else if (isSystemScope()) {
             req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
                                         Request::SYSTEM_SCOPE);
-            break;
-          default:
-            panic("Bad scope type");
-            break;
+        } else if (!isNoScope() && !isWorkitemScope()) {
+            fatal("%s has bad scope type\n", disassemble());
         }
 
         if (setMemOrder) {
             // set acquire and release flags
-            switch (memoryOrder){
-              case Enums::MEMORY_ORDER_SC_ACQUIRE:
+            if (isAcquire()) {
                 req->setFlags(Request::ACQUIRE);
-                break;
-              case Enums::MEMORY_ORDER_SC_RELEASE:
+            } else if (isRelease()) {
                 req->setFlags(Request::RELEASE);
-                break;
-              case Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE:
+            } else if (isAcquireRelease()) {
                 req->setFlags(Request::ACQUIRE | Request::RELEASE);
-                break;
-              default:
-                break;
+            } else if (!isNoOrder()) {
+                fatal("%s has bad memory order\n", disassemble());
             }
         }
 
         // set atomic type
         // currently, the instruction genenerator only produces atomic return
         // but a magic instruction can produce atomic no return
-        if (m_op == Enums::MO_AADD || m_op == Enums::MO_ASUB ||
-            m_op == Enums::MO_AAND || m_op == Enums::MO_AOR ||
-            m_op == Enums::MO_AXOR || m_op == Enums::MO_AMAX ||
-            m_op == Enums::MO_AMIN || m_op == Enums::MO_AINC ||
-            m_op == Enums::MO_ADEC || m_op == Enums::MO_AEXCH ||
-            m_op == Enums::MO_ACAS) {
+        if (isAtomicRet()) {
             req->setFlags(Request::ATOMIC_RETURN_OP);
-        } else if (m_op == Enums::MO_ANRADD || m_op == Enums::MO_ANRSUB ||
-                   m_op == Enums::MO_ANRAND || m_op == Enums::MO_ANROR ||
-                   m_op == Enums::MO_ANRXOR || m_op == Enums::MO_ANRMAX ||
-                   m_op == Enums::MO_ANRMIN || m_op == Enums::MO_ANRINC ||
-                   m_op == Enums::MO_ANRDEC || m_op == Enums::MO_ANREXCH ||
-                   m_op == Enums::MO_ANRCAS) {
+        } else if (isAtomicNoRet()) {
             req->setFlags(Request::ATOMIC_NO_RETURN_OP);
         }
     }
@@ -457,7 +448,7 @@ class GPUDynInst : public GPUExecContext
     std::vector<int> tlbHitLevel;
 
   private:
-    GPUStaticInst *staticInst;
+    GPUStaticInst *_staticInst;
     uint64_t _seqNum;
 };
 
diff --git a/src/gpu-compute/gpu_static_inst.cc b/src/gpu-compute/gpu_static_inst.cc
index 83b429e62..0f74bd532 100644
--- a/src/gpu-compute/gpu_static_inst.cc
+++ b/src/gpu-compute/gpu_static_inst.cc
@@ -36,10 +36,12 @@
 #include "gpu-compute/gpu_static_inst.hh"
 
 GPUStaticInst::GPUStaticInst(const std::string &opcode)
-    : o_type(Enums::OT_ALU), executed_as(Enums::SC_NONE), opcode(opcode),
-      _instNum(0), _scalarOp(false)
+    : executed_as(Enums::SC_NONE), opcode(opcode),
+      _instNum(0)
 {
+    setFlag(NoOrder);
 }
+
 const std::string&
 GPUStaticInst::disassemble()
 {
diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh
index 911e4f308..a73ec12e3 100644
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -48,7 +48,7 @@
 #include <cstdint>
 #include <string>
 
-#include "enums/OpType.hh"
+#include "enums/GPUStaticInstFlags.hh"
 #include "enums/StorageClassType.hh"
 #include "gpu-compute/gpu_dyn_inst.hh"
 #include "gpu-compute/misc.hh"
@@ -57,7 +57,7 @@ class BaseOperand;
 class BaseRegOperand;
 class Wavefront;
 
-class GPUStaticInst
+class GPUStaticInst : public GPUStaticInstFlags
 {
   public:
     GPUStaticInst(const std::string &opcode);
@@ -86,22 +86,110 @@ class GPUStaticInst
 
     virtual bool isValid() const = 0;
 
-    /*
-     * Most instructions (including all HSAIL instructions)
-     * are vector ops, so _scalarOp will be false by default.
-     * Derived instruction objects that are scalar ops must
-     * set _scalarOp to true in their constructors.
-     */
-    bool scalarOp() const { return _scalarOp; }
+    bool isALU() const { return _flags[ALU]; }
+    bool isBranch() const { return _flags[Branch]; }
+    bool isNop() const { return _flags[Nop]; }
+    bool isReturn() const { return _flags[Return]; }
+
+    bool
+    isUnconditionalJump() const
+    {
+        return _flags[UnconditionalJump];
+    }
+
+    bool isSpecialOp() const { return _flags[SpecialOp]; }
+    bool isWaitcnt() const { return _flags[Waitcnt]; }
+
+    bool isBarrier() const { return _flags[MemBarrier]; }
+    bool isMemFence() const { return _flags[MemFence]; }
+    bool isMemRef() const { return _flags[MemoryRef]; }
+    bool isFlat() const { return _flags[Flat]; }
+    bool isLoad() const { return _flags[Load]; }
+    bool isStore() const { return _flags[Store]; }
+
+    bool
+    isAtomic() const
+    {
+        return _flags[AtomicReturn] || _flags[AtomicNoReturn];
+    }
+
+    bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
+    bool isAtomicRet() const { return _flags[AtomicReturn]; }
+
+    bool isScalar() const { return _flags[Scalar]; }
+    bool readsSCC() const { return _flags[ReadsSCC]; }
+    bool writesSCC() const { return _flags[WritesSCC]; }
+    bool readsVCC() const { return _flags[ReadsVCC]; }
+    bool writesVCC() const { return _flags[WritesVCC]; }
 
-    virtual bool isLocalMem() const
+    bool isAtomicAnd() const { return _flags[AtomicAnd]; }
+    bool isAtomicOr() const { return _flags[AtomicOr]; }
+    bool isAtomicXor() const { return _flags[AtomicXor]; }
+    bool isAtomicCAS() const { return _flags[AtomicCAS]; }
+    bool isAtomicExch() const { return _flags[AtomicExch]; }
+    bool isAtomicAdd() const { return _flags[AtomicAdd]; }
+    bool isAtomicSub() const { return _flags[AtomicSub]; }
+    bool isAtomicInc() const { return _flags[AtomicInc]; }
+    bool isAtomicDec() const { return _flags[AtomicDec]; }
+    bool isAtomicMax() const { return _flags[AtomicMax]; }
+    bool isAtomicMin() const { return _flags[AtomicMin]; }
+
+    bool
+    isArgLoad() const
+    {
+        return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
+    }
+
+    bool
+    isGlobalMem() const
     {
-        fatal("calling isLocalMem() on non-memory instruction.\n");
+        return _flags[MemoryRef] && (_flags[GlobalSegment] ||
+               _flags[PrivateSegment] || _flags[ReadOnlySegment] ||
+               _flags[SpillSegment]);
+    }
 
-        return false;
+    bool
+    isLocalMem() const
+    {
+        return _flags[MemoryRef] && _flags[GroupSegment];
     }
 
-    bool isArgLoad() { return false; }
+    bool isArgSeg() const { return _flags[ArgSegment]; }
+    bool isGlobalSeg() const { return _flags[GlobalSegment]; }
+    bool isGroupSeg() const { return _flags[GroupSegment]; }
+    bool isKernArgSeg() const { return _flags[KernArgSegment]; }
+    bool isPrivateSeg() const { return _flags[PrivateSegment]; }
+    bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
+    bool isSpillSeg() const { return _flags[SpillSegment]; }
+
+    bool isWorkitemScope() const { return _flags[WorkitemScope]; }
+    bool isWavefrontScope() const { return _flags[WavefrontScope]; }
+    bool isWorkgroupScope() const { return _flags[WorkgroupScope]; }
+    bool isDeviceScope() const { return _flags[DeviceScope]; }
+    bool isSystemScope() const { return _flags[SystemScope]; }
+    bool isNoScope() const { return _flags[NoScope]; }
+
+    bool isRelaxedOrder() const { return _flags[RelaxedOrder]; }
+    bool isAcquire() const { return _flags[Acquire]; }
+    bool isRelease() const { return _flags[Release]; }
+    bool isAcquireRelease() const { return _flags[AcquireRelease]; }
+    bool isNoOrder() const { return _flags[NoOrder]; }
+
+    /**
+     * Coherence domain of a memory instruction. Only valid for
+     * machine ISA. The coherence domain specifies where it is
+     * possible to perform memory synchronization, e.g., acquire
+     * or release, from the shader kernel.
+     *
+     * isGloballyCoherent(): returns true if kernel is sharing memory
+     * with other work-items on the same device (GPU)
+     *
+     * isSystemCoherent(): returns true if kernel is sharing memory
+     * with other work-items on a different device (GPU) or the host (CPU)
+     */
+    bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
+    bool isSystemCoherent() const { return _flags[SystemCoherent]; }
+
     virtual uint32_t instSize() = 0;
 
     // only used for memory instructions
@@ -120,22 +208,13 @@ class GPUStaticInst
 
     virtual uint32_t getTargetPc() { return 0; }
 
-    /**
-     * Query whether the instruction is an unconditional jump i.e., the jump
-     * is always executed because there is no condition to be evaluated.
-     *
-     * If the instruction is not of branch type, the result is always false.
-     *
-     * @return True if the instruction is an unconditional jump.
-     */
-    virtual bool unconditionalJumpInstruction() { return false; }
-
     static uint64_t dynamic_id_count;
 
-    Enums::OpType o_type;
     // For flat memory accesses
     Enums::StorageClassType executed_as;
 
+    void setFlag(Flags flag) { _flags[flag] = true; }
+
   protected:
     virtual void
     execLdAcq(GPUDynInstPtr gpuDynInst)
@@ -169,7 +248,45 @@ class GPUStaticInst
      */
     int _ipdInstNum;
 
-    bool _scalarOp;
+    std::bitset<Num_Flags> _flags;
+};
+
+class KernelLaunchStaticInst : public GPUStaticInst
+{
+  public:
+    KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
+    {
+        setFlag(Nop);
+        setFlag(Scalar);
+        setFlag(Acquire);
+        setFlag(SystemScope);
+        setFlag(GlobalSegment);
+    }
+
+    void
+    execute(GPUDynInstPtr gpuDynInst)
+    {
+        fatal("kernel launch instruction should not be executed\n");
+    }
+
+    void
+    generateDisassembly()
+    {
+        disassembly = opcode;
+    }
+
+    int getNumOperands() { return 0; }
+    bool isCondRegister(int operandIndex) { return false; }
+    bool isScalarRegister(int operandIndex) { return false; }
+    bool isVectorRegister(int operandIndex) { return false; }
+    bool isSrcOperand(int operandIndex) { return false; }
+    bool isDstOperand(int operandIndex) { return false; }
+    int getOperandSize(int operandIndex) { return 0; }
+    int getRegisterIndex(int operandIndex) { return 0; }
+    int numDstRegOperands() { return 0; }
+    int numSrcRegOperands() { return 0; }
+    bool isValid() const { return true; }
+    uint32_t instSize() { return 0; }
 };
 
 #endif // __GPU_STATIC_INST_HH__
diff --git a/src/gpu-compute/kernel_cfg.cc b/src/gpu-compute/kernel_cfg.cc
index 10ded11b7..ac6a81b16 100644
--- a/src/gpu-compute/kernel_cfg.cc
+++ b/src/gpu-compute/kernel_cfg.cc
@@ -104,7 +104,7 @@ ControlFlowInfo::createBasicBlocks()
     leaders.insert(0);
     for (int i = 1; i < instructions.size(); i++) {
         GPUStaticInst* instruction = instructions[i];
-        if (instruction->o_type == Enums::OT_BRANCH) {
+        if (instruction->isBranch()) {
             const int target_pc = instruction->getTargetPc();
             leaders.insert(target_pc);
             leaders.insert(i + 1);
@@ -137,18 +137,18 @@ ControlFlowInfo::connectBasicBlocks()
             break;
         }
         GPUStaticInst* last = lastInstruction(bb.get());
-        if (last->o_type == Enums::OT_RET) {
+        if (last->isReturn()) {
             bb->successorIds.insert(exit_bb->id);
             continue;
         }
-        if (last->o_type == Enums::OT_BRANCH) {
+        if (last->isBranch()) {
             const uint32_t target_pc = last->getTargetPc();
             BasicBlock* target_bb = basicBlock(target_pc);
             bb->successorIds.insert(target_bb->id);
         }
 
         // Unconditional jump instructions have a unique successor
-        if (!last->unconditionalJumpInstruction()) {
+        if (!last->isUnconditionalJump()) {
             BasicBlock* next_bb = basicBlock(last->instNum() + 1);
             bb->successorIds.insert(next_bb->id);
         }
@@ -274,7 +274,7 @@ ControlFlowInfo::printBasicBlocks() const
         int inst_num = inst->instNum();
         std::cout << inst_num << " [" << basicBlock(inst_num)->id
                 << "]: " << inst->disassemble();
-        if (inst->o_type == Enums::OT_BRANCH) {
+        if (inst->isBranch()) {
             std::cout << ", PC = " << inst->getTargetPc();
         }
         std::cout << std::endl;
diff --git a/src/gpu-compute/lds_state.cc b/src/gpu-compute/lds_state.cc
index d4a27318a..fad98c886 100644
--- a/src/gpu-compute/lds_state.cc
+++ b/src/gpu-compute/lds_state.cc
@@ -141,8 +141,7 @@ LdsState::countBankConflicts(GPUDynInstPtr gpuDynInst,
             }
         }
 
-        if (gpuDynInst->m_op == Enums::MO_LD ||
-            gpuDynInst->m_op == Enums::MO_ST) {
+        if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
             // mask identical addresses
             for (int j = 0; j < numBanks; ++j) {
                 for (int j0 = 0; j0 < j; j0++) {
@@ -208,8 +207,8 @@ LdsState::processPacket(PacketPtr packet)
 
     GPUDynInstPtr dynInst = getDynInstr(packet);
     // account for the LDS bank conflict overhead
-    int busLength = (dynInst->m_op == Enums::MO_LD) ? parent->loadBusLength() :
-        (dynInst->m_op == Enums::MO_ST) ? parent->storeBusLength() :
+    int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
+        (dynInst->isStore()) ? parent->storeBusLength() :
         parent->loadBusLength();
     // delay for accessing the LDS
     Tick processingTime =
diff --git a/src/gpu-compute/lds_state.hh b/src/gpu-compute/lds_state.hh
index 58d109493..5fcbe82c0 100644
--- a/src/gpu-compute/lds_state.hh
+++ b/src/gpu-compute/lds_state.hh
@@ -43,7 +43,6 @@
 #include <utility>
 #include <vector>
 
-#include "enums/MemOpType.hh"
 #include "enums/MemType.hh"
 #include "gpu-compute/misc.hh"
 #include "mem/mem_object.hh"
diff --git a/src/gpu-compute/local_memory_pipeline.cc b/src/gpu-compute/local_memory_pipeline.cc
index e2238bf45..80dad6fcd 100644
--- a/src/gpu-compute/local_memory_pipeline.cc
+++ b/src/gpu-compute/local_memory_pipeline.cc
@@ -62,7 +62,7 @@ LocalMemPipeline::exec()
         lmReturnedRequests.front() : nullptr;
 
     bool accessVrf = true;
-    if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
+    if ((m) && (m->isLoad() || m->isAtomicRet())) {
         Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
 
         accessVrf =
@@ -137,7 +137,7 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
     Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
 
     // Return data to registers
-    if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
+    if (m->isLoad() || m->isAtomicRet()) {
         std::vector<uint32_t> regVec;
         for (int k = 0; k < m->n_reg; ++k) {
             int dst = m->dst_reg+k;
@@ -172,13 +172,12 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
     // Decrement outstanding request count
     computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
 
-    if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op)
-        || MO_H(m->m_op)) {
+    if (m->isStore() || m->isAtomic()) {
         computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
                                          m->time, -1);
     }
 
-    if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+    if (m->isLoad() || m->isAtomic()) {
         computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
                                          m->time, -1);
     }
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
index c1f741d6a..13afab977 100644
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -47,7 +47,6 @@
 #include "cpu/simple_thread.hh"
 #include "cpu/thread_context.hh"
 #include "cpu/thread_state.hh"
-#include "enums/MemOpType.hh"
 #include "enums/MemType.hh"
 #include "gpu-compute/compute_unit.hh"
 #include "gpu-compute/gpu_tlb.hh"
diff --git a/src/gpu-compute/vector_register_file.cc b/src/gpu-compute/vector_register_file.cc
index c43d765af..c50c06cc6 100644
--- a/src/gpu-compute/vector_register_file.cc
+++ b/src/gpu-compute/vector_register_file.cc
@@ -38,7 +38,6 @@
 #include <string>
 
 #include "base/misc.hh"
-#include "gpu-compute/code_enums.hh"
 #include "gpu-compute/compute_unit.hh"
 #include "gpu-compute/gpu_dyn_inst.hh"
 #include "gpu-compute/shader.hh"
@@ -153,8 +152,8 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
 void
 VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w)
 {
-    bool loadInstr = IS_OT_READ(ii->opType());
-    bool atomicInstr = IS_OT_ATOMIC(ii->opType());
+    bool loadInstr = ii->isLoad();
+    bool atomicInstr = ii->isAtomic() || ii->isMemFence();
 
     bool loadNoArgInstr = loadInstr && !ii->isArgLoad();
 
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index c677cbe41..caeed85a7 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -37,7 +37,6 @@
 
 #include "debug/GPUExec.hh"
 #include "debug/WavefrontStack.hh"
-#include "gpu-compute/code_enums.hh"
 #include "gpu-compute/compute_unit.hh"
 #include "gpu-compute/gpu_dyn_inst.hh"
 #include "gpu-compute/shader.hh"
@@ -165,19 +164,8 @@ Wavefront::start(uint64_t _wf_dyn_id,uint64_t _base_ptr)
 bool
 Wavefront::isGmInstruction(GPUDynInstPtr ii)
 {
-    if (IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
-        IS_OT_ATOMIC_PM(ii->opType())) {
+    if (ii->isGlobalMem() || ii->isFlat())
         return true;
-    }
-
-    if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
-        IS_OT_ATOMIC_GM(ii->opType())) {
-        return true;
-    }
-
-    if (IS_OT_FLAT(ii->opType())) {
-        return true;
-    }
 
     return false;
 }
@@ -185,8 +173,7 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii)
 bool
 Wavefront::isLmInstruction(GPUDynInstPtr ii)
 {
-    if (IS_OT_READ_LM(ii->opType()) || IS_OT_WRITE_LM(ii->opType()) ||
-        IS_OT_ATOMIC_LM(ii->opType())) {
+    if (ii->isLocalMem()) {
         return true;
     }
 
@@ -199,10 +186,9 @@ Wavefront::isOldestInstALU()
     assert(!instructionBuffer.empty());
     GPUDynInstPtr ii = instructionBuffer.front();
 
-    if (status != S_STOPPED && (ii->opType() == Enums::OT_NOP ||
-        ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
-        ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
-        ii->opType() == Enums::OT_KERN_READ)) {
+    if (status != S_STOPPED && (ii->isNop() ||
+        ii->isReturn() || ii->isBranch() ||
+        ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) {
         return true;
     }
 
@@ -215,7 +201,7 @@ Wavefront::isOldestInstBarrier()
     assert(!instructionBuffer.empty());
     GPUDynInstPtr ii = instructionBuffer.front();
 
-    if (status != S_STOPPED && ii->opType() == Enums::OT_BARRIER) {
+    if (status != S_STOPPED && ii->isBarrier()) {
         return true;
     }
 
@@ -228,9 +214,7 @@ Wavefront::isOldestInstGMem()
     assert(!instructionBuffer.empty());
     GPUDynInstPtr ii = instructionBuffer.front();
 
-    if (status != S_STOPPED && (IS_OT_READ_GM(ii->opType()) ||
-        IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
-
+    if (status != S_STOPPED && ii->isGlobalMem()) {
         return true;
     }
 
@@ -243,9 +227,7 @@ Wavefront::isOldestInstLMem()
     assert(!instructionBuffer.empty());
     GPUDynInstPtr ii = instructionBuffer.front();
 
-    if (status != S_STOPPED && (IS_OT_READ_LM(ii->opType()) ||
-        IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
-
+    if (status != S_STOPPED && ii->isLocalMem()) {
         return true;
     }
 
@@ -258,9 +240,7 @@ Wavefront::isOldestInstPrivMem()
     assert(!instructionBuffer.empty());
     GPUDynInstPtr ii = instructionBuffer.front();
 
-    if (status != S_STOPPED && (IS_OT_READ_PM(ii->opType()) ||
-        IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
-
+    if (status != S_STOPPED && ii->isPrivateSeg()) {
         return true;
     }
 
@@ -273,8 +253,7 @@ Wavefront::isOldestInstFlatMem()
     assert(!instructionBuffer.empty());
     GPUDynInstPtr ii = instructionBuffer.front();
 
-    if (status != S_STOPPED && IS_OT_FLAT(ii->opType())) {
-
+    if (status != S_STOPPED && ii->isFlat()) {
         return true;
     }
 
@@ -289,7 +268,7 @@ Wavefront::instructionBufferHasBranch()
     for (auto it : instructionBuffer) {
         GPUDynInstPtr ii = it;
 
-        if (ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH) {
+        if (ii->isReturn() || ii->isBranch()) {
             return true;
         }
     }
@@ -371,23 +350,16 @@ Wavefront::ready(itype_e type)
     // checking readiness will be fixed eventually.  In the meantime, let's
     // make sure that we do not silently let an instruction type slip
     // through this logic and always return not ready.
-    if (!(ii->opType() == Enums::OT_BARRIER || ii->opType() == Enums::OT_NOP ||
-          ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
-          ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
-          ii->opType() == Enums::OT_KERN_READ ||
-          ii->opType() == Enums::OT_ARG ||
-          IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
-          IS_OT_ATOMIC_GM(ii->opType()) || IS_OT_READ_LM(ii->opType()) ||
-          IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
-          IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
-          IS_OT_ATOMIC_PM(ii->opType()) || IS_OT_FLAT(ii->opType()))) {
+    if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
+        ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
+        ii->isMemFence() || ii->isFlat())) {
         panic("next instruction: %s is of unknown type\n", ii->disassemble());
     }
 
     DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n",
             computeUnit->cu_id, simdId, wfSlotId, ii->disassemble());
 
-    if (type == I_ALU && ii->opType() == Enums::OT_BARRIER) {
+    if (type == I_ALU && ii->isBarrier()) {
         // Here for ALU instruction (barrier)
         if (!computeUnit->wfWait[simdId].prerdy()) {
             // Is wave slot free?
@@ -400,7 +372,7 @@ Wavefront::ready(itype_e type)
         }
 
         ready_inst = true;
-    } else if (type == I_ALU && ii->opType() == Enums::OT_NOP) {
+    } else if (type == I_ALU && ii->isNop()) {
         // Here for ALU instruction (nop)
         if (!computeUnit->wfWait[simdId].prerdy()) {
             // Is wave slot free?
@@ -408,7 +380,7 @@ Wavefront::ready(itype_e type)
         }
 
         ready_inst = true;
-    } else if (type == I_ALU && ii->opType() == Enums::OT_RET) {
+    } else if (type == I_ALU && ii->isReturn()) {
         // Here for ALU instruction (return)
         if (!computeUnit->wfWait[simdId].prerdy()) {
             // Is wave slot free?
@@ -421,10 +393,10 @@ Wavefront::ready(itype_e type)
         }
 
         ready_inst = true;
-    } else if (type == I_ALU && (ii->opType() == Enums::OT_BRANCH ||
-               ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
-               ii->opType() == Enums::OT_KERN_READ ||
-               ii->opType() == Enums::OT_ARG)) {
+    } else if (type == I_ALU && (ii->isBranch() ||
+               ii->isALU() ||
+               (ii->isKernArgSeg() && ii->isLoad()) ||
+               ii->isArgSeg())) {
         // Here for ALU instruction (all others)
         if (!computeUnit->wfWait[simdId].prerdy()) {
             // Is alu slot free?
@@ -439,18 +411,16 @@ Wavefront::ready(itype_e type)
             return 0;
         }
         ready_inst = true;
-    } else if (type == I_GLOBAL && (IS_OT_READ_GM(ii->opType()) ||
-               IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
+    } else if (type == I_GLOBAL && ii->isGlobalMem()) {
         // Here Global memory instruction
-        if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) {
+        if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
             // Are there in pipe or outstanding global memory write requests?
             if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
                 return 0;
             }
         }
 
-        if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) ||
-            IS_OT_HIST_GM(ii->opType())) {
+        if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
             // Are there in pipe or outstanding global memory read requests?
             if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
                 return 0;
@@ -480,17 +450,15 @@ Wavefront::ready(itype_e type)
             return 0;
         }
         ready_inst = true;
-    } else if (type == I_SHARED && (IS_OT_READ_LM(ii->opType()) ||
-               IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
+    } else if (type == I_SHARED && ii->isLocalMem()) {
         // Here for Shared memory instruction
-        if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) {
+        if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
             if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
                 return 0;
             }
         }
 
-        if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
-            IS_OT_HIST_LM(ii->opType())) {
+        if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
             if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
                 return 0;
             }
@@ -519,47 +487,7 @@ Wavefront::ready(itype_e type)
             return 0;
         }
         ready_inst = true;
-    } else if (type == I_PRIVATE && (IS_OT_READ_PM(ii->opType()) ||
-               IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
-        // Here for Private memory instruction ------------------------    //
-        if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
-            if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
-                return 0;
-            }
-        }
-
-        if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
-            IS_OT_HIST_PM(ii->opType())) {
-            if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) {
-                return 0;
-            }
-        }
-
-        if (!glbMemBusRdy) {
-            // Is there an available VRF->Global memory read bus?
-            return 0;
-        }
-
-        if (!glbMemIssueRdy) {
-             // Is wave slot free?
-            return 0;
-        }
-
-        if (!computeUnit->globalMemoryPipe.
-            isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
-            // Can we insert a new request to the Global Mem Request FIFO?
-            return 0;
-        }
-        // can we schedule source & destination operands on the VRF?
-        if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
-                    VrfAccessType::RD_WR)) {
-            return 0;
-        }
-        if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
-            return 0;
-        }
-        ready_inst = true;
-    } else if (type == I_FLAT && IS_OT_FLAT(ii->opType())) {
+    } else if (type == I_FLAT && ii->isFlat()) {
         if (!glbMemBusRdy) {
             // Is there an available VRF->Global memory read bus?
             return 0;
@@ -618,23 +546,22 @@ Wavefront::updateResources()
     assert(ii);
     computeUnit->vrf[simdId]->updateResources(this, ii);
     // Single precision ALU or Branch or Return or Special instruction
-    if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
-        ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
+    if (ii->isALU() || ii->isSpecialOp() ||
+        ii->isBranch() ||
         // FIXME: Kernel argument loads are currently treated as ALU operations
         // since we don't send memory packets at execution. If we fix that then
         // we should map them to one of the memory pipelines
-        ii->opType()==Enums::OT_KERN_READ ||
-        ii->opType()==Enums::OT_ARG ||
-        ii->opType()==Enums::OT_RET) {
+        (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
+        ii->isReturn()) {
         computeUnit->aluPipe[simdId].preset(computeUnit->shader->
                                             ticks(computeUnit->spBypassLength()));
         // this is to enforce a fixed number of cycles per issue slot per SIMD
         computeUnit->wfWait[simdId].preset(computeUnit->shader->
                                            ticks(computeUnit->issuePeriod));
-    } else if (ii->opType() == Enums::OT_BARRIER) {
+    } else if (ii->isBarrier()) {
         computeUnit->wfWait[simdId].preset(computeUnit->shader->
                                            ticks(computeUnit->issuePeriod));
-    } else if (ii->opType() == Enums::OT_FLAT_READ) {
+    } else if (ii->isLoad() && ii->isFlat()) {
         assert(Enums::SC_NONE != ii->executedAs());
         memReqsInPipe++;
         rdGmReqsInPipe++;
@@ -649,7 +576,7 @@ Wavefront::updateResources()
             computeUnit->wfWait[computeUnit->GlbMemUnitId()].
                 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
         }
-    } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
+    } else if (ii->isStore() && ii->isFlat()) {
         assert(Enums::SC_NONE != ii->executedAs());
         memReqsInPipe++;
         wrGmReqsInPipe++;
@@ -664,21 +591,21 @@ Wavefront::updateResources()
             computeUnit->wfWait[computeUnit->GlbMemUnitId()].
                 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
         }
-    } else if (IS_OT_READ_GM(ii->opType())) {
+    } else if (ii->isLoad() && ii->isGlobalMem()) {
         memReqsInPipe++;
         rdGmReqsInPipe++;
         computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
             preset(computeUnit->shader->ticks(4));
         computeUnit->wfWait[computeUnit->GlbMemUnitId()].
             preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_WRITE_GM(ii->opType())) {
+    } else if (ii->isStore() && ii->isGlobalMem()) {
         memReqsInPipe++;
         wrGmReqsInPipe++;
         computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
             preset(computeUnit->shader->ticks(8));
         computeUnit->wfWait[computeUnit->GlbMemUnitId()].
             preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_ATOMIC_GM(ii->opType())) {
+    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
         memReqsInPipe++;
         wrGmReqsInPipe++;
         rdGmReqsInPipe++;
@@ -686,21 +613,21 @@ Wavefront::updateResources()
             preset(computeUnit->shader->ticks(8));
         computeUnit->wfWait[computeUnit->GlbMemUnitId()].
             preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_READ_LM(ii->opType())) {
+    } else if (ii->isLoad() && ii->isLocalMem()) {
         memReqsInPipe++;
         rdLmReqsInPipe++;
         computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
             preset(computeUnit->shader->ticks(4));
         computeUnit->wfWait[computeUnit->ShrMemUnitId()].
             preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_WRITE_LM(ii->opType())) {
+    } else if (ii->isStore() && ii->isLocalMem()) {
         memReqsInPipe++;
         wrLmReqsInPipe++;
         computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
             preset(computeUnit->shader->ticks(8));
         computeUnit->wfWait[computeUnit->ShrMemUnitId()].
             preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_ATOMIC_LM(ii->opType())) {
+    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
         memReqsInPipe++;
         wrLmReqsInPipe++;
         rdLmReqsInPipe++;
@@ -708,28 +635,6 @@ Wavefront::updateResources()
             preset(computeUnit->shader->ticks(8));
         computeUnit->wfWait[computeUnit->ShrMemUnitId()].
             preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_READ_PM(ii->opType())) {
-        memReqsInPipe++;
-        rdGmReqsInPipe++;
-        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
-            preset(computeUnit->shader->ticks(4));
-        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
-            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_WRITE_PM(ii->opType())) {
-        memReqsInPipe++;
-        wrGmReqsInPipe++;
-        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
-            preset(computeUnit->shader->ticks(8));
-        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
-            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_ATOMIC_PM(ii->opType())) {
-        memReqsInPipe++;
-        wrGmReqsInPipe++;
-        rdGmReqsInPipe++;
-        computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
-            preset(computeUnit->shader->ticks(8));
-        computeUnit->wfWait[computeUnit->GlbMemUnitId()].
-            preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
     }
 }
 
@@ -751,7 +656,7 @@ Wavefront::exec()
     DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
             "(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId,
             ii->disassemble(), old_pc);
-    ii->execute();
+    ii->execute(ii);
     // access the VRF
     computeUnit->vrf[simdId]->exec(ii, this);
     srcRegOpDist.sample(ii->numSrcRegOperands());
@@ -785,24 +690,24 @@ Wavefront::exec()
 
     // ---- Update Vector ALU pipeline and other resources ------------------ //
     // Single precision ALU or Branch or Return or Special instruction
-    if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
-        ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
+    if (ii->isALU() || ii->isSpecialOp() ||
+        ii->isBranch() ||
         // FIXME: Kernel argument loads are currently treated as ALU operations
         // since we don't send memory packets at execution. If we fix that then
         // we should map them to one of the memory pipelines
-        ii->opType() == Enums::OT_KERN_READ ||
-        ii->opType() == Enums::OT_ARG ||
-        ii->opType() == Enums::OT_RET) {
+        (ii->isKernArgSeg() && ii->isLoad()) ||
+        ii->isArgSeg() ||
+        ii->isReturn()) {
         computeUnit->aluPipe[simdId].set(computeUnit->shader->
                                          ticks(computeUnit->spBypassLength()));
 
         // this is to enforce a fixed number of cycles per issue slot per SIMD
         computeUnit->wfWait[simdId].set(computeUnit->shader->
                                         ticks(computeUnit->issuePeriod));
-    } else if (ii->opType() == Enums::OT_BARRIER) {
+    } else if (ii->isBarrier()) {
         computeUnit->wfWait[simdId].set(computeUnit->shader->
                                         ticks(computeUnit->issuePeriod));
-    } else if (ii->opType() == Enums::OT_FLAT_READ) {
+    } else if (ii->isLoad() && ii->isFlat()) {
         assert(Enums::SC_NONE != ii->executedAs());
 
         if (Enums::SC_SHARED == ii->executedAs()) {
@@ -816,7 +721,7 @@ Wavefront::exec()
             computeUnit->wfWait[computeUnit->GlbMemUnitId()].
                 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
         }
-    } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
+    } else if (ii->isStore() && ii->isFlat()) {
         assert(Enums::SC_NONE != ii->executedAs());
         if (Enums::SC_SHARED == ii->executedAs()) {
             computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
@@ -829,32 +734,32 @@ Wavefront::exec()
             computeUnit->wfWait[computeUnit->GlbMemUnitId()].
                 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
         }
-    } else if (IS_OT_READ_GM(ii->opType())) {
+    } else if (ii->isLoad() && ii->isGlobalMem()) {
         computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
             set(computeUnit->shader->ticks(4));
         computeUnit->wfWait[computeUnit->GlbMemUnitId()].
             set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_WRITE_GM(ii->opType())) {
+    } else if (ii->isStore() && ii->isGlobalMem()) {
         computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
             set(computeUnit->shader->ticks(8));
         computeUnit->wfWait[computeUnit->GlbMemUnitId()].
             set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_ATOMIC_GM(ii->opType())) {
+    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
         computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
             set(computeUnit->shader->ticks(8));
         computeUnit->wfWait[computeUnit->GlbMemUnitId()].
             set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_READ_LM(ii->opType())) {
+    } else if (ii->isLoad() && ii->isLocalMem()) {
         computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
             set(computeUnit->shader->ticks(4));
         computeUnit->wfWait[computeUnit->ShrMemUnitId()].
             set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_WRITE_LM(ii->opType())) {
+    } else if (ii->isStore() && ii->isLocalMem()) {
         computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
             set(computeUnit->shader->ticks(8));
         computeUnit->wfWait[computeUnit->ShrMemUnitId()].
             set(computeUnit->shader->ticks(computeUnit->issuePeriod));
-    } else if (IS_OT_ATOMIC_LM(ii->opType())) {
+    } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
         computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
             set(computeUnit->shader->ticks(8));
         computeUnit->wfWait[computeUnit->ShrMemUnitId()].