summaryrefslogtreecommitdiff
path: root/src/gpu-compute
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu-compute')
-rw-r--r--src/gpu-compute/GPU.py108
-rw-r--r--src/gpu-compute/GPUStaticInstFlags.py111
-rw-r--r--src/gpu-compute/SConscript1
-rw-r--r--src/gpu-compute/code_enums.hh116
-rw-r--r--src/gpu-compute/compute_unit.cc26
-rw-r--r--src/gpu-compute/compute_unit.hh1
-rw-r--r--src/gpu-compute/global_memory_pipeline.cc23
-rw-r--r--src/gpu-compute/gpu_dyn_inst.cc382
-rw-r--r--src/gpu-compute/gpu_dyn_inst.hh219
-rw-r--r--src/gpu-compute/gpu_static_inst.cc6
-rw-r--r--src/gpu-compute/gpu_static_inst.hh167
-rw-r--r--src/gpu-compute/kernel_cfg.cc10
-rw-r--r--src/gpu-compute/lds_state.cc7
-rw-r--r--src/gpu-compute/lds_state.hh1
-rw-r--r--src/gpu-compute/local_memory_pipeline.cc9
-rw-r--r--src/gpu-compute/shader.hh1
-rw-r--r--src/gpu-compute/vector_register_file.cc5
-rw-r--r--src/gpu-compute/wavefront.cc207
18 files changed, 802 insertions, 598 deletions
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index f580a09f7..b672f616c 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -171,56 +171,6 @@ class GpuDispatcher(DmaDevice):
cl_driver = Param.ClDriver('pointer to driver')
-class OpType(Enum): vals = [
- 'OT_NULL',
- 'OT_ALU',
- 'OT_SPECIAL',
- 'OT_GLOBAL_READ',
- 'OT_GLOBAL_WRITE',
- 'OT_GLOBAL_ATOMIC',
- 'OT_GLOBAL_HIST',
- 'OT_GLOBAL_LDAS',
- 'OT_SHARED_READ',
- 'OT_SHARED_WRITE',
- 'OT_SHARED_ATOMIC',
- 'OT_SHARED_HIST',
- 'OT_SHARED_LDAS',
- 'OT_PRIVATE_READ',
- 'OT_PRIVATE_WRITE',
- 'OT_PRIVATE_ATOMIC',
- 'OT_PRIVATE_HIST',
- 'OT_PRIVATE_LDAS',
- 'OT_SPILL_READ',
- 'OT_SPILL_WRITE',
- 'OT_SPILL_ATOMIC',
- 'OT_SPILL_HIST',
- 'OT_SPILL_LDAS',
- 'OT_READONLY_READ',
- 'OT_READONLY_WRITE',
- 'OT_READONLY_ATOMIC',
- 'OT_READONLY_HIST',
- 'OT_READONLY_LDAS',
- 'OT_FLAT_READ',
- 'OT_FLAT_WRITE',
- 'OT_FLAT_ATOMIC',
- 'OT_FLAT_HIST',
- 'OT_FLAT_LDAS',
- 'OT_KERN_READ',
- 'OT_BRANCH',
-
- # note: Only the OT_BOTH_MEMFENCE seems to be supported in the 1.0F version
- # of the compiler.
- 'OT_SHARED_MEMFENCE',
- 'OT_GLOBAL_MEMFENCE',
- 'OT_BOTH_MEMFENCE',
-
- 'OT_BARRIER',
- 'OT_PRINT',
- 'OT_RET',
- 'OT_NOP',
- 'OT_ARG'
- ]
-
class MemType(Enum): vals = [
'M_U8',
'M_U16',
@@ -235,47 +185,6 @@ class MemType(Enum): vals = [
'M_F64',
]
-class MemOpType(Enum): vals = [
- 'MO_LD',
- 'MO_ST',
- 'MO_LDAS',
- 'MO_LDA',
- 'MO_AAND',
- 'MO_AOR',
- 'MO_AXOR',
- 'MO_ACAS',
- 'MO_AEXCH',
- 'MO_AADD',
- 'MO_ASUB',
- 'MO_AINC',
- 'MO_ADEC',
- 'MO_AMAX',
- 'MO_AMIN',
- 'MO_ANRAND',
- 'MO_ANROR',
- 'MO_ANRXOR',
- 'MO_ANRCAS',
- 'MO_ANREXCH',
- 'MO_ANRADD',
- 'MO_ANRSUB',
- 'MO_ANRINC',
- 'MO_ANRDEC',
- 'MO_ANRMAX',
- 'MO_ANRMIN',
- 'MO_HAND',
- 'MO_HOR',
- 'MO_HXOR',
- 'MO_HCAS',
- 'MO_HEXCH',
- 'MO_HADD',
- 'MO_HSUB',
- 'MO_HINC',
- 'MO_HDEC',
- 'MO_HMAX',
- 'MO_HMIN',
- 'MO_UNDEF'
- ]
-
class StorageClassType(Enum): vals = [
'SC_SPILL',
'SC_GLOBAL',
@@ -293,20 +202,3 @@ class RegisterType(Enum): vals = [
'RT_HARDWARE',
'RT_NONE',
]
-
-class GenericMemoryOrder(Enum): vals = [
- 'MEMORY_ORDER_NONE',
- 'MEMORY_ORDER_RELAXED',
- 'MEMORY_ORDER_SC_ACQUIRE',
- 'MEMORY_ORDER_SC_RELEASE',
- 'MEMORY_ORDER_SC_ACQUIRE_RELEASE',
- ]
-
-class GenericMemoryScope(Enum): vals = [
- 'MEMORY_SCOPE_NONE',
- 'MEMORY_SCOPE_WORKITEM',
- 'MEMORY_SCOPE_WAVEFRONT',
- 'MEMORY_SCOPE_WORKGROUP',
- 'MEMORY_SCOPE_DEVICE',
- 'MEMORY_SCOPE_SYSTEM',
- ]
diff --git a/src/gpu-compute/GPUStaticInstFlags.py b/src/gpu-compute/GPUStaticInstFlags.py
new file mode 100644
index 000000000..453fdced2
--- /dev/null
+++ b/src/gpu-compute/GPUStaticInstFlags.py
@@ -0,0 +1,111 @@
+# Copyright (c) 2016 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Anthony Gutierrez
+
+from m5.params import *
+
+class GPUStaticInstFlags(Enum):
+ wrapper_name = 'GPUStaticInstFlags'
+ wrapper_is_struct = True
+ enum_name = 'Flags'
+
+ vals = [
+ # Op types
+ 'ALU', # ALU op
+ 'Branch', # Branch instruction
+ 'Nop', # No-op (no effect at all)
+ 'Return', # Return instruction
+ 'UnconditionalJump', #
+ 'SpecialOp', # Special op
+ 'Waitcnt', # Is a waitcnt instruction
+
+ # Memory ops
+ 'MemBarrier', # Barrier instruction
+ 'MemFence', # Memory fence instruction
+ 'MemoryRef', # References memory (load, store, or atomic)
+ 'Flat', # Flat memory op
+ 'Load', # Reads from memory
+ 'Store', # Writes to memory
+
+ # Atomic ops
+ 'AtomicReturn', # Atomic instruction that returns data
+ 'AtomicNoReturn', # Atomic instruction that doesn't return data
+
+ # Instruction attributes
+ 'Scalar', # A scalar (not vector) operation
+ 'ReadsSCC', # The instruction reads SCC
+ 'WritesSCC', # The instruction writes SCC
+ 'ReadsVCC', # The instruction reads VCC
+ 'WritesVCC', # The instruction writes VCC
+
+ # Atomic OP types
+ 'AtomicAnd',
+ 'AtomicOr',
+ 'AtomicXor',
+ 'AtomicCAS',
+ 'AtomicExch',
+ 'AtomicAdd',
+ 'AtomicSub',
+ 'AtomicInc',
+ 'AtomicDec',
+ 'AtomicMax',
+ 'AtomicMin',
+
+ # Memory order flags
+ 'RelaxedOrder',
+ 'Acquire', # Has acquire semantics
+ 'Release', # Has release semantics
+ 'AcquireRelease', # Has acquire and release semantics
+ 'NoOrder', # Has no ordering restrictions
+
+ # Segment access flags
+ 'ArgSegment', # Accesses the arg segment
+ 'GlobalSegment', # Accesses global memory
+ 'GroupSegment', # Accesses local memory (LDS), aka shared memory
+ 'KernArgSegment', # Accesses the kernel argument segment
+ 'PrivateSegment', # Accesses the private segment
+ 'ReadOnlySegment', # Accesses read only memory
+ 'SpillSegment', # Accesses the spill segment
+ 'NoSegment', # Does not have an associated segment
+
+ # Scope flags
+ 'WorkitemScope',
+ 'WavefrontScope',
+ 'WorkgroupScope',
+ 'DeviceScope',
+ 'SystemScope',
+ 'NoScope', # Does not have an associated scope
+
+ # Coherence flags
+ 'GloballyCoherent', # Coherent with other workitems on same device
+ 'SystemCoherent' # Coherent with a different device, or the host
+ ]
diff --git a/src/gpu-compute/SConscript b/src/gpu-compute/SConscript
index 88c1cf036..8cf1ed8cf 100644
--- a/src/gpu-compute/SConscript
+++ b/src/gpu-compute/SConscript
@@ -41,6 +41,7 @@ if not env['BUILD_GPU']:
Return()
SimObject('GPU.py')
+SimObject('GPUStaticInstFlags.py')
SimObject('LdsState.py')
SimObject('X86GPUTLB.py')
diff --git a/src/gpu-compute/code_enums.hh b/src/gpu-compute/code_enums.hh
deleted file mode 100644
index 6cd9bfe26..000000000
--- a/src/gpu-compute/code_enums.hh
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __CODE_ENUMS_HH__
-#define __CODE_ENUMS_HH__
-
-#define IS_OT_GLOBAL(a) ((a)>=Enums::OT_GLOBAL_READ \
- && (a)<=Enums::OT_GLOBAL_LDAS)
-#define IS_OT_SHARED(a) ((a)>=Enums::OT_SHARED_READ \
- && (a)<=Enums::OT_SHARED_LDAS)
-#define IS_OT_PRIVATE(a) ((a)>=Enums::OT_PRIVATE_READ \
- && (a)<=Enums::OT_PRIVATE_LDAS)
-#define IS_OT_SPILL(a) ((a)>=Enums::OT_SPILL_READ \
- && (a)<=Enums::OT_SPILL_LDAS)
-#define IS_OT_READONLY(a) ((a)>=Enums::OT_READONLY_READ \
- && (a)<=Enums::OT_READONLY_LDAS)
-#define IS_OT_FLAT(a) ((a)>=Enums::OT_FLAT_READ && (a)<=Enums::OT_FLAT_LDAS)
-
-#define IS_OT_LDAS(a) ((a)==Enums::OT_GLOBAL_LDAS||(a)==Enums::OT_SHARED_LDAS \
- ||(a)==Enums::OT_PRIVATE_LDAS||(a)==Enums::OT_SPILL_LDAS \
- ||(a)==Enums::OT_READONLY_LDAS||(a)==Enums::OT_FLAT_LDAS)
-
-#define IS_OT_READ(a) ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SHARED_READ \
- ||(a)==Enums::OT_PRIVATE_READ||(a)==Enums::OT_SPILL_READ \
- ||(a)==Enums::OT_READONLY_READ||(a)==Enums::OT_FLAT_READ)
-
-#define IS_OT_READ_GM(a) \
- ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SPILL_READ \
- ||(a)==Enums::OT_READONLY_READ)
-
-#define IS_OT_READ_LM(a) ((a)==Enums::OT_SHARED_READ)
-
-#define IS_OT_READ_RM(a) ((a)==Enums::OT_READONLY_READ)
-
-#define IS_OT_READ_PM(a) ((a)==Enums::OT_PRIVATE_READ)
-
-#define IS_OT_WRITE(a) \
- ((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SHARED_WRITE \
- ||(a)==Enums::OT_PRIVATE_WRITE||(a)==Enums::OT_SPILL_WRITE \
- ||(a)==Enums::OT_READONLY_WRITE||(a)==Enums::OT_FLAT_WRITE)
-
-#define IS_OT_WRITE_GM(a) \
- ((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SPILL_WRITE \
- ||(a)==Enums::OT_READONLY_WRITE)
-
-#define IS_OT_WRITE_LM(a) ((a)==Enums::OT_SHARED_WRITE)
-
-#define IS_OT_WRITE_PM(a) ((a)==Enums::OT_PRIVATE_WRITE)
-
-#define IS_OT_ATOMIC(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
- ||(a)==Enums::OT_SHARED_ATOMIC \
- ||(a)==Enums::OT_PRIVATE_ATOMIC \
- ||(a)==Enums::OT_SPILL_ATOMIC \
- ||(a)==Enums::OT_READONLY_ATOMIC \
- ||(a)==Enums::OT_BOTH_MEMFENCE \
- ||(a)==Enums::OT_FLAT_ATOMIC)
-
-#define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
- ||(a)==Enums::OT_SPILL_ATOMIC \
- ||(a)==Enums::OT_READONLY_ATOMIC \
- ||(a)==Enums::OT_GLOBAL_MEMFENCE \
- ||(a)==Enums::OT_BOTH_MEMFENCE)
-
-#define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \
- ||(a)==Enums::OT_SHARED_MEMFENCE)
-
-#define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC)
-
-#define IS_OT_HIST(a) ((a)==Enums::OT_GLOBAL_HIST \
- ||(a)==Enums::OT_SHARED_HIST \
- ||(a)==Enums::OT_PRIVATE_HIST \
- ||(a)==Enums::OT_SPILL_HIST \
- ||(a)==Enums::OT_READONLY_HIST \
- ||(a)==Enums::OT_FLAT_HIST)
-
-#define IS_OT_HIST_GM(a) ((a)==Enums::OT_GLOBAL_HIST \
- ||(a)==Enums::OT_SPILL_HIST \
- ||(a)==Enums::OT_READONLY_HIST)
-
-#define IS_OT_HIST_LM(a) ((a)==Enums::OT_SHARED_HIST)
-
-#define IS_OT_HIST_PM(a) ((a)==Enums::OT_PRIVATE_HIST)
-
-#endif // __CODE_ENUMS_HH__
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 97e018713..abf8ff2c5 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p),
req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
_masterId(p->system->getMasterId(name() + ".ComputeUnit")),
- lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize)
+ lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize),
+ kernelLaunchInst(new KernelLaunchStaticInst())
{
/**
* This check is necessary because std::bitset only provides conversion
@@ -316,13 +317,11 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
// Send L1 cache acquire
// isKernel + isAcquire = Kernel Begin
if (shader->impl_kern_boundary_sync) {
- GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(this,
- nullptr,
- nullptr, 0);
+ GPUDynInstPtr gpuDynInst =
+ std::make_shared<GPUDynInst>(this, nullptr, kernelLaunchInst,
+ getAndIncSeqNum());
gpuDynInst->useContinuation = false;
- gpuDynInst->memoryOrder = Enums::MEMORY_ORDER_SC_ACQUIRE;
- gpuDynInst->scope = Enums::MEMORY_SCOPE_SYSTEM;
injectGlobalMemFence(gpuDynInst, true);
}
@@ -647,7 +646,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
gpuDynInst->wfSlotId, w->barrierCnt);
if (gpuDynInst->useContinuation) {
- assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+ assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
@@ -658,7 +657,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
return true;
} else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
if (gpuDynInst->useContinuation) {
- assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+ assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
@@ -942,6 +941,8 @@ void
ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
Request* req)
{
+ assert(gpuDynInst->isGlobalSeg());
+
if (!req) {
req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
}
@@ -950,8 +951,6 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
req->setFlags(Request::KERNEL);
}
- gpuDynInst->s_type = SEG_GLOBAL;
-
// for non-kernel MemFence operations, memorder flags are set depending
// on which type of request is currently being sent, so this
// should be set by the caller (e.g. if an inst has acq-rel
@@ -1033,8 +1032,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
gpuDynInst->statusVector.clear();
- if (gpuDynInst->m_op == Enums::MO_LD || MO_A(gpuDynInst->m_op)
- || MO_ANR(gpuDynInst->m_op)) {
+ if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
assert(compute_unit->globalMemoryPipe.isGMLdRespFIFOWrRdy());
compute_unit->globalMemoryPipe.getGMLdRespFIFO()
@@ -1055,7 +1053,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
// the continuation may generate more work for
// this memory request
if (gpuDynInst->useContinuation) {
- assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+ assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
@@ -1065,7 +1063,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
gpuDynInst->statusBitVector = VectorMask(0);
if (gpuDynInst->useContinuation) {
- assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+ assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh
index a3547402a..938658fd1 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -744,6 +744,7 @@ class ComputeUnit : public MemObject
private:
uint64_t globalSeqNum;
int wavefrontSize;
+ GPUStaticInst *kernelLaunchInst;
};
#endif // __COMPUTE_UNIT_HH__
diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc
index 102905ec8..ab3e8c47e 100644
--- a/src/gpu-compute/global_memory_pipeline.cc
+++ b/src/gpu-compute/global_memory_pipeline.cc
@@ -67,7 +67,7 @@ GlobalMemPipeline::exec()
bool accessVrf = true;
// check the VRF to see if the operands of a load (or load component
// of an atomic) are accessible
- if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
+ if ((m) && (m->isLoad() || m->isAtomicRet())) {
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
accessVrf =
@@ -127,10 +127,7 @@ GlobalMemPipeline::exec()
// memory packets to DTLB
if (!gmIssuedRequests.empty()) {
GPUDynInstPtr mp = gmIssuedRequests.front();
- if (mp->m_op == Enums::MO_LD ||
- (mp->m_op >= Enums::MO_AAND && mp->m_op <= Enums::MO_AMIN) ||
- (mp->m_op >= Enums::MO_ANRAND && mp->m_op <= Enums::MO_ANRMIN)) {
-
+ if (mp->isLoad() || mp->isAtomic()) {
if (inflightLoads >= gmQueueSize) {
return;
} else {
@@ -139,7 +136,7 @@ GlobalMemPipeline::exec()
} else {
if (inflightStores >= gmQueueSize) {
return;
- } else if (mp->m_op == Enums::MO_ST) {
+ } else if (mp->isStore()) {
++inflightStores;
}
}
@@ -147,9 +144,8 @@ GlobalMemPipeline::exec()
mp->initiateAcc(mp);
gmIssuedRequests.pop();
- DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = %s\n",
- computeUnit->cu_id, mp->simdId, mp->wfSlotId,
- Enums::MemOpTypeStrings[mp->m_op]);
+ DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
+ computeUnit->cu_id, mp->simdId, mp->wfSlotId);
}
}
@@ -160,12 +156,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
// Return data to registers
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+ if (m->isLoad() || m->isAtomic()) {
gmReturnedLoads.pop();
assert(inflightLoads > 0);
--inflightLoads;
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
+ if (m->isLoad() || m->isAtomicRet()) {
std::vector<uint32_t> regVec;
// iterate over number of destination register operands since
// this is a load or atomic operation
@@ -214,13 +210,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
// Decrement outstanding register count
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
- if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) ||
- MO_H(m->m_op)) {
+ if (m->isStore() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time,
-1);
}
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+ if (m->isLoad() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time,
-1);
}
diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc
index 1806e79e4..ec6340360 100644
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -41,11 +41,10 @@
#include "gpu-compute/wavefront.hh"
GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
- GPUStaticInst *_staticInst, uint64_t instSeqNum)
+ GPUStaticInst *static_inst, uint64_t instSeqNum)
: GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
- m_op(Enums::MO_UNDEF),
- memoryOrder(Enums::MEMORY_ORDER_NONE), n_reg(0), useContinuation(false),
- statusBitVector(0), staticInst(_staticInst), _seqNum(instSeqNum)
+ n_reg(0), useContinuation(false),
+ statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
{
tlbHitLevel.assign(computeUnit()->wfSize(), -1);
d_data = new uint8_t[computeUnit()->wfSize() * 16];
@@ -68,77 +67,69 @@ GPUDynInst::~GPUDynInst()
}
void
-GPUDynInst::execute()
+GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
{
- GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(cu, wf, staticInst,
- _seqNum);
- staticInst->execute(gpuDynInst);
+ _staticInst->execute(gpuDynInst);
}
int
GPUDynInst::numSrcRegOperands()
{
- return staticInst->numSrcRegOperands();
+ return _staticInst->numSrcRegOperands();
}
int
GPUDynInst::numDstRegOperands()
{
- return staticInst->numDstRegOperands();
+ return _staticInst->numDstRegOperands();
}
int
GPUDynInst::getNumOperands()
{
- return staticInst->getNumOperands();
+ return _staticInst->getNumOperands();
}
bool
GPUDynInst::isVectorRegister(int operandIdx)
{
- return staticInst->isVectorRegister(operandIdx);
+ return _staticInst->isVectorRegister(operandIdx);
}
bool
GPUDynInst::isScalarRegister(int operandIdx)
{
- return staticInst->isScalarRegister(operandIdx);
+ return _staticInst->isScalarRegister(operandIdx);
}
int
GPUDynInst::getRegisterIndex(int operandIdx)
{
- return staticInst->getRegisterIndex(operandIdx);
+ return _staticInst->getRegisterIndex(operandIdx);
}
int
GPUDynInst::getOperandSize(int operandIdx)
{
- return staticInst->getOperandSize(operandIdx);
+ return _staticInst->getOperandSize(operandIdx);
}
bool
GPUDynInst::isDstOperand(int operandIdx)
{
- return staticInst->isDstOperand(operandIdx);
+ return _staticInst->isDstOperand(operandIdx);
}
bool
GPUDynInst::isSrcOperand(int operandIdx)
{
- return staticInst->isSrcOperand(operandIdx);
-}
-
-bool
-GPUDynInst::isArgLoad()
-{
- return staticInst->isArgLoad();
+ return _staticInst->isSrcOperand(operandIdx);
}
const std::string&
GPUDynInst::disassemble() const
{
- return staticInst->disassemble();
+ return _staticInst->disassemble();
}
uint64_t
@@ -147,16 +138,10 @@ GPUDynInst::seqNum() const
return _seqNum;
}
-Enums::OpType
-GPUDynInst::opType()
-{
- return staticInst->o_type;
-}
-
Enums::StorageClassType
GPUDynInst::executedAs()
{
- return staticInst->executed_as;
+ return _staticInst->executed_as;
}
// Process a memory instruction and (if necessary) submit timing request
@@ -166,20 +151,347 @@ GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
cu->cu_id, simdId, wfSlotId, exec_mask);
- staticInst->initiateAcc(gpuDynInst);
+ _staticInst->initiateAcc(gpuDynInst);
time = 0;
}
+/**
+ * accessor methods for the attributes of
+ * the underlying GPU static instruction
+ */
+bool
+GPUDynInst::isALU() const
+{
+ return _staticInst->isALU();
+}
+
+bool
+GPUDynInst::isBranch() const
+{
+ return _staticInst->isBranch();
+}
+
+bool
+GPUDynInst::isNop() const
+{
+ return _staticInst->isNop();
+}
+
+bool
+GPUDynInst::isReturn() const
+{
+ return _staticInst->isReturn();
+}
+
+bool
+GPUDynInst::isUnconditionalJump() const
+{
+ return _staticInst->isUnconditionalJump();
+}
+
+bool
+GPUDynInst::isSpecialOp() const
+{
+ return _staticInst->isSpecialOp();
+}
+
+bool
+GPUDynInst::isWaitcnt() const
+{
+ return _staticInst->isWaitcnt();
+}
+
+bool
+GPUDynInst::isBarrier() const
+{
+ return _staticInst->isBarrier();
+}
+
+bool
+GPUDynInst::isMemFence() const
+{
+ return _staticInst->isMemFence();
+}
+
+bool
+GPUDynInst::isMemRef() const
+{
+ return _staticInst->isMemRef();
+}
+
+bool
+GPUDynInst::isFlat() const
+{
+ return _staticInst->isFlat();
+}
+
+bool
+GPUDynInst::isLoad() const
+{
+ return _staticInst->isLoad();
+}
+
+bool
+GPUDynInst::isStore() const
+{
+ return _staticInst->isStore();
+}
+
+bool
+GPUDynInst::isAtomic() const
+{
+ return _staticInst->isAtomic();
+}
+
+bool
+GPUDynInst::isAtomicNoRet() const
+{
+ return _staticInst->isAtomicNoRet();
+}
+
+bool
+GPUDynInst::isAtomicRet() const
+{
+ return _staticInst->isAtomicRet();
+}
+
+bool
+GPUDynInst::isScalar() const
+{
+ return _staticInst->isScalar();
+}
+
+bool
+GPUDynInst::readsSCC() const
+{
+ return _staticInst->readsSCC();
+}
+
+bool
+GPUDynInst::writesSCC() const
+{
+ return _staticInst->writesSCC();
+}
+
+bool
+GPUDynInst::readsVCC() const
+{
+ return _staticInst->readsVCC();
+}
+
+bool
+GPUDynInst::writesVCC() const
+{
+ return _staticInst->writesVCC();
+}
+
+bool
+GPUDynInst::isAtomicAnd() const
+{
+ return _staticInst->isAtomicAnd();
+}
+
+bool
+GPUDynInst::isAtomicOr() const
+{
+ return _staticInst->isAtomicOr();
+}
+
+bool
+GPUDynInst::isAtomicXor() const
+{
+ return _staticInst->isAtomicXor();
+}
+
+bool
+GPUDynInst::isAtomicCAS() const
+{
+ return _staticInst->isAtomicCAS();
+}
+
+bool GPUDynInst::isAtomicExch() const
+{
+ return _staticInst->isAtomicExch();
+}
+
+bool
+GPUDynInst::isAtomicAdd() const
+{
+ return _staticInst->isAtomicAdd();
+}
+
+bool
+GPUDynInst::isAtomicSub() const
+{
+ return _staticInst->isAtomicSub();
+}
+
+bool
+GPUDynInst::isAtomicInc() const
+{
+ return _staticInst->isAtomicInc();
+}
+
+bool
+GPUDynInst::isAtomicDec() const
+{
+ return _staticInst->isAtomicDec();
+}
+
+bool
+GPUDynInst::isAtomicMax() const
+{
+ return _staticInst->isAtomicMax();
+}
+
+bool
+GPUDynInst::isAtomicMin() const
+{
+ return _staticInst->isAtomicMin();
+}
+
+bool
+GPUDynInst::isArgLoad() const
+{
+ return _staticInst->isArgLoad();
+}
+
+bool
+GPUDynInst::isGlobalMem() const
+{
+ return _staticInst->isGlobalMem();
+}
+
+bool
+GPUDynInst::isLocalMem() const
+{
+ return _staticInst->isLocalMem();
+}
+
+bool
+GPUDynInst::isArgSeg() const
+{
+ return _staticInst->isArgSeg();
+}
+
+bool
+GPUDynInst::isGlobalSeg() const
+{
+ return _staticInst->isGlobalSeg();
+}
+
+bool
+GPUDynInst::isGroupSeg() const
+{
+ return _staticInst->isGroupSeg();
+}
+
+bool
+GPUDynInst::isKernArgSeg() const
+{
+ return _staticInst->isKernArgSeg();
+}
+
+bool
+GPUDynInst::isPrivateSeg() const
+{
+ return _staticInst->isPrivateSeg();
+}
+
+bool
+GPUDynInst::isReadOnlySeg() const
+{
+ return _staticInst->isReadOnlySeg();
+}
+
+bool
+GPUDynInst::isSpillSeg() const
+{
+ return _staticInst->isSpillSeg();
+}
+
+bool
+GPUDynInst::isWorkitemScope() const
+{
+ return _staticInst->isWorkitemScope();
+}
+
+bool
+GPUDynInst::isWavefrontScope() const
+{
+ return _staticInst->isWavefrontScope();
+}
+
+bool
+GPUDynInst::isWorkgroupScope() const
+{
+ return _staticInst->isWorkgroupScope();
+}
+
+bool
+GPUDynInst::isDeviceScope() const
+{
+ return _staticInst->isDeviceScope();
+}
+
+bool
+GPUDynInst::isSystemScope() const
+{
+ return _staticInst->isSystemScope();
+}
+
+bool
+GPUDynInst::isNoScope() const
+{
+ return _staticInst->isNoScope();
+}
+
+bool
+GPUDynInst::isRelaxedOrder() const
+{
+ return _staticInst->isRelaxedOrder();
+}
+
+bool
+GPUDynInst::isAcquire() const
+{
+ return _staticInst->isAcquire();
+}
+
+bool
+GPUDynInst::isRelease() const
+{
+ return _staticInst->isRelease();
+}
+
+bool
+GPUDynInst::isAcquireRelease() const
+{
+ return _staticInst->isAcquireRelease();
+}
+
+bool
+GPUDynInst::isNoOrder() const
+{
+ return _staticInst->isNoOrder();
+}
+
+bool
+GPUDynInst::isGloballyCoherent() const
+{
+ return _staticInst->isGloballyCoherent();
+}
+
bool
-GPUDynInst::scalarOp() const
+GPUDynInst::isSystemCoherent() const
{
- return staticInst->scalarOp();
+ return _staticInst->isSystemCoherent();
}
void
GPUDynInst::updateStats()
{
- if (staticInst->isLocalMem()) {
+ if (_staticInst->isLocalMem()) {
// access to LDS (shared) memory
cu->dynamicLMemInstrCnt++;
} else {
diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh
index 46774d867..c07d85d78 100644
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -39,11 +39,7 @@
#include <cstdint>
#include <string>
-#include "enums/GenericMemoryOrder.hh"
-#include "enums/GenericMemoryScope.hh"
-#include "enums/MemOpType.hh"
#include "enums/MemType.hh"
-#include "enums/OpType.hh"
#include "enums/StorageClassType.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_exec_context.hh"
@@ -180,33 +176,19 @@ class AtomicOpMin : public TypedAtomicOpFunctor<T>
}
};
-#define MO_A(a) ((a)>=Enums::MO_AAND && (a)<=Enums::MO_AMIN)
-#define MO_ANR(a) ((a)>=Enums::MO_ANRAND && (a)<=Enums::MO_ANRMIN)
-#define MO_H(a) ((a)>=Enums::MO_HAND && (a)<=Enums::MO_HMIN)
-
typedef enum
{
VT_32,
VT_64,
} vgpr_type;
-typedef enum
-{
- SEG_PRIVATE,
- SEG_SPILL,
- SEG_GLOBAL,
- SEG_SHARED,
- SEG_READONLY,
- SEG_FLAT
-} seg_type;
-
class GPUDynInst : public GPUExecContext
{
public:
- GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *_staticInst,
+ GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
uint64_t instSeqNum);
~GPUDynInst();
- void execute();
+ void execute(GPUDynInstPtr gpuDynInst);
int numSrcRegOperands();
int numDstRegOperands();
int getNumOperands();
@@ -216,13 +198,11 @@ class GPUDynInst : public GPUExecContext
int getOperandSize(int operandIdx);
bool isDstOperand(int operandIdx);
bool isSrcOperand(int operandIdx);
- bool isArgLoad();
const std::string &disassemble() const;
uint64_t seqNum() const;
- Enums::OpType opType();
Enums::StorageClassType executedAs();
// The address of the memory operation
@@ -240,14 +220,7 @@ class GPUDynInst : public GPUExecContext
// The memory type (M_U32, M_S32, ...)
Enums::MemType m_type;
- // The memory operation (MO_LD, MO_ST, ...)
- Enums::MemOpType m_op;
- Enums::GenericMemoryOrder memoryOrder;
-
- // Scope of the request
- Enums::GenericMemoryScope scope;
- // The memory segment (SEG_SHARED, SEG_GLOBAL, ...)
- seg_type s_type;
+
// The equivalency class
int equiv;
// The return VGPR type (VT_32 or VT_64)
@@ -288,10 +261,72 @@ class GPUDynInst : public GPUExecContext
void updateStats();
- GPUStaticInst* staticInstruction() { return staticInst; }
-
- // Is the instruction a scalar or vector op?
- bool scalarOp() const;
+ GPUStaticInst* staticInstruction() { return _staticInst; }
+
+ bool isALU() const;
+ bool isBranch() const;
+ bool isNop() const;
+ bool isReturn() const;
+ bool isUnconditionalJump() const;
+ bool isSpecialOp() const;
+ bool isWaitcnt() const;
+
+ bool isBarrier() const;
+ bool isMemFence() const;
+ bool isMemRef() const;
+ bool isFlat() const;
+ bool isLoad() const;
+ bool isStore() const;
+
+ bool isAtomic() const;
+ bool isAtomicNoRet() const;
+ bool isAtomicRet() const;
+
+ bool isScalar() const;
+ bool readsSCC() const;
+ bool writesSCC() const;
+ bool readsVCC() const;
+ bool writesVCC() const;
+
+ bool isAtomicAnd() const;
+ bool isAtomicOr() const;
+ bool isAtomicXor() const;
+ bool isAtomicCAS() const;
+ bool isAtomicExch() const;
+ bool isAtomicAdd() const;
+ bool isAtomicSub() const;
+ bool isAtomicInc() const;
+ bool isAtomicDec() const;
+ bool isAtomicMax() const;
+ bool isAtomicMin() const;
+
+ bool isArgLoad() const;
+ bool isGlobalMem() const;
+ bool isLocalMem() const;
+
+ bool isArgSeg() const;
+ bool isGlobalSeg() const;
+ bool isGroupSeg() const;
+ bool isKernArgSeg() const;
+ bool isPrivateSeg() const;
+ bool isReadOnlySeg() const;
+ bool isSpillSeg() const;
+
+ bool isWorkitemScope() const;
+ bool isWavefrontScope() const;
+ bool isWorkgroupScope() const;
+ bool isDeviceScope() const;
+ bool isSystemScope() const;
+ bool isNoScope() const;
+
+ bool isRelaxedOrder() const;
+ bool isAcquire() const;
+ bool isRelease() const;
+ bool isAcquireRelease() const;
+ bool isNoOrder() const;
+
+ bool isGloballyCoherent() const;
+ bool isSystemCoherent() const;
/*
* Loads/stores/atomics may have acquire/release semantics associated
@@ -312,46 +347,32 @@ class GPUDynInst : public GPUExecContext
bool useContinuation;
template<typename c0> AtomicOpFunctor*
- makeAtomicOpFunctor(c0 *reg0, c0 *reg1, Enums::MemOpType op)
+ makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
{
- using namespace Enums;
-
- switch(op) {
- case MO_AAND:
- case MO_ANRAND:
+ if (isAtomicAnd()) {
return new AtomicOpAnd<c0>(*reg0);
- case MO_AOR:
- case MO_ANROR:
+ } else if (isAtomicOr()) {
return new AtomicOpOr<c0>(*reg0);
- case MO_AXOR:
- case MO_ANRXOR:
+ } else if (isAtomicXor()) {
return new AtomicOpXor<c0>(*reg0);
- case MO_ACAS:
- case MO_ANRCAS:
+ } else if (isAtomicCAS()) {
return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
- case MO_AEXCH:
- case MO_ANREXCH:
+ } else if (isAtomicExch()) {
return new AtomicOpExch<c0>(*reg0);
- case MO_AADD:
- case MO_ANRADD:
+ } else if (isAtomicAdd()) {
return new AtomicOpAdd<c0>(*reg0);
- case MO_ASUB:
- case MO_ANRSUB:
+ } else if (isAtomicSub()) {
return new AtomicOpSub<c0>(*reg0);
- case MO_AINC:
- case MO_ANRINC:
+ } else if (isAtomicInc()) {
return new AtomicOpInc<c0>();
- case MO_ADEC:
- case MO_ANRDEC:
+ } else if (isAtomicDec()) {
return new AtomicOpDec<c0>();
- case MO_AMAX:
- case MO_ANRMAX:
+ } else if (isAtomicMax()) {
return new AtomicOpMax<c0>(*reg0);
- case MO_AMIN:
- case MO_ANRMIN:
+ } else if (isAtomicMin()) {
return new AtomicOpMin<c0>(*reg0);
- default:
- panic("Unrecognized atomic operation");
+ } else {
+ fatal("Unrecognized atomic operation");
}
}
@@ -359,88 +380,58 @@ class GPUDynInst : public GPUExecContext
setRequestFlags(Request *req, bool setMemOrder=true)
{
// currently these are the easy scopes to deduce
- switch (s_type) {
- case SEG_PRIVATE:
+ if (isPrivateSeg()) {
req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
- break;
- case SEG_SPILL:
+ } else if (isSpillSeg()) {
req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
- break;
- case SEG_GLOBAL:
+ } else if (isGlobalSeg()) {
req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
- break;
- case SEG_READONLY:
+ } else if (isReadOnlySeg()) {
req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
- break;
- case SEG_SHARED:
+ } else if (isGroupSeg()) {
req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
- break;
- case SEG_FLAT:
+ } else if (isFlat()) {
// TODO: translate to correct scope
assert(false);
- default:
- panic("Bad segment type");
- break;
+ } else {
+ fatal("%s has bad segment type\n", disassemble());
}
- switch (scope) {
- case Enums::MEMORY_SCOPE_NONE:
- case Enums::MEMORY_SCOPE_WORKITEM:
- break;
- case Enums::MEMORY_SCOPE_WAVEFRONT:
+ if (isWavefrontScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::WAVEFRONT_SCOPE);
- break;
- case Enums::MEMORY_SCOPE_WORKGROUP:
+ } else if (isWorkgroupScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::WORKGROUP_SCOPE);
- break;
- case Enums::MEMORY_SCOPE_DEVICE:
+ } else if (isDeviceScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::DEVICE_SCOPE);
- break;
- case Enums::MEMORY_SCOPE_SYSTEM:
+ } else if (isSystemScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::SYSTEM_SCOPE);
- break;
- default:
- panic("Bad scope type");
- break;
+ } else if (!isNoScope() && !isWorkitemScope()) {
+ fatal("%s has bad scope type\n", disassemble());
}
if (setMemOrder) {
// set acquire and release flags
- switch (memoryOrder){
- case Enums::MEMORY_ORDER_SC_ACQUIRE:
+ if (isAcquire()) {
req->setFlags(Request::ACQUIRE);
- break;
- case Enums::MEMORY_ORDER_SC_RELEASE:
+ } else if (isRelease()) {
req->setFlags(Request::RELEASE);
- break;
- case Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE:
+ } else if (isAcquireRelease()) {
req->setFlags(Request::ACQUIRE | Request::RELEASE);
- break;
- default:
- break;
+ } else if (!isNoOrder()) {
+ fatal("%s has bad memory order\n", disassemble());
}
}
// set atomic type
// currently, the instruction genenerator only produces atomic return
// but a magic instruction can produce atomic no return
- if (m_op == Enums::MO_AADD || m_op == Enums::MO_ASUB ||
- m_op == Enums::MO_AAND || m_op == Enums::MO_AOR ||
- m_op == Enums::MO_AXOR || m_op == Enums::MO_AMAX ||
- m_op == Enums::MO_AMIN || m_op == Enums::MO_AINC ||
- m_op == Enums::MO_ADEC || m_op == Enums::MO_AEXCH ||
- m_op == Enums::MO_ACAS) {
+ if (isAtomicRet()) {
req->setFlags(Request::ATOMIC_RETURN_OP);
- } else if (m_op == Enums::MO_ANRADD || m_op == Enums::MO_ANRSUB ||
- m_op == Enums::MO_ANRAND || m_op == Enums::MO_ANROR ||
- m_op == Enums::MO_ANRXOR || m_op == Enums::MO_ANRMAX ||
- m_op == Enums::MO_ANRMIN || m_op == Enums::MO_ANRINC ||
- m_op == Enums::MO_ANRDEC || m_op == Enums::MO_ANREXCH ||
- m_op == Enums::MO_ANRCAS) {
+ } else if (isAtomicNoRet()) {
req->setFlags(Request::ATOMIC_NO_RETURN_OP);
}
}
@@ -457,7 +448,7 @@ class GPUDynInst : public GPUExecContext
std::vector<int> tlbHitLevel;
private:
- GPUStaticInst *staticInst;
+ GPUStaticInst *_staticInst;
uint64_t _seqNum;
};
diff --git a/src/gpu-compute/gpu_static_inst.cc b/src/gpu-compute/gpu_static_inst.cc
index 83b429e62..0f74bd532 100644
--- a/src/gpu-compute/gpu_static_inst.cc
+++ b/src/gpu-compute/gpu_static_inst.cc
@@ -36,10 +36,12 @@
#include "gpu-compute/gpu_static_inst.hh"
GPUStaticInst::GPUStaticInst(const std::string &opcode)
- : o_type(Enums::OT_ALU), executed_as(Enums::SC_NONE), opcode(opcode),
- _instNum(0), _scalarOp(false)
+ : executed_as(Enums::SC_NONE), opcode(opcode),
+ _instNum(0)
{
+ setFlag(NoOrder);
}
+
const std::string&
GPUStaticInst::disassemble()
{
diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh
index 911e4f308..a73ec12e3 100644
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -48,7 +48,7 @@
#include <cstdint>
#include <string>
-#include "enums/OpType.hh"
+#include "enums/GPUStaticInstFlags.hh"
#include "enums/StorageClassType.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/misc.hh"
@@ -57,7 +57,7 @@ class BaseOperand;
class BaseRegOperand;
class Wavefront;
-class GPUStaticInst
+class GPUStaticInst : public GPUStaticInstFlags
{
public:
GPUStaticInst(const std::string &opcode);
@@ -86,22 +86,110 @@ class GPUStaticInst
virtual bool isValid() const = 0;
- /*
- * Most instructions (including all HSAIL instructions)
- * are vector ops, so _scalarOp will be false by default.
- * Derived instruction objects that are scalar ops must
- * set _scalarOp to true in their constructors.
- */
- bool scalarOp() const { return _scalarOp; }
+ bool isALU() const { return _flags[ALU]; }
+ bool isBranch() const { return _flags[Branch]; }
+ bool isNop() const { return _flags[Nop]; }
+ bool isReturn() const { return _flags[Return]; }
+
+ bool
+ isUnconditionalJump() const
+ {
+ return _flags[UnconditionalJump];
+ }
+
+ bool isSpecialOp() const { return _flags[SpecialOp]; }
+ bool isWaitcnt() const { return _flags[Waitcnt]; }
+
+ bool isBarrier() const { return _flags[MemBarrier]; }
+ bool isMemFence() const { return _flags[MemFence]; }
+ bool isMemRef() const { return _flags[MemoryRef]; }
+ bool isFlat() const { return _flags[Flat]; }
+ bool isLoad() const { return _flags[Load]; }
+ bool isStore() const { return _flags[Store]; }
+
+ bool
+ isAtomic() const
+ {
+ return _flags[AtomicReturn] || _flags[AtomicNoReturn];
+ }
+
+ bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
+ bool isAtomicRet() const { return _flags[AtomicReturn]; }
+
+ bool isScalar() const { return _flags[Scalar]; }
+ bool readsSCC() const { return _flags[ReadsSCC]; }
+ bool writesSCC() const { return _flags[WritesSCC]; }
+ bool readsVCC() const { return _flags[ReadsVCC]; }
+ bool writesVCC() const { return _flags[WritesVCC]; }
- virtual bool isLocalMem() const
+ bool isAtomicAnd() const { return _flags[AtomicAnd]; }
+ bool isAtomicOr() const { return _flags[AtomicOr]; }
+ bool isAtomicXor() const { return _flags[AtomicXor]; }
+ bool isAtomicCAS() const { return _flags[AtomicCAS]; }
+ bool isAtomicExch() const { return _flags[AtomicExch]; }
+ bool isAtomicAdd() const { return _flags[AtomicAdd]; }
+ bool isAtomicSub() const { return _flags[AtomicSub]; }
+ bool isAtomicInc() const { return _flags[AtomicInc]; }
+ bool isAtomicDec() const { return _flags[AtomicDec]; }
+ bool isAtomicMax() const { return _flags[AtomicMax]; }
+ bool isAtomicMin() const { return _flags[AtomicMin]; }
+
+ bool
+ isArgLoad() const
+ {
+ return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
+ }
+
+ bool
+ isGlobalMem() const
{
- fatal("calling isLocalMem() on non-memory instruction.\n");
+ return _flags[MemoryRef] && (_flags[GlobalSegment] ||
+ _flags[PrivateSegment] || _flags[ReadOnlySegment] ||
+ _flags[SpillSegment]);
+ }
- return false;
+ bool
+ isLocalMem() const
+ {
+ return _flags[MemoryRef] && _flags[GroupSegment];
}
- bool isArgLoad() { return false; }
+ bool isArgSeg() const { return _flags[ArgSegment]; }
+ bool isGlobalSeg() const { return _flags[GlobalSegment]; }
+ bool isGroupSeg() const { return _flags[GroupSegment]; }
+ bool isKernArgSeg() const { return _flags[KernArgSegment]; }
+ bool isPrivateSeg() const { return _flags[PrivateSegment]; }
+ bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
+ bool isSpillSeg() const { return _flags[SpillSegment]; }
+
+ bool isWorkitemScope() const { return _flags[WorkitemScope]; }
+ bool isWavefrontScope() const { return _flags[WavefrontScope]; }
+ bool isWorkgroupScope() const { return _flags[WorkgroupScope]; }
+ bool isDeviceScope() const { return _flags[DeviceScope]; }
+ bool isSystemScope() const { return _flags[SystemScope]; }
+ bool isNoScope() const { return _flags[NoScope]; }
+
+ bool isRelaxedOrder() const { return _flags[RelaxedOrder]; }
+ bool isAcquire() const { return _flags[Acquire]; }
+ bool isRelease() const { return _flags[Release]; }
+ bool isAcquireRelease() const { return _flags[AcquireRelease]; }
+ bool isNoOrder() const { return _flags[NoOrder]; }
+
+ /**
+ * Coherence domain of a memory instruction. Only valid for
+ * machine ISA. The coherence domain specifies where it is
+ * possible to perform memory synchronization, e.g., acquire
+ * or release, from the shader kernel.
+ *
+ * isGloballyCoherent(): returns true if kernel is sharing memory
+ * with other work-items on the same device (GPU)
+ *
+ * isSystemCoherent(): returns true if kernel is sharing memory
+ * with other work-items on a different device (GPU) or the host (CPU)
+ */
+ bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
+ bool isSystemCoherent() const { return _flags[SystemCoherent]; }
+
virtual uint32_t instSize() = 0;
// only used for memory instructions
@@ -120,22 +208,13 @@ class GPUStaticInst
virtual uint32_t getTargetPc() { return 0; }
- /**
- * Query whether the instruction is an unconditional jump i.e., the jump
- * is always executed because there is no condition to be evaluated.
- *
- * If the instruction is not of branch type, the result is always false.
- *
- * @return True if the instruction is an unconditional jump.
- */
- virtual bool unconditionalJumpInstruction() { return false; }
-
static uint64_t dynamic_id_count;
- Enums::OpType o_type;
// For flat memory accesses
Enums::StorageClassType executed_as;
+ void setFlag(Flags flag) { _flags[flag] = true; }
+
protected:
virtual void
execLdAcq(GPUDynInstPtr gpuDynInst)
@@ -169,7 +248,45 @@ class GPUStaticInst
*/
int _ipdInstNum;
- bool _scalarOp;
+ std::bitset<Num_Flags> _flags;
+};
+
+class KernelLaunchStaticInst : public GPUStaticInst
+{
+ public:
+ KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
+ {
+ setFlag(Nop);
+ setFlag(Scalar);
+ setFlag(Acquire);
+ setFlag(SystemScope);
+ setFlag(GlobalSegment);
+ }
+
+ void
+ execute(GPUDynInstPtr gpuDynInst)
+ {
+ fatal("kernel launch instruction should not be executed\n");
+ }
+
+ void
+ generateDisassembly()
+ {
+ disassembly = opcode;
+ }
+
+ int getNumOperands() { return 0; }
+ bool isCondRegister(int operandIndex) { return false; }
+ bool isScalarRegister(int operandIndex) { return false; }
+ bool isVectorRegister(int operandIndex) { return false; }
+ bool isSrcOperand(int operandIndex) { return false; }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex) { return 0; }
+ int getRegisterIndex(int operandIndex) { return 0; }
+ int numDstRegOperands() { return 0; }
+ int numSrcRegOperands() { return 0; }
+ bool isValid() const { return true; }
+ uint32_t instSize() { return 0; }
};
#endif // __GPU_STATIC_INST_HH__
diff --git a/src/gpu-compute/kernel_cfg.cc b/src/gpu-compute/kernel_cfg.cc
index 10ded11b7..ac6a81b16 100644
--- a/src/gpu-compute/kernel_cfg.cc
+++ b/src/gpu-compute/kernel_cfg.cc
@@ -104,7 +104,7 @@ ControlFlowInfo::createBasicBlocks()
leaders.insert(0);
for (int i = 1; i < instructions.size(); i++) {
GPUStaticInst* instruction = instructions[i];
- if (instruction->o_type == Enums::OT_BRANCH) {
+ if (instruction->isBranch()) {
const int target_pc = instruction->getTargetPc();
leaders.insert(target_pc);
leaders.insert(i + 1);
@@ -137,18 +137,18 @@ ControlFlowInfo::connectBasicBlocks()
break;
}
GPUStaticInst* last = lastInstruction(bb.get());
- if (last->o_type == Enums::OT_RET) {
+ if (last->isReturn()) {
bb->successorIds.insert(exit_bb->id);
continue;
}
- if (last->o_type == Enums::OT_BRANCH) {
+ if (last->isBranch()) {
const uint32_t target_pc = last->getTargetPc();
BasicBlock* target_bb = basicBlock(target_pc);
bb->successorIds.insert(target_bb->id);
}
// Unconditional jump instructions have a unique successor
- if (!last->unconditionalJumpInstruction()) {
+ if (!last->isUnconditionalJump()) {
BasicBlock* next_bb = basicBlock(last->instNum() + 1);
bb->successorIds.insert(next_bb->id);
}
@@ -274,7 +274,7 @@ ControlFlowInfo::printBasicBlocks() const
int inst_num = inst->instNum();
std::cout << inst_num << " [" << basicBlock(inst_num)->id
<< "]: " << inst->disassemble();
- if (inst->o_type == Enums::OT_BRANCH) {
+ if (inst->isBranch()) {
std::cout << ", PC = " << inst->getTargetPc();
}
std::cout << std::endl;
diff --git a/src/gpu-compute/lds_state.cc b/src/gpu-compute/lds_state.cc
index d4a27318a..fad98c886 100644
--- a/src/gpu-compute/lds_state.cc
+++ b/src/gpu-compute/lds_state.cc
@@ -141,8 +141,7 @@ LdsState::countBankConflicts(GPUDynInstPtr gpuDynInst,
}
}
- if (gpuDynInst->m_op == Enums::MO_LD ||
- gpuDynInst->m_op == Enums::MO_ST) {
+ if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
// mask identical addresses
for (int j = 0; j < numBanks; ++j) {
for (int j0 = 0; j0 < j; j0++) {
@@ -208,8 +207,8 @@ LdsState::processPacket(PacketPtr packet)
GPUDynInstPtr dynInst = getDynInstr(packet);
// account for the LDS bank conflict overhead
- int busLength = (dynInst->m_op == Enums::MO_LD) ? parent->loadBusLength() :
- (dynInst->m_op == Enums::MO_ST) ? parent->storeBusLength() :
+ int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
+ (dynInst->isStore()) ? parent->storeBusLength() :
parent->loadBusLength();
// delay for accessing the LDS
Tick processingTime =
diff --git a/src/gpu-compute/lds_state.hh b/src/gpu-compute/lds_state.hh
index 58d109493..5fcbe82c0 100644
--- a/src/gpu-compute/lds_state.hh
+++ b/src/gpu-compute/lds_state.hh
@@ -43,7 +43,6 @@
#include <utility>
#include <vector>
-#include "enums/MemOpType.hh"
#include "enums/MemType.hh"
#include "gpu-compute/misc.hh"
#include "mem/mem_object.hh"
diff --git a/src/gpu-compute/local_memory_pipeline.cc b/src/gpu-compute/local_memory_pipeline.cc
index e2238bf45..80dad6fcd 100644
--- a/src/gpu-compute/local_memory_pipeline.cc
+++ b/src/gpu-compute/local_memory_pipeline.cc
@@ -62,7 +62,7 @@ LocalMemPipeline::exec()
lmReturnedRequests.front() : nullptr;
bool accessVrf = true;
- if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
+ if ((m) && (m->isLoad() || m->isAtomicRet())) {
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
accessVrf =
@@ -137,7 +137,7 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
// Return data to registers
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
+ if (m->isLoad() || m->isAtomicRet()) {
std::vector<uint32_t> regVec;
for (int k = 0; k < m->n_reg; ++k) {
int dst = m->dst_reg+k;
@@ -172,13 +172,12 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
// Decrement outstanding request count
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
- if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op)
- || MO_H(m->m_op)) {
+ if (m->isStore() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
m->time, -1);
}
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+ if (m->isLoad() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
m->time, -1);
}
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
index c1f741d6a..13afab977 100644
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -47,7 +47,6 @@
#include "cpu/simple_thread.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
-#include "enums/MemOpType.hh"
#include "enums/MemType.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_tlb.hh"
diff --git a/src/gpu-compute/vector_register_file.cc b/src/gpu-compute/vector_register_file.cc
index c43d765af..c50c06cc6 100644
--- a/src/gpu-compute/vector_register_file.cc
+++ b/src/gpu-compute/vector_register_file.cc
@@ -38,7 +38,6 @@
#include <string>
#include "base/misc.hh"
-#include "gpu-compute/code_enums.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh"
@@ -153,8 +152,8 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
void
VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w)
{
- bool loadInstr = IS_OT_READ(ii->opType());
- bool atomicInstr = IS_OT_ATOMIC(ii->opType());
+ bool loadInstr = ii->isLoad();
+ bool atomicInstr = ii->isAtomic() || ii->isMemFence();
bool loadNoArgInstr = loadInstr && !ii->isArgLoad();
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index c677cbe41..caeed85a7 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -37,7 +37,6 @@
#include "debug/GPUExec.hh"
#include "debug/WavefrontStack.hh"
-#include "gpu-compute/code_enums.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh"
@@ -165,19 +164,8 @@ Wavefront::start(uint64_t _wf_dyn_id,uint64_t _base_ptr)
bool
Wavefront::isGmInstruction(GPUDynInstPtr ii)
{
- if (IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
- IS_OT_ATOMIC_PM(ii->opType())) {
+ if (ii->isGlobalMem() || ii->isFlat())
return true;
- }
-
- if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
- IS_OT_ATOMIC_GM(ii->opType())) {
- return true;
- }
-
- if (IS_OT_FLAT(ii->opType())) {
- return true;
- }
return false;
}
@@ -185,8 +173,7 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii)
bool
Wavefront::isLmInstruction(GPUDynInstPtr ii)
{
- if (IS_OT_READ_LM(ii->opType()) || IS_OT_WRITE_LM(ii->opType()) ||
- IS_OT_ATOMIC_LM(ii->opType())) {
+ if (ii->isLocalMem()) {
return true;
}
@@ -199,10 +186,9 @@ Wavefront::isOldestInstALU()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && (ii->opType() == Enums::OT_NOP ||
- ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
- ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
- ii->opType() == Enums::OT_KERN_READ)) {
+ if (status != S_STOPPED && (ii->isNop() ||
+ ii->isReturn() || ii->isBranch() ||
+ ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) {
return true;
}
@@ -215,7 +201,7 @@ Wavefront::isOldestInstBarrier()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && ii->opType() == Enums::OT_BARRIER) {
+ if (status != S_STOPPED && ii->isBarrier()) {
return true;
}
@@ -228,9 +214,7 @@ Wavefront::isOldestInstGMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && (IS_OT_READ_GM(ii->opType()) ||
- IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
-
+ if (status != S_STOPPED && ii->isGlobalMem()) {
return true;
}
@@ -243,9 +227,7 @@ Wavefront::isOldestInstLMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && (IS_OT_READ_LM(ii->opType()) ||
- IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
-
+ if (status != S_STOPPED && ii->isLocalMem()) {
return true;
}
@@ -258,9 +240,7 @@ Wavefront::isOldestInstPrivMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && (IS_OT_READ_PM(ii->opType()) ||
- IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
-
+ if (status != S_STOPPED && ii->isPrivateSeg()) {
return true;
}
@@ -273,8 +253,7 @@ Wavefront::isOldestInstFlatMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && IS_OT_FLAT(ii->opType())) {
-
+ if (status != S_STOPPED && ii->isFlat()) {
return true;
}
@@ -289,7 +268,7 @@ Wavefront::instructionBufferHasBranch()
for (auto it : instructionBuffer) {
GPUDynInstPtr ii = it;
- if (ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH) {
+ if (ii->isReturn() || ii->isBranch()) {
return true;
}
}
@@ -371,23 +350,16 @@ Wavefront::ready(itype_e type)
// checking readiness will be fixed eventually. In the meantime, let's
// make sure that we do not silently let an instruction type slip
// through this logic and always return not ready.
- if (!(ii->opType() == Enums::OT_BARRIER || ii->opType() == Enums::OT_NOP ||
- ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
- ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
- ii->opType() == Enums::OT_KERN_READ ||
- ii->opType() == Enums::OT_ARG ||
- IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
- IS_OT_ATOMIC_GM(ii->opType()) || IS_OT_READ_LM(ii->opType()) ||
- IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
- IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
- IS_OT_ATOMIC_PM(ii->opType()) || IS_OT_FLAT(ii->opType()))) {
+ if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
+ ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
+ ii->isMemFence() || ii->isFlat())) {
panic("next instruction: %s is of unknown type\n", ii->disassemble());
}
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n",
computeUnit->cu_id, simdId, wfSlotId, ii->disassemble());
- if (type == I_ALU && ii->opType() == Enums::OT_BARRIER) {
+ if (type == I_ALU && ii->isBarrier()) {
// Here for ALU instruction (barrier)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free?
@@ -400,7 +372,7 @@ Wavefront::ready(itype_e type)
}
ready_inst = true;
- } else if (type == I_ALU && ii->opType() == Enums::OT_NOP) {
+ } else if (type == I_ALU && ii->isNop()) {
// Here for ALU instruction (nop)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free?
@@ -408,7 +380,7 @@ Wavefront::ready(itype_e type)
}
ready_inst = true;
- } else if (type == I_ALU && ii->opType() == Enums::OT_RET) {
+ } else if (type == I_ALU && ii->isReturn()) {
// Here for ALU instruction (return)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free?
@@ -421,10 +393,10 @@ Wavefront::ready(itype_e type)
}
ready_inst = true;
- } else if (type == I_ALU && (ii->opType() == Enums::OT_BRANCH ||
- ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
- ii->opType() == Enums::OT_KERN_READ ||
- ii->opType() == Enums::OT_ARG)) {
+ } else if (type == I_ALU && (ii->isBranch() ||
+ ii->isALU() ||
+ (ii->isKernArgSeg() && ii->isLoad()) ||
+ ii->isArgSeg())) {
// Here for ALU instruction (all others)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is alu slot free?
@@ -439,18 +411,16 @@ Wavefront::ready(itype_e type)
return 0;
}
ready_inst = true;
- } else if (type == I_GLOBAL && (IS_OT_READ_GM(ii->opType()) ||
- IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
+ } else if (type == I_GLOBAL && ii->isGlobalMem()) {
// Here Global memory instruction
- if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) {
+ if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
// Are there in pipe or outstanding global memory write requests?
if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
return 0;
}
}
- if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) ||
- IS_OT_HIST_GM(ii->opType())) {
+ if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
// Are there in pipe or outstanding global memory read requests?
if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
return 0;
@@ -480,17 +450,15 @@ Wavefront::ready(itype_e type)
return 0;
}
ready_inst = true;
- } else if (type == I_SHARED && (IS_OT_READ_LM(ii->opType()) ||
- IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
+ } else if (type == I_SHARED && ii->isLocalMem()) {
// Here for Shared memory instruction
- if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) {
+ if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
return 0;
}
}
- if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
- IS_OT_HIST_LM(ii->opType())) {
+ if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
return 0;
}
@@ -519,47 +487,7 @@ Wavefront::ready(itype_e type)
return 0;
}
ready_inst = true;
- } else if (type == I_PRIVATE && (IS_OT_READ_PM(ii->opType()) ||
- IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
- // Here for Private memory instruction ------------------------ //
- if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
- if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
- return 0;
- }
- }
-
- if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
- IS_OT_HIST_PM(ii->opType())) {
- if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) {
- return 0;
- }
- }
-
- if (!glbMemBusRdy) {
- // Is there an available VRF->Global memory read bus?
- return 0;
- }
-
- if (!glbMemIssueRdy) {
- // Is wave slot free?
- return 0;
- }
-
- if (!computeUnit->globalMemoryPipe.
- isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
- // Can we insert a new request to the Global Mem Request FIFO?
- return 0;
- }
- // can we schedule source & destination operands on the VRF?
- if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
- VrfAccessType::RD_WR)) {
- return 0;
- }
- if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
- return 0;
- }
- ready_inst = true;
- } else if (type == I_FLAT && IS_OT_FLAT(ii->opType())) {
+ } else if (type == I_FLAT && ii->isFlat()) {
if (!glbMemBusRdy) {
// Is there an available VRF->Global memory read bus?
return 0;
@@ -618,23 +546,22 @@ Wavefront::updateResources()
assert(ii);
computeUnit->vrf[simdId]->updateResources(this, ii);
// Single precision ALU or Branch or Return or Special instruction
- if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
- ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
+ if (ii->isALU() || ii->isSpecialOp() ||
+ ii->isBranch() ||
// FIXME: Kernel argument loads are currently treated as ALU operations
// since we don't send memory packets at execution. If we fix that then
// we should map them to one of the memory pipelines
- ii->opType()==Enums::OT_KERN_READ ||
- ii->opType()==Enums::OT_ARG ||
- ii->opType()==Enums::OT_RET) {
+ (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
+ ii->isReturn()) {
computeUnit->aluPipe[simdId].preset(computeUnit->shader->
ticks(computeUnit->spBypassLength()));
// this is to enforce a fixed number of cycles per issue slot per SIMD
computeUnit->wfWait[simdId].preset(computeUnit->shader->
ticks(computeUnit->issuePeriod));
- } else if (ii->opType() == Enums::OT_BARRIER) {
+ } else if (ii->isBarrier()) {
computeUnit->wfWait[simdId].preset(computeUnit->shader->
ticks(computeUnit->issuePeriod));
- } else if (ii->opType() == Enums::OT_FLAT_READ) {
+ } else if (ii->isLoad() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
memReqsInPipe++;
rdGmReqsInPipe++;
@@ -649,7 +576,7 @@ Wavefront::updateResources()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
- } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
+ } else if (ii->isStore() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
memReqsInPipe++;
wrGmReqsInPipe++;
@@ -664,21 +591,21 @@ Wavefront::updateResources()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
- } else if (IS_OT_READ_GM(ii->opType())) {
+ } else if (ii->isLoad() && ii->isGlobalMem()) {
memReqsInPipe++;
rdGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_GM(ii->opType())) {
+ } else if (ii->isStore() && ii->isGlobalMem()) {
memReqsInPipe++;
wrGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_GM(ii->opType())) {
+ } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
memReqsInPipe++;
wrGmReqsInPipe++;
rdGmReqsInPipe++;
@@ -686,21 +613,21 @@ Wavefront::updateResources()
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_READ_LM(ii->opType())) {
+ } else if (ii->isLoad() && ii->isLocalMem()) {
memReqsInPipe++;
rdLmReqsInPipe++;
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_LM(ii->opType())) {
+ } else if (ii->isStore() && ii->isLocalMem()) {
memReqsInPipe++;
wrLmReqsInPipe++;
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_LM(ii->opType())) {
+ } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
memReqsInPipe++;
wrLmReqsInPipe++;
rdLmReqsInPipe++;
@@ -708,28 +635,6 @@ Wavefront::updateResources()
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_READ_PM(ii->opType())) {
- memReqsInPipe++;
- rdGmReqsInPipe++;
- computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
- preset(computeUnit->shader->ticks(4));
- computeUnit->wfWait[computeUnit->GlbMemUnitId()].
- preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_PM(ii->opType())) {
- memReqsInPipe++;
- wrGmReqsInPipe++;
- computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
- preset(computeUnit->shader->ticks(8));
- computeUnit->wfWait[computeUnit->GlbMemUnitId()].
- preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_PM(ii->opType())) {
- memReqsInPipe++;
- wrGmReqsInPipe++;
- rdGmReqsInPipe++;
- computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
- preset(computeUnit->shader->ticks(8));
- computeUnit->wfWait[computeUnit->GlbMemUnitId()].
- preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
}
@@ -751,7 +656,7 @@ Wavefront::exec()
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
"(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId,
ii->disassemble(), old_pc);
- ii->execute();
+ ii->execute(ii);
// access the VRF
computeUnit->vrf[simdId]->exec(ii, this);
srcRegOpDist.sample(ii->numSrcRegOperands());
@@ -785,24 +690,24 @@ Wavefront::exec()
// ---- Update Vector ALU pipeline and other resources ------------------ //
// Single precision ALU or Branch or Return or Special instruction
- if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
- ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
+ if (ii->isALU() || ii->isSpecialOp() ||
+ ii->isBranch() ||
// FIXME: Kernel argument loads are currently treated as ALU operations
// since we don't send memory packets at execution. If we fix that then
// we should map them to one of the memory pipelines
- ii->opType() == Enums::OT_KERN_READ ||
- ii->opType() == Enums::OT_ARG ||
- ii->opType() == Enums::OT_RET) {
+ (ii->isKernArgSeg() && ii->isLoad()) ||
+ ii->isArgSeg() ||
+ ii->isReturn()) {
computeUnit->aluPipe[simdId].set(computeUnit->shader->
ticks(computeUnit->spBypassLength()));
// this is to enforce a fixed number of cycles per issue slot per SIMD
computeUnit->wfWait[simdId].set(computeUnit->shader->
ticks(computeUnit->issuePeriod));
- } else if (ii->opType() == Enums::OT_BARRIER) {
+ } else if (ii->isBarrier()) {
computeUnit->wfWait[simdId].set(computeUnit->shader->
ticks(computeUnit->issuePeriod));
- } else if (ii->opType() == Enums::OT_FLAT_READ) {
+ } else if (ii->isLoad() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
if (Enums::SC_SHARED == ii->executedAs()) {
@@ -816,7 +721,7 @@ Wavefront::exec()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
- } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
+ } else if (ii->isStore() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
if (Enums::SC_SHARED == ii->executedAs()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
@@ -829,32 +734,32 @@ Wavefront::exec()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
- } else if (IS_OT_READ_GM(ii->opType())) {
+ } else if (ii->isLoad() && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_GM(ii->opType())) {
+ } else if (ii->isStore() && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_GM(ii->opType())) {
+ } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_READ_LM(ii->opType())) {
+ } else if (ii->isLoad() && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_LM(ii->opType())) {
+ } else if (ii->isStore() && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_LM(ii->opType())) {
+ } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].