summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/arch/hsail/SConscript1
-rw-r--r--src/arch/hsail/generic_types.cc47
-rw-r--r--src/arch/hsail/generic_types.hh16
-rw-r--r--src/arch/hsail/insts/branch.hh14
-rw-r--r--src/arch/hsail/insts/decl.hh125
-rw-r--r--src/arch/hsail/insts/main.cc5
-rw-r--r--src/arch/hsail/insts/mem.cc63
-rw-r--r--src/arch/hsail/insts/mem.hh542
-rw-r--r--src/arch/hsail/insts/mem_impl.hh25
-rw-r--r--src/arch/hsail/insts/pseudo_inst.cc31
-rw-r--r--src/gpu-compute/GPU.py108
-rw-r--r--src/gpu-compute/GPUStaticInstFlags.py111
-rw-r--r--src/gpu-compute/SConscript1
-rw-r--r--src/gpu-compute/code_enums.hh116
-rw-r--r--src/gpu-compute/compute_unit.cc26
-rw-r--r--src/gpu-compute/compute_unit.hh1
-rw-r--r--src/gpu-compute/global_memory_pipeline.cc23
-rw-r--r--src/gpu-compute/gpu_dyn_inst.cc382
-rw-r--r--src/gpu-compute/gpu_dyn_inst.hh219
-rw-r--r--src/gpu-compute/gpu_static_inst.cc6
-rw-r--r--src/gpu-compute/gpu_static_inst.hh167
-rw-r--r--src/gpu-compute/kernel_cfg.cc10
-rw-r--r--src/gpu-compute/lds_state.cc7
-rw-r--r--src/gpu-compute/lds_state.hh1
-rw-r--r--src/gpu-compute/local_memory_pipeline.cc9
-rw-r--r--src/gpu-compute/shader.hh1
-rw-r--r--src/gpu-compute/vector_register_file.cc5
-rw-r--r--src/gpu-compute/wavefront.cc207
28 files changed, 1205 insertions, 1064 deletions
diff --git a/src/arch/hsail/SConscript b/src/arch/hsail/SConscript
index 3455823a6..251c103fd 100644
--- a/src/arch/hsail/SConscript
+++ b/src/arch/hsail/SConscript
@@ -43,7 +43,6 @@ if env['TARGET_GPU_ISA'] == 'hsail':
env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'],
'gen.py', '$SOURCE $TARGETS')
- Source('generic_types.cc')
Source('gpu_decoder.cc')
Source('insts/branch.cc')
Source('insts/gen_exec.cc')
diff --git a/src/arch/hsail/generic_types.cc b/src/arch/hsail/generic_types.cc
deleted file mode 100644
index 0cd55d1d5..000000000
--- a/src/arch/hsail/generic_types.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-#include "arch/hsail/generic_types.hh"
-#include "base/misc.hh"
-
-using namespace Brig;
-
-namespace HsailISA
-{
- Enums::GenericMemoryOrder
- getGenericMemoryOrder(BrigMemoryOrder brig_memory_order)
- {
- switch(brig_memory_order) {
- case BRIG_MEMORY_ORDER_NONE:
- return Enums::MEMORY_ORDER_NONE;
- case BRIG_MEMORY_ORDER_RELAXED:
- return Enums::MEMORY_ORDER_RELAXED;
- case BRIG_MEMORY_ORDER_SC_ACQUIRE:
- return Enums::MEMORY_ORDER_SC_ACQUIRE;
- case BRIG_MEMORY_ORDER_SC_RELEASE:
- return Enums::MEMORY_ORDER_SC_RELEASE;
- case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
- return Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE;
- default:
- fatal("HsailISA::MemInst::getGenericMemoryOrder -> ",
- "bad BrigMemoryOrder\n");
- }
- }
-
- Enums::GenericMemoryScope
- getGenericMemoryScope(BrigMemoryScope brig_memory_scope)
- {
- switch(brig_memory_scope) {
- case BRIG_MEMORY_SCOPE_NONE:
- return Enums::MEMORY_SCOPE_NONE;
- case BRIG_MEMORY_SCOPE_WORKITEM:
- return Enums::MEMORY_SCOPE_WORKITEM;
- case BRIG_MEMORY_SCOPE_WORKGROUP:
- return Enums::MEMORY_SCOPE_WORKGROUP;
- case BRIG_MEMORY_SCOPE_AGENT:
- return Enums::MEMORY_SCOPE_DEVICE;
- case BRIG_MEMORY_SCOPE_SYSTEM:
- return Enums::MEMORY_SCOPE_SYSTEM;
- default:
- fatal("HsailISA::MemInst::getGenericMemoryScope -> ",
- "bad BrigMemoryScope\n");
- }
- }
-} // namespace HsailISA
diff --git a/src/arch/hsail/generic_types.hh b/src/arch/hsail/generic_types.hh
deleted file mode 100644
index 50e430bef..000000000
--- a/src/arch/hsail/generic_types.hh
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __ARCH_HSAIL_GENERIC_TYPES_HH__
-#define __ARCH_HSAIL_GENERIC_TYPES_HH__
-
-#include "arch/hsail/Brig.h"
-#include "enums/GenericMemoryOrder.hh"
-#include "enums/GenericMemoryScope.hh"
-
-namespace HsailISA
-{
- Enums::GenericMemoryOrder
- getGenericMemoryOrder(Brig::BrigMemoryOrder brig_memory_order);
- Enums::GenericMemoryScope
- getGenericMemoryScope(Brig::BrigMemoryScope brig_memory_scope);
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_GENERIC_TYPES_HH__
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh
index 45cd876ad..89bcc1277 100644
--- a/src/arch/hsail/insts/branch.hh
+++ b/src/arch/hsail/insts/branch.hh
@@ -59,16 +59,15 @@ namespace HsailISA
BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "brn")
{
- o_type = Enums::OT_BRANCH;
+ setFlag(Branch);
+ setFlag(UnconditionalJump);
width = ((Brig::BrigInstBr*)ib)->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
target.init(op_offs, obj);
- o_type = Enums::OT_BRANCH;
}
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
- bool unconditionalJumpInstruction() override { return true; }
bool isVectorRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.isVectorRegister();
@@ -175,13 +174,12 @@ namespace HsailISA
CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "cbr")
{
- o_type = Enums::OT_BRANCH;
+ setFlag(Branch);
width = ((Brig::BrigInstBr *)ib)->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
cond.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
target.init(op_offs, obj);
- o_type = Enums::OT_BRANCH;
}
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
@@ -343,17 +341,15 @@ namespace HsailISA
BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "br")
{
- o_type = Enums::OT_BRANCH;
+ setFlag(Branch);
+ setFlag(UnconditionalJump);
width.init(((Brig::BrigInstBr *)ib)->width, obj);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
target.init(op_offs, obj);
- o_type = Enums::OT_BRANCH;
}
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
- bool unconditionalJumpInstruction() override { return true; }
-
void execute(GPUDynInstPtr gpuDynInst) override;
bool isVectorRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
diff --git a/src/arch/hsail/insts/decl.hh b/src/arch/hsail/insts/decl.hh
index 48e022ff7..94f23ac1f 100644
--- a/src/arch/hsail/insts/decl.hh
+++ b/src/arch/hsail/insts/decl.hh
@@ -38,11 +38,9 @@
#include <cmath>
-#include "arch/hsail/generic_types.hh"
#include "arch/hsail/insts/gpu_static_inst.hh"
#include "arch/hsail/operand.hh"
#include "debug/HSAIL.hh"
-#include "enums/OpType.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh"
@@ -127,6 +125,8 @@ namespace HsailISA
const char *opcode)
: HsailGPUStaticInst(obj, opcode)
{
+ setFlag(ALU);
+
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
@@ -240,6 +240,8 @@ namespace HsailISA
const char *opcode)
: HsailGPUStaticInst(obj, opcode)
{
+ setFlag(ALU);
+
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
@@ -414,6 +416,8 @@ namespace HsailISA
const BrigObject *obj, const char *opcode)
: HsailGPUStaticInst(obj, opcode)
{
+ setFlag(ALU);
+
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
@@ -818,6 +822,8 @@ namespace HsailISA
const BrigObject *obj, const char *_opcode)
: HsailGPUStaticInst(obj, _opcode)
{
+ setFlag(ALU);
+
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
@@ -874,7 +880,7 @@ namespace HsailISA
Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "ret")
{
- o_type = Enums::OT_RET;
+ setFlag(GPUStaticInst::Return);
}
void execute(GPUDynInstPtr gpuDynInst);
@@ -889,7 +895,7 @@ namespace HsailISA
Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "barrier")
{
- o_type = Enums::OT_BARRIER;
+ setFlag(GPUStaticInst::MemBarrier);
assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
}
@@ -924,14 +930,105 @@ namespace HsailISA
memFenceMemOrder = (Brig::BrigMemoryOrder)
((Brig::BrigInstMemFence*)ib)->memoryOrder;
- // set o_type based on scopes
+ setFlag(MemoryRef);
+ setFlag(GPUStaticInst::MemFence);
+
+ switch (memFenceMemOrder) {
+ case Brig::BRIG_MEMORY_ORDER_NONE:
+ setFlag(NoOrder);
+ break;
+ case Brig::BRIG_MEMORY_ORDER_RELAXED:
+ setFlag(RelaxedOrder);
+ break;
+ case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
+ setFlag(Acquire);
+ break;
+ case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
+ setFlag(Release);
+ break;
+ case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
+ setFlag(AcquireRelease);
+ break;
+ default:
+ fatal("MemInst has bad BrigMemoryOrder\n");
+ }
+
+ // set inst flags based on scopes
if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
- o_type = Enums::OT_BOTH_MEMFENCE;
+ setFlag(GPUStaticInst::GlobalSegment);
+
+ /**
+ * A memory fence that has scope for
+ * both segments will use the global
+ * segment, and be executed in the
+ * global memory pipeline, therefore,
+ * we set the segment to match the
+ * global scope only
+ */
+ switch (memFenceScopeSegGlobal) {
+ case Brig::BRIG_MEMORY_SCOPE_NONE:
+ setFlag(NoScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
+ setFlag(WorkitemScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
+ setFlag(WorkgroupScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_AGENT:
+ setFlag(DeviceScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
+ setFlag(SystemScope);
+ break;
+ default:
+ fatal("MemFence has bad global scope type\n");
+ }
} else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
- o_type = Enums::OT_GLOBAL_MEMFENCE;
+ setFlag(GPUStaticInst::GlobalSegment);
+
+ switch (memFenceScopeSegGlobal) {
+ case Brig::BRIG_MEMORY_SCOPE_NONE:
+ setFlag(NoScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
+ setFlag(WorkitemScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
+ setFlag(WorkgroupScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_AGENT:
+ setFlag(DeviceScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
+ setFlag(SystemScope);
+ break;
+ default:
+ fatal("MemFence has bad global scope type\n");
+ }
} else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
- o_type = Enums::OT_SHARED_MEMFENCE;
+ setFlag(GPUStaticInst::GroupSegment);
+
+ switch (memFenceScopeSegGroup) {
+ case Brig::BRIG_MEMORY_SCOPE_NONE:
+ setFlag(NoScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
+ setFlag(WorkitemScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
+ setFlag(WorkgroupScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_AGENT:
+ setFlag(DeviceScope);
+ break;
+ case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
+ setFlag(SystemScope);
+ break;
+ default:
+ fatal("MemFence has bad group scope type\n");
+ }
} else {
fatal("MemFence constructor: bad scope specifiers\n");
}
@@ -955,18 +1052,13 @@ namespace HsailISA
// etc.). We send a packet, tagged with the memory order and
// scope, and let the GPU coalescer handle it.
- if (o_type == Enums::OT_GLOBAL_MEMFENCE ||
- o_type == Enums::OT_BOTH_MEMFENCE) {
+ if (isGlobalSeg()) {
gpuDynInst->simdId = w->simdId;
gpuDynInst->wfSlotId = w->wfSlotId;
gpuDynInst->wfDynId = w->wfDynId;
gpuDynInst->kern_id = w->kernId;
gpuDynInst->cu_id = w->computeUnit->cu_id;
- gpuDynInst->memoryOrder =
- getGenericMemoryOrder(memFenceMemOrder);
- gpuDynInst->scope =
- getGenericMemoryScope(memFenceScopeSegGlobal);
gpuDynInst->useContinuation = false;
GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
gmp->getGMReqFIFO().push(gpuDynInst);
@@ -975,10 +1067,10 @@ namespace HsailISA
w->rdGmReqsInPipe--;
w->memReqsInPipe--;
w->outstandingReqs++;
- } else if (o_type == Enums::OT_SHARED_MEMFENCE) {
+ } else if (isGroupSeg()) {
// no-op
} else {
- fatal("MemFence execute: bad o_type\n");
+ fatal("MemFence execute: bad op type\n");
}
}
};
@@ -1054,6 +1146,7 @@ namespace HsailISA
Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "call")
{
+ setFlag(ALU);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc
index f1662430a..783689dd5 100644
--- a/src/arch/hsail/insts/main.cc
+++ b/src/arch/hsail/insts/main.cc
@@ -179,12 +179,13 @@ namespace HsailISA
w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
if (!refCount) {
+ setFlag(SystemScope);
+ setFlag(Release);
+ setFlag(GlobalSegment);
// Notify Memory System of Kernel Completion
// Kernel End = isKernel + isRelease
w->status = Wavefront::S_RETURNING;
GPUDynInstPtr local_mempacket = gpuDynInst;
- local_mempacket->memoryOrder = Enums::MEMORY_ORDER_SC_RELEASE;
- local_mempacket->scope = Enums::MEMORY_SCOPE_SYSTEM;
local_mempacket->useContinuation = false;
local_mempacket->simdId = w->simdId;
local_mempacket->wfSlotId = w->wfSlotId;
diff --git a/src/arch/hsail/insts/mem.cc b/src/arch/hsail/insts/mem.cc
index 97d4c902b..6a6928838 100644
--- a/src/arch/hsail/insts/mem.cc
+++ b/src/arch/hsail/insts/mem.cc
@@ -36,7 +36,6 @@
#include "arch/hsail/insts/mem.hh"
#include "arch/hsail/Brig.h"
-#include "enums/OpType.hh"
using namespace Brig;
@@ -44,68 +43,6 @@ namespace HsailISA
{
const char* atomicOpToString(BrigAtomicOperation brigOp);
- Enums::MemOpType
- brigAtomicToMemOpType(BrigOpcode brigOpCode, BrigAtomicOperation brigOp)
- {
- if (brigOpCode == Brig::BRIG_OPCODE_ATOMIC) {
- switch (brigOp) {
- case BRIG_ATOMIC_AND:
- return Enums::MO_AAND;
- case BRIG_ATOMIC_OR:
- return Enums::MO_AOR;
- case BRIG_ATOMIC_XOR:
- return Enums::MO_AXOR;
- case BRIG_ATOMIC_CAS:
- return Enums::MO_ACAS;
- case BRIG_ATOMIC_EXCH:
- return Enums::MO_AEXCH;
- case BRIG_ATOMIC_ADD:
- return Enums::MO_AADD;
- case BRIG_ATOMIC_WRAPINC:
- return Enums::MO_AINC;
- case BRIG_ATOMIC_WRAPDEC:
- return Enums::MO_ADEC;
- case BRIG_ATOMIC_MIN:
- return Enums::MO_AMIN;
- case BRIG_ATOMIC_MAX:
- return Enums::MO_AMAX;
- case BRIG_ATOMIC_SUB:
- return Enums::MO_ASUB;
- default:
- fatal("Bad BrigAtomicOperation code %d\n", brigOp);
- }
- } else if (brigOpCode == Brig::BRIG_OPCODE_ATOMICNORET) {
- switch (brigOp) {
- case BRIG_ATOMIC_AND:
- return Enums::MO_ANRAND;
- case BRIG_ATOMIC_OR:
- return Enums::MO_ANROR;
- case BRIG_ATOMIC_XOR:
- return Enums::MO_ANRXOR;
- case BRIG_ATOMIC_CAS:
- return Enums::MO_ANRCAS;
- case BRIG_ATOMIC_EXCH:
- return Enums::MO_ANREXCH;
- case BRIG_ATOMIC_ADD:
- return Enums::MO_ANRADD;
- case BRIG_ATOMIC_WRAPINC:
- return Enums::MO_ANRINC;
- case BRIG_ATOMIC_WRAPDEC:
- return Enums::MO_ANRDEC;
- case BRIG_ATOMIC_MIN:
- return Enums::MO_ANRMIN;
- case BRIG_ATOMIC_MAX:
- return Enums::MO_ANRMAX;
- case BRIG_ATOMIC_SUB:
- return Enums::MO_ANRSUB;
- default:
- fatal("Bad BrigAtomicOperation code %d\n", brigOp);
- }
- } else {
- fatal("Bad BrigAtomicOpcode %d\n", brigOpCode);
- }
- }
-
const char*
atomicOpToString(BrigAtomicOperation brigOp)
{
diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh
index acc8434be..e223c7cf5 100644
--- a/src/arch/hsail/insts/mem.hh
+++ b/src/arch/hsail/insts/mem.hh
@@ -96,6 +96,8 @@ namespace HsailISA
{
using namespace Brig;
+ setFlag(ALU);
+
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
dest.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
@@ -211,143 +213,119 @@ namespace HsailISA
Brig::BrigMemoryOrder memoryOrder;
Brig::BrigMemoryScope memoryScope;
unsigned int equivClass;
- bool isArgLoad()
- {
- return segment == Brig::BRIG_SEGMENT_KERNARG ||
- segment == Brig::BRIG_SEGMENT_ARG;
- }
- void
- initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
+
+ LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
{
using namespace Brig;
- const BrigInstMem *ldst = (const BrigInstMem*)ib;
+ setFlag(MemoryRef);
+ setFlag(Load);
- segment = (BrigSegment)ldst->segment;
- memoryOrder = BRIG_MEMORY_ORDER_NONE;
- memoryScope = BRIG_MEMORY_SCOPE_NONE;
- equivClass = ldst->equivClass;
+ if (ib->opcode == BRIG_OPCODE_LD) {
+ const BrigInstMem *ldst = (const BrigInstMem*)ib;
- switch (segment) {
- case BRIG_SEGMENT_GLOBAL:
- o_type = Enums::OT_GLOBAL_READ;
- break;
+ segment = (BrigSegment)ldst->segment;
+ memoryOrder = BRIG_MEMORY_ORDER_NONE;
+ memoryScope = BRIG_MEMORY_SCOPE_NONE;
+ equivClass = ldst->equivClass;
- case BRIG_SEGMENT_GROUP:
- o_type = Enums::OT_SHARED_READ;
- break;
+ width = ldst->width;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
+ if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
+ dest.init(op_offs, obj);
- case BRIG_SEGMENT_PRIVATE:
- o_type = Enums::OT_PRIVATE_READ;
- break;
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+ } else {
+ const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
- case BRIG_SEGMENT_READONLY:
- o_type = Enums::OT_READONLY_READ;
- break;
+ segment = (BrigSegment)at->segment;
+ memoryOrder = (BrigMemoryOrder)at->memoryOrder;
+ memoryScope = (BrigMemoryScope)at->memoryScope;
+ equivClass = 0;
- case BRIG_SEGMENT_SPILL:
- o_type = Enums::OT_SPILL_READ;
- break;
+ width = BRIG_WIDTH_1;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
- case BRIG_SEGMENT_FLAT:
- o_type = Enums::OT_FLAT_READ;
- break;
+ if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
+ dest.init(op_offs, obj);
- case BRIG_SEGMENT_KERNARG:
- o_type = Enums::OT_KERN_READ;
- break;
+ op_offs = obj->getOperandPtr(ib->operands,1);
+ addr.init(op_offs, obj);
+ }
- case BRIG_SEGMENT_ARG:
- o_type = Enums::OT_ARG;
+ switch (memoryOrder) {
+ case BRIG_MEMORY_ORDER_NONE:
+ setFlag(NoOrder);
+ break;
+ case BRIG_MEMORY_ORDER_RELAXED:
+ setFlag(RelaxedOrder);
+ break;
+ case BRIG_MEMORY_ORDER_SC_ACQUIRE:
+ setFlag(Acquire);
+ break;
+ case BRIG_MEMORY_ORDER_SC_RELEASE:
+ setFlag(Release);
+ break;
+ case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
+ setFlag(AcquireRelease);
break;
-
default:
- panic("Ld: segment %d not supported\n", segment);
+ fatal("LdInst has bad memory order type\n");
}
- width = ldst->width;
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
- if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
- dest.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 1);
- addr.init(op_offs, obj);
- }
-
- void
- initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- {
- using namespace Brig;
-
- const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
-
- segment = (BrigSegment)at->segment;
- memoryOrder = (BrigMemoryOrder)at->memoryOrder;
- memoryScope = (BrigMemoryScope)at->memoryScope;
- equivClass = 0;
+ switch (memoryScope) {
+ case BRIG_MEMORY_SCOPE_NONE:
+ setFlag(NoScope);
+ break;
+ case BRIG_MEMORY_SCOPE_WORKITEM:
+ setFlag(WorkitemScope);
+ break;
+ case BRIG_MEMORY_SCOPE_WORKGROUP:
+ setFlag(WorkgroupScope);
+ break;
+ case BRIG_MEMORY_SCOPE_AGENT:
+ setFlag(DeviceScope);
+ break;
+ case BRIG_MEMORY_SCOPE_SYSTEM:
+ setFlag(SystemScope);
+ break;
+ default:
+ fatal("LdInst has bad memory scope type\n");
+ }
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
- o_type = Enums::OT_GLOBAL_READ;
+ setFlag(GlobalSegment);
break;
-
case BRIG_SEGMENT_GROUP:
- o_type = Enums::OT_SHARED_READ;
+ setFlag(GroupSegment);
break;
-
case BRIG_SEGMENT_PRIVATE:
- o_type = Enums::OT_PRIVATE_READ;
+ setFlag(PrivateSegment);
break;
-
case BRIG_SEGMENT_READONLY:
- o_type = Enums::OT_READONLY_READ;
+ setFlag(ReadOnlySegment);
break;
-
case BRIG_SEGMENT_SPILL:
- o_type = Enums::OT_SPILL_READ;
+ setFlag(SpillSegment);
break;
-
case BRIG_SEGMENT_FLAT:
- o_type = Enums::OT_FLAT_READ;
+ setFlag(Flat);
break;
-
case BRIG_SEGMENT_KERNARG:
- o_type = Enums::OT_KERN_READ;
+ setFlag(KernArgSegment);
break;
-
case BRIG_SEGMENT_ARG:
- o_type = Enums::OT_ARG;
+ setFlag(ArgSegment);
break;
-
default:
panic("Ld: segment %d not supported\n", segment);
}
-
- width = BRIG_WIDTH_1;
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
-
- if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
- dest.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands,1);
- addr.init(op_offs, obj);
- }
-
- LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- using namespace Brig;
-
- if (ib->opcode == BRIG_OPCODE_LD) {
- initLd(ib, obj, _opcode);
- } else {
- initAtomicLd(ib, obj, _opcode);
- }
}
int numSrcRegOperands() override
@@ -473,7 +451,7 @@ namespace HsailISA
if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
- if (isLocalMem()) {
+ if (this->isLocalMem()) {
// load from shared memory
*d = gpuDynInst->wavefront()->ldsChunk->
read<c0>(vaddr);
@@ -488,8 +466,7 @@ namespace HsailISA
if (gpuDynInst->computeUnit()->shader->
separate_acquire_release &&
- gpuDynInst->memoryOrder ==
- Enums::MEMORY_ORDER_SC_ACQUIRE) {
+ gpuDynInst->isAcquire()) {
// if this load has acquire semantics,
// set the response continuation function
// to perform an Acquire request
@@ -520,10 +497,9 @@ namespace HsailISA
{
// after the load has complete and if the load has acquire
// semantics, issue an acquire request.
- if (!isLocalMem()) {
+ if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release
- && gpuDynInst->memoryOrder ==
- Enums::MEMORY_ORDER_SC_ACQUIRE) {
+ && gpuDynInst->isAcquire()) {
gpuDynInst->statusBitVector = VectorMask(1);
gpuDynInst->useContinuation = false;
// create request
@@ -537,12 +513,6 @@ namespace HsailISA
}
public:
- bool
- isLocalMem() const override
- {
- return this->segment == Brig::BRIG_SEGMENT_GROUP;
- }
-
bool isVectorRegister(int operandIndex) override
{
assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
@@ -731,128 +701,113 @@ namespace HsailISA
Brig::BrigMemoryOrder memoryOrder;
unsigned int equivClass;
- void
- initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
+ StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
{
using namespace Brig;
- const BrigInstMem *ldst = (const BrigInstMem*)ib;
+ setFlag(MemoryRef);
+ setFlag(Store);
- segment = (BrigSegment)ldst->segment;
- memoryOrder = BRIG_MEMORY_ORDER_NONE;
- memoryScope = BRIG_MEMORY_SCOPE_NONE;
- equivClass = ldst->equivClass;
+ if (ib->opcode == BRIG_OPCODE_ST) {
+ const BrigInstMem *ldst = (const BrigInstMem*)ib;
- switch (segment) {
- case BRIG_SEGMENT_GLOBAL:
- o_type = Enums::OT_GLOBAL_WRITE;
- break;
+ segment = (BrigSegment)ldst->segment;
+ memoryOrder = BRIG_MEMORY_ORDER_NONE;
+ memoryScope = BRIG_MEMORY_SCOPE_NONE;
+ equivClass = ldst->equivClass;
- case BRIG_SEGMENT_GROUP:
- o_type = Enums::OT_SHARED_WRITE;
- break;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const BrigOperand *baseOp = obj->getOperand(op_offs);
- case BRIG_SEGMENT_PRIVATE:
- o_type = Enums::OT_PRIVATE_WRITE;
- break;
+ if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
+ (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
+ src.init(op_offs, obj);
+ }
- case BRIG_SEGMENT_READONLY:
- o_type = Enums::OT_READONLY_WRITE;
- break;
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+ } else {
+ const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
- case BRIG_SEGMENT_SPILL:
- o_type = Enums::OT_SPILL_WRITE;
- break;
+ segment = (BrigSegment)at->segment;
+ memoryScope = (BrigMemoryScope)at->memoryScope;
+ memoryOrder = (BrigMemoryOrder)at->memoryOrder;
+ equivClass = 0;
- case BRIG_SEGMENT_FLAT:
- o_type = Enums::OT_FLAT_WRITE;
- break;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ addr.init(op_offs, obj);
- case BRIG_SEGMENT_ARG:
- o_type = Enums::OT_ARG;
- break;
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ src.init(op_offs, obj);
+ }
+ switch (memoryOrder) {
+ case BRIG_MEMORY_ORDER_NONE:
+ setFlag(NoOrder);
+ break;
+ case BRIG_MEMORY_ORDER_RELAXED:
+ setFlag(RelaxedOrder);
+ break;
+ case BRIG_MEMORY_ORDER_SC_ACQUIRE:
+ setFlag(Acquire);
+ break;
+ case BRIG_MEMORY_ORDER_SC_RELEASE:
+ setFlag(Release);
+ break;
+ case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
+ setFlag(AcquireRelease);
+ break;
default:
- panic("St: segment %d not supported\n", segment);
+ fatal("StInst has bad memory order type\n");
}
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- const BrigOperand *baseOp = obj->getOperand(op_offs);
-
- if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
- (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
- src.init(op_offs, obj);
+ switch (memoryScope) {
+ case BRIG_MEMORY_SCOPE_NONE:
+ setFlag(NoScope);
+ break;
+ case BRIG_MEMORY_SCOPE_WORKITEM:
+ setFlag(WorkitemScope);
+ break;
+ case BRIG_MEMORY_SCOPE_WORKGROUP:
+ setFlag(WorkgroupScope);
+ break;
+ case BRIG_MEMORY_SCOPE_AGENT:
+ setFlag(DeviceScope);
+ break;
+ case BRIG_MEMORY_SCOPE_SYSTEM:
+ setFlag(SystemScope);
+ break;
+ default:
+ fatal("StInst has bad memory scope type\n");
}
- op_offs = obj->getOperandPtr(ib->operands, 1);
- addr.init(op_offs, obj);
- }
-
- void
- initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- {
- using namespace Brig;
-
- const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
-
- segment = (BrigSegment)at->segment;
- memoryScope = (BrigMemoryScope)at->memoryScope;
- memoryOrder = (BrigMemoryOrder)at->memoryOrder;
- equivClass = 0;
-
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
- o_type = Enums::OT_GLOBAL_WRITE;
+ setFlag(GlobalSegment);
break;
-
case BRIG_SEGMENT_GROUP:
- o_type = Enums::OT_SHARED_WRITE;
+ setFlag(GroupSegment);
break;
-
case BRIG_SEGMENT_PRIVATE:
- o_type = Enums::OT_PRIVATE_WRITE;
+ setFlag(PrivateSegment);
break;
-
case BRIG_SEGMENT_READONLY:
- o_type = Enums::OT_READONLY_WRITE;
+ setFlag(ReadOnlySegment);
break;
-
case BRIG_SEGMENT_SPILL:
- o_type = Enums::OT_SPILL_WRITE;
+ setFlag(SpillSegment);
break;
-
case BRIG_SEGMENT_FLAT:
- o_type = Enums::OT_FLAT_WRITE;
+ setFlag(Flat);
break;
-
case BRIG_SEGMENT_ARG:
- o_type = Enums::OT_ARG;
+ setFlag(ArgSegment);
break;
-
default:
panic("St: segment %d not supported\n", segment);
}
-
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- addr.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 1);
- src.init(op_offs, obj);
- }
-
- StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- using namespace Brig;
-
- if (ib->opcode == BRIG_OPCODE_ST) {
- initSt(ib, obj, _opcode);
- } else {
- initAtomicSt(ib, obj, _opcode);
- }
}
int numDstRegOperands() override { return 0; }
@@ -964,10 +919,9 @@ namespace HsailISA
{
// before performing a store, check if this store has
// release semantics, and if so issue a release first
- if (!isLocalMem()) {
+ if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release
- && gpuDynInst->memoryOrder ==
- Enums::MEMORY_ORDER_SC_RELEASE) {
+ && gpuDynInst->isRelease()) {
gpuDynInst->statusBitVector = VectorMask(1);
gpuDynInst->execContinuation = &GPUStaticInst::execSt;
@@ -987,12 +941,6 @@ namespace HsailISA
execSt(gpuDynInst);
}
- bool
- isLocalMem() const override
- {
- return this->segment == Brig::BRIG_SEGMENT_GROUP;
- }
-
private:
// execSt may be called through a continuation
// if the store had release semantics. see comment for
@@ -1020,7 +968,7 @@ namespace HsailISA
if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
- if (isLocalMem()) {
+ if (this->isLocalMem()) {
//store to shared memory
gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
*d);
@@ -1166,9 +1114,6 @@ namespace HsailISA
}
}
- Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
- Brig::BrigAtomicOperation brigOp);
-
template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
bool HasDst>
class AtomicInstBase : public HsailGPUStaticInst
@@ -1183,7 +1128,6 @@ namespace HsailISA
Brig::BrigAtomicOperation atomicOperation;
Brig::BrigMemoryScope memoryScope;
Brig::BrigOpcode opcode;
- Enums::MemOpType opType;
AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
const char *_opcode)
@@ -1198,21 +1142,106 @@ namespace HsailISA
memoryOrder = (BrigMemoryOrder)at->memoryOrder;
atomicOperation = (BrigAtomicOperation)at->atomicOperation;
opcode = (BrigOpcode)ib->opcode;
- opType = brigAtomicToMemOpType(opcode, atomicOperation);
+
+ assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
+ opcode == Brig::BRIG_OPCODE_ATOMIC);
+
+ setFlag(MemoryRef);
+
+ if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
+ setFlag(AtomicReturn);
+ } else {
+ setFlag(AtomicNoReturn);
+ }
+
+ switch (memoryOrder) {
+ case BRIG_MEMORY_ORDER_NONE:
+ setFlag(NoOrder);
+ break;
+ case BRIG_MEMORY_ORDER_RELAXED:
+ setFlag(RelaxedOrder);
+ break;
+ case BRIG_MEMORY_ORDER_SC_ACQUIRE:
+ setFlag(Acquire);
+ break;
+ case BRIG_MEMORY_ORDER_SC_RELEASE:
+ setFlag(Release);
+ break;
+ case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
+ setFlag(AcquireRelease);
+ break;
+ default:
+ fatal("AtomicInst has bad memory order type\n");
+ }
+
+ switch (memoryScope) {
+ case BRIG_MEMORY_SCOPE_NONE:
+ setFlag(NoScope);
+ break;
+ case BRIG_MEMORY_SCOPE_WORKITEM:
+ setFlag(WorkitemScope);
+ break;
+ case BRIG_MEMORY_SCOPE_WORKGROUP:
+ setFlag(WorkgroupScope);
+ break;
+ case BRIG_MEMORY_SCOPE_AGENT:
+ setFlag(DeviceScope);
+ break;
+ case BRIG_MEMORY_SCOPE_SYSTEM:
+ setFlag(SystemScope);
+ break;
+ default:
+ fatal("AtomicInst has bad memory scope type\n");
+ }
+
+ switch (atomicOperation) {
+ case Brig::BRIG_ATOMIC_AND:
+ setFlag(AtomicAnd);
+ break;
+ case Brig::BRIG_ATOMIC_OR:
+ setFlag(AtomicOr);
+ break;
+ case Brig::BRIG_ATOMIC_XOR:
+ setFlag(AtomicXor);
+ break;
+ case Brig::BRIG_ATOMIC_CAS:
+ setFlag(AtomicCAS);
+ break;
+ case Brig::BRIG_ATOMIC_EXCH:
+ setFlag(AtomicExch);
+ break;
+ case Brig::BRIG_ATOMIC_ADD:
+ setFlag(AtomicAdd);
+ break;
+ case Brig::BRIG_ATOMIC_WRAPINC:
+ setFlag(AtomicInc);
+ break;
+ case Brig::BRIG_ATOMIC_WRAPDEC:
+ setFlag(AtomicDec);
+ break;
+ case Brig::BRIG_ATOMIC_MIN:
+ setFlag(AtomicMin);
+ break;
+ case Brig::BRIG_ATOMIC_MAX:
+ setFlag(AtomicMax);
+ break;
+ case Brig::BRIG_ATOMIC_SUB:
+ setFlag(AtomicSub);
+ break;
+ default:
+ fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
+ }
switch (segment) {
case BRIG_SEGMENT_GLOBAL:
- o_type = Enums::OT_GLOBAL_ATOMIC;
+ setFlag(GlobalSegment);
break;
-
case BRIG_SEGMENT_GROUP:
- o_type = Enums::OT_SHARED_ATOMIC;
+ setFlag(GroupSegment);
break;
-
case BRIG_SEGMENT_FLAT:
- o_type = Enums::OT_FLAT_ATOMIC;
+ setFlag(Flat);
break;
-
default:
panic("Atomic: segment %d not supported\n", segment);
}
@@ -1354,11 +1383,10 @@ namespace HsailISA
{
// before doing the RMW, check if this atomic has
// release semantics, and if so issue a release first
- if (!isLocalMem()) {
+ if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release
- && (gpuDynInst->memoryOrder ==
- Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
- Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
+ && (gpuDynInst->isRelease()
+ || gpuDynInst->isAcquireRelease())) {
gpuDynInst->statusBitVector = VectorMask(1);
@@ -1383,12 +1411,6 @@ namespace HsailISA
void execute(GPUDynInstPtr gpuDynInst) override;
- bool
- isLocalMem() const override
- {
- return this->segment == Brig::BRIG_SEGMENT_GROUP;
- }
-
private:
// execAtomic may be called through a continuation
// if the RMW had release semantics. see comment for
@@ -1408,72 +1430,48 @@ namespace HsailISA
if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i];
- if (isLocalMem()) {
+ if (this->isLocalMem()) {
Wavefront *wavefront = gpuDynInst->wavefront();
*d = wavefront->ldsChunk->read<c0>(vaddr);
- switch (this->opType) {
- case Enums::MO_AADD:
- case Enums::MO_ANRADD:
+ if (this->isAtomicAdd()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) + (*e));
- break;
- case Enums::MO_ASUB:
- case Enums::MO_ANRSUB:
+ } else if (this->isAtomicSub()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) - (*e));
- break;
- case Enums::MO_AMAX:
- case Enums::MO_ANRMAX:
+ } else if (this->isAtomicMax()) {
wavefront->ldsChunk->write<c0>(vaddr,
std::max(wavefront->ldsChunk->read<c0>(vaddr),
(*e)));
- break;
- case Enums::MO_AMIN:
- case Enums::MO_ANRMIN:
+ } else if (this->isAtomicMin()) {
wavefront->ldsChunk->write<c0>(vaddr,
std::min(wavefront->ldsChunk->read<c0>(vaddr),
(*e)));
- break;
- case Enums::MO_AAND:
- case Enums::MO_ANRAND:
+ } else if (this->isAtomicAnd()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) & (*e));
- break;
- case Enums::MO_AOR:
- case Enums::MO_ANROR:
+ } else if (this->isAtomicOr()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) | (*e));
- break;
- case Enums::MO_AXOR:
- case Enums::MO_ANRXOR:
+ } else if (this->isAtomicXor()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
- break;
- case Enums::MO_AINC:
- case Enums::MO_ANRINC:
+ } else if (this->isAtomicInc()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) + 1);
- break;
- case Enums::MO_ADEC:
- case Enums::MO_ANRDEC:
+ } else if (this->isAtomicDec()) {
wavefront->ldsChunk->write<c0>(vaddr,
wavefront->ldsChunk->read<c0>(vaddr) - 1);
- break;
- case Enums::MO_AEXCH:
- case Enums::MO_ANREXCH:
+ } else if (this->isAtomicExch()) {
wavefront->ldsChunk->write<c0>(vaddr, (*e));
- break;
- case Enums::MO_ACAS:
- case Enums::MO_ANRCAS:
+ } else if (this->isAtomicCAS()) {
wavefront->ldsChunk->write<c0>(vaddr,
(wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
(*f) : wavefront->ldsChunk->read<c0>(vaddr));
- break;
- default:
+ } else {
fatal("Unrecognized or invalid HSAIL atomic op "
"type.\n");
- break;
}
} else {
Request *req =
@@ -1481,7 +1479,7 @@ namespace HsailISA
gpuDynInst->computeUnit()->masterId(),
0, gpuDynInst->wfDynId,
gpuDynInst->makeAtomicOpFunctor<c0>(e,
- f, this->opType));
+ f));
gpuDynInst->setRequestFlags(req);
PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
@@ -1489,8 +1487,7 @@ namespace HsailISA
if (gpuDynInst->computeUnit()->shader->
separate_acquire_release &&
- (gpuDynInst->memoryOrder ==
- Enums::MEMORY_ORDER_SC_ACQUIRE)) {
+ (gpuDynInst->isAcquire())) {
// if this atomic has acquire semantics,
// schedule the continuation to perform an
// acquire after the RMW completes
@@ -1523,10 +1520,9 @@ namespace HsailISA
{
// after performing the RMW, check to see if this instruction
// has acquire semantics, and if so, issue an acquire
- if (!isLocalMem()) {
+ if (!this->isLocalMem()) {
if (gpuDynInst->computeUnit()->shader->separate_acquire_release
- && gpuDynInst->memoryOrder ==
- Enums::MEMORY_ORDER_SC_ACQUIRE) {
+ && gpuDynInst->isAcquire()) {
gpuDynInst->statusBitVector = VectorMask(1);
// the request will be finished when
diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh
index e3529f914..c175f2782 100644
--- a/src/arch/hsail/insts/mem_impl.hh
+++ b/src/arch/hsail/insts/mem_impl.hh
@@ -33,7 +33,6 @@
* Author: Steve Reinhardt
*/
-#include "arch/hsail/generic_types.hh"
#include "gpu-compute/hsail_code.hh"
// defined in code.cc, but not worth sucking in all of code.h for this
@@ -215,16 +214,12 @@ namespace HsailISA
this->addr.calcVector(w, m->addr);
- m->m_op = Enums::MO_LD;
m->m_type = MemDataType::memType;
m->v_type = DestDataType::vgprType;
m->exec_mask = w->execMask();
m->statusBitVector = 0;
m->equiv = this->equivClass;
- m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
-
- m->scope = getGenericMemoryScope(this->memoryScope);
if (num_dest_operands == 1) {
m->dst_reg = this->dest.regIndex();
@@ -245,7 +240,6 @@ namespace HsailISA
switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL:
- m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
@@ -276,7 +270,6 @@ namespace HsailISA
case Brig::BRIG_SEGMENT_SPILL:
assert(num_dest_operands == 1);
- m->s_type = SEG_SPILL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
@@ -301,7 +294,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_GROUP:
- m->s_type = SEG_SHARED;
m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
@@ -310,7 +302,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_READONLY:
- m->s_type = SEG_READONLY;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
@@ -327,7 +318,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_PRIVATE:
- m->s_type = SEG_PRIVATE;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
@@ -408,7 +398,6 @@ namespace HsailISA
}
}
- m->m_op = Enums::MO_ST;
m->m_type = OperationType::memType;
m->v_type = OperationType::vgprType;
@@ -421,10 +410,6 @@ namespace HsailISA
m->n_reg = num_src_operands;
}
- m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
-
- m->scope = getGenericMemoryScope(this->memoryScope);
-
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
@@ -434,7 +419,6 @@ namespace HsailISA
switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL:
- m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
@@ -463,7 +447,6 @@ namespace HsailISA
case Brig::BRIG_SEGMENT_SPILL:
assert(num_src_operands == 1);
- m->s_type = SEG_SPILL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
@@ -483,7 +466,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_GROUP:
- m->s_type = SEG_SHARED;
m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
@@ -492,7 +474,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_PRIVATE:
- m->s_type = SEG_PRIVATE;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
@@ -586,7 +567,6 @@ namespace HsailISA
assert(NumSrcOperands <= 2);
- m->m_op = this->opType;
m->m_type = DataType::memType;
m->v_type = DataType::vgprType;
@@ -594,9 +574,6 @@ namespace HsailISA
m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1;
- m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
-
- m->scope = getGenericMemoryScope(this->memoryScope);
if (HasDst) {
m->dst_reg = this->dest.regIndex();
@@ -611,7 +588,6 @@ namespace HsailISA
switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL:
- m->s_type = SEG_GLOBAL;
m->latency.set(w->computeUnit->shader->ticks(64));
m->pipeId = GLBMEM_PIPE;
@@ -623,7 +599,6 @@ namespace HsailISA
break;
case Brig::BRIG_SEGMENT_GROUP:
- m->s_type = SEG_SHARED;
m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc
index 2bfc5aaad..bfffb7d8f 100644
--- a/src/arch/hsail/insts/pseudo_inst.cc
+++ b/src/arch/hsail/insts/pseudo_inst.cc
@@ -627,8 +627,12 @@ namespace HsailISA
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
}
- m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
- Brig::BRIG_ATOMIC_ADD);
+ setFlag(AtomicNoReturn);
+ setFlag(AtomicAdd);
+ setFlag(NoScope);
+ setFlag(NoOrder);
+ setFlag(GlobalSegment);
+
m->m_type = U32::memType;
m->v_type = U32::vgprType;
@@ -636,15 +640,12 @@ namespace HsailISA
m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1;
- m->memoryOrder = Enums::MEMORY_ORDER_NONE;
- m->scope = Enums::MEMORY_SCOPE_NONE;
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt);
- m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(64));
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
@@ -666,8 +667,12 @@ namespace HsailISA
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
}
- m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
- Brig::BRIG_ATOMIC_ADD);
+ setFlag(AtomicNoReturn);
+ setFlag(AtomicAdd);
+ setFlag(NoScope);
+ setFlag(NoOrder);
+ setFlag(GlobalSegment);
+
m->m_type = U32::memType;
m->v_type = U32::vgprType;
@@ -675,15 +680,12 @@ namespace HsailISA
m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1;
- m->memoryOrder = Enums::MEMORY_ORDER_NONE;
- m->scope = Enums::MEMORY_SCOPE_NONE;
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt);
- m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(64));
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
@@ -702,7 +704,11 @@ namespace HsailISA
// calculate the address
calcAddr(w, m);
- m->m_op = Enums::MO_LD;
+ setFlag(Load);
+ setFlag(NoScope);
+ setFlag(NoOrder);
+ setFlag(GlobalSegment);
+
m->m_type = U32::memType; //MemDataType::memType;
m->v_type = U32::vgprType; //DestDataType::vgprType;
@@ -710,8 +716,6 @@ namespace HsailISA
m->statusBitVector = 0;
m->equiv = 0;
m->n_reg = 1;
- m->memoryOrder = Enums::MEMORY_ORDER_NONE;
- m->scope = Enums::MEMORY_SCOPE_NONE;
// FIXME
//m->dst_reg = this->dest.regIndex();
@@ -721,7 +725,6 @@ namespace HsailISA
m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt);
- m->s_type = SEG_GLOBAL;
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index f580a09f7..b672f616c 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -171,56 +171,6 @@ class GpuDispatcher(DmaDevice):
cl_driver = Param.ClDriver('pointer to driver')
-class OpType(Enum): vals = [
- 'OT_NULL',
- 'OT_ALU',
- 'OT_SPECIAL',
- 'OT_GLOBAL_READ',
- 'OT_GLOBAL_WRITE',
- 'OT_GLOBAL_ATOMIC',
- 'OT_GLOBAL_HIST',
- 'OT_GLOBAL_LDAS',
- 'OT_SHARED_READ',
- 'OT_SHARED_WRITE',
- 'OT_SHARED_ATOMIC',
- 'OT_SHARED_HIST',
- 'OT_SHARED_LDAS',
- 'OT_PRIVATE_READ',
- 'OT_PRIVATE_WRITE',
- 'OT_PRIVATE_ATOMIC',
- 'OT_PRIVATE_HIST',
- 'OT_PRIVATE_LDAS',
- 'OT_SPILL_READ',
- 'OT_SPILL_WRITE',
- 'OT_SPILL_ATOMIC',
- 'OT_SPILL_HIST',
- 'OT_SPILL_LDAS',
- 'OT_READONLY_READ',
- 'OT_READONLY_WRITE',
- 'OT_READONLY_ATOMIC',
- 'OT_READONLY_HIST',
- 'OT_READONLY_LDAS',
- 'OT_FLAT_READ',
- 'OT_FLAT_WRITE',
- 'OT_FLAT_ATOMIC',
- 'OT_FLAT_HIST',
- 'OT_FLAT_LDAS',
- 'OT_KERN_READ',
- 'OT_BRANCH',
-
- # note: Only the OT_BOTH_MEMFENCE seems to be supported in the 1.0F version
- # of the compiler.
- 'OT_SHARED_MEMFENCE',
- 'OT_GLOBAL_MEMFENCE',
- 'OT_BOTH_MEMFENCE',
-
- 'OT_BARRIER',
- 'OT_PRINT',
- 'OT_RET',
- 'OT_NOP',
- 'OT_ARG'
- ]
-
class MemType(Enum): vals = [
'M_U8',
'M_U16',
@@ -235,47 +185,6 @@ class MemType(Enum): vals = [
'M_F64',
]
-class MemOpType(Enum): vals = [
- 'MO_LD',
- 'MO_ST',
- 'MO_LDAS',
- 'MO_LDA',
- 'MO_AAND',
- 'MO_AOR',
- 'MO_AXOR',
- 'MO_ACAS',
- 'MO_AEXCH',
- 'MO_AADD',
- 'MO_ASUB',
- 'MO_AINC',
- 'MO_ADEC',
- 'MO_AMAX',
- 'MO_AMIN',
- 'MO_ANRAND',
- 'MO_ANROR',
- 'MO_ANRXOR',
- 'MO_ANRCAS',
- 'MO_ANREXCH',
- 'MO_ANRADD',
- 'MO_ANRSUB',
- 'MO_ANRINC',
- 'MO_ANRDEC',
- 'MO_ANRMAX',
- 'MO_ANRMIN',
- 'MO_HAND',
- 'MO_HOR',
- 'MO_HXOR',
- 'MO_HCAS',
- 'MO_HEXCH',
- 'MO_HADD',
- 'MO_HSUB',
- 'MO_HINC',
- 'MO_HDEC',
- 'MO_HMAX',
- 'MO_HMIN',
- 'MO_UNDEF'
- ]
-
class StorageClassType(Enum): vals = [
'SC_SPILL',
'SC_GLOBAL',
@@ -293,20 +202,3 @@ class RegisterType(Enum): vals = [
'RT_HARDWARE',
'RT_NONE',
]
-
-class GenericMemoryOrder(Enum): vals = [
- 'MEMORY_ORDER_NONE',
- 'MEMORY_ORDER_RELAXED',
- 'MEMORY_ORDER_SC_ACQUIRE',
- 'MEMORY_ORDER_SC_RELEASE',
- 'MEMORY_ORDER_SC_ACQUIRE_RELEASE',
- ]
-
-class GenericMemoryScope(Enum): vals = [
- 'MEMORY_SCOPE_NONE',
- 'MEMORY_SCOPE_WORKITEM',
- 'MEMORY_SCOPE_WAVEFRONT',
- 'MEMORY_SCOPE_WORKGROUP',
- 'MEMORY_SCOPE_DEVICE',
- 'MEMORY_SCOPE_SYSTEM',
- ]
diff --git a/src/gpu-compute/GPUStaticInstFlags.py b/src/gpu-compute/GPUStaticInstFlags.py
new file mode 100644
index 000000000..453fdced2
--- /dev/null
+++ b/src/gpu-compute/GPUStaticInstFlags.py
@@ -0,0 +1,111 @@
+# Copyright (c) 2016 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Anthony Gutierrez
+
+from m5.params import *
+
+class GPUStaticInstFlags(Enum):
+ wrapper_name = 'GPUStaticInstFlags'
+ wrapper_is_struct = True
+ enum_name = 'Flags'
+
+ vals = [
+ # Op types
+ 'ALU', # ALU op
+ 'Branch', # Branch instruction
+ 'Nop', # No-op (no effect at all)
+ 'Return', # Return instruction
+ 'UnconditionalJump', #
+ 'SpecialOp', # Special op
+ 'Waitcnt', # Is a waitcnt instruction
+
+ # Memory ops
+ 'MemBarrier', # Barrier instruction
+ 'MemFence', # Memory fence instruction
+ 'MemoryRef', # References memory (load, store, or atomic)
+ 'Flat', # Flat memory op
+ 'Load', # Reads from memory
+ 'Store', # Writes to memory
+
+ # Atomic ops
+ 'AtomicReturn', # Atomic instruction that returns data
+ 'AtomicNoReturn', # Atomic instruction that doesn't return data
+
+ # Instruction attributes
+ 'Scalar', # A scalar (not vector) operation
+ 'ReadsSCC', # The instruction reads SCC
+ 'WritesSCC', # The instruction writes SCC
+ 'ReadsVCC', # The instruction reads VCC
+ 'WritesVCC', # The instruction writes VCC
+
+ # Atomic OP types
+ 'AtomicAnd',
+ 'AtomicOr',
+ 'AtomicXor',
+ 'AtomicCAS',
+ 'AtomicExch',
+ 'AtomicAdd',
+ 'AtomicSub',
+ 'AtomicInc',
+ 'AtomicDec',
+ 'AtomicMax',
+ 'AtomicMin',
+
+ # Memory order flags
+ 'RelaxedOrder',
+ 'Acquire', # Has acquire semantics
+ 'Release', # Has release semantics
+ 'AcquireRelease', # Has acquire and release semantics
+ 'NoOrder', # Has no ordering restrictions
+
+ # Segment access flags
+ 'ArgSegment', # Accesses the arg segment
+ 'GlobalSegment', # Accesses global memory
+ 'GroupSegment', # Accesses local memory (LDS), aka shared memory
+ 'KernArgSegment', # Accesses the kernel argument segment
+ 'PrivateSegment', # Accesses the private segment
+ 'ReadOnlySegment', # Accesses read only memory
+ 'SpillSegment', # Accesses the spill segment
+ 'NoSegment', # Does not have an associated segment
+
+ # Scope flags
+ 'WorkitemScope',
+ 'WavefrontScope',
+ 'WorkgroupScope',
+ 'DeviceScope',
+ 'SystemScope',
+ 'NoScope', # Does not have an associated scope
+
+ # Coherence flags
+ 'GloballyCoherent', # Coherent with other workitems on same device
+ 'SystemCoherent' # Coherent with a different device, or the host
+ ]
diff --git a/src/gpu-compute/SConscript b/src/gpu-compute/SConscript
index 88c1cf036..8cf1ed8cf 100644
--- a/src/gpu-compute/SConscript
+++ b/src/gpu-compute/SConscript
@@ -41,6 +41,7 @@ if not env['BUILD_GPU']:
Return()
SimObject('GPU.py')
+SimObject('GPUStaticInstFlags.py')
SimObject('LdsState.py')
SimObject('X86GPUTLB.py')
diff --git a/src/gpu-compute/code_enums.hh b/src/gpu-compute/code_enums.hh
deleted file mode 100644
index 6cd9bfe26..000000000
--- a/src/gpu-compute/code_enums.hh
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __CODE_ENUMS_HH__
-#define __CODE_ENUMS_HH__
-
-#define IS_OT_GLOBAL(a) ((a)>=Enums::OT_GLOBAL_READ \
- && (a)<=Enums::OT_GLOBAL_LDAS)
-#define IS_OT_SHARED(a) ((a)>=Enums::OT_SHARED_READ \
- && (a)<=Enums::OT_SHARED_LDAS)
-#define IS_OT_PRIVATE(a) ((a)>=Enums::OT_PRIVATE_READ \
- && (a)<=Enums::OT_PRIVATE_LDAS)
-#define IS_OT_SPILL(a) ((a)>=Enums::OT_SPILL_READ \
- && (a)<=Enums::OT_SPILL_LDAS)
-#define IS_OT_READONLY(a) ((a)>=Enums::OT_READONLY_READ \
- && (a)<=Enums::OT_READONLY_LDAS)
-#define IS_OT_FLAT(a) ((a)>=Enums::OT_FLAT_READ && (a)<=Enums::OT_FLAT_LDAS)
-
-#define IS_OT_LDAS(a) ((a)==Enums::OT_GLOBAL_LDAS||(a)==Enums::OT_SHARED_LDAS \
- ||(a)==Enums::OT_PRIVATE_LDAS||(a)==Enums::OT_SPILL_LDAS \
- ||(a)==Enums::OT_READONLY_LDAS||(a)==Enums::OT_FLAT_LDAS)
-
-#define IS_OT_READ(a) ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SHARED_READ \
- ||(a)==Enums::OT_PRIVATE_READ||(a)==Enums::OT_SPILL_READ \
- ||(a)==Enums::OT_READONLY_READ||(a)==Enums::OT_FLAT_READ)
-
-#define IS_OT_READ_GM(a) \
- ((a)==Enums::OT_GLOBAL_READ||(a)==Enums::OT_SPILL_READ \
- ||(a)==Enums::OT_READONLY_READ)
-
-#define IS_OT_READ_LM(a) ((a)==Enums::OT_SHARED_READ)
-
-#define IS_OT_READ_RM(a) ((a)==Enums::OT_READONLY_READ)
-
-#define IS_OT_READ_PM(a) ((a)==Enums::OT_PRIVATE_READ)
-
-#define IS_OT_WRITE(a) \
- ((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SHARED_WRITE \
- ||(a)==Enums::OT_PRIVATE_WRITE||(a)==Enums::OT_SPILL_WRITE \
- ||(a)==Enums::OT_READONLY_WRITE||(a)==Enums::OT_FLAT_WRITE)
-
-#define IS_OT_WRITE_GM(a) \
- ((a)==Enums::OT_GLOBAL_WRITE||(a)==Enums::OT_SPILL_WRITE \
- ||(a)==Enums::OT_READONLY_WRITE)
-
-#define IS_OT_WRITE_LM(a) ((a)==Enums::OT_SHARED_WRITE)
-
-#define IS_OT_WRITE_PM(a) ((a)==Enums::OT_PRIVATE_WRITE)
-
-#define IS_OT_ATOMIC(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
- ||(a)==Enums::OT_SHARED_ATOMIC \
- ||(a)==Enums::OT_PRIVATE_ATOMIC \
- ||(a)==Enums::OT_SPILL_ATOMIC \
- ||(a)==Enums::OT_READONLY_ATOMIC \
- ||(a)==Enums::OT_BOTH_MEMFENCE \
- ||(a)==Enums::OT_FLAT_ATOMIC)
-
-#define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
- ||(a)==Enums::OT_SPILL_ATOMIC \
- ||(a)==Enums::OT_READONLY_ATOMIC \
- ||(a)==Enums::OT_GLOBAL_MEMFENCE \
- ||(a)==Enums::OT_BOTH_MEMFENCE)
-
-#define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \
- ||(a)==Enums::OT_SHARED_MEMFENCE)
-
-#define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC)
-
-#define IS_OT_HIST(a) ((a)==Enums::OT_GLOBAL_HIST \
- ||(a)==Enums::OT_SHARED_HIST \
- ||(a)==Enums::OT_PRIVATE_HIST \
- ||(a)==Enums::OT_SPILL_HIST \
- ||(a)==Enums::OT_READONLY_HIST \
- ||(a)==Enums::OT_FLAT_HIST)
-
-#define IS_OT_HIST_GM(a) ((a)==Enums::OT_GLOBAL_HIST \
- ||(a)==Enums::OT_SPILL_HIST \
- ||(a)==Enums::OT_READONLY_HIST)
-
-#define IS_OT_HIST_LM(a) ((a)==Enums::OT_SHARED_HIST)
-
-#define IS_OT_HIST_PM(a) ((a)==Enums::OT_PRIVATE_HIST)
-
-#endif // __CODE_ENUMS_HH__
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 97e018713..abf8ff2c5 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p),
req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
_masterId(p->system->getMasterId(name() + ".ComputeUnit")),
- lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize)
+ lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize),
+ kernelLaunchInst(new KernelLaunchStaticInst())
{
/**
* This check is necessary because std::bitset only provides conversion
@@ -316,13 +317,11 @@ ComputeUnit::StartWorkgroup(NDRange *ndr)
// Send L1 cache acquire
// isKernel + isAcquire = Kernel Begin
if (shader->impl_kern_boundary_sync) {
- GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(this,
- nullptr,
- nullptr, 0);
+ GPUDynInstPtr gpuDynInst =
+ std::make_shared<GPUDynInst>(this, nullptr, kernelLaunchInst,
+ getAndIncSeqNum());
gpuDynInst->useContinuation = false;
- gpuDynInst->memoryOrder = Enums::MEMORY_ORDER_SC_ACQUIRE;
- gpuDynInst->scope = Enums::MEMORY_SCOPE_SYSTEM;
injectGlobalMemFence(gpuDynInst, true);
}
@@ -647,7 +646,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
gpuDynInst->wfSlotId, w->barrierCnt);
if (gpuDynInst->useContinuation) {
- assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+ assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
@@ -658,7 +657,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
return true;
} else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
if (gpuDynInst->useContinuation) {
- assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+ assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
@@ -942,6 +941,8 @@ void
ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
Request* req)
{
+ assert(gpuDynInst->isGlobalSeg());
+
if (!req) {
req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
}
@@ -950,8 +951,6 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
req->setFlags(Request::KERNEL);
}
- gpuDynInst->s_type = SEG_GLOBAL;
-
// for non-kernel MemFence operations, memorder flags are set depending
// on which type of request is currently being sent, so this
// should be set by the caller (e.g. if an inst has acq-rel
@@ -1033,8 +1032,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
gpuDynInst->statusVector.clear();
- if (gpuDynInst->m_op == Enums::MO_LD || MO_A(gpuDynInst->m_op)
- || MO_ANR(gpuDynInst->m_op)) {
+ if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
assert(compute_unit->globalMemoryPipe.isGMLdRespFIFOWrRdy());
compute_unit->globalMemoryPipe.getGMLdRespFIFO()
@@ -1055,7 +1053,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
// the continuation may generate more work for
// this memory request
if (gpuDynInst->useContinuation) {
- assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+ assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
@@ -1065,7 +1063,7 @@ ComputeUnit::DataPort::MemRespEvent::process()
gpuDynInst->statusBitVector = VectorMask(0);
if (gpuDynInst->useContinuation) {
- assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
+ assert(!gpuDynInst->isNoScope());
gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
gpuDynInst);
}
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh
index a3547402a..938658fd1 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -744,6 +744,7 @@ class ComputeUnit : public MemObject
private:
uint64_t globalSeqNum;
int wavefrontSize;
+ GPUStaticInst *kernelLaunchInst;
};
#endif // __COMPUTE_UNIT_HH__
diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc
index 102905ec8..ab3e8c47e 100644
--- a/src/gpu-compute/global_memory_pipeline.cc
+++ b/src/gpu-compute/global_memory_pipeline.cc
@@ -67,7 +67,7 @@ GlobalMemPipeline::exec()
bool accessVrf = true;
// check the VRF to see if the operands of a load (or load component
// of an atomic) are accessible
- if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
+ if ((m) && (m->isLoad() || m->isAtomicRet())) {
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
accessVrf =
@@ -127,10 +127,7 @@ GlobalMemPipeline::exec()
// memory packets to DTLB
if (!gmIssuedRequests.empty()) {
GPUDynInstPtr mp = gmIssuedRequests.front();
- if (mp->m_op == Enums::MO_LD ||
- (mp->m_op >= Enums::MO_AAND && mp->m_op <= Enums::MO_AMIN) ||
- (mp->m_op >= Enums::MO_ANRAND && mp->m_op <= Enums::MO_ANRMIN)) {
-
+ if (mp->isLoad() || mp->isAtomic()) {
if (inflightLoads >= gmQueueSize) {
return;
} else {
@@ -139,7 +136,7 @@ GlobalMemPipeline::exec()
} else {
if (inflightStores >= gmQueueSize) {
return;
- } else if (mp->m_op == Enums::MO_ST) {
+ } else if (mp->isStore()) {
++inflightStores;
}
}
@@ -147,9 +144,8 @@ GlobalMemPipeline::exec()
mp->initiateAcc(mp);
gmIssuedRequests.pop();
- DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = %s\n",
- computeUnit->cu_id, mp->simdId, mp->wfSlotId,
- Enums::MemOpTypeStrings[mp->m_op]);
+ DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
+ computeUnit->cu_id, mp->simdId, mp->wfSlotId);
}
}
@@ -160,12 +156,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
// Return data to registers
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+ if (m->isLoad() || m->isAtomic()) {
gmReturnedLoads.pop();
assert(inflightLoads > 0);
--inflightLoads;
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
+ if (m->isLoad() || m->isAtomicRet()) {
std::vector<uint32_t> regVec;
// iterate over number of destination register operands since
// this is a load or atomic operation
@@ -214,13 +210,12 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
// Decrement outstanding register count
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
- if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) ||
- MO_H(m->m_op)) {
+ if (m->isStore() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time,
-1);
}
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+ if (m->isLoad() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time,
-1);
}
diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc
index 1806e79e4..ec6340360 100644
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -41,11 +41,10 @@
#include "gpu-compute/wavefront.hh"
GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
- GPUStaticInst *_staticInst, uint64_t instSeqNum)
+ GPUStaticInst *static_inst, uint64_t instSeqNum)
: GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
- m_op(Enums::MO_UNDEF),
- memoryOrder(Enums::MEMORY_ORDER_NONE), n_reg(0), useContinuation(false),
- statusBitVector(0), staticInst(_staticInst), _seqNum(instSeqNum)
+ n_reg(0), useContinuation(false),
+ statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
{
tlbHitLevel.assign(computeUnit()->wfSize(), -1);
d_data = new uint8_t[computeUnit()->wfSize() * 16];
@@ -68,77 +67,69 @@ GPUDynInst::~GPUDynInst()
}
void
-GPUDynInst::execute()
+GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
{
- GPUDynInstPtr gpuDynInst = std::make_shared<GPUDynInst>(cu, wf, staticInst,
- _seqNum);
- staticInst->execute(gpuDynInst);
+ _staticInst->execute(gpuDynInst);
}
int
GPUDynInst::numSrcRegOperands()
{
- return staticInst->numSrcRegOperands();
+ return _staticInst->numSrcRegOperands();
}
int
GPUDynInst::numDstRegOperands()
{
- return staticInst->numDstRegOperands();
+ return _staticInst->numDstRegOperands();
}
int
GPUDynInst::getNumOperands()
{
- return staticInst->getNumOperands();
+ return _staticInst->getNumOperands();
}
bool
GPUDynInst::isVectorRegister(int operandIdx)
{
- return staticInst->isVectorRegister(operandIdx);
+ return _staticInst->isVectorRegister(operandIdx);
}
bool
GPUDynInst::isScalarRegister(int operandIdx)
{
- return staticInst->isScalarRegister(operandIdx);
+ return _staticInst->isScalarRegister(operandIdx);
}
int
GPUDynInst::getRegisterIndex(int operandIdx)
{
- return staticInst->getRegisterIndex(operandIdx);
+ return _staticInst->getRegisterIndex(operandIdx);
}
int
GPUDynInst::getOperandSize(int operandIdx)
{
- return staticInst->getOperandSize(operandIdx);
+ return _staticInst->getOperandSize(operandIdx);
}
bool
GPUDynInst::isDstOperand(int operandIdx)
{
- return staticInst->isDstOperand(operandIdx);
+ return _staticInst->isDstOperand(operandIdx);
}
bool
GPUDynInst::isSrcOperand(int operandIdx)
{
- return staticInst->isSrcOperand(operandIdx);
-}
-
-bool
-GPUDynInst::isArgLoad()
-{
- return staticInst->isArgLoad();
+ return _staticInst->isSrcOperand(operandIdx);
}
const std::string&
GPUDynInst::disassemble() const
{
- return staticInst->disassemble();
+ return _staticInst->disassemble();
}
uint64_t
@@ -147,16 +138,10 @@ GPUDynInst::seqNum() const
return _seqNum;
}
-Enums::OpType
-GPUDynInst::opType()
-{
- return staticInst->o_type;
-}
-
Enums::StorageClassType
GPUDynInst::executedAs()
{
- return staticInst->executed_as;
+ return _staticInst->executed_as;
}
// Process a memory instruction and (if necessary) submit timing request
@@ -166,20 +151,347 @@ GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
cu->cu_id, simdId, wfSlotId, exec_mask);
- staticInst->initiateAcc(gpuDynInst);
+ _staticInst->initiateAcc(gpuDynInst);
time = 0;
}
+/**
+ * accessor methods for the attributes of
+ * the underlying GPU static instruction
+ */
+bool
+GPUDynInst::isALU() const
+{
+ return _staticInst->isALU();
+}
+
+bool
+GPUDynInst::isBranch() const
+{
+ return _staticInst->isBranch();
+}
+
+bool
+GPUDynInst::isNop() const
+{
+ return _staticInst->isNop();
+}
+
+bool
+GPUDynInst::isReturn() const
+{
+ return _staticInst->isReturn();
+}
+
+bool
+GPUDynInst::isUnconditionalJump() const
+{
+ return _staticInst->isUnconditionalJump();
+}
+
+bool
+GPUDynInst::isSpecialOp() const
+{
+ return _staticInst->isSpecialOp();
+}
+
+bool
+GPUDynInst::isWaitcnt() const
+{
+ return _staticInst->isWaitcnt();
+}
+
+bool
+GPUDynInst::isBarrier() const
+{
+ return _staticInst->isBarrier();
+}
+
+bool
+GPUDynInst::isMemFence() const
+{
+ return _staticInst->isMemFence();
+}
+
+bool
+GPUDynInst::isMemRef() const
+{
+ return _staticInst->isMemRef();
+}
+
+bool
+GPUDynInst::isFlat() const
+{
+ return _staticInst->isFlat();
+}
+
+bool
+GPUDynInst::isLoad() const
+{
+ return _staticInst->isLoad();
+}
+
+bool
+GPUDynInst::isStore() const
+{
+ return _staticInst->isStore();
+}
+
+bool
+GPUDynInst::isAtomic() const
+{
+ return _staticInst->isAtomic();
+}
+
+bool
+GPUDynInst::isAtomicNoRet() const
+{
+ return _staticInst->isAtomicNoRet();
+}
+
+bool
+GPUDynInst::isAtomicRet() const
+{
+ return _staticInst->isAtomicRet();
+}
+
+bool
+GPUDynInst::isScalar() const
+{
+ return _staticInst->isScalar();
+}
+
+bool
+GPUDynInst::readsSCC() const
+{
+ return _staticInst->readsSCC();
+}
+
+bool
+GPUDynInst::writesSCC() const
+{
+ return _staticInst->writesSCC();
+}
+
+bool
+GPUDynInst::readsVCC() const
+{
+ return _staticInst->readsVCC();
+}
+
+bool
+GPUDynInst::writesVCC() const
+{
+ return _staticInst->writesVCC();
+}
+
+bool
+GPUDynInst::isAtomicAnd() const
+{
+ return _staticInst->isAtomicAnd();
+}
+
+bool
+GPUDynInst::isAtomicOr() const
+{
+ return _staticInst->isAtomicOr();
+}
+
+bool
+GPUDynInst::isAtomicXor() const
+{
+ return _staticInst->isAtomicXor();
+}
+
+bool
+GPUDynInst::isAtomicCAS() const
+{
+ return _staticInst->isAtomicCAS();
+}
+
+bool GPUDynInst::isAtomicExch() const
+{
+ return _staticInst->isAtomicExch();
+}
+
+bool
+GPUDynInst::isAtomicAdd() const
+{
+ return _staticInst->isAtomicAdd();
+}
+
+bool
+GPUDynInst::isAtomicSub() const
+{
+ return _staticInst->isAtomicSub();
+}
+
+bool
+GPUDynInst::isAtomicInc() const
+{
+ return _staticInst->isAtomicInc();
+}
+
+bool
+GPUDynInst::isAtomicDec() const
+{
+ return _staticInst->isAtomicDec();
+}
+
+bool
+GPUDynInst::isAtomicMax() const
+{
+ return _staticInst->isAtomicMax();
+}
+
+bool
+GPUDynInst::isAtomicMin() const
+{
+ return _staticInst->isAtomicMin();
+}
+
+bool
+GPUDynInst::isArgLoad() const
+{
+ return _staticInst->isArgLoad();
+}
+
+bool
+GPUDynInst::isGlobalMem() const
+{
+ return _staticInst->isGlobalMem();
+}
+
+bool
+GPUDynInst::isLocalMem() const
+{
+ return _staticInst->isLocalMem();
+}
+
+bool
+GPUDynInst::isArgSeg() const
+{
+ return _staticInst->isArgSeg();
+}
+
+bool
+GPUDynInst::isGlobalSeg() const
+{
+ return _staticInst->isGlobalSeg();
+}
+
+bool
+GPUDynInst::isGroupSeg() const
+{
+ return _staticInst->isGroupSeg();
+}
+
+bool
+GPUDynInst::isKernArgSeg() const
+{
+ return _staticInst->isKernArgSeg();
+}
+
+bool
+GPUDynInst::isPrivateSeg() const
+{
+ return _staticInst->isPrivateSeg();
+}
+
+bool
+GPUDynInst::isReadOnlySeg() const
+{
+ return _staticInst->isReadOnlySeg();
+}
+
+bool
+GPUDynInst::isSpillSeg() const
+{
+ return _staticInst->isSpillSeg();
+}
+
+bool
+GPUDynInst::isWorkitemScope() const
+{
+ return _staticInst->isWorkitemScope();
+}
+
+bool
+GPUDynInst::isWavefrontScope() const
+{
+ return _staticInst->isWavefrontScope();
+}
+
+bool
+GPUDynInst::isWorkgroupScope() const
+{
+ return _staticInst->isWorkgroupScope();
+}
+
+bool
+GPUDynInst::isDeviceScope() const
+{
+ return _staticInst->isDeviceScope();
+}
+
+bool
+GPUDynInst::isSystemScope() const
+{
+ return _staticInst->isSystemScope();
+}
+
+bool
+GPUDynInst::isNoScope() const
+{
+ return _staticInst->isNoScope();
+}
+
+bool
+GPUDynInst::isRelaxedOrder() const
+{
+ return _staticInst->isRelaxedOrder();
+}
+
+bool
+GPUDynInst::isAcquire() const
+{
+ return _staticInst->isAcquire();
+}
+
+bool
+GPUDynInst::isRelease() const
+{
+ return _staticInst->isRelease();
+}
+
+bool
+GPUDynInst::isAcquireRelease() const
+{
+ return _staticInst->isAcquireRelease();
+}
+
+bool
+GPUDynInst::isNoOrder() const
+{
+ return _staticInst->isNoOrder();
+}
+
+bool
+GPUDynInst::isGloballyCoherent() const
+{
+ return _staticInst->isGloballyCoherent();
+}
+
bool
-GPUDynInst::scalarOp() const
+GPUDynInst::isSystemCoherent() const
{
- return staticInst->scalarOp();
+ return _staticInst->isSystemCoherent();
}
void
GPUDynInst::updateStats()
{
- if (staticInst->isLocalMem()) {
+ if (_staticInst->isLocalMem()) {
// access to LDS (shared) memory
cu->dynamicLMemInstrCnt++;
} else {
diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh
index 46774d867..c07d85d78 100644
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -39,11 +39,7 @@
#include <cstdint>
#include <string>
-#include "enums/GenericMemoryOrder.hh"
-#include "enums/GenericMemoryScope.hh"
-#include "enums/MemOpType.hh"
#include "enums/MemType.hh"
-#include "enums/OpType.hh"
#include "enums/StorageClassType.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_exec_context.hh"
@@ -180,33 +176,19 @@ class AtomicOpMin : public TypedAtomicOpFunctor<T>
}
};
-#define MO_A(a) ((a)>=Enums::MO_AAND && (a)<=Enums::MO_AMIN)
-#define MO_ANR(a) ((a)>=Enums::MO_ANRAND && (a)<=Enums::MO_ANRMIN)
-#define MO_H(a) ((a)>=Enums::MO_HAND && (a)<=Enums::MO_HMIN)
-
typedef enum
{
VT_32,
VT_64,
} vgpr_type;
-typedef enum
-{
- SEG_PRIVATE,
- SEG_SPILL,
- SEG_GLOBAL,
- SEG_SHARED,
- SEG_READONLY,
- SEG_FLAT
-} seg_type;
-
class GPUDynInst : public GPUExecContext
{
public:
- GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *_staticInst,
+ GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
uint64_t instSeqNum);
~GPUDynInst();
- void execute();
+ void execute(GPUDynInstPtr gpuDynInst);
int numSrcRegOperands();
int numDstRegOperands();
int getNumOperands();
@@ -216,13 +198,11 @@ class GPUDynInst : public GPUExecContext
int getOperandSize(int operandIdx);
bool isDstOperand(int operandIdx);
bool isSrcOperand(int operandIdx);
- bool isArgLoad();
const std::string &disassemble() const;
uint64_t seqNum() const;
- Enums::OpType opType();
Enums::StorageClassType executedAs();
// The address of the memory operation
@@ -240,14 +220,7 @@ class GPUDynInst : public GPUExecContext
// The memory type (M_U32, M_S32, ...)
Enums::MemType m_type;
- // The memory operation (MO_LD, MO_ST, ...)
- Enums::MemOpType m_op;
- Enums::GenericMemoryOrder memoryOrder;
-
- // Scope of the request
- Enums::GenericMemoryScope scope;
- // The memory segment (SEG_SHARED, SEG_GLOBAL, ...)
- seg_type s_type;
+
// The equivalency class
int equiv;
// The return VGPR type (VT_32 or VT_64)
@@ -288,10 +261,72 @@ class GPUDynInst : public GPUExecContext
void updateStats();
- GPUStaticInst* staticInstruction() { return staticInst; }
-
- // Is the instruction a scalar or vector op?
- bool scalarOp() const;
+ GPUStaticInst* staticInstruction() { return _staticInst; }
+
+ bool isALU() const;
+ bool isBranch() const;
+ bool isNop() const;
+ bool isReturn() const;
+ bool isUnconditionalJump() const;
+ bool isSpecialOp() const;
+ bool isWaitcnt() const;
+
+ bool isBarrier() const;
+ bool isMemFence() const;
+ bool isMemRef() const;
+ bool isFlat() const;
+ bool isLoad() const;
+ bool isStore() const;
+
+ bool isAtomic() const;
+ bool isAtomicNoRet() const;
+ bool isAtomicRet() const;
+
+ bool isScalar() const;
+ bool readsSCC() const;
+ bool writesSCC() const;
+ bool readsVCC() const;
+ bool writesVCC() const;
+
+ bool isAtomicAnd() const;
+ bool isAtomicOr() const;
+ bool isAtomicXor() const;
+ bool isAtomicCAS() const;
+ bool isAtomicExch() const;
+ bool isAtomicAdd() const;
+ bool isAtomicSub() const;
+ bool isAtomicInc() const;
+ bool isAtomicDec() const;
+ bool isAtomicMax() const;
+ bool isAtomicMin() const;
+
+ bool isArgLoad() const;
+ bool isGlobalMem() const;
+ bool isLocalMem() const;
+
+ bool isArgSeg() const;
+ bool isGlobalSeg() const;
+ bool isGroupSeg() const;
+ bool isKernArgSeg() const;
+ bool isPrivateSeg() const;
+ bool isReadOnlySeg() const;
+ bool isSpillSeg() const;
+
+ bool isWorkitemScope() const;
+ bool isWavefrontScope() const;
+ bool isWorkgroupScope() const;
+ bool isDeviceScope() const;
+ bool isSystemScope() const;
+ bool isNoScope() const;
+
+ bool isRelaxedOrder() const;
+ bool isAcquire() const;
+ bool isRelease() const;
+ bool isAcquireRelease() const;
+ bool isNoOrder() const;
+
+ bool isGloballyCoherent() const;
+ bool isSystemCoherent() const;
/*
* Loads/stores/atomics may have acquire/release semantics associated
@@ -312,46 +347,32 @@ class GPUDynInst : public GPUExecContext
bool useContinuation;
template<typename c0> AtomicOpFunctor*
- makeAtomicOpFunctor(c0 *reg0, c0 *reg1, Enums::MemOpType op)
+ makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
{
- using namespace Enums;
-
- switch(op) {
- case MO_AAND:
- case MO_ANRAND:
+ if (isAtomicAnd()) {
return new AtomicOpAnd<c0>(*reg0);
- case MO_AOR:
- case MO_ANROR:
+ } else if (isAtomicOr()) {
return new AtomicOpOr<c0>(*reg0);
- case MO_AXOR:
- case MO_ANRXOR:
+ } else if (isAtomicXor()) {
return new AtomicOpXor<c0>(*reg0);
- case MO_ACAS:
- case MO_ANRCAS:
+ } else if (isAtomicCAS()) {
return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
- case MO_AEXCH:
- case MO_ANREXCH:
+ } else if (isAtomicExch()) {
return new AtomicOpExch<c0>(*reg0);
- case MO_AADD:
- case MO_ANRADD:
+ } else if (isAtomicAdd()) {
return new AtomicOpAdd<c0>(*reg0);
- case MO_ASUB:
- case MO_ANRSUB:
+ } else if (isAtomicSub()) {
return new AtomicOpSub<c0>(*reg0);
- case MO_AINC:
- case MO_ANRINC:
+ } else if (isAtomicInc()) {
return new AtomicOpInc<c0>();
- case MO_ADEC:
- case MO_ANRDEC:
+ } else if (isAtomicDec()) {
return new AtomicOpDec<c0>();
- case MO_AMAX:
- case MO_ANRMAX:
+ } else if (isAtomicMax()) {
return new AtomicOpMax<c0>(*reg0);
- case MO_AMIN:
- case MO_ANRMIN:
+ } else if (isAtomicMin()) {
return new AtomicOpMin<c0>(*reg0);
- default:
- panic("Unrecognized atomic operation");
+ } else {
+ fatal("Unrecognized atomic operation");
}
}
@@ -359,88 +380,58 @@ class GPUDynInst : public GPUExecContext
setRequestFlags(Request *req, bool setMemOrder=true)
{
// currently these are the easy scopes to deduce
- switch (s_type) {
- case SEG_PRIVATE:
+ if (isPrivateSeg()) {
req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
- break;
- case SEG_SPILL:
+ } else if (isSpillSeg()) {
req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
- break;
- case SEG_GLOBAL:
+ } else if (isGlobalSeg()) {
req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
- break;
- case SEG_READONLY:
+ } else if (isReadOnlySeg()) {
req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
- break;
- case SEG_SHARED:
+ } else if (isGroupSeg()) {
req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
- break;
- case SEG_FLAT:
+ } else if (isFlat()) {
// TODO: translate to correct scope
assert(false);
- default:
- panic("Bad segment type");
- break;
+ } else {
+ fatal("%s has bad segment type\n", disassemble());
}
- switch (scope) {
- case Enums::MEMORY_SCOPE_NONE:
- case Enums::MEMORY_SCOPE_WORKITEM:
- break;
- case Enums::MEMORY_SCOPE_WAVEFRONT:
+ if (isWavefrontScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::WAVEFRONT_SCOPE);
- break;
- case Enums::MEMORY_SCOPE_WORKGROUP:
+ } else if (isWorkgroupScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::WORKGROUP_SCOPE);
- break;
- case Enums::MEMORY_SCOPE_DEVICE:
+ } else if (isDeviceScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::DEVICE_SCOPE);
- break;
- case Enums::MEMORY_SCOPE_SYSTEM:
+ } else if (isSystemScope()) {
req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
Request::SYSTEM_SCOPE);
- break;
- default:
- panic("Bad scope type");
- break;
+ } else if (!isNoScope() && !isWorkitemScope()) {
+ fatal("%s has bad scope type\n", disassemble());
}
if (setMemOrder) {
// set acquire and release flags
- switch (memoryOrder){
- case Enums::MEMORY_ORDER_SC_ACQUIRE:
+ if (isAcquire()) {
req->setFlags(Request::ACQUIRE);
- break;
- case Enums::MEMORY_ORDER_SC_RELEASE:
+ } else if (isRelease()) {
req->setFlags(Request::RELEASE);
- break;
- case Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE:
+ } else if (isAcquireRelease()) {
req->setFlags(Request::ACQUIRE | Request::RELEASE);
- break;
- default:
- break;
+ } else if (!isNoOrder()) {
+ fatal("%s has bad memory order\n", disassemble());
}
}
// set atomic type
// currently, the instruction genenerator only produces atomic return
// but a magic instruction can produce atomic no return
- if (m_op == Enums::MO_AADD || m_op == Enums::MO_ASUB ||
- m_op == Enums::MO_AAND || m_op == Enums::MO_AOR ||
- m_op == Enums::MO_AXOR || m_op == Enums::MO_AMAX ||
- m_op == Enums::MO_AMIN || m_op == Enums::MO_AINC ||
- m_op == Enums::MO_ADEC || m_op == Enums::MO_AEXCH ||
- m_op == Enums::MO_ACAS) {
+ if (isAtomicRet()) {
req->setFlags(Request::ATOMIC_RETURN_OP);
- } else if (m_op == Enums::MO_ANRADD || m_op == Enums::MO_ANRSUB ||
- m_op == Enums::MO_ANRAND || m_op == Enums::MO_ANROR ||
- m_op == Enums::MO_ANRXOR || m_op == Enums::MO_ANRMAX ||
- m_op == Enums::MO_ANRMIN || m_op == Enums::MO_ANRINC ||
- m_op == Enums::MO_ANRDEC || m_op == Enums::MO_ANREXCH ||
- m_op == Enums::MO_ANRCAS) {
+ } else if (isAtomicNoRet()) {
req->setFlags(Request::ATOMIC_NO_RETURN_OP);
}
}
@@ -457,7 +448,7 @@ class GPUDynInst : public GPUExecContext
std::vector<int> tlbHitLevel;
private:
- GPUStaticInst *staticInst;
+ GPUStaticInst *_staticInst;
uint64_t _seqNum;
};
diff --git a/src/gpu-compute/gpu_static_inst.cc b/src/gpu-compute/gpu_static_inst.cc
index 83b429e62..0f74bd532 100644
--- a/src/gpu-compute/gpu_static_inst.cc
+++ b/src/gpu-compute/gpu_static_inst.cc
@@ -36,10 +36,12 @@
#include "gpu-compute/gpu_static_inst.hh"
GPUStaticInst::GPUStaticInst(const std::string &opcode)
- : o_type(Enums::OT_ALU), executed_as(Enums::SC_NONE), opcode(opcode),
- _instNum(0), _scalarOp(false)
+ : executed_as(Enums::SC_NONE), opcode(opcode),
+ _instNum(0)
{
+ setFlag(NoOrder);
}
+
const std::string&
GPUStaticInst::disassemble()
{
diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh
index 911e4f308..a73ec12e3 100644
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -48,7 +48,7 @@
#include <cstdint>
#include <string>
-#include "enums/OpType.hh"
+#include "enums/GPUStaticInstFlags.hh"
#include "enums/StorageClassType.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/misc.hh"
@@ -57,7 +57,7 @@ class BaseOperand;
class BaseRegOperand;
class Wavefront;
-class GPUStaticInst
+class GPUStaticInst : public GPUStaticInstFlags
{
public:
GPUStaticInst(const std::string &opcode);
@@ -86,22 +86,110 @@ class GPUStaticInst
virtual bool isValid() const = 0;
- /*
- * Most instructions (including all HSAIL instructions)
- * are vector ops, so _scalarOp will be false by default.
- * Derived instruction objects that are scalar ops must
- * set _scalarOp to true in their constructors.
- */
- bool scalarOp() const { return _scalarOp; }
+ bool isALU() const { return _flags[ALU]; }
+ bool isBranch() const { return _flags[Branch]; }
+ bool isNop() const { return _flags[Nop]; }
+ bool isReturn() const { return _flags[Return]; }
+
+ bool
+ isUnconditionalJump() const
+ {
+ return _flags[UnconditionalJump];
+ }
+
+ bool isSpecialOp() const { return _flags[SpecialOp]; }
+ bool isWaitcnt() const { return _flags[Waitcnt]; }
+
+ bool isBarrier() const { return _flags[MemBarrier]; }
+ bool isMemFence() const { return _flags[MemFence]; }
+ bool isMemRef() const { return _flags[MemoryRef]; }
+ bool isFlat() const { return _flags[Flat]; }
+ bool isLoad() const { return _flags[Load]; }
+ bool isStore() const { return _flags[Store]; }
+
+ bool
+ isAtomic() const
+ {
+ return _flags[AtomicReturn] || _flags[AtomicNoReturn];
+ }
+
+ bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
+ bool isAtomicRet() const { return _flags[AtomicReturn]; }
+
+ bool isScalar() const { return _flags[Scalar]; }
+ bool readsSCC() const { return _flags[ReadsSCC]; }
+ bool writesSCC() const { return _flags[WritesSCC]; }
+ bool readsVCC() const { return _flags[ReadsVCC]; }
+ bool writesVCC() const { return _flags[WritesVCC]; }
- virtual bool isLocalMem() const
+ bool isAtomicAnd() const { return _flags[AtomicAnd]; }
+ bool isAtomicOr() const { return _flags[AtomicOr]; }
+ bool isAtomicXor() const { return _flags[AtomicXor]; }
+ bool isAtomicCAS() const { return _flags[AtomicCAS]; }
+ bool isAtomicExch() const { return _flags[AtomicExch]; }
+ bool isAtomicAdd() const { return _flags[AtomicAdd]; }
+ bool isAtomicSub() const { return _flags[AtomicSub]; }
+ bool isAtomicInc() const { return _flags[AtomicInc]; }
+ bool isAtomicDec() const { return _flags[AtomicDec]; }
+ bool isAtomicMax() const { return _flags[AtomicMax]; }
+ bool isAtomicMin() const { return _flags[AtomicMin]; }
+
+ bool
+ isArgLoad() const
+ {
+ return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
+ }
+
+ bool
+ isGlobalMem() const
{
- fatal("calling isLocalMem() on non-memory instruction.\n");
+ return _flags[MemoryRef] && (_flags[GlobalSegment] ||
+ _flags[PrivateSegment] || _flags[ReadOnlySegment] ||
+ _flags[SpillSegment]);
+ }
- return false;
+ bool
+ isLocalMem() const
+ {
+ return _flags[MemoryRef] && _flags[GroupSegment];
}
- bool isArgLoad() { return false; }
+ bool isArgSeg() const { return _flags[ArgSegment]; }
+ bool isGlobalSeg() const { return _flags[GlobalSegment]; }
+ bool isGroupSeg() const { return _flags[GroupSegment]; }
+ bool isKernArgSeg() const { return _flags[KernArgSegment]; }
+ bool isPrivateSeg() const { return _flags[PrivateSegment]; }
+ bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
+ bool isSpillSeg() const { return _flags[SpillSegment]; }
+
+ bool isWorkitemScope() const { return _flags[WorkitemScope]; }
+ bool isWavefrontScope() const { return _flags[WavefrontScope]; }
+ bool isWorkgroupScope() const { return _flags[WorkgroupScope]; }
+ bool isDeviceScope() const { return _flags[DeviceScope]; }
+ bool isSystemScope() const { return _flags[SystemScope]; }
+ bool isNoScope() const { return _flags[NoScope]; }
+
+ bool isRelaxedOrder() const { return _flags[RelaxedOrder]; }
+ bool isAcquire() const { return _flags[Acquire]; }
+ bool isRelease() const { return _flags[Release]; }
+ bool isAcquireRelease() const { return _flags[AcquireRelease]; }
+ bool isNoOrder() const { return _flags[NoOrder]; }
+
+ /**
+ * Coherence domain of a memory instruction. Only valid for
+ * machine ISA. The coherence domain specifies where it is
+ * possible to perform memory synchronization, e.g., acquire
+ * or release, from the shader kernel.
+ *
+ * isGloballyCoherent(): returns true if kernel is sharing memory
+ * with other work-items on the same device (GPU)
+ *
+ * isSystemCoherent(): returns true if kernel is sharing memory
+ * with other work-items on a different device (GPU) or the host (CPU)
+ */
+ bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
+ bool isSystemCoherent() const { return _flags[SystemCoherent]; }
+
virtual uint32_t instSize() = 0;
// only used for memory instructions
@@ -120,22 +208,13 @@ class GPUStaticInst
virtual uint32_t getTargetPc() { return 0; }
- /**
- * Query whether the instruction is an unconditional jump i.e., the jump
- * is always executed because there is no condition to be evaluated.
- *
- * If the instruction is not of branch type, the result is always false.
- *
- * @return True if the instruction is an unconditional jump.
- */
- virtual bool unconditionalJumpInstruction() { return false; }
-
static uint64_t dynamic_id_count;
- Enums::OpType o_type;
// For flat memory accesses
Enums::StorageClassType executed_as;
+ void setFlag(Flags flag) { _flags[flag] = true; }
+
protected:
virtual void
execLdAcq(GPUDynInstPtr gpuDynInst)
@@ -169,7 +248,45 @@ class GPUStaticInst
*/
int _ipdInstNum;
- bool _scalarOp;
+ std::bitset<Num_Flags> _flags;
+};
+
+class KernelLaunchStaticInst : public GPUStaticInst
+{
+ public:
+ KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
+ {
+ setFlag(Nop);
+ setFlag(Scalar);
+ setFlag(Acquire);
+ setFlag(SystemScope);
+ setFlag(GlobalSegment);
+ }
+
+ void
+ execute(GPUDynInstPtr gpuDynInst)
+ {
+ fatal("kernel launch instruction should not be executed\n");
+ }
+
+ void
+ generateDisassembly()
+ {
+ disassembly = opcode;
+ }
+
+ int getNumOperands() { return 0; }
+ bool isCondRegister(int operandIndex) { return false; }
+ bool isScalarRegister(int operandIndex) { return false; }
+ bool isVectorRegister(int operandIndex) { return false; }
+ bool isSrcOperand(int operandIndex) { return false; }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex) { return 0; }
+ int getRegisterIndex(int operandIndex) { return 0; }
+ int numDstRegOperands() { return 0; }
+ int numSrcRegOperands() { return 0; }
+ bool isValid() const { return true; }
+ uint32_t instSize() { return 0; }
};
#endif // __GPU_STATIC_INST_HH__
diff --git a/src/gpu-compute/kernel_cfg.cc b/src/gpu-compute/kernel_cfg.cc
index 10ded11b7..ac6a81b16 100644
--- a/src/gpu-compute/kernel_cfg.cc
+++ b/src/gpu-compute/kernel_cfg.cc
@@ -104,7 +104,7 @@ ControlFlowInfo::createBasicBlocks()
leaders.insert(0);
for (int i = 1; i < instructions.size(); i++) {
GPUStaticInst* instruction = instructions[i];
- if (instruction->o_type == Enums::OT_BRANCH) {
+ if (instruction->isBranch()) {
const int target_pc = instruction->getTargetPc();
leaders.insert(target_pc);
leaders.insert(i + 1);
@@ -137,18 +137,18 @@ ControlFlowInfo::connectBasicBlocks()
break;
}
GPUStaticInst* last = lastInstruction(bb.get());
- if (last->o_type == Enums::OT_RET) {
+ if (last->isReturn()) {
bb->successorIds.insert(exit_bb->id);
continue;
}
- if (last->o_type == Enums::OT_BRANCH) {
+ if (last->isBranch()) {
const uint32_t target_pc = last->getTargetPc();
BasicBlock* target_bb = basicBlock(target_pc);
bb->successorIds.insert(target_bb->id);
}
// Unconditional jump instructions have a unique successor
- if (!last->unconditionalJumpInstruction()) {
+ if (!last->isUnconditionalJump()) {
BasicBlock* next_bb = basicBlock(last->instNum() + 1);
bb->successorIds.insert(next_bb->id);
}
@@ -274,7 +274,7 @@ ControlFlowInfo::printBasicBlocks() const
int inst_num = inst->instNum();
std::cout << inst_num << " [" << basicBlock(inst_num)->id
<< "]: " << inst->disassemble();
- if (inst->o_type == Enums::OT_BRANCH) {
+ if (inst->isBranch()) {
std::cout << ", PC = " << inst->getTargetPc();
}
std::cout << std::endl;
diff --git a/src/gpu-compute/lds_state.cc b/src/gpu-compute/lds_state.cc
index d4a27318a..fad98c886 100644
--- a/src/gpu-compute/lds_state.cc
+++ b/src/gpu-compute/lds_state.cc
@@ -141,8 +141,7 @@ LdsState::countBankConflicts(GPUDynInstPtr gpuDynInst,
}
}
- if (gpuDynInst->m_op == Enums::MO_LD ||
- gpuDynInst->m_op == Enums::MO_ST) {
+ if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
// mask identical addresses
for (int j = 0; j < numBanks; ++j) {
for (int j0 = 0; j0 < j; j0++) {
@@ -208,8 +207,8 @@ LdsState::processPacket(PacketPtr packet)
GPUDynInstPtr dynInst = getDynInstr(packet);
// account for the LDS bank conflict overhead
- int busLength = (dynInst->m_op == Enums::MO_LD) ? parent->loadBusLength() :
- (dynInst->m_op == Enums::MO_ST) ? parent->storeBusLength() :
+ int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
+ (dynInst->isStore()) ? parent->storeBusLength() :
parent->loadBusLength();
// delay for accessing the LDS
Tick processingTime =
diff --git a/src/gpu-compute/lds_state.hh b/src/gpu-compute/lds_state.hh
index 58d109493..5fcbe82c0 100644
--- a/src/gpu-compute/lds_state.hh
+++ b/src/gpu-compute/lds_state.hh
@@ -43,7 +43,6 @@
#include <utility>
#include <vector>
-#include "enums/MemOpType.hh"
#include "enums/MemType.hh"
#include "gpu-compute/misc.hh"
#include "mem/mem_object.hh"
diff --git a/src/gpu-compute/local_memory_pipeline.cc b/src/gpu-compute/local_memory_pipeline.cc
index e2238bf45..80dad6fcd 100644
--- a/src/gpu-compute/local_memory_pipeline.cc
+++ b/src/gpu-compute/local_memory_pipeline.cc
@@ -62,7 +62,7 @@ LocalMemPipeline::exec()
lmReturnedRequests.front() : nullptr;
bool accessVrf = true;
- if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
+ if ((m) && (m->isLoad() || m->isAtomicRet())) {
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
accessVrf =
@@ -137,7 +137,7 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
// Return data to registers
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
+ if (m->isLoad() || m->isAtomicRet()) {
std::vector<uint32_t> regVec;
for (int k = 0; k < m->n_reg; ++k) {
int dst = m->dst_reg+k;
@@ -172,13 +172,12 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
// Decrement outstanding request count
computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
- if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op)
- || MO_H(m->m_op)) {
+ if (m->isStore() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
m->time, -1);
}
- if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
+ if (m->isLoad() || m->isAtomic()) {
computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
m->time, -1);
}
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
index c1f741d6a..13afab977 100644
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -47,7 +47,6 @@
#include "cpu/simple_thread.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
-#include "enums/MemOpType.hh"
#include "enums/MemType.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_tlb.hh"
diff --git a/src/gpu-compute/vector_register_file.cc b/src/gpu-compute/vector_register_file.cc
index c43d765af..c50c06cc6 100644
--- a/src/gpu-compute/vector_register_file.cc
+++ b/src/gpu-compute/vector_register_file.cc
@@ -38,7 +38,6 @@
#include <string>
#include "base/misc.hh"
-#include "gpu-compute/code_enums.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh"
@@ -153,8 +152,8 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
void
VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w)
{
- bool loadInstr = IS_OT_READ(ii->opType());
- bool atomicInstr = IS_OT_ATOMIC(ii->opType());
+ bool loadInstr = ii->isLoad();
+ bool atomicInstr = ii->isAtomic() || ii->isMemFence();
bool loadNoArgInstr = loadInstr && !ii->isArgLoad();
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index c677cbe41..caeed85a7 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -37,7 +37,6 @@
#include "debug/GPUExec.hh"
#include "debug/WavefrontStack.hh"
-#include "gpu-compute/code_enums.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/shader.hh"
@@ -165,19 +164,8 @@ Wavefront::start(uint64_t _wf_dyn_id,uint64_t _base_ptr)
bool
Wavefront::isGmInstruction(GPUDynInstPtr ii)
{
- if (IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
- IS_OT_ATOMIC_PM(ii->opType())) {
+ if (ii->isGlobalMem() || ii->isFlat())
return true;
- }
-
- if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
- IS_OT_ATOMIC_GM(ii->opType())) {
- return true;
- }
-
- if (IS_OT_FLAT(ii->opType())) {
- return true;
- }
return false;
}
@@ -185,8 +173,7 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii)
bool
Wavefront::isLmInstruction(GPUDynInstPtr ii)
{
- if (IS_OT_READ_LM(ii->opType()) || IS_OT_WRITE_LM(ii->opType()) ||
- IS_OT_ATOMIC_LM(ii->opType())) {
+ if (ii->isLocalMem()) {
return true;
}
@@ -199,10 +186,9 @@ Wavefront::isOldestInstALU()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && (ii->opType() == Enums::OT_NOP ||
- ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
- ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
- ii->opType() == Enums::OT_KERN_READ)) {
+ if (status != S_STOPPED && (ii->isNop() ||
+ ii->isReturn() || ii->isBranch() ||
+ ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) {
return true;
}
@@ -215,7 +201,7 @@ Wavefront::isOldestInstBarrier()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && ii->opType() == Enums::OT_BARRIER) {
+ if (status != S_STOPPED && ii->isBarrier()) {
return true;
}
@@ -228,9 +214,7 @@ Wavefront::isOldestInstGMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && (IS_OT_READ_GM(ii->opType()) ||
- IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
-
+ if (status != S_STOPPED && ii->isGlobalMem()) {
return true;
}
@@ -243,9 +227,7 @@ Wavefront::isOldestInstLMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && (IS_OT_READ_LM(ii->opType()) ||
- IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
-
+ if (status != S_STOPPED && ii->isLocalMem()) {
return true;
}
@@ -258,9 +240,7 @@ Wavefront::isOldestInstPrivMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && (IS_OT_READ_PM(ii->opType()) ||
- IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
-
+ if (status != S_STOPPED && ii->isPrivateSeg()) {
return true;
}
@@ -273,8 +253,7 @@ Wavefront::isOldestInstFlatMem()
assert(!instructionBuffer.empty());
GPUDynInstPtr ii = instructionBuffer.front();
- if (status != S_STOPPED && IS_OT_FLAT(ii->opType())) {
-
+ if (status != S_STOPPED && ii->isFlat()) {
return true;
}
@@ -289,7 +268,7 @@ Wavefront::instructionBufferHasBranch()
for (auto it : instructionBuffer) {
GPUDynInstPtr ii = it;
- if (ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH) {
+ if (ii->isReturn() || ii->isBranch()) {
return true;
}
}
@@ -371,23 +350,16 @@ Wavefront::ready(itype_e type)
// checking readiness will be fixed eventually. In the meantime, let's
// make sure that we do not silently let an instruction type slip
// through this logic and always return not ready.
- if (!(ii->opType() == Enums::OT_BARRIER || ii->opType() == Enums::OT_NOP ||
- ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
- ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
- ii->opType() == Enums::OT_KERN_READ ||
- ii->opType() == Enums::OT_ARG ||
- IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
- IS_OT_ATOMIC_GM(ii->opType()) || IS_OT_READ_LM(ii->opType()) ||
- IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
- IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
- IS_OT_ATOMIC_PM(ii->opType()) || IS_OT_FLAT(ii->opType()))) {
+ if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
+ ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
+ ii->isMemFence() || ii->isFlat())) {
panic("next instruction: %s is of unknown type\n", ii->disassemble());
}
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n",
computeUnit->cu_id, simdId, wfSlotId, ii->disassemble());
- if (type == I_ALU && ii->opType() == Enums::OT_BARRIER) {
+ if (type == I_ALU && ii->isBarrier()) {
// Here for ALU instruction (barrier)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free?
@@ -400,7 +372,7 @@ Wavefront::ready(itype_e type)
}
ready_inst = true;
- } else if (type == I_ALU && ii->opType() == Enums::OT_NOP) {
+ } else if (type == I_ALU && ii->isNop()) {
// Here for ALU instruction (nop)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free?
@@ -408,7 +380,7 @@ Wavefront::ready(itype_e type)
}
ready_inst = true;
- } else if (type == I_ALU && ii->opType() == Enums::OT_RET) {
+ } else if (type == I_ALU && ii->isReturn()) {
// Here for ALU instruction (return)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is wave slot free?
@@ -421,10 +393,10 @@ Wavefront::ready(itype_e type)
}
ready_inst = true;
- } else if (type == I_ALU && (ii->opType() == Enums::OT_BRANCH ||
- ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
- ii->opType() == Enums::OT_KERN_READ ||
- ii->opType() == Enums::OT_ARG)) {
+ } else if (type == I_ALU && (ii->isBranch() ||
+ ii->isALU() ||
+ (ii->isKernArgSeg() && ii->isLoad()) ||
+ ii->isArgSeg())) {
// Here for ALU instruction (all others)
if (!computeUnit->wfWait[simdId].prerdy()) {
// Is alu slot free?
@@ -439,18 +411,16 @@ Wavefront::ready(itype_e type)
return 0;
}
ready_inst = true;
- } else if (type == I_GLOBAL && (IS_OT_READ_GM(ii->opType()) ||
- IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
+ } else if (type == I_GLOBAL && ii->isGlobalMem()) {
// Here Global memory instruction
- if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) {
+ if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
// Are there in pipe or outstanding global memory write requests?
if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
return 0;
}
}
- if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) ||
- IS_OT_HIST_GM(ii->opType())) {
+ if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
// Are there in pipe or outstanding global memory read requests?
if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
return 0;
@@ -480,17 +450,15 @@ Wavefront::ready(itype_e type)
return 0;
}
ready_inst = true;
- } else if (type == I_SHARED && (IS_OT_READ_LM(ii->opType()) ||
- IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
+ } else if (type == I_SHARED && ii->isLocalMem()) {
// Here for Shared memory instruction
- if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) {
+ if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
return 0;
}
}
- if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
- IS_OT_HIST_LM(ii->opType())) {
+ if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
return 0;
}
@@ -519,47 +487,7 @@ Wavefront::ready(itype_e type)
return 0;
}
ready_inst = true;
- } else if (type == I_PRIVATE && (IS_OT_READ_PM(ii->opType()) ||
- IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
- // Here for Private memory instruction ------------------------ //
- if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
- if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
- return 0;
- }
- }
-
- if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
- IS_OT_HIST_PM(ii->opType())) {
- if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) {
- return 0;
- }
- }
-
- if (!glbMemBusRdy) {
- // Is there an available VRF->Global memory read bus?
- return 0;
- }
-
- if (!glbMemIssueRdy) {
- // Is wave slot free?
- return 0;
- }
-
- if (!computeUnit->globalMemoryPipe.
- isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
- // Can we insert a new request to the Global Mem Request FIFO?
- return 0;
- }
- // can we schedule source & destination operands on the VRF?
- if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
- VrfAccessType::RD_WR)) {
- return 0;
- }
- if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
- return 0;
- }
- ready_inst = true;
- } else if (type == I_FLAT && IS_OT_FLAT(ii->opType())) {
+ } else if (type == I_FLAT && ii->isFlat()) {
if (!glbMemBusRdy) {
// Is there an available VRF->Global memory read bus?
return 0;
@@ -618,23 +546,22 @@ Wavefront::updateResources()
assert(ii);
computeUnit->vrf[simdId]->updateResources(this, ii);
// Single precision ALU or Branch or Return or Special instruction
- if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
- ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
+ if (ii->isALU() || ii->isSpecialOp() ||
+ ii->isBranch() ||
// FIXME: Kernel argument loads are currently treated as ALU operations
// since we don't send memory packets at execution. If we fix that then
// we should map them to one of the memory pipelines
- ii->opType()==Enums::OT_KERN_READ ||
- ii->opType()==Enums::OT_ARG ||
- ii->opType()==Enums::OT_RET) {
+ (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
+ ii->isReturn()) {
computeUnit->aluPipe[simdId].preset(computeUnit->shader->
ticks(computeUnit->spBypassLength()));
// this is to enforce a fixed number of cycles per issue slot per SIMD
computeUnit->wfWait[simdId].preset(computeUnit->shader->
ticks(computeUnit->issuePeriod));
- } else if (ii->opType() == Enums::OT_BARRIER) {
+ } else if (ii->isBarrier()) {
computeUnit->wfWait[simdId].preset(computeUnit->shader->
ticks(computeUnit->issuePeriod));
- } else if (ii->opType() == Enums::OT_FLAT_READ) {
+ } else if (ii->isLoad() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
memReqsInPipe++;
rdGmReqsInPipe++;
@@ -649,7 +576,7 @@ Wavefront::updateResources()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
- } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
+ } else if (ii->isStore() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
memReqsInPipe++;
wrGmReqsInPipe++;
@@ -664,21 +591,21 @@ Wavefront::updateResources()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
- } else if (IS_OT_READ_GM(ii->opType())) {
+ } else if (ii->isLoad() && ii->isGlobalMem()) {
memReqsInPipe++;
rdGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_GM(ii->opType())) {
+ } else if (ii->isStore() && ii->isGlobalMem()) {
memReqsInPipe++;
wrGmReqsInPipe++;
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_GM(ii->opType())) {
+ } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
memReqsInPipe++;
wrGmReqsInPipe++;
rdGmReqsInPipe++;
@@ -686,21 +613,21 @@ Wavefront::updateResources()
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_READ_LM(ii->opType())) {
+ } else if (ii->isLoad() && ii->isLocalMem()) {
memReqsInPipe++;
rdLmReqsInPipe++;
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
preset(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_LM(ii->opType())) {
+ } else if (ii->isStore() && ii->isLocalMem()) {
memReqsInPipe++;
wrLmReqsInPipe++;
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_LM(ii->opType())) {
+ } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
memReqsInPipe++;
wrLmReqsInPipe++;
rdLmReqsInPipe++;
@@ -708,28 +635,6 @@ Wavefront::updateResources()
preset(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_READ_PM(ii->opType())) {
- memReqsInPipe++;
- rdGmReqsInPipe++;
- computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
- preset(computeUnit->shader->ticks(4));
- computeUnit->wfWait[computeUnit->GlbMemUnitId()].
- preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_PM(ii->opType())) {
- memReqsInPipe++;
- wrGmReqsInPipe++;
- computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
- preset(computeUnit->shader->ticks(8));
- computeUnit->wfWait[computeUnit->GlbMemUnitId()].
- preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_PM(ii->opType())) {
- memReqsInPipe++;
- wrGmReqsInPipe++;
- rdGmReqsInPipe++;
- computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
- preset(computeUnit->shader->ticks(8));
- computeUnit->wfWait[computeUnit->GlbMemUnitId()].
- preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
}
@@ -751,7 +656,7 @@ Wavefront::exec()
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
"(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId,
ii->disassemble(), old_pc);
- ii->execute();
+ ii->execute(ii);
// access the VRF
computeUnit->vrf[simdId]->exec(ii, this);
srcRegOpDist.sample(ii->numSrcRegOperands());
@@ -785,24 +690,24 @@ Wavefront::exec()
// ---- Update Vector ALU pipeline and other resources ------------------ //
// Single precision ALU or Branch or Return or Special instruction
- if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
- ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
+ if (ii->isALU() || ii->isSpecialOp() ||
+ ii->isBranch() ||
// FIXME: Kernel argument loads are currently treated as ALU operations
// since we don't send memory packets at execution. If we fix that then
// we should map them to one of the memory pipelines
- ii->opType() == Enums::OT_KERN_READ ||
- ii->opType() == Enums::OT_ARG ||
- ii->opType() == Enums::OT_RET) {
+ (ii->isKernArgSeg() && ii->isLoad()) ||
+ ii->isArgSeg() ||
+ ii->isReturn()) {
computeUnit->aluPipe[simdId].set(computeUnit->shader->
ticks(computeUnit->spBypassLength()));
// this is to enforce a fixed number of cycles per issue slot per SIMD
computeUnit->wfWait[simdId].set(computeUnit->shader->
ticks(computeUnit->issuePeriod));
- } else if (ii->opType() == Enums::OT_BARRIER) {
+ } else if (ii->isBarrier()) {
computeUnit->wfWait[simdId].set(computeUnit->shader->
ticks(computeUnit->issuePeriod));
- } else if (ii->opType() == Enums::OT_FLAT_READ) {
+ } else if (ii->isLoad() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
if (Enums::SC_SHARED == ii->executedAs()) {
@@ -816,7 +721,7 @@ Wavefront::exec()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
- } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
+ } else if (ii->isStore() && ii->isFlat()) {
assert(Enums::SC_NONE != ii->executedAs());
if (Enums::SC_SHARED == ii->executedAs()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
@@ -829,32 +734,32 @@ Wavefront::exec()
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
}
- } else if (IS_OT_READ_GM(ii->opType())) {
+ } else if (ii->isLoad() && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_GM(ii->opType())) {
+ } else if (ii->isStore() && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_GM(ii->opType())) {
+ } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->GlbMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_READ_LM(ii->opType())) {
+ } else if (ii->isLoad() && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(4));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_WRITE_LM(ii->opType())) {
+ } else if (ii->isStore() && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].
set(computeUnit->shader->ticks(computeUnit->issuePeriod));
- } else if (IS_OT_ATOMIC_LM(ii->opType())) {
+ } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
set(computeUnit->shader->ticks(8));
computeUnit->wfWait[computeUnit->ShrMemUnitId()].