summaryrefslogtreecommitdiff
path: root/src/arch/hsail
diff options
context:
space:
mode:
authorTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
committerTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
commit1a7d3f9fcb76a68540dd948f91413533a383bfde (patch)
tree867510a147cd095f19499d26b7c02d27de4cae9d /src/arch/hsail
parent28e353e0403ea379d244a418e8dc8ee0b48187cf (diff)
downloadgem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz
gpu-compute: AMD's baseline GPU model
Diffstat (limited to 'src/arch/hsail')
-rw-r--r--src/arch/hsail/Brig.h67
-rw-r--r--src/arch/hsail/Brig_new.hpp1587
-rw-r--r--src/arch/hsail/SConscript54
-rw-r--r--src/arch/hsail/SConsopts40
-rwxr-xr-xsrc/arch/hsail/gen.py806
-rw-r--r--src/arch/hsail/generic_types.cc47
-rw-r--r--src/arch/hsail/generic_types.hh16
-rw-r--r--src/arch/hsail/gpu_decoder.hh77
-rw-r--r--src/arch/hsail/gpu_types.hh69
-rw-r--r--src/arch/hsail/insts/branch.cc86
-rw-r--r--src/arch/hsail/insts/branch.hh442
-rw-r--r--src/arch/hsail/insts/decl.hh1106
-rw-r--r--src/arch/hsail/insts/gpu_static_inst.cc64
-rw-r--r--src/arch/hsail/insts/gpu_static_inst.hh65
-rw-r--r--src/arch/hsail/insts/main.cc208
-rw-r--r--src/arch/hsail/insts/mem.cc139
-rw-r--r--src/arch/hsail/insts/mem.hh1629
-rw-r--r--src/arch/hsail/insts/mem_impl.hh660
-rw-r--r--src/arch/hsail/insts/pseudo_inst.cc787
-rw-r--r--src/arch/hsail/operand.cc449
-rw-r--r--src/arch/hsail/operand.hh768
21 files changed, 9166 insertions, 0 deletions
diff --git a/src/arch/hsail/Brig.h b/src/arch/hsail/Brig.h
new file mode 100644
index 000000000..b260157ab
--- /dev/null
+++ b/src/arch/hsail/Brig.h
@@ -0,0 +1,67 @@
+// University of Illinois/NCSA
+// Open Source License
+//
+// Copyright (c) 2013, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Developed by:
+//
+// HSA Team
+//
+// Advanced Micro Devices, Inc
+//
+// www.amd.com
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files (the "Software"), to deal with
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimers.
+//
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimers in the
+// documentation and/or other materials provided with the distribution.
+//
+// * Neither the names of the LLVM Team, University of Illinois at
+// Urbana-Champaign, nor the names of its contributors may be used to
+// endorse or promote products derived from this Software without specific
+// prior written permission.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+// SOFTWARE.
+#ifndef INTERNAL_BRIG_H
+#define INTERNAL_BRIG_H
+
+#include <stdint.h>
+
+namespace Brig {
+#include "Brig_new.hpp"
+
+// These typedefs provide some backward compatibility with earlier versions
+// of Brig.h, reducing the number of code changes. The distinct names also
+// increase legibility by showing the code's intent.
+typedef BrigBase BrigDirective;
+typedef BrigBase BrigOperand;
+
+enum BrigMemoryFenceSegments { // for internal use only
+ //.mnemo={ s/^BRIG_MEMORY_FENCE_SEGMENT_//;lc }
+ //.mnemo_token=_EMMemoryFenceSegments
+ //.mnemo_context=EInstModifierInstFenceContext
+ BRIG_MEMORY_FENCE_SEGMENT_GLOBAL = 0,
+ BRIG_MEMORY_FENCE_SEGMENT_GROUP = 1,
+ BRIG_MEMORY_FENCE_SEGMENT_IMAGE = 2,
+ BRIG_MEMORY_FENCE_SEGMENT_LAST = 3 //.skip
+};
+
+}
+
+#endif // defined(INTERNAL_BRIG_H)
diff --git a/src/arch/hsail/Brig_new.hpp b/src/arch/hsail/Brig_new.hpp
new file mode 100644
index 000000000..60e6f4dea
--- /dev/null
+++ b/src/arch/hsail/Brig_new.hpp
@@ -0,0 +1,1587 @@
+// University of Illinois/NCSA
+// Open Source License
+//
+// Copyright (c) 2013-2015, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Developed by:
+//
+// HSA Team
+//
+// Advanced Micro Devices, Inc
+//
+// www.amd.com
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files (the "Software"), to deal with
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimers.
+//
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimers in the
+// documentation and/or other materials provided with the distribution.
+//
+// * Neither the names of the LLVM Team, University of Illinois at
+// Urbana-Champaign, nor the names of its contributors may be used to
+// endorse or promote products derived from this Software without specific
+// prior written permission.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+// SOFTWARE.
+
+//.ignore{
+
+#ifndef INCLUDED_BRIG_H
+#define INCLUDED_BRIG_H
+
+#include <stdint.h>
+
+enum BrigAuxDefs {
+ MAX_OPERANDS_NUM = 6
+};
+
+//}
+
+typedef uint32_t BrigVersion32_t;
+
+enum BrigVersion {
+
+ //.nowrap
+ //.nodump
+ //.nollvm
+
+ BRIG_VERSION_HSAIL_MAJOR = 1,
+ BRIG_VERSION_HSAIL_MINOR = 0,
+ BRIG_VERSION_BRIG_MAJOR = 1,
+ BRIG_VERSION_BRIG_MINOR = 0
+};
+
+typedef uint8_t BrigAlignment8_t; //.defValue=BRIG_ALIGNMENT_NONE
+
+typedef uint8_t BrigAllocation8_t; //.defValue=BRIG_ALLOCATION_NONE
+
+typedef uint8_t BrigAluModifier8_t;
+
+typedef uint8_t BrigAtomicOperation8_t;
+
+typedef uint32_t BrigCodeOffset32_t; //.defValue=0 //.wtype=ItemRef<Code>
+
+typedef uint8_t BrigCompareOperation8_t;
+
+typedef uint16_t BrigControlDirective16_t;
+
+typedef uint32_t BrigDataOffset32_t;
+
+typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t; //.wtype=ListRef<Code> //.defValue=0
+
+typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t; //.wtype=ListRef<Operand> //.defValue=0
+
+typedef BrigDataOffset32_t BrigDataOffsetString32_t; //.wtype=StrRef //.defValue=0
+
+typedef uint8_t BrigExecutableModifier8_t;
+
+typedef uint8_t BrigImageChannelOrder8_t; //.defValue=BRIG_CHANNEL_ORDER_UNKNOWN
+
+typedef uint8_t BrigImageChannelType8_t; //.defValue=BRIG_CHANNEL_TYPE_UNKNOWN
+
+typedef uint8_t BrigImageGeometry8_t; //.defValue=BRIG_GEOMETRY_UNKNOWN
+
+typedef uint8_t BrigImageQuery8_t;
+
+typedef uint16_t BrigKind16_t;
+
+typedef uint8_t BrigLinkage8_t; //.defValue=BRIG_LINKAGE_NONE
+
+typedef uint8_t BrigMachineModel8_t; //.defValue=BRIG_MACHINE_LARGE
+
+typedef uint8_t BrigMemoryModifier8_t;
+
+typedef uint8_t BrigMemoryOrder8_t; //.defValue=BRIG_MEMORY_ORDER_RELAXED
+
+typedef uint8_t BrigMemoryScope8_t; //.defValue=BRIG_MEMORY_SCOPE_SYSTEM
+
+typedef uint16_t BrigOpcode16_t;
+
+typedef uint32_t BrigOperandOffset32_t; //.defValue=0 //.wtype=ItemRef<Operand>
+
+typedef uint8_t BrigPack8_t; //.defValue=BRIG_PACK_NONE
+
+typedef uint8_t BrigProfile8_t; //.defValue=BRIG_PROFILE_FULL
+
+typedef uint16_t BrigRegisterKind16_t;
+
+typedef uint8_t BrigRound8_t; //.defValue=BRIG_ROUND_NONE
+
+typedef uint8_t BrigSamplerAddressing8_t; //.defValue=BRIG_ADDRESSING_CLAMP_TO_EDGE
+
+typedef uint8_t BrigSamplerCoordNormalization8_t;
+
+typedef uint8_t BrigSamplerFilter8_t;
+
+typedef uint8_t BrigSamplerQuery8_t;
+
+typedef uint32_t BrigSectionIndex32_t;
+
+typedef uint8_t BrigSegCvtModifier8_t;
+
+typedef uint8_t BrigSegment8_t; //.defValue=BRIG_SEGMENT_NONE
+
+typedef uint32_t BrigStringOffset32_t; //.defValue=0 //.wtype=StrRef
+
+typedef uint16_t BrigType16_t;
+
+typedef uint8_t BrigVariableModifier8_t;
+
+typedef uint8_t BrigWidth8_t;
+
+typedef uint32_t BrigExceptions32_t;
+
+enum BrigKind {
+
+ //.nollvm
+ //
+ //.wname={ s/^BRIG_KIND//; MACRO2Name($_) }
+ //.mnemo=$wname{ $wname }
+ //
+ //.sizeof=$wname{ "sizeof(".$structs->{"Brig".$wname}->{rawbrig}.")" }
+ //.sizeof_switch //.sizeof_proto="int size_of_brig_record(unsigned arg)" //.sizeof_default="return -1"
+ //
+ //.isBodyOnly={ "false" }
+ //.isBodyOnly_switch //.isBodyOnly_proto="bool isBodyOnly(Directive d)" //.isBodyOnly_arg="d.kind()"
+ //.isBodyOnly_default="assert(false); return false"
+ //
+ //.isToplevelOnly={ "false" }
+ //.isToplevelOnly_switch //.isToplevelOnly_proto="bool isToplevelOnly(Directive d)" //.isToplevelOnly_arg="d.kind()"
+ //.isToplevelOnly_default="assert(false); return false"
+
+ BRIG_KIND_NONE = 0x0000, //.skip
+
+ BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, //.skip
+ BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000, //.isBodyOnly=true
+ BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001, //.isBodyOnly=true
+ BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
+ BRIG_KIND_DIRECTIVE_CONTROL = 0x1003, //.isBodyOnly=true
+ BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004, //.isToplevelOnly=true
+ BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
+ BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006, //.isToplevelOnly=true
+ BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007, //.isToplevelOnly=true
+ BRIG_KIND_DIRECTIVE_KERNEL = 0x1008, //.isToplevelOnly=true
+ BRIG_KIND_DIRECTIVE_LABEL = 0x1009, //.isBodyOnly=true
+ BRIG_KIND_DIRECTIVE_LOC = 0x100a,
+ BRIG_KIND_DIRECTIVE_MODULE = 0x100b, //.isToplevelOnly=true
+ BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
+ BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d, //.isToplevelOnly=true
+ BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
+ BRIG_KIND_DIRECTIVE_END = 0x100f, //.skip
+
+ BRIG_KIND_INST_BEGIN = 0x2000, //.skip
+ BRIG_KIND_INST_ADDR = 0x2000,
+ BRIG_KIND_INST_ATOMIC = 0x2001,
+ BRIG_KIND_INST_BASIC = 0x2002,
+ BRIG_KIND_INST_BR = 0x2003,
+ BRIG_KIND_INST_CMP = 0x2004,
+ BRIG_KIND_INST_CVT = 0x2005,
+ BRIG_KIND_INST_IMAGE = 0x2006,
+ BRIG_KIND_INST_LANE = 0x2007,
+ BRIG_KIND_INST_MEM = 0x2008,
+ BRIG_KIND_INST_MEM_FENCE = 0x2009,
+ BRIG_KIND_INST_MOD = 0x200a,
+ BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
+ BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
+ BRIG_KIND_INST_QUEUE = 0x200d,
+ BRIG_KIND_INST_SEG = 0x200e,
+ BRIG_KIND_INST_SEG_CVT = 0x200f,
+ BRIG_KIND_INST_SIGNAL = 0x2010,
+ BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
+ BRIG_KIND_INST_END = 0x2012, //.skip
+
+ BRIG_KIND_OPERAND_BEGIN = 0x3000, //.skip
+ BRIG_KIND_OPERAND_ADDRESS = 0x3000,
+ BRIG_KIND_OPERAND_ALIGN = 0x3001,
+ BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
+ BRIG_KIND_OPERAND_CODE_REF = 0x3003,
+ BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
+ BRIG_KIND_OPERAND_RESERVED = 0x3005, //.skip
+ BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
+ BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
+ BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
+ BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
+ BRIG_KIND_OPERAND_REGISTER = 0x300a,
+ BRIG_KIND_OPERAND_STRING = 0x300b,
+ BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
+ BRIG_KIND_OPERAND_END = 0x300d //.skip
+};
+
+enum BrigAlignment {
+
+ //.mnemo={ s/^BRIG_ALIGNMENT_//; lc }
+ //.mnemo_proto="const char* align2str(unsigned arg)"
+ //
+ //.bytes={ /(\d+)/ ? $1 : undef }
+ //.bytes_switch //.bytes_proto="unsigned align2num(unsigned arg)" //.bytes_default="assert(false); return -1"
+ //
+ //.rbytes=$bytes{ $bytes }
+ //.rbytes_switch //.rbytes_reverse //.rbytes_proto="BrigAlignment num2align(uint64_t arg)"
+ //.rbytes_default="return BRIG_ALIGNMENT_LAST"
+ //
+ //.print=$bytes{ $bytes>1 ? "_align($bytes)" : "" }
+
+ BRIG_ALIGNMENT_NONE = 0, //.no_mnemo
+ BRIG_ALIGNMENT_1 = 1, //.mnemo=""
+ BRIG_ALIGNMENT_2 = 2,
+ BRIG_ALIGNMENT_4 = 3,
+ BRIG_ALIGNMENT_8 = 4,
+ BRIG_ALIGNMENT_16 = 5,
+ BRIG_ALIGNMENT_32 = 6,
+ BRIG_ALIGNMENT_64 = 7,
+ BRIG_ALIGNMENT_128 = 8,
+ BRIG_ALIGNMENT_256 = 9,
+
+ BRIG_ALIGNMENT_LAST, //.skip
+ BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1 //.skip
+};
+
+enum BrigAllocation {
+
+ //.mnemo={ s/^BRIG_ALLOCATION_//;lc }
+ //.mnemo_token=EAllocKind
+
+ BRIG_ALLOCATION_NONE = 0, //.mnemo=""
+ BRIG_ALLOCATION_PROGRAM = 1,
+ BRIG_ALLOCATION_AGENT = 2,
+ BRIG_ALLOCATION_AUTOMATIC = 3
+};
+
+enum BrigAluModifierMask {
+ BRIG_ALU_FTZ = 1
+};
+
+enum BrigAtomicOperation {
+
+ //.tdcaption="Atomic Operations"
+ //
+ //.mnemo={ s/^BRIG_ATOMIC_//;lc }
+ //.mnemo_token=_EMAtomicOp
+ //.mnemo_context=EInstModifierInstAtomicContext
+ //
+ //.print=$mnemo{ "_$mnemo" }
+
+ BRIG_ATOMIC_ADD = 0,
+ BRIG_ATOMIC_AND = 1,
+ BRIG_ATOMIC_CAS = 2,
+ BRIG_ATOMIC_EXCH = 3,
+ BRIG_ATOMIC_LD = 4,
+ BRIG_ATOMIC_MAX = 5,
+ BRIG_ATOMIC_MIN = 6,
+ BRIG_ATOMIC_OR = 7,
+ BRIG_ATOMIC_ST = 8,
+ BRIG_ATOMIC_SUB = 9,
+ BRIG_ATOMIC_WRAPDEC = 10,
+ BRIG_ATOMIC_WRAPINC = 11,
+ BRIG_ATOMIC_XOR = 12,
+ BRIG_ATOMIC_WAIT_EQ = 13,
+ BRIG_ATOMIC_WAIT_NE = 14,
+ BRIG_ATOMIC_WAIT_LT = 15,
+ BRIG_ATOMIC_WAIT_GTE = 16,
+ BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
+ BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
+ BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
+ BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
+};
+
+enum BrigCompareOperation {
+
+ //.tdcaption="Comparison Operators"
+ //
+ //.mnemo={ s/^BRIG_COMPARE_//;lc }
+ //.mnemo_token=_EMCompare
+ //
+ //.print=$mnemo{ "_$mnemo" }
+
+ BRIG_COMPARE_EQ = 0,
+ BRIG_COMPARE_NE = 1,
+ BRIG_COMPARE_LT = 2,
+ BRIG_COMPARE_LE = 3,
+ BRIG_COMPARE_GT = 4,
+ BRIG_COMPARE_GE = 5,
+ BRIG_COMPARE_EQU = 6,
+ BRIG_COMPARE_NEU = 7,
+ BRIG_COMPARE_LTU = 8,
+ BRIG_COMPARE_LEU = 9,
+ BRIG_COMPARE_GTU = 10,
+ BRIG_COMPARE_GEU = 11,
+ BRIG_COMPARE_NUM = 12,
+ BRIG_COMPARE_NAN = 13,
+ BRIG_COMPARE_SEQ = 14,
+ BRIG_COMPARE_SNE = 15,
+ BRIG_COMPARE_SLT = 16,
+ BRIG_COMPARE_SLE = 17,
+ BRIG_COMPARE_SGT = 18,
+ BRIG_COMPARE_SGE = 19,
+ BRIG_COMPARE_SGEU = 20,
+ BRIG_COMPARE_SEQU = 21,
+ BRIG_COMPARE_SNEU = 22,
+ BRIG_COMPARE_SLTU = 23,
+ BRIG_COMPARE_SLEU = 24,
+ BRIG_COMPARE_SNUM = 25,
+ BRIG_COMPARE_SNAN = 26,
+ BRIG_COMPARE_SGTU = 27
+};
+
+enum BrigControlDirective {
+
+ //.mnemo={ s/^BRIG_CONTROL_//;lc }
+ //.mnemo_token=EControl
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_CONTROL_NONE = 0, //.skip
+ BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
+ BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
+ BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
+ BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
+ BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
+ BRIG_CONTROL_REQUIREDDIM = 6,
+ BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
+ BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
+ BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
+};
+
+enum BrigExecutableModifierMask {
+ //.nodump
+ BRIG_EXECUTABLE_DEFINITION = 1
+};
+
+enum BrigImageChannelOrder {
+
+ //.mnemo={ s/^BRIG_CHANNEL_ORDER_?//;lc }
+ //.mnemo_token=EImageOrder
+ //.mnemo_context=EImageOrderContext
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_CHANNEL_ORDER_A = 0,
+ BRIG_CHANNEL_ORDER_R = 1,
+ BRIG_CHANNEL_ORDER_RX = 2,
+ BRIG_CHANNEL_ORDER_RG = 3,
+ BRIG_CHANNEL_ORDER_RGX = 4,
+ BRIG_CHANNEL_ORDER_RA = 5,
+ BRIG_CHANNEL_ORDER_RGB = 6,
+ BRIG_CHANNEL_ORDER_RGBX = 7,
+ BRIG_CHANNEL_ORDER_RGBA = 8,
+ BRIG_CHANNEL_ORDER_BGRA = 9,
+ BRIG_CHANNEL_ORDER_ARGB = 10,
+ BRIG_CHANNEL_ORDER_ABGR = 11,
+ BRIG_CHANNEL_ORDER_SRGB = 12,
+ BRIG_CHANNEL_ORDER_SRGBX = 13,
+ BRIG_CHANNEL_ORDER_SRGBA = 14,
+ BRIG_CHANNEL_ORDER_SBGRA = 15,
+ BRIG_CHANNEL_ORDER_INTENSITY = 16,
+ BRIG_CHANNEL_ORDER_LUMINANCE = 17,
+ BRIG_CHANNEL_ORDER_DEPTH = 18,
+ BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,
+
+ // used internally
+ BRIG_CHANNEL_ORDER_UNKNOWN, //.mnemo="" // used when no order is specified
+
+ BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128 //.skip
+
+};
+
+enum BrigImageChannelType {
+
+ //.mnemo={ s/^BRIG_CHANNEL_TYPE_//;lc }
+ //.mnemo_token=EImageFormat
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
+ BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
+ BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
+ BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
+ BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
+ BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
+ BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
+ BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
+ BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
+ BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
+ BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
+ BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
+ BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
+ BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
+ BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
+ BRIG_CHANNEL_TYPE_FLOAT = 15,
+
+ // used internally
+ BRIG_CHANNEL_TYPE_UNKNOWN, //.mnemo=""
+
+ BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128 //.skip
+};
+
+enum BrigImageGeometry {
+
+ //.tdcaption="Geometry"
+ //
+ //.mnemo={ s/^BRIG_GEOMETRY_//;lc }
+ //.mnemo_token=EImageGeometry
+ //
+ //.dim={/_([0-9]+D)(A)?/ ? $1+(defined $2?1:0) : undef}
+ //.dim_switch //.dim_proto="unsigned getBrigGeometryDim(unsigned geo)" //.dim_arg="geo"
+ //.dim_default="assert(0); return 0"
+ //
+ //.depth={/DEPTH$/?"true":"false"}
+ //.depth_switch //.depth_proto="bool isBrigGeometryDepth(unsigned geo)" //.depth_arg="geo"
+ //.depth_default="return false"
+
+ BRIG_GEOMETRY_1D = 0,
+ BRIG_GEOMETRY_2D = 1,
+ BRIG_GEOMETRY_3D = 2,
+ BRIG_GEOMETRY_1DA = 3,
+ BRIG_GEOMETRY_2DA = 4,
+ BRIG_GEOMETRY_1DB = 5,
+ BRIG_GEOMETRY_2DDEPTH = 6,
+ BRIG_GEOMETRY_2DADEPTH = 7,
+
+ // used internally
+ BRIG_GEOMETRY_UNKNOWN, //.mnemo=""
+
+ BRIG_GEOMETRY_FIRST_USER_DEFINED = 128 //.skip
+};
+
+enum BrigImageQuery {
+
+ //.mnemo={ s/^BRIG_IMAGE_QUERY_//;lc }
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_IMAGE_QUERY_WIDTH = 0,
+ BRIG_IMAGE_QUERY_HEIGHT = 1,
+ BRIG_IMAGE_QUERY_DEPTH = 2,
+ BRIG_IMAGE_QUERY_ARRAY = 3,
+ BRIG_IMAGE_QUERY_CHANNELORDER = 4,
+ BRIG_IMAGE_QUERY_CHANNELTYPE = 5,
+ BRIG_IMAGE_QUERY_NUMMIPLEVELS = 6
+};
+
+enum BrigLinkage {
+
+ //.mnemo={ s/^BRIG_LINKAGE_//;s/NONE//;lc }
+
+ BRIG_LINKAGE_NONE = 0,
+ BRIG_LINKAGE_PROGRAM = 1,
+ BRIG_LINKAGE_MODULE = 2,
+ BRIG_LINKAGE_FUNCTION = 3,
+ BRIG_LINKAGE_ARG = 4
+};
+
+enum BrigMachineModel {
+
+ //.mnemo={ s/^BRIG_MACHINE_//; '$'.lc }
+ //.mnemo_token=ETargetMachine
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_MACHINE_SMALL = 0,
+ BRIG_MACHINE_LARGE = 1,
+
+ BRIG_MACHINE_UNDEF = 2 //.skip
+};
+
+enum BrigMemoryModifierMask { //.tddef=0
+ BRIG_MEMORY_CONST = 1
+};
+
+enum BrigMemoryOrder {
+
+ //.mnemo={ s/^BRIG_MEMORY_ORDER_//; lc }
+ //.mnemo_token=_EMMemoryOrder
+ //
+ //.print=$mnemo{ "_$mnemo" }
+
+ BRIG_MEMORY_ORDER_NONE = 0, //.mnemo=""
+ BRIG_MEMORY_ORDER_RELAXED = 1, //.mnemo=rlx
+ BRIG_MEMORY_ORDER_SC_ACQUIRE = 2, //.mnemo=scacq
+ BRIG_MEMORY_ORDER_SC_RELEASE = 3, //.mnemo=screl
+ BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4, //.mnemo=scar
+
+ BRIG_MEMORY_ORDER_LAST = 5 //.skip
+};
+
+enum BrigMemoryScope {
+
+ //.mnemo={ s/^BRIG_MEMORY_SCOPE_//; lc }
+ //.mnemo_token=_EMMemoryScope
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_MEMORY_SCOPE_NONE = 0, //.mnemo=""
+ BRIG_MEMORY_SCOPE_WORKITEM = 1, //.mnemo=""
+ BRIG_MEMORY_SCOPE_WAVEFRONT = 2, //.mnemo=wave
+ BRIG_MEMORY_SCOPE_WORKGROUP = 3, //.mnemo=wg
+ BRIG_MEMORY_SCOPE_AGENT = 4, //.mnemo=agent
+ BRIG_MEMORY_SCOPE_SYSTEM = 5, //.mnemo=system
+
+ BRIG_MEMORY_SCOPE_LAST = 6 //.skip
+};
+
+enum BrigOpcode {
+
+ //.tdcaption="Instruction Opcodes"
+ //
+ //.k={ "BASIC" }
+ //.pscode=$k{ MACRO2Name("_".$k) }
+ //.opcodeparser=$pscode{ return $pscode && "parseMnemo$pscode" }
+ //.opcodeparser_incfile=ParserUtilities
+ //.opcodeparser_switch //.opcodeparser_proto="OpcodeParser getOpcodeParser(BrigOpcode16_t arg)" //.opcodeparser_default="return parseMnemoBasic"
+ //
+ //.psopnd={undef}
+ //.opndparser=$psopnd{ return $psopnd && "&Parser::parse$psopnd" }
+ //.opndparser_incfile=ParserUtilities
+ //.opndparser_switch //.opndparser_proto="Parser::OperandParser Parser::getOperandParser(BrigOpcode16_t arg)" //.opndparser_default="return &Parser::parseOperands"
+ //
+ //.mnemo={ s/^BRIG_OPCODE_//; s/GCN([^_])/GCN_$1/; lc }
+ //.mnemo_scanner=Instructions //.mnemo_token=EInstruction
+ //.mnemo_context=EDefaultContext
+ //
+ //.has_memory_order={undef}
+ //.semsupport=$has_memory_order{ return $has_memory_order && "true" }
+ //
+ //.hasType=$k{ return ($k and $k eq "BASIC_NO_TYPE") ? "false" : undef; }
+ //.hasType_switch //.hasType_proto="bool instHasType(BrigOpcode16_t arg)" //.hasType_default="return true"
+ //
+ //.opcodevis=$pscode{ s/^BRIG_OPCODE_//; sprintf("%-47s(","vis.visitOpcode_".$_) . ($pscode =~m/^(BasicOrMod|Nop)$/? "inst" : "HSAIL_ASM::Inst". ($pscode=~m/BasicNoType/? "Basic":$pscode) ."(inst)").")" }
+ //.opcodevis_switch //.opcodevis_proto="template <typename RetType, typename Visitor> RetType visitOpcode_gen(HSAIL_ASM::Inst inst, Visitor& vis)"
+ //.opcodevis_arg="inst.opcode()" //.opcodevis_default="return RetType()"
+ //.opcodevis_incfile=ItemUtils
+ //
+ //.ftz=$k{ return ($k eq "BASIC_OR_MOD" or $k eq "CMP" or $k eq "CVT") ? "true" : undef }
+ //.ftz_incfile=ItemUtils //.ftz_switch //.ftz_proto="inline bool instSupportsFtz(BrigOpcode16_t arg)" //.ftz_default="return false"
+ //
+ //.vecOpndIndex={undef}
+ //.vecOpndIndex_switch //.vecOpndIndex_proto="int vecOpndIndex(BrigOpcode16_t arg)" //.vecOpndIndex_default="return -1"
+ //.vecOpndIndex_incfile=ParserUtilities
+ //
+ //.numdst={undef}
+ //.numdst_switch //.numdst_proto="int instNumDstOperands(BrigOpcode16_t arg)" //.numdst_default="return 1"
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_OPCODE_NOP = 0, //.k=NOP //.hasType=false
+ BRIG_OPCODE_ABS = 1, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_ADD = 2, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_BORROW = 3,
+ BRIG_OPCODE_CARRY = 4,
+ BRIG_OPCODE_CEIL = 5, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_COPYSIGN = 6, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_DIV = 7, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_FLOOR = 8, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_FMA = 9, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_FRACT = 10, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_MAD = 11, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_MAX = 12, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_MIN = 13, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_MUL = 14, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_MULHI = 15, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_NEG = 16, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_REM = 17,
+ BRIG_OPCODE_RINT = 18, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_SQRT = 19, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_SUB = 20, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_TRUNC = 21, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_MAD24 = 22,
+ BRIG_OPCODE_MAD24HI = 23,
+ BRIG_OPCODE_MUL24 = 24,
+ BRIG_OPCODE_MUL24HI = 25,
+ BRIG_OPCODE_SHL = 26,
+ BRIG_OPCODE_SHR = 27,
+ BRIG_OPCODE_AND = 28,
+ BRIG_OPCODE_NOT = 29,
+ BRIG_OPCODE_OR = 30,
+ BRIG_OPCODE_POPCOUNT = 31, //.k=SOURCE_TYPE
+ BRIG_OPCODE_XOR = 32,
+ BRIG_OPCODE_BITEXTRACT = 33,
+ BRIG_OPCODE_BITINSERT = 34,
+ BRIG_OPCODE_BITMASK = 35,
+ BRIG_OPCODE_BITREV = 36,
+ BRIG_OPCODE_BITSELECT = 37,
+ BRIG_OPCODE_FIRSTBIT = 38, //.k=SOURCE_TYPE
+ BRIG_OPCODE_LASTBIT = 39, //.k=SOURCE_TYPE
+ BRIG_OPCODE_COMBINE = 40, //.k=SOURCE_TYPE //.vecOpndIndex=1
+ BRIG_OPCODE_EXPAND = 41, //.k=SOURCE_TYPE //.vecOpndIndex=0
+ BRIG_OPCODE_LDA = 42, //.k=ADDR
+ BRIG_OPCODE_MOV = 43,
+ BRIG_OPCODE_SHUFFLE = 44,
+ BRIG_OPCODE_UNPACKHI = 45,
+ BRIG_OPCODE_UNPACKLO = 46,
+ BRIG_OPCODE_PACK = 47, //.k=SOURCE_TYPE
+ BRIG_OPCODE_UNPACK = 48, //.k=SOURCE_TYPE
+ BRIG_OPCODE_CMOV = 49,
+ BRIG_OPCODE_CLASS = 50, //.k=SOURCE_TYPE
+ BRIG_OPCODE_NCOS = 51,
+ BRIG_OPCODE_NEXP2 = 52,
+ BRIG_OPCODE_NFMA = 53,
+ BRIG_OPCODE_NLOG2 = 54,
+ BRIG_OPCODE_NRCP = 55,
+ BRIG_OPCODE_NRSQRT = 56,
+ BRIG_OPCODE_NSIN = 57,
+ BRIG_OPCODE_NSQRT = 58,
+ BRIG_OPCODE_BITALIGN = 59,
+ BRIG_OPCODE_BYTEALIGN = 60,
+ BRIG_OPCODE_PACKCVT = 61, //.k=SOURCE_TYPE
+ BRIG_OPCODE_UNPACKCVT = 62, //.k=SOURCE_TYPE
+ BRIG_OPCODE_LERP = 63,
+ BRIG_OPCODE_SAD = 64, //.k=SOURCE_TYPE
+ BRIG_OPCODE_SADHI = 65, //.k=SOURCE_TYPE
+ BRIG_OPCODE_SEGMENTP = 66, //.k=SEG_CVT
+ BRIG_OPCODE_FTOS = 67, //.k=SEG_CVT
+ BRIG_OPCODE_STOF = 68, //.k=SEG_CVT
+ BRIG_OPCODE_CMP = 69, //.k=CMP
+ BRIG_OPCODE_CVT = 70, //.k=CVT
+ BRIG_OPCODE_LD = 71, //.k=MEM //.has_memory_order //.vecOpndIndex=0
+ BRIG_OPCODE_ST = 72, //.k=MEM //.has_memory_order //.vecOpndIndex=0 //.numdst=0
+ BRIG_OPCODE_ATOMIC = 73, //.k=ATOMIC
+ BRIG_OPCODE_ATOMICNORET = 74, //.k=ATOMIC //.numdst=0
+ BRIG_OPCODE_SIGNAL = 75, //.k=SIGNAL
+ BRIG_OPCODE_SIGNALNORET = 76, //.k=SIGNAL //.numdst=0
+ BRIG_OPCODE_MEMFENCE = 77, //.k=MEM_FENCE //.numdst=0
+ BRIG_OPCODE_RDIMAGE = 78, //.k=IMAGE //.vecOpndIndex=0
+ BRIG_OPCODE_LDIMAGE = 79, //.k=IMAGE //.vecOpndIndex=0
+ BRIG_OPCODE_STIMAGE = 80, //.k=IMAGE //.vecOpndIndex=0 //.numdst=0
+ BRIG_OPCODE_IMAGEFENCE = 81, //.k=BASIC_NO_TYPE
+ BRIG_OPCODE_QUERYIMAGE = 82, //.k=QUERY_IMAGE
+ BRIG_OPCODE_QUERYSAMPLER = 83, //.k=QUERY_SAMPLER
+ BRIG_OPCODE_CBR = 84, //.k=BR //.numdst=0
+ BRIG_OPCODE_BR = 85, //.k=BR //.numdst=0 //.hasType=false
+ BRIG_OPCODE_SBR = 86, //.k=BR //.numdst=0 //.psopnd=SbrOperands
+ BRIG_OPCODE_BARRIER = 87, //.k=BR //.numdst=0 //.hasType=false
+ BRIG_OPCODE_WAVEBARRIER = 88, //.k=BR //.numdst=0 //.hasType=false
+ BRIG_OPCODE_ARRIVEFBAR = 89, //.k=BR //.numdst=0 //.hasType=false
+ BRIG_OPCODE_INITFBAR = 90, //.k=BASIC_NO_TYPE //.numdst=0 //.hasType=false
+ BRIG_OPCODE_JOINFBAR = 91, //.k=BR //.numdst=0 //.hasType=false
+ BRIG_OPCODE_LEAVEFBAR = 92, //.k=BR //.numdst=0 //.hasType=false
+ BRIG_OPCODE_RELEASEFBAR = 93, //.k=BASIC_NO_TYPE //.numdst=0
+ BRIG_OPCODE_WAITFBAR = 94, //.k=BR //.numdst=0 //.hasType=false
+ BRIG_OPCODE_LDF = 95,
+ BRIG_OPCODE_ACTIVELANECOUNT = 96, //.k=LANE
+ BRIG_OPCODE_ACTIVELANEID = 97, //.k=LANE
+ BRIG_OPCODE_ACTIVELANEMASK = 98, //.k=LANE //.vecOpndIndex=0
+ BRIG_OPCODE_ACTIVELANEPERMUTE = 99, //.k=LANE
+ BRIG_OPCODE_CALL = 100, //.k=BR //.psopnd=CallOperands //.numdst=0 //.hasType=false
+ BRIG_OPCODE_SCALL = 101, //.k=BR //.psopnd=CallOperands //.numdst=0
+ BRIG_OPCODE_ICALL = 102, //.k=BR //.psopnd=CallOperands //.numdst=0
+ BRIG_OPCODE_RET = 103, //.k=BASIC_NO_TYPE
+ BRIG_OPCODE_ALLOCA = 104, //.k=MEM
+ BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
+ BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
+ BRIG_OPCODE_DIM = 107,
+ BRIG_OPCODE_GRIDGROUPS = 108,
+ BRIG_OPCODE_GRIDSIZE = 109,
+ BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
+ BRIG_OPCODE_PACKETID = 111,
+ BRIG_OPCODE_WORKGROUPID = 112,
+ BRIG_OPCODE_WORKGROUPSIZE = 113,
+ BRIG_OPCODE_WORKITEMABSID = 114,
+ BRIG_OPCODE_WORKITEMFLATABSID = 115,
+ BRIG_OPCODE_WORKITEMFLATID = 116,
+ BRIG_OPCODE_WORKITEMID = 117,
+ BRIG_OPCODE_CLEARDETECTEXCEPT = 118, //.numdst=0
+ BRIG_OPCODE_GETDETECTEXCEPT = 119,
+ BRIG_OPCODE_SETDETECTEXCEPT = 120, //.numdst=0
+ BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121, //.k=QUEUE
+ BRIG_OPCODE_CASQUEUEWRITEINDEX = 122, //.k=QUEUE
+ BRIG_OPCODE_LDQUEUEREADINDEX = 123, //.k=QUEUE
+ BRIG_OPCODE_LDQUEUEWRITEINDEX = 124, //.k=QUEUE
+ BRIG_OPCODE_STQUEUEREADINDEX = 125, //.k=QUEUE //.numdst=0
+ BRIG_OPCODE_STQUEUEWRITEINDEX = 126, //.k=QUEUE //.numdst=0
+ BRIG_OPCODE_CLOCK = 127,
+ BRIG_OPCODE_CUID = 128,
+ BRIG_OPCODE_DEBUGTRAP = 129, //.numdst=0
+ BRIG_OPCODE_GROUPBASEPTR = 130,
+ BRIG_OPCODE_KERNARGBASEPTR = 131,
+ BRIG_OPCODE_LANEID = 132,
+ BRIG_OPCODE_MAXCUID = 133,
+ BRIG_OPCODE_MAXWAVEID = 134,
+ BRIG_OPCODE_NULLPTR = 135, //.k=SEG
+ BRIG_OPCODE_WAVEID = 136,
+ BRIG_OPCODE_FIRST_USER_DEFINED = 32768, //.skip
+
+ BRIG_OPCODE_GCNMADU = (1u << 15) | 0, //.k=BASIC_NO_TYPE
+ BRIG_OPCODE_GCNMADS = (1u << 15) | 1, //.k=BASIC_NO_TYPE
+ BRIG_OPCODE_GCNMAX3 = (1u << 15) | 2,
+ BRIG_OPCODE_GCNMIN3 = (1u << 15) | 3,
+ BRIG_OPCODE_GCNMED3 = (1u << 15) | 4,
+ BRIG_OPCODE_GCNFLDEXP = (1u << 15) | 5, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_GCNFREXP_EXP = (1u << 15) | 6, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_GCNFREXP_MANT = (1u << 15) | 7, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_GCNTRIG_PREOP = (1u << 15) | 8, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_GCNBFM = (1u << 15) | 9,
+ BRIG_OPCODE_GCNLD = (1u << 15) | 10, //.k=MEM //.has_memory_order //.vecOpndIndex=0
+ BRIG_OPCODE_GCNST = (1u << 15) | 11, //.k=MEM //.has_memory_order //.vecOpndIndex=0
+ BRIG_OPCODE_GCNATOMIC = (1u << 15) | 12, //.k=ATOMIC
+ BRIG_OPCODE_GCNATOMICNORET = (1u << 15) | 13, //.k=ATOMIC //.mnemo=gcn_atomicNoRet
+ BRIG_OPCODE_GCNSLEEP = (1u << 15) | 14,
+ BRIG_OPCODE_GCNPRIORITY = (1u << 15) | 15,
+ BRIG_OPCODE_GCNREGIONALLOC = (1u << 15) | 16, //.k=BASIC_NO_TYPE //.mnemo=gcn_region_alloc
+ BRIG_OPCODE_GCNMSAD = (1u << 15) | 17,
+ BRIG_OPCODE_GCNQSAD = (1u << 15) | 18,
+ BRIG_OPCODE_GCNMQSAD = (1u << 15) | 19,
+ BRIG_OPCODE_GCNMQSAD4 = (1u << 15) | 20, //.k=BASIC_NO_TYPE
+ BRIG_OPCODE_GCNSADW = (1u << 15) | 21,
+ BRIG_OPCODE_GCNSADD = (1u << 15) | 22,
+ BRIG_OPCODE_GCNCONSUME = (1u << 15) | 23, //.k=ADDR //.mnemo=gcn_atomic_consume
+ BRIG_OPCODE_GCNAPPEND = (1u << 15) | 24, //.k=ADDR //.mnemo=gcn_atomic_append
+ BRIG_OPCODE_GCNB4XCHG = (1u << 15) | 25, //.mnemo=gcn_b4xchg
+ BRIG_OPCODE_GCNB32XCHG = (1u << 15) | 26, //.mnemo=gcn_b32xchg
+ BRIG_OPCODE_GCNMAX = (1u << 15) | 27,
+ BRIG_OPCODE_GCNMIN = (1u << 15) | 28,
+ BRIG_OPCODE_GCNDIVRELAXED = (1u << 15) | 29, //.k=BASIC_OR_MOD
+ BRIG_OPCODE_GCNDIVRELAXEDNARROW = (1u << 15) | 30,
+
+ BRIG_OPCODE_AMDRDIMAGELOD = (1u << 15) | 31, //.k=IMAGE //.mnemo=amd_rdimagelod //.vecOpndIndex=0
+ BRIG_OPCODE_AMDRDIMAGEGRAD = (1u << 15) | 32, //.k=IMAGE //.mnemo=amd_rdimagegrad //.vecOpndIndex=0
+ BRIG_OPCODE_AMDLDIMAGEMIP = (1u << 15) | 33, //.k=IMAGE //.mnemo=amd_ldimagemip //.vecOpndIndex=0
+ BRIG_OPCODE_AMDSTIMAGEMIP = (1u << 15) | 34, //.k=IMAGE //.mnemo=amd_stimagemip //.vecOpndIndex=0 //.numdst=0
+ BRIG_OPCODE_AMDQUERYIMAGE = (1u << 15) | 35 //.k=QUERY_IMAGE //.mnemo=amd_queryimage
+};
+
+enum BrigPack {
+
+ //.tdcaption="Packing"
+ //
+ //.mnemo={ s/^BRIG_PACK_//;s/SAT$/_sat/;lc }
+ //.mnemo_token=_EMPacking
+ //
+ //.print=$mnemo{ "_$mnemo" }
+
+ BRIG_PACK_NONE = 0, //.mnemo=""
+ BRIG_PACK_PP = 1,
+ BRIG_PACK_PS = 2,
+ BRIG_PACK_SP = 3,
+ BRIG_PACK_SS = 4,
+ BRIG_PACK_S = 5,
+ BRIG_PACK_P = 6,
+ BRIG_PACK_PPSAT = 7,
+ BRIG_PACK_PSSAT = 8,
+ BRIG_PACK_SPSAT = 9,
+ BRIG_PACK_SSSAT = 10,
+ BRIG_PACK_SSAT = 11,
+ BRIG_PACK_PSAT = 12
+};
+
+enum BrigProfile {
+
+ //.mnemo={ s/^BRIG_PROFILE_//;'$'.lc }
+ //.mnemo_token=ETargetProfile
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_PROFILE_BASE = 0,
+ BRIG_PROFILE_FULL = 1,
+
+ BRIG_PROFILE_UNDEF = 2 //.skip
+};
+
+enum BrigRegisterKind {
+
+ //.mnemo={ s/^BRIG_REGISTER_KIND_//;'$'.lc(substr($_,0,1)) }
+ //
+ //.bits={ }
+ //.bits_switch //.bits_proto="unsigned getRegBits(BrigRegisterKind16_t arg)" //.bits_default="return (unsigned)-1"
+ //
+ //.nollvm
+
+ BRIG_REGISTER_KIND_CONTROL = 0, //.bits=1
+ BRIG_REGISTER_KIND_SINGLE = 1, //.bits=32
+ BRIG_REGISTER_KIND_DOUBLE = 2, //.bits=64
+ BRIG_REGISTER_KIND_QUAD = 3 //.bits=128
+};
+
+enum BrigRound {
+
+ //.mnemo={}
+ //.mnemo_fn=round2str //.mnemo_token=_EMRound
+ //
+ //.sat={/_SAT$/? "true" : "false"}
+ //.sat_switch //.sat_proto="bool isSatRounding(unsigned rounding)" //.sat_arg="rounding"
+ //.sat_default="return false"
+ //
+ //.sig={/_SIGNALING_/? "true" : "false"}
+ //.sig_switch //.sig_proto="bool isSignalingRounding(unsigned rounding)" //.sig_arg="rounding"
+ //.sig_default="return false"
+ //
+ //.int={/_INTEGER_/? "true" : "false"}
+ //.int_switch //.int_proto="bool isIntRounding(unsigned rounding)" //.int_arg="rounding"
+ //.int_default="return false"
+ //
+ //.flt={/_FLOAT_/? "true" : "false"}
+ //.flt_switch //.flt_proto="bool isFloatRounding(unsigned rounding)" //.flt_arg="rounding"
+ //.flt_default="return false"
+ //
+ //.print=$mnemo{ "_$mnemo" }
+
+ BRIG_ROUND_NONE = 0, //.no_mnemo
+ BRIG_ROUND_FLOAT_DEFAULT = 1, //.no_mnemo
+ BRIG_ROUND_FLOAT_NEAR_EVEN = 2, //.mnemo=near
+ BRIG_ROUND_FLOAT_ZERO = 3, //.mnemo=zero
+ BRIG_ROUND_FLOAT_PLUS_INFINITY = 4, //.mnemo=up
+ BRIG_ROUND_FLOAT_MINUS_INFINITY = 5, //.mnemo=down
+ BRIG_ROUND_INTEGER_NEAR_EVEN = 6, //.mnemo=neari
+ BRIG_ROUND_INTEGER_ZERO = 7, //.mnemo=zeroi
+ BRIG_ROUND_INTEGER_PLUS_INFINITY = 8, //.mnemo=upi
+ BRIG_ROUND_INTEGER_MINUS_INFINITY = 9, //.mnemo=downi
+ BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10, //.mnemo=neari_sat
+ BRIG_ROUND_INTEGER_ZERO_SAT = 11, //.mnemo=zeroi_sat
+ BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12, //.mnemo=upi_sat
+ BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13, //.mnemo=downi_sat
+ BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14, //.mnemo=sneari
+ BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15, //.mnemo=szeroi
+ BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16, //.mnemo=supi
+ BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17, //.mnemo=sdowni
+ BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18, //.mnemo=sneari_sat
+ BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19, //.mnemo=szeroi_sat
+ BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20, //.mnemo=supi_sat
+ BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21 //.mnemo=sdowni_sat
+};
+
+enum BrigSamplerAddressing {
+
+ //.mnemo={ s/^BRIG_ADDRESSING_//;lc }
+ //.mnemo_token=ESamplerAddressingMode
+
+ BRIG_ADDRESSING_UNDEFINED = 0,
+ BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
+ BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
+ BRIG_ADDRESSING_REPEAT = 3,
+ BRIG_ADDRESSING_MIRRORED_REPEAT = 4,
+
+ BRIG_ADDRESSING_FIRST_USER_DEFINED = 128 //.skip
+};
+
+enum BrigSamplerCoordNormalization {
+
+ //.mnemo={ s/^BRIG_COORD_//;lc }
+ //.mnemo_token=ESamplerCoord
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_COORD_UNNORMALIZED = 0,
+ BRIG_COORD_NORMALIZED = 1
+};
+
+enum BrigSamplerFilter {
+
+ //.mnemo={ s/^BRIG_FILTER_//;lc }
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_FILTER_NEAREST = 0,
+ BRIG_FILTER_LINEAR = 1,
+
+ BRIG_FILTER_FIRST_USER_DEFINED = 128 //.skip
+};
+
+enum BrigSamplerQuery {
+
+ //.mnemo={ s/^BRIG_SAMPLER_QUERY_//;lc }
+ //.mnemo_token=_EMSamplerQuery
+ //
+ //.print=$mnemo{ $mnemo }
+
+ BRIG_SAMPLER_QUERY_ADDRESSING = 0,
+ BRIG_SAMPLER_QUERY_COORD = 1,
+ BRIG_SAMPLER_QUERY_FILTER = 2
+};
+
+enum BrigSectionIndex {
+
+ //.nollvm
+ //
+ //.mnemo={ s/^BRIG_SECTION_INDEX_/HSA_/;lc }
+
+ BRIG_SECTION_INDEX_DATA = 0,
+ BRIG_SECTION_INDEX_CODE = 1,
+ BRIG_SECTION_INDEX_OPERAND = 2,
+ BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3,
+
+ // used internally
+ BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED //.skip
+};
+
+enum BrigSegCvtModifierMask {
+ BRIG_SEG_CVT_NONULL = 1 //.mnemo="nonull" //.print="_nonull"
+};
+
+enum BrigSegment {
+
+ //.mnemo={ s/^BRIG_SEGMENT_//;lc}
+ //.mnemo_token=_EMSegment
+ //.mnemo_context=EInstModifierContext
+ //
+ //.print=$mnemo{ $mnemo ? "_$mnemo" : "" }
+
+ BRIG_SEGMENT_NONE = 0, //.mnemo=""
+ BRIG_SEGMENT_FLAT = 1, //.mnemo=""
+ BRIG_SEGMENT_GLOBAL = 2,
+ BRIG_SEGMENT_READONLY = 3,
+ BRIG_SEGMENT_KERNARG = 4,
+ BRIG_SEGMENT_GROUP = 5,
+ BRIG_SEGMENT_PRIVATE = 6,
+ BRIG_SEGMENT_SPILL = 7,
+ BRIG_SEGMENT_ARG = 8,
+
+ BRIG_SEGMENT_FIRST_USER_DEFINED = 128, //.skip
+
+ BRIG_SEGMENT_AMD_GCN = 9, //.mnemo="region"
+};
+
+enum BrigPackedTypeBits {
+
+ //.nodump
+ //
+ //.nollvm
+
+ BRIG_TYPE_BASE_SIZE = 5,
+ BRIG_TYPE_PACK_SIZE = 2,
+ BRIG_TYPE_ARRAY_SIZE = 1,
+
+ BRIG_TYPE_BASE_SHIFT = 0,
+ BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
+ BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,
+
+ BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) << BRIG_TYPE_BASE_SHIFT,
+ BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT,
+
+ BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT,
+
+ BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT
+};
+
+enum BrigType {
+
+ //.numBits={ /ARRAY$/ ? undef : /([0-9]+)X([0-9]+)/ ? $1*$2 : /([0-9]+)/ ? $1 : undef }
+ //.numBits_switch //.numBits_proto="unsigned getBrigTypeNumBits(unsigned arg)" //.numBits_default="assert(0); return 0"
+ //.numBytes=$numBits{ $numBits > 1 ? $numBits/8 : undef }
+ //.numBytes_switch //.numBytes_proto="unsigned getBrigTypeNumBytes(unsigned arg)" //.numBytes_default="assert(0); return 0"
+ //
+ //.mnemo={ s/^BRIG_TYPE_//;lc }
+ //.mnemo_token=_EMType
+ //
+ //.array={/ARRAY$/?"true":"false"}
+ //.array_switch //.array_proto="bool isArrayType(unsigned type)" //.array_arg="type"
+ //.array_default="return false"
+ //
+ //.a2e={/(.*)_ARRAY$/? $1 : "BRIG_TYPE_NONE"}
+ //.a2e_switch //.a2e_proto="unsigned arrayType2elementType(unsigned type)" //.a2e_arg="type"
+ //.a2e_default="return BRIG_TYPE_NONE"
+ //
+ //.e2a={/_ARRAY$/? "BRIG_TYPE_NONE" : /_NONE$/ ? "BRIG_TYPE_NONE" : /_B1$/ ? "BRIG_TYPE_NONE" : $_ . "_ARRAY"}
+ //.e2a_switch //.e2a_proto="unsigned elementType2arrayType(unsigned type)" //.e2a_arg="type"
+ //.e2a_default="return BRIG_TYPE_NONE"
+ //
+ //.t2s={s/^BRIG_TYPE_//;lc s/_ARRAY$/[]/;lc}
+ //.t2s_switch //.t2s_proto="const char* type2name(unsigned type)" //.t2s_arg="type"
+ //.t2s_default="return NULL"
+ //
+ //.dispatch_switch //.dispatch_incfile=TemplateUtilities
+ //.dispatch_proto="template<typename RetType, typename Visitor>\nRetType dispatchByType_gen(unsigned type, Visitor& v)"
+ //.dispatch={ /ARRAY$/ ? "v.visitNone(type)" : /^BRIG_TYPE_([BUSF]|SIG)[0-9]+/ ? "v.template visit< BrigTypeTraits<$_> >()" : "v.visitNone(type)" }
+ //.dispatch_arg="type" //.dispatch_default="return v.visitNone(type)"
+ //
+ //- .tdname=BrigType
+ //
+ //.print=$mnemo{ "_$mnemo" }
+
+ BRIG_TYPE_NONE = 0, //.mnemo="" //.print=""
+ BRIG_TYPE_U8 = 1, //.ctype=uint8_t
+ BRIG_TYPE_U16 = 2, //.ctype=uint16_t
+ BRIG_TYPE_U32 = 3, //.ctype=uint32_t
+ BRIG_TYPE_U64 = 4, //.ctype=uint64_t
+ BRIG_TYPE_S8 = 5, //.ctype=int8_t
+ BRIG_TYPE_S16 = 6, //.ctype=int16_t
+ BRIG_TYPE_S32 = 7, //.ctype=int32_t
+ BRIG_TYPE_S64 = 8, //.ctype=int64_t
+ BRIG_TYPE_F16 = 9, //.ctype=f16_t
+ BRIG_TYPE_F32 = 10, //.ctype=float
+ BRIG_TYPE_F64 = 11, //.ctype=double
+ BRIG_TYPE_B1 = 12, //.ctype=bool //.numBytes=1
+ BRIG_TYPE_B8 = 13, //.ctype=uint8_t
+ BRIG_TYPE_B16 = 14, //.ctype=uint16_t
+ BRIG_TYPE_B32 = 15, //.ctype=uint32_t
+ BRIG_TYPE_B64 = 16, //.ctype=uint64_t
+ BRIG_TYPE_B128 = 17, //.ctype=b128_t
+ BRIG_TYPE_SAMP = 18, //.mnemo=samp //.numBits=64
+ BRIG_TYPE_ROIMG = 19, //.mnemo=roimg //.numBits=64
+ BRIG_TYPE_WOIMG = 20, //.mnemo=woimg //.numBits=64
+ BRIG_TYPE_RWIMG = 21, //.mnemo=rwimg //.numBits=64
+ BRIG_TYPE_SIG32 = 22, //.mnemo=sig32 //.numBits=64
+ BRIG_TYPE_SIG64 = 23, //.mnemo=sig64 //.numBits=64
+
+ BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32, //.ctype=uint8_t
+ BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64, //.ctype=uint8_t
+ BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128, //.ctype=uint8_t
+ BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32, //.ctype=uint16_t
+ BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64, //.ctype=uint16_t
+ BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128, //.ctype=uint16_t
+ BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64, //.ctype=uint32_t
+ BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128, //.ctype=uint32_t
+ BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128, //.ctype=uint64_t
+ BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32, //.ctype=int8_t
+ BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64, //.ctype=int8_t
+ BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128, //.ctype=int8_t
+ BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32, //.ctype=int16_t
+ BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64, //.ctype=int16_t
+ BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128, //.ctype=int16_t
+ BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64, //.ctype=int32_t
+ BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128, //.ctype=int32_t
+ BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128, //.ctype=int64_t
+ BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32, //.ctype=f16_t
+ BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64, //.ctype=f16_t
+ BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128, //.ctype=f16_t
+ BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64, //.ctype=float
+ BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128, //.ctype=float
+ BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128, //.ctype=double
+
+ BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+ BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
+
+ // Used internally
+ BRIG_TYPE_INVALID = (unsigned) -1 //.skip
+};
+
+enum BrigVariableModifierMask {
+
+ //.nodump
+
+ BRIG_VARIABLE_DEFINITION = 1,
+ BRIG_VARIABLE_CONST = 2
+};
+
+enum BrigWidth {
+
+ //.tddef=1
+ //
+ //.print={ s/^BRIG_WIDTH_//; "_width($_)" }
+
+ BRIG_WIDTH_NONE = 0,
+ BRIG_WIDTH_1 = 1,
+ BRIG_WIDTH_2 = 2,
+ BRIG_WIDTH_4 = 3,
+ BRIG_WIDTH_8 = 4,
+ BRIG_WIDTH_16 = 5,
+ BRIG_WIDTH_32 = 6,
+ BRIG_WIDTH_64 = 7,
+ BRIG_WIDTH_128 = 8,
+ BRIG_WIDTH_256 = 9,
+ BRIG_WIDTH_512 = 10,
+ BRIG_WIDTH_1024 = 11,
+ BRIG_WIDTH_2048 = 12,
+ BRIG_WIDTH_4096 = 13,
+ BRIG_WIDTH_8192 = 14,
+ BRIG_WIDTH_16384 = 15,
+ BRIG_WIDTH_32768 = 16,
+ BRIG_WIDTH_65536 = 17,
+ BRIG_WIDTH_131072 = 18,
+ BRIG_WIDTH_262144 = 19,
+ BRIG_WIDTH_524288 = 20,
+ BRIG_WIDTH_1048576 = 21,
+ BRIG_WIDTH_2097152 = 22,
+ BRIG_WIDTH_4194304 = 23,
+ BRIG_WIDTH_8388608 = 24,
+ BRIG_WIDTH_16777216 = 25,
+ BRIG_WIDTH_33554432 = 26,
+ BRIG_WIDTH_67108864 = 27,
+ BRIG_WIDTH_134217728 = 28,
+ BRIG_WIDTH_268435456 = 29,
+ BRIG_WIDTH_536870912 = 30,
+ BRIG_WIDTH_1073741824 = 31,
+ BRIG_WIDTH_2147483648 = 32,
+ BRIG_WIDTH_WAVESIZE = 33,
+ BRIG_WIDTH_ALL = 34,
+
+ BRIG_WIDTH_LAST //.skip
+};
+
+struct BrigUInt64 { //.isroot //.standalone
+ uint32_t lo; //.defValue=0
+ uint32_t hi; //.defValue=0
+
+ //+hcode KLASS& operator=(uint64_t rhs);
+ //+hcode operator uint64_t();
+ //+implcode inline KLASS& KLASS::operator=(uint64_t rhs) { lo() = (uint32_t)rhs; hi() = (uint32_t)(rhs >> 32); return *this; }
+ //+implcode inline KLASS::operator uint64_t() { return ((uint64_t)hi()) << 32 | lo(); }
+};
+
+struct BrigAluModifier { //.isroot //.standalone
+ BrigAluModifier8_t allBits; //.defValue=0
+ //^^ bool ftz; //.wtype=BitValRef<0>
+};
+
+struct BrigBase { //.nowrap
+ uint16_t byteCount;
+ BrigKind16_t kind;
+};
+
+//.alias Code:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_CODE };
+//.alias Directive:Code { //.generic };
+//.alias Operand:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_OPERAND };
+
+struct BrigData {
+ //.nowrap
+ uint32_t byteCount;
+ uint8_t bytes[1];
+};
+
+struct BrigExecutableModifier { //.isroot //.standalone
+ BrigExecutableModifier8_t allBits; //.defValue=0
+ //^^ bool isDefinition; //.wtype=BitValRef<0>
+};
+
+struct BrigMemoryModifier { //.isroot //.standalone
+ BrigMemoryModifier8_t allBits; //.defValue=0
+ //^^ bool isConst; //.wtype=BitValRef<0>
+};
+
+struct BrigSegCvtModifier { //.isroot //.standalone
+ BrigSegCvtModifier8_t allBits; //.defValue=0
+ //^^ bool isNoNull; //.wtype=BitValRef<0>
+};
+
+struct BrigVariableModifier { //.isroot //.standalone
+ BrigVariableModifier8_t allBits; //.defValue=0
+
+ //^^ bool isDefinition; //.wtype=BitValRef<0>
+ //^^ bool isConst; //.wtype=BitValRef<1>
+};
+
+struct BrigDirectiveArgBlockEnd {
+ BrigBase base;
+};
+
+struct BrigDirectiveArgBlockStart {
+ BrigBase base;
+};
+
+struct BrigDirectiveComment {
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+};
+
+struct BrigDirectiveControl {
+ BrigBase base;
+ BrigControlDirective16_t control;
+ uint16_t reserved; //.defValue=0
+ BrigDataOffsetOperandList32_t operands;
+};
+
+struct BrigDirectiveExecutable { //.generic
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ uint16_t outArgCount; //.defValue=0
+ uint16_t inArgCount; //.defValue=0
+ BrigCodeOffset32_t firstInArg;
+ BrigCodeOffset32_t firstCodeBlockEntry;
+ BrigCodeOffset32_t nextModuleEntry;
+ BrigExecutableModifier modifier; //.acc=subItem<ExecutableModifier> //.wtype=ExecutableModifier
+ BrigLinkage8_t linkage;
+ uint16_t reserved; //.defValue=0
+};
+
+//.alias DirectiveKernel:DirectiveExecutable { };
+//.alias DirectiveFunction:DirectiveExecutable { };
+//.alias DirectiveSignature:DirectiveExecutable { };
+//.alias DirectiveIndirectFunction:DirectiveExecutable { };
+
+struct BrigDirectiveExtension {
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+};
+
+struct BrigDirectiveFbarrier {
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ BrigVariableModifier modifier; //.acc=subItem<VariableModifier> //.wtype=VariableModifier
+ BrigLinkage8_t linkage;
+ uint16_t reserved; //.defValue=0
+};
+
+struct BrigDirectiveLabel {
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+};
+
+struct BrigDirectiveLoc {
+ BrigBase base;
+ BrigDataOffsetString32_t filename;
+ uint32_t line;
+ uint32_t column; //.defValue=1
+};
+
+struct BrigDirectiveNone { //.enum=BRIG_KIND_NONE
+ BrigBase base;
+};
+
+struct BrigDirectivePragma {
+ BrigBase base;
+ BrigDataOffsetOperandList32_t operands;
+};
+
+struct BrigDirectiveVariable {
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ BrigOperandOffset32_t init;
+ BrigType16_t type;
+
+ //+hcode bool isArray();
+ //+implcode inline bool KLASS::isArray() { return isArrayType(type()); }
+
+ //+hcode unsigned elementType();
+ //+implcode inline unsigned KLASS::elementType() { return isArray()? arrayType2elementType(type()) : type(); }
+
+ BrigSegment8_t segment;
+ BrigAlignment8_t align;
+ BrigUInt64 dim; //.acc=subItem<UInt64> //.wtype=UInt64
+ BrigVariableModifier modifier; //.acc=subItem<VariableModifier> //.wtype=VariableModifier
+ BrigLinkage8_t linkage;
+ BrigAllocation8_t allocation;
+ uint8_t reserved; //.defValue=0
+};
+
+struct BrigDirectiveModule {
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ BrigVersion32_t hsailMajor; //.wtype=ValRef<uint32_t>
+ BrigVersion32_t hsailMinor; //.wtype=ValRef<uint32_t>
+ BrigProfile8_t profile;
+ BrigMachineModel8_t machineModel;
+ BrigRound8_t defaultFloatRound;
+ uint8_t reserved; //.defValue=0
+};
+
+struct BrigInstBase { //.wname=Inst //.generic //.parent=BrigCode
+ BrigBase base;
+ BrigOpcode16_t opcode;
+ BrigType16_t type;
+ BrigDataOffsetOperandList32_t operands;
+
+ //+hcode Operand operand(int index);
+ //+implcode inline Operand KLASS::operand(int index) { return operands()[index]; }
+};
+
+struct BrigInstAddr {
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ uint8_t reserved[3]; //.defValue=0
+};
+
+struct BrigInstAtomic {
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ BrigMemoryOrder8_t memoryOrder;
+ BrigMemoryScope8_t memoryScope;
+ BrigAtomicOperation8_t atomicOperation;
+ uint8_t equivClass;
+ uint8_t reserved[3]; //.defValue=0
+};
+
+struct BrigInstBasic {
+ BrigInstBase base;
+};
+
+struct BrigInstBr {
+ BrigInstBase base;
+ BrigWidth8_t width;
+ uint8_t reserved[3]; //.defValue=0
+};
+
+struct BrigInstCmp {
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier
+ BrigCompareOperation8_t compare;
+ BrigPack8_t pack;
+ uint8_t reserved[3]; //.defValue=0
+};
+
+struct BrigInstCvt {
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier
+ BrigRound8_t round;
+};
+
+struct BrigInstImage {
+ BrigInstBase base;
+ BrigType16_t imageType;
+ BrigType16_t coordType;
+ BrigImageGeometry8_t geometry;
+ uint8_t equivClass;
+ uint16_t reserved; //.defValue=0
+};
+
+struct BrigInstLane {
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigWidth8_t width;
+ uint8_t reserved; //.defValue=0
+};
+
+struct BrigInstMem {
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ BrigAlignment8_t align;
+ uint8_t equivClass;
+ BrigWidth8_t width;
+ BrigMemoryModifier modifier; //.acc=subItem<MemoryModifier> //.wtype=MemoryModifier
+ uint8_t reserved[3]; //.defValue=0
+};
+
+struct BrigInstMemFence {
+ BrigInstBase base;
+ BrigMemoryOrder8_t memoryOrder;
+ BrigMemoryScope8_t globalSegmentMemoryScope;
+ BrigMemoryScope8_t groupSegmentMemoryScope;
+ BrigMemoryScope8_t imageSegmentMemoryScope;
+};
+
+struct BrigInstMod {
+ BrigInstBase base;
+ BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier
+ BrigRound8_t round;
+ BrigPack8_t pack;
+ uint8_t reserved; //.defValue=0
+};
+
+struct BrigInstQueryImage {
+ BrigInstBase base;
+ BrigType16_t imageType;
+ BrigImageGeometry8_t geometry;
+ BrigImageQuery8_t imageQuery;
+};
+
+struct BrigInstQuerySampler {
+ BrigInstBase base;
+ BrigSamplerQuery8_t samplerQuery;
+ uint8_t reserved[3]; //.defValue=0
+};
+
+struct BrigInstQueue {
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ BrigMemoryOrder8_t memoryOrder;
+ uint16_t reserved; //.defValue=0
+};
+
+struct BrigInstSeg {
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ uint8_t reserved[3]; //.defValue=0
+};
+
+struct BrigInstSegCvt {
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigSegment8_t segment;
+ BrigSegCvtModifier modifier; //.acc=subItem<SegCvtModifier> //.wtype=SegCvtModifier
+};
+
+struct BrigInstSignal {
+ BrigInstBase base;
+ BrigType16_t signalType;
+ BrigMemoryOrder8_t memoryOrder;
+ BrigAtomicOperation8_t signalOperation;
+};
+
+struct BrigInstSourceType {
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ uint16_t reserved; //.defValue=0
+};
+
+struct BrigOperandAddress {
+ BrigBase base;
+ BrigCodeOffset32_t symbol; //.wtype=ItemRef<DirectiveVariable>
+ BrigOperandOffset32_t reg; //.wtype=ItemRef<OperandRegister>
+ BrigUInt64 offset; //.acc=subItem<UInt64> //.wtype=UInt64
+};
+
+struct BrigOperandAlign {
+ BrigBase base;
+ BrigAlignment8_t align;
+ uint8_t reserved[3]; //.defValue=0
+};
+
+struct BrigOperandCodeList {
+ BrigBase base;
+ BrigDataOffsetCodeList32_t elements;
+
+ //+hcode unsigned elementCount();
+ //+implcode inline unsigned KLASS::elementCount() { return elements().size(); }
+ //+hcode Code elements(int index);
+ //+implcode inline Code KLASS::elements(int index) { return elements()[index]; }
+};
+
+struct BrigOperandCodeRef {
+ BrigBase base;
+ BrigCodeOffset32_t ref;
+};
+
+struct BrigOperandConstantBytes {
+ BrigBase base;
+ BrigType16_t type; //.defValue=0
+ uint16_t reserved; //.defValue=0
+ BrigDataOffsetString32_t bytes;
+};
+
+struct BrigOperandConstantOperandList {
+ BrigBase base;
+ BrigType16_t type;
+ uint16_t reserved; //.defValue=0
+ BrigDataOffsetOperandList32_t elements;
+
+ //+hcode unsigned elementCount();
+ //+implcode inline unsigned KLASS::elementCount() { return elements().size(); }
+ //+hcode Operand elements(int index);
+ //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; }
+};
+
+struct BrigOperandConstantImage {
+ BrigBase base;
+ BrigType16_t type;
+ BrigImageGeometry8_t geometry;
+ BrigImageChannelOrder8_t channelOrder;
+ BrigImageChannelType8_t channelType;
+ uint8_t reserved[3]; //.defValue=0
+ BrigUInt64 width; //.acc=subItem<UInt64> //.wtype=UInt64
+ BrigUInt64 height; //.acc=subItem<UInt64> //.wtype=UInt64
+ BrigUInt64 depth; //.acc=subItem<UInt64> //.wtype=UInt64
+ BrigUInt64 array; //.acc=subItem<UInt64> //.wtype=UInt64
+};
+
+struct BrigOperandOperandList {
+ BrigBase base;
+ BrigDataOffsetOperandList32_t elements;
+
+ //+hcode unsigned elementCount();
+ //+implcode inline unsigned KLASS::elementCount() { return elements().size(); }
+ //+hcode Operand elements(int index);
+ //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; }
+};
+
+struct BrigOperandRegister {
+ BrigBase base;
+ BrigRegisterKind16_t regKind;
+ uint16_t regNum;
+};
+
+struct BrigOperandConstantSampler {
+ BrigBase base;
+ BrigType16_t type;
+ BrigSamplerCoordNormalization8_t coord;
+ BrigSamplerFilter8_t filter;
+ BrigSamplerAddressing8_t addressing;
+ uint8_t reserved[3]; //.defValue=0
+};
+
+struct BrigOperandString {
+ BrigBase base;
+ BrigDataOffsetString32_t string;
+};
+
+struct BrigOperandWavesize {
+ BrigBase base;
+};
+
+//.ignore{
+
+enum BrigExceptionsMask {
+ BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
+ BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
+ BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
+ BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
+ BRIG_EXCEPTIONS_INEXACT = 1 << 4,
+
+ BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
+};
+
+struct BrigSectionHeader {
+ uint64_t byteCount;
+ uint32_t headerByteCount;
+ uint32_t nameLength;
+ uint8_t name[1];
+};
+
+#define MODULE_IDENTIFICATION_LENGTH (8)
+
+struct BrigModuleHeader {
+ char identification[MODULE_IDENTIFICATION_LENGTH];
+ BrigVersion32_t brigMajor;
+ BrigVersion32_t brigMinor;
+ uint64_t byteCount;
+ uint8_t hash[64];
+ uint32_t reserved;
+ uint32_t sectionCount;
+ uint64_t sectionIndex;
+};
+
+typedef BrigModuleHeader* BrigModule_t;
+
+#endif // defined(INCLUDED_BRIG_H)
+//}
diff --git a/src/arch/hsail/SConscript b/src/arch/hsail/SConscript
new file mode 100644
index 000000000..3455823a6
--- /dev/null
+++ b/src/arch/hsail/SConscript
@@ -0,0 +1,54 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2015 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: Anthony Gutierrez
+#
+
+Import('*')
+
+if not env['BUILD_GPU']:
+ Return()
+
+if env['TARGET_GPU_ISA'] == 'hsail':
+ env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'],
+ 'gen.py', '$SOURCE $TARGETS')
+
+ Source('generic_types.cc')
+ Source('gpu_decoder.cc')
+ Source('insts/branch.cc')
+ Source('insts/gen_exec.cc')
+ Source('insts/gpu_static_inst.cc')
+ Source('insts/main.cc')
+ Source('insts/pseudo_inst.cc')
+ Source('insts/mem.cc')
+ Source('operand.cc')
diff --git a/src/arch/hsail/SConsopts b/src/arch/hsail/SConsopts
new file mode 100644
index 000000000..641963c82
--- /dev/null
+++ b/src/arch/hsail/SConsopts
@@ -0,0 +1,40 @@
+# -*- mode:python -*-
+
+#
+# Copyright (c) 2015 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: Anthony Gutierrez
+#
+
+Import('*')
+
+all_gpu_isa_list.append('hsail')
diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py
new file mode 100755
index 000000000..f2996019b
--- /dev/null
+++ b/src/arch/hsail/gen.py
@@ -0,0 +1,806 @@
+#! /usr/bin/python
+
+#
+# Copyright (c) 2015 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: Steve Reinhardt
+#
+
+import sys, re
+
+from m5.util import code_formatter
+
+if len(sys.argv) != 4:
+ print "Error: need 3 args (file names)"
+ sys.exit(0)
+
+header_code = code_formatter()
+decoder_code = code_formatter()
+exec_code = code_formatter()
+
+###############
+#
+# Generate file prologs (includes etc.)
+#
+###############
+
+header_code('''
+#include "arch/hsail/insts/decl.hh"
+#include "base/bitfield.hh"
+#include "gpu-compute/hsail_code.hh"
+#include "gpu-compute/wavefront.hh"
+
+namespace HsailISA
+{
+''')
+header_code.indent()
+
+decoder_code('''
+#include "arch/hsail/gpu_decoder.hh"
+#include "arch/hsail/insts/branch.hh"
+#include "arch/hsail/insts/decl.hh"
+#include "arch/hsail/insts/gen_decl.hh"
+#include "arch/hsail/insts/mem.hh"
+#include "arch/hsail/insts/mem_impl.hh"
+#include "gpu-compute/brig_object.hh"
+
+namespace HsailISA
+{
+ std::vector<GPUStaticInst*> Decoder::decodedInsts;
+
+ GPUStaticInst*
+ Decoder::decode(MachInst machInst)
+ {
+ using namespace Brig;
+
+ const BrigInstBase *ib = machInst.brigInstBase;
+ const BrigObject *obj = machInst.brigObj;
+
+ switch(ib->opcode) {
+''')
+decoder_code.indent()
+decoder_code.indent()
+
+exec_code('''
+#include "arch/hsail/insts/gen_decl.hh"
+#include "base/intmath.hh"
+
+namespace HsailISA
+{
+''')
+exec_code.indent()
+
+###############
+#
+# Define code templates for class declarations (for header file)
+#
+###############
+
+# Basic header template for an instruction with no template parameters.
+header_template_nodt = '''
+class $class_name : public $base_class
+{
+ public:
+ typedef $base_class Base;
+
+ $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : Base(ib, obj, "$opcode")
+ {
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+};
+
+'''
+
+# Basic header template for an instruction with a single DataType
+# template parameter.
+header_template_1dt = '''
+template<typename DataType>
+class $class_name : public $base_class<DataType>
+{
+ public:
+ typedef $base_class<DataType> Base;
+ typedef typename DataType::CType CType;
+
+ $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : Base(ib, obj, "$opcode")
+ {
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+};
+
+'''
+
+header_template_1dt_noexec = '''
+template<typename DataType>
+class $class_name : public $base_class<DataType>
+{
+ public:
+ typedef $base_class<DataType> Base;
+ typedef typename DataType::CType CType;
+
+ $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : Base(ib, obj, "$opcode")
+ {
+ }
+};
+
+'''
+
+# Same as header_template_1dt, except the base class has a second
+# template parameter NumSrcOperands to allow a variable number of
+# source operands. Note that since this is implemented with an array,
+# it only works for instructions where all sources are of the same
+# type (like most arithmetics).
+header_template_1dt_varsrcs = '''
+template<typename DataType>
+class $class_name : public $base_class<DataType, $num_srcs>
+{
+ public:
+ typedef $base_class<DataType, $num_srcs> Base;
+ typedef typename DataType::CType CType;
+
+ $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : Base(ib, obj, "$opcode")
+ {
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+};
+
+'''
+
+# Header template for instruction with two DataType template
+# parameters, one for the dest and one for the source. This is used
+# by compare and convert.
+header_template_2dt = '''
+template<typename DestDataType, class SrcDataType>
+class $class_name : public $base_class<DestDataType, SrcDataType>
+{
+ public:
+ typedef $base_class<DestDataType, SrcDataType> Base;
+ typedef typename DestDataType::CType DestCType;
+ typedef typename SrcDataType::CType SrcCType;
+
+ $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : Base(ib, obj, "$opcode")
+ {
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+};
+
+'''
+
+header_templates = {
+ 'ArithInst': header_template_1dt_varsrcs,
+ 'CmovInst': header_template_1dt,
+ 'ClassInst': header_template_1dt,
+ 'ShiftInst': header_template_1dt,
+ 'ExtractInsertInst': header_template_1dt,
+ 'CmpInst': header_template_2dt,
+ 'CvtInst': header_template_2dt,
+ 'LdInst': '',
+ 'StInst': '',
+ 'SpecialInstNoSrc': header_template_nodt,
+ 'SpecialInst1Src': header_template_nodt,
+ 'SpecialInstNoSrcNoDest': '',
+}
+
+###############
+#
+# Define code templates for exec functions
+#
+###############
+
+# exec function body
+exec_template_nodt_nosrc = '''
+void
+$class_name::execute(GPUDynInstPtr gpuDynInst)
+{
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef Base::DestCType DestCType;
+
+ const VectorMask &mask = w->get_pred();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ DestCType dest_val = $expr;
+ this->dest.set(w, lane, dest_val);
+ }
+ }
+}
+
+'''
+
+exec_template_nodt_1src = '''
+void
+$class_name::execute(GPUDynInstPtr gpuDynInst)
+{
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef Base::DestCType DestCType;
+ typedef Base::SrcCType SrcCType;
+
+ const VectorMask &mask = w->get_pred();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
+ DestCType dest_val = $expr;
+
+ this->dest.set(w, lane, dest_val);
+ }
+ }
+}
+
+'''
+
+exec_template_1dt_varsrcs = '''
+template<typename DataType>
+void
+$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
+{
+ Wavefront *w = gpuDynInst->wavefront();
+
+ const VectorMask &mask = w->get_pred();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ CType dest_val;
+ if ($dest_is_src_flag) {
+ dest_val = this->dest.template get<CType>(w, lane);
+ }
+
+ CType src_val[$num_srcs];
+
+ for (int i = 0; i < $num_srcs; ++i) {
+ src_val[i] = this->src[i].template get<CType>(w, lane);
+ }
+
+ dest_val = (CType)($expr);
+
+ this->dest.set(w, lane, dest_val);
+ }
+ }
+}
+
+'''
+
+exec_template_1dt_3srcs = '''
+template<typename DataType>
+void
+$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
+{
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef typename Base::Src0CType Src0T;
+ typedef typename Base::Src1CType Src1T;
+ typedef typename Base::Src2CType Src2T;
+
+ const VectorMask &mask = w->get_pred();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ CType dest_val;
+
+ if ($dest_is_src_flag) {
+ dest_val = this->dest.template get<CType>(w, lane);
+ }
+
+ Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
+ Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
+ Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
+
+ dest_val = $expr;
+
+ this->dest.set(w, lane, dest_val);
+ }
+ }
+}
+
+'''
+
+exec_template_1dt_2src_1dest = '''
+template<typename DataType>
+void
+$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
+{
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef typename Base::DestCType DestT;
+ typedef CType Src0T;
+ typedef typename Base::Src1CType Src1T;
+
+ const VectorMask &mask = w->get_pred();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ DestT dest_val;
+ if ($dest_is_src_flag) {
+ dest_val = this->dest.template get<DestT>(w, lane);
+ }
+ Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
+ Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
+
+ dest_val = $expr;
+
+ this->dest.set(w, lane, dest_val);
+ }
+ }
+}
+
+'''
+
+exec_template_shift = '''
+template<typename DataType>
+void
+$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
+{
+ Wavefront *w = gpuDynInst->wavefront();
+
+ const VectorMask &mask = w->get_pred();
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ CType dest_val;
+
+ if ($dest_is_src_flag) {
+ dest_val = this->dest.template get<CType>(w, lane);
+ }
+
+ CType src_val0 = this->src0.template get<CType>(w, lane);
+ uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
+
+ dest_val = $expr;
+
+ this->dest.set(w, lane, dest_val);
+ }
+ }
+}
+
+'''
+
+exec_template_2dt = '''
+template<typename DestDataType, class SrcDataType>
+void
+$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
+{
+ Wavefront *w = gpuDynInst->wavefront();
+
+ const VectorMask &mask = w->get_pred();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ DestCType dest_val;
+ SrcCType src_val[$num_srcs];
+
+ for (int i = 0; i < $num_srcs; ++i) {
+ src_val[i] = this->src[i].template get<SrcCType>(w, lane);
+ }
+
+ dest_val = $expr;
+
+ this->dest.set(w, lane, dest_val);
+ }
+ }
+}
+
+'''
+
+exec_templates = {
+ 'ArithInst': exec_template_1dt_varsrcs,
+ 'CmovInst': exec_template_1dt_3srcs,
+ 'ExtractInsertInst': exec_template_1dt_3srcs,
+ 'ClassInst': exec_template_1dt_2src_1dest,
+ 'CmpInst': exec_template_2dt,
+ 'CvtInst': exec_template_2dt,
+ 'LdInst': '',
+ 'StInst': '',
+ 'SpecialInstNoSrc': exec_template_nodt_nosrc,
+ 'SpecialInst1Src': exec_template_nodt_1src,
+ 'SpecialInstNoSrcNoDest': '',
+}
+
+###############
+#
+# Define code templates for the decoder cases
+#
+###############
+
+# decode template for nodt-opcode case
+decode_nodt_template = '''
+ case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
+
+decode_case_prolog_class_inst = '''
+ case BRIG_OPCODE_$brig_opcode_upper:
+ {
+ //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
+ BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
+ //switch (baseOp->kind) {
+ // case BRIG_OPERAND_REG:
+ // type = ((const BrigOperandReg*)baseOp)->type;
+ // break;
+ // case BRIG_OPERAND_IMMED:
+ // type = ((const BrigOperandImmed*)baseOp)->type;
+ // break;
+ // default:
+ // fatal("CLASS unrecognized kind of operand %d\\n",
+ // baseOp->kind);
+ //}
+ switch (type) {'''
+
+# common prolog for 1dt- or 2dt-opcode case: switch on data type
+decode_case_prolog = '''
+ case BRIG_OPCODE_$brig_opcode_upper:
+ {
+ switch (ib->type) {'''
+
+# single-level decode case entry (for 1dt opcodes)
+decode_case_entry = \
+' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
+
+decode_store_prolog = \
+' case BRIG_TYPE_$type_name: {'
+
+decode_store_case_epilog = '''
+ }'''
+
+decode_store_case_entry = \
+' return $constructor(ib, obj);'
+
+# common epilog for type switch
+decode_case_epilog = '''
+ default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
+ ib->type);
+ }
+ }
+ break;'''
+
+# Additional templates for nested decode on a second type field (for
+# compare and convert). These are used in place of the
+# decode_case_entry template to create a second-level switch on on the
+# second type field inside each case of the first-level type switch.
+# Because the name and location of the second type can vary, the Brig
+# instruction type must be provided in $brig_type, and the name of the
+# second type field must be provided in $type_field.
+decode_case2_prolog = '''
+ case BRIG_TYPE_$type_name:
+ switch (((Brig$brig_type*)ib)->$type2_field) {'''
+
+decode_case2_entry = \
+' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
+
+decode_case2_epilog = '''
+ default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
+ ((Brig$brig_type*)ib)->$type2_field);
+ }
+ break;'''
+
+# Figure out how many source operands an expr needs by looking for the
+# highest-numbered srcN value referenced. Since sources are numbered
+# starting at 0, the return value is N+1.
+def num_src_operands(expr):
+ if expr.find('src2') != -1:
+ return 3
+ elif expr.find('src1') != -1:
+ return 2
+ elif expr.find('src0') != -1:
+ return 1
+ else:
+ return 0
+
+###############
+#
+# Define final code generation methods
+#
+# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
+# generating actual instructions.
+#
+###############
+
+# Generate class declaration, exec function, and decode switch case
+# for an brig_opcode with a single-level type switch. The 'types'
+# parameter is a list or tuple of types for which the instruction
+# should be instantiated.
+def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
+ type2_info=None, constructor_prefix='new ', is_store=False):
+ brig_opcode_upper = brig_opcode.upper()
+ class_name = brig_opcode
+ opcode = class_name.lower()
+
+ if base_class == 'ArithInst':
+ # note that expr must be provided with ArithInst so we can
+ # derive num_srcs for the template
+ assert expr
+
+ if expr:
+ # Derive several bits of info from expr. If expr is not used,
+ # this info will be irrelevant.
+ num_srcs = num_src_operands(expr)
+ # if the RHS expression includes 'dest', then we're doing an RMW
+ # on the reg and we need to treat it like a source
+ dest_is_src = expr.find('dest') != -1
+ dest_is_src_flag = str(dest_is_src).lower() # for C++
+ if base_class in ['ShiftInst']:
+ expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
+ elif base_class in ['ArithInst', 'CmpInst', 'CvtInst']:
+ expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
+ else:
+ expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
+ expr = re.sub(r'\bdest\b', r'dest_val', expr)
+
+ # Strip template arguments off of base class before looking up
+ # appropriate templates
+ base_class_base = re.sub(r'<.*>$', '', base_class)
+ header_code(header_templates[base_class_base])
+
+ if base_class.startswith('SpecialInst'):
+ exec_code(exec_templates[base_class_base])
+ elif base_class.startswith('ShiftInst'):
+ header_code(exec_template_shift)
+ else:
+ header_code(exec_templates[base_class_base])
+
+ if not types or isinstance(types, str):
+ # Just a single type
+ constructor = constructor_prefix + class_name
+ decoder_code(decode_nodt_template)
+ else:
+ # multiple types, need at least one level of decode
+ if brig_opcode == 'Class':
+ decoder_code(decode_case_prolog_class_inst)
+ else:
+ decoder_code(decode_case_prolog)
+ if not type2_info:
+ if is_store == False:
+ # single list of types, to basic one-level decode
+ for type_name in types:
+ full_class_name = '%s<%s>' % (class_name, type_name.upper())
+ constructor = constructor_prefix + full_class_name
+ decoder_code(decode_case_entry)
+ else:
+ # single list of types, to basic one-level decode
+ for type_name in types:
+ decoder_code(decode_store_prolog)
+ type_size = int(re.findall(r'[0-9]+', type_name)[0])
+ src_size = 32
+ type_type = type_name[0]
+ full_class_name = '%s<%s,%s>' % (class_name, \
+ type_name.upper(), \
+ '%s%d' % \
+ (type_type.upper(), \
+ type_size))
+ constructor = constructor_prefix + full_class_name
+ decoder_code(decode_store_case_entry)
+ decoder_code(decode_store_case_epilog)
+ else:
+ # need secondary type switch (convert, compare)
+ # unpack extra info on second switch
+ (type2_field, types2) = type2_info
+ brig_type = 'Inst%s' % brig_opcode
+ for type_name in types:
+ decoder_code(decode_case2_prolog)
+ fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
+ for type2_name in types2:
+ full_class_name = fmt % type2_name.upper()
+ constructor = constructor_prefix + full_class_name
+ decoder_code(decode_case2_entry)
+
+ decoder_code(decode_case2_epilog)
+
+ decoder_code(decode_case_epilog)
+
+###############
+#
+# Generate instructions
+#
+###############
+
+# handy abbreviations for common sets of types
+
+# arithmetic ops are typically defined only on 32- and 64-bit sizes
+arith_int_types = ('S32', 'U32', 'S64', 'U64')
+arith_float_types = ('F32', 'F64')
+arith_types = arith_int_types + arith_float_types
+
+bit_types = ('B1', 'B32', 'B64')
+
+all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
+
+# I think you might be able to do 'f16' memory ops too, but we'll
+# ignore them for now.
+mem_types = all_int_types + arith_float_types
+mem_atom_types = all_int_types + ('B32', 'B64')
+
+##### Arithmetic & logical operations
+gen('Add', arith_types, 'src0 + src1')
+gen('Sub', arith_types, 'src0 - src1')
+gen('Mul', arith_types, 'src0 * src1')
+gen('Div', arith_types, 'src0 / src1')
+gen('Min', arith_types, 'std::min(src0, src1)')
+gen('Max', arith_types, 'std::max(src0, src1)')
+gen('Gcnmin', arith_types, 'std::min(src0, src1)')
+
+gen('CopySign', arith_float_types,
+ 'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
+gen('Sqrt', arith_float_types, 'sqrt(src0)')
+gen('Floor', arith_float_types, 'floor(src0)')
+
+# "fast" sqrt... same as slow for us
+gen('Nsqrt', arith_float_types, 'sqrt(src0)')
+gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
+gen('Nrcp', arith_float_types, '1.0/src0')
+gen('Fract', arith_float_types,
+ '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
+
+gen('Ncos', arith_float_types, 'cos(src0)');
+gen('Nsin', arith_float_types, 'sin(src0)');
+
+gen('And', bit_types, 'src0 & src1')
+gen('Or', bit_types, 'src0 | src1')
+gen('Xor', bit_types, 'src0 ^ src1')
+
+gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)')
+gen('Firstbit',bit_types, 'firstbit(src0)')
+gen('Popcount', ('B32', 'B64'), '__builtin_popcount(src0)')
+
+gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
+gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
+
+# gen('Mul_hi', types=('s32','u32', '??'))
+# gen('Mul24', types=('s32','u32', '??'))
+gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
+
+gen('Abs', arith_types, 'std::abs(src0)')
+gen('Neg', arith_types, '-src0')
+
+gen('Mov', bit_types, 'src0')
+gen('Not', bit_types, 'heynot(src0)')
+
+# mad and fma differ only in rounding behavior, which we don't emulate
+# also there's an integer form of mad, but not of fma
+gen('Mad', arith_types, 'src0 * src1 + src2')
+gen('Fma', arith_float_types, 'src0 * src1 + src2')
+
+#native floating point operations
+gen('Nfma', arith_float_types, 'src0 * src1 + src2')
+
+gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
+gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
+gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
+
+# see base/bitfield.hh
+gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
+ 'ExtractInsertInst')
+
+gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
+ 'ExtractInsertInst')
+
+##### Compare
+gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
+ 'CmpInst', ('sourceType', arith_types + bit_types))
+gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
+
+##### Conversion
+
+# Conversion operations are only defined on B1, not B32 or B64
+cvt_types = ('B1',) + mem_types
+
+gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
+
+
+##### Load & Store
+gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
+gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
+gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
+ is_store=True)
+gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
+gen('AtomicNoRet', mem_atom_types, base_class='StInst',
+ constructor_prefix='decode')
+
+gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
+gen('Br', base_class = 'LdInst', constructor_prefix='decode')
+
+##### Special operations
+def gen_special(brig_opcode, expr, dest_type='U32'):
+ num_srcs = num_src_operands(expr)
+ if num_srcs == 0:
+ base_class = 'SpecialInstNoSrc<%s>' % dest_type
+ elif num_srcs == 1:
+ base_class = 'SpecialInst1Src<%s>' % dest_type
+ else:
+ assert false
+
+ gen(brig_opcode, None, expr, base_class)
+
+gen_special('WorkItemId', 'w->workitemid[src0][lane]')
+gen_special('WorkItemAbsId',
+ 'w->workitemid[src0][lane] + (w->workgroupid[src0] * w->workgroupsz[src0])')
+gen_special('WorkGroupId', 'w->workgroupid[src0]')
+gen_special('WorkGroupSize', 'w->workgroupsz[src0]')
+gen_special('CurrentWorkGroupSize', 'w->workgroupsz[src0]')
+gen_special('GridSize', 'w->gridsz[src0]')
+gen_special('GridGroups',
+ 'divCeil(w->gridsz[src0],w->workgroupsz[src0])')
+gen_special('LaneId', 'lane')
+gen_special('WaveId', 'w->dynwaveid')
+gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
+
+# gen_special('CU'', ')
+
+gen('Ret', base_class='SpecialInstNoSrcNoDest')
+gen('Barrier', base_class='SpecialInstNoSrcNoDest')
+gen('MemFence', base_class='SpecialInstNoSrcNoDest')
+
+# Map magic instructions to the BrigSyscall opcode
+# Magic instructions are defined in magic.hh
+#
+# In the future, real HSA kernel system calls can be implemented and coexist
+# with magic instructions.
+gen('Call', base_class='SpecialInstNoSrcNoDest')
+
+###############
+#
+# Generate file epilogs
+#
+###############
+header_code.dedent()
+header_code('''
+} // namespace HsailISA
+''')
+
+# close off main decode switch
+decoder_code.dedent()
+decoder_code.dedent()
+decoder_code('''
+ default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
+ } // end switch(ib->opcode)
+ } // end decode()
+} // namespace HsailISA
+''')
+
+exec_code.dedent()
+exec_code('''
+} // namespace HsailISA
+''')
+
+###############
+#
+# Output accumulated code to files
+#
+###############
+header_code.write(sys.argv[1])
+decoder_code.write(sys.argv[2])
+exec_code.write(sys.argv[3])
diff --git a/src/arch/hsail/generic_types.cc b/src/arch/hsail/generic_types.cc
new file mode 100644
index 000000000..0cd55d1d5
--- /dev/null
+++ b/src/arch/hsail/generic_types.cc
@@ -0,0 +1,47 @@
+#include "arch/hsail/generic_types.hh"
+#include "base/misc.hh"
+
+using namespace Brig;
+
+namespace HsailISA
+{
+ Enums::GenericMemoryOrder
+ getGenericMemoryOrder(BrigMemoryOrder brig_memory_order)
+ {
+ switch(brig_memory_order) {
+ case BRIG_MEMORY_ORDER_NONE:
+ return Enums::MEMORY_ORDER_NONE;
+ case BRIG_MEMORY_ORDER_RELAXED:
+ return Enums::MEMORY_ORDER_RELAXED;
+ case BRIG_MEMORY_ORDER_SC_ACQUIRE:
+ return Enums::MEMORY_ORDER_SC_ACQUIRE;
+ case BRIG_MEMORY_ORDER_SC_RELEASE:
+ return Enums::MEMORY_ORDER_SC_RELEASE;
+ case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
+ return Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE;
+ default:
+ fatal("HsailISA::MemInst::getGenericMemoryOrder -> ",
+ "bad BrigMemoryOrder\n");
+ }
+ }
+
+ Enums::GenericMemoryScope
+ getGenericMemoryScope(BrigMemoryScope brig_memory_scope)
+ {
+ switch(brig_memory_scope) {
+ case BRIG_MEMORY_SCOPE_NONE:
+ return Enums::MEMORY_SCOPE_NONE;
+ case BRIG_MEMORY_SCOPE_WORKITEM:
+ return Enums::MEMORY_SCOPE_WORKITEM;
+ case BRIG_MEMORY_SCOPE_WORKGROUP:
+ return Enums::MEMORY_SCOPE_WORKGROUP;
+ case BRIG_MEMORY_SCOPE_AGENT:
+ return Enums::MEMORY_SCOPE_DEVICE;
+ case BRIG_MEMORY_SCOPE_SYSTEM:
+ return Enums::MEMORY_SCOPE_SYSTEM;
+ default:
+ fatal("HsailISA::MemInst::getGenericMemoryScope -> ",
+ "bad BrigMemoryScope\n");
+ }
+ }
+} // namespace HsailISA
diff --git a/src/arch/hsail/generic_types.hh b/src/arch/hsail/generic_types.hh
new file mode 100644
index 000000000..50e430bef
--- /dev/null
+++ b/src/arch/hsail/generic_types.hh
@@ -0,0 +1,16 @@
+#ifndef __ARCH_HSAIL_GENERIC_TYPES_HH__
+#define __ARCH_HSAIL_GENERIC_TYPES_HH__
+
+#include "arch/hsail/Brig.h"
+#include "enums/GenericMemoryOrder.hh"
+#include "enums/GenericMemoryScope.hh"
+
+namespace HsailISA
+{
+ Enums::GenericMemoryOrder
+ getGenericMemoryOrder(Brig::BrigMemoryOrder brig_memory_order);
+ Enums::GenericMemoryScope
+ getGenericMemoryScope(Brig::BrigMemoryScope brig_memory_scope);
+} // namespace HsailISA
+
+#endif // __ARCH_HSAIL_GENERIC_TYPES_HH__
diff --git a/src/arch/hsail/gpu_decoder.hh b/src/arch/hsail/gpu_decoder.hh
new file mode 100644
index 000000000..98a689664
--- /dev/null
+++ b/src/arch/hsail/gpu_decoder.hh
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Anthony Gutierrez
+ */
+
+#ifndef __ARCH_HSAIL_GPU_DECODER_HH__
+#define __ARCH_HSAIL_GPU_DECODER_HH__
+
+#include <vector>
+
+#include "arch/hsail/gpu_types.hh"
+
+class BrigObject;
+class GPUStaticInst;
+
+namespace Brig
+{
+ class BrigInstBase;
+}
+
+namespace HsailISA
+{
+ class Decoder
+ {
+ public:
+ GPUStaticInst* decode(MachInst machInst);
+
+ GPUStaticInst*
+ decode(RawMachInst inst)
+ {
+ return inst < decodedInsts.size() ? decodedInsts.at(inst) : nullptr;
+ }
+
+ RawMachInst
+ saveInst(GPUStaticInst *decodedInst)
+ {
+ decodedInsts.push_back(decodedInst);
+
+ return decodedInsts.size() - 1;
+ }
+
+ private:
+ static std::vector<GPUStaticInst*> decodedInsts;
+ };
+} // namespace HsailISA
+
+#endif // __ARCH_HSAIL_GPU_DECODER_HH__
diff --git a/src/arch/hsail/gpu_types.hh b/src/arch/hsail/gpu_types.hh
new file mode 100644
index 000000000..4b3a66a9a
--- /dev/null
+++ b/src/arch/hsail/gpu_types.hh
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Anthony Gutierrez
+ */
+
+#ifndef __ARCH_HSAIL_GPU_TYPES_HH__
+#define __ARCH_HSAIL_GPU_TYPES_HH__
+
+#include <cstdint>
+
+namespace Brig
+{
+ class BrigInstBase;
+}
+
+class BrigObject;
+
+namespace HsailISA
+{
+ // A raw machine instruction represents the raw bits that
+ // our model uses to represent an actual instruction. In
+ // the case of HSAIL this is just an index into a list of
+ // instruction objects.
+ typedef uint64_t RawMachInst;
+
+ // The MachInst is a representation of an instruction
+ // that has more information than just the machine code.
+ // For HSAIL the actual machine code is a BrigInstBase
+ // and the BrigObject contains more pertinent
+ // information related to operaands, etc.
+
+ struct MachInst
+ {
+ const Brig::BrigInstBase *brigInstBase;
+ const BrigObject *brigObj;
+ };
+}
+
+#endif // __ARCH_HSAIL_GPU_TYPES_HH__
diff --git a/src/arch/hsail/insts/branch.cc b/src/arch/hsail/insts/branch.cc
new file mode 100644
index 000000000..d65279cc8
--- /dev/null
+++ b/src/arch/hsail/insts/branch.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Anthony Gutierrez
+ */
+
+#include "arch/hsail/insts/branch.hh"
+
+#include "gpu-compute/hsail_code.hh"
+
+namespace HsailISA
+{
+ GPUStaticInst*
+ decodeBrn(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ // Detect direct vs indirect branch by seeing whether we have a
+ // register operand.
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const Brig::BrigOperand *reg = obj->getOperand(op_offs);
+
+ if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
+ return new BrnIndirectInst(ib, obj);
+ } else {
+ return new BrnDirectInst(ib, obj);
+ }
+ }
+
+ GPUStaticInst*
+ decodeCbr(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ // Detect direct vs indirect branch by seeing whether we have a
+ // second register operand (after the condition).
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
+ const Brig::BrigOperand *reg = obj->getOperand(op_offs);
+
+ if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
+ return new CbrIndirectInst(ib, obj);
+ } else {
+ return new CbrDirectInst(ib, obj);
+ }
+ }
+
+ GPUStaticInst*
+ decodeBr(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ // Detect direct vs indirect branch by seeing whether we have a
+ // second register operand (after the condition).
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
+ const Brig::BrigOperand *reg = obj->getOperand(op_offs);
+
+ if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
+ return new BrIndirectInst(ib, obj);
+ } else {
+ return new BrDirectInst(ib, obj);
+ }
+ }
+} // namespace HsailISA
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh
new file mode 100644
index 000000000..54ad9a042
--- /dev/null
+++ b/src/arch/hsail/insts/branch.hh
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
+#define __ARCH_HSAIL_INSTS_BRANCH_HH__
+
+#include "arch/hsail/insts/gpu_static_inst.hh"
+#include "arch/hsail/operand.hh"
+#include "gpu-compute/gpu_dyn_inst.hh"
+#include "gpu-compute/wavefront.hh"
+
+namespace HsailISA
+{
+
+ // The main difference between a direct branch and an indirect branch
+ // is whether the target is a register or a label, so we can share a
+ // lot of code if we template the base implementation on that type.
+ template<typename TargetType>
+ class BrnInstBase : public HsailGPUStaticInst
+ {
+ public:
+ void generateDisassembly();
+
+ Brig::BrigWidth8_t width;
+ TargetType target;
+
+ BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : HsailGPUStaticInst(obj, "brn")
+ {
+ o_type = Enums::OT_BRANCH;
+ width = ((Brig::BrigInstBr*)ib)->width;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ target.init(op_offs, obj);
+ o_type = Enums::OT_BRANCH;
+ }
+
+ uint32_t getTargetPc() override { return target.getTarget(0, 0); }
+
+ bool unconditionalJumpInstruction() override { return true; }
+ bool isVectorRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isScalarRegister();
+ }
+
+ bool isSrcOperand(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return true;
+ }
+
+ bool isDstOperand(int operandIndex) {
+ return false;
+ }
+
+ int getOperandSize(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.opSize();
+ }
+
+ int getRegisterIndex(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.regIndex();
+ }
+
+ int getNumOperands() {
+ return 1;
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ template<typename TargetType>
+ void
+ BrnInstBase<TargetType>::generateDisassembly()
+ {
+ std::string widthClause;
+
+ if (width != 1) {
+ widthClause = csprintf("_width(%d)", width);
+ }
+
+ disassembly = csprintf("%s%s %s", opcode, widthClause,
+ target.disassemble());
+ }
+
+ template<typename TargetType>
+ void
+ BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ if (getTargetPc() == w->rpc()) {
+ w->popFromReconvergenceStack();
+ } else {
+ // Rpc and execution mask remain the same
+ w->pc(getTargetPc());
+ }
+ w->discardFetch();
+ }
+
+ class BrnDirectInst : public BrnInstBase<LabelOperand>
+ {
+ public:
+ BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : BrnInstBase<LabelOperand>(ib, obj)
+ {
+ }
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return 0; }
+ };
+
+ class BrnIndirectInst : public BrnInstBase<SRegOperand>
+ {
+ public:
+ BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : BrnInstBase<SRegOperand>(ib, obj)
+ {
+ }
+ int numSrcRegOperands() { return target.isVectorRegister(); }
+ int numDstRegOperands() { return 0; }
+ };
+
+ GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
+ const BrigObject *obj);
+
+ template<typename TargetType>
+ class CbrInstBase : public HsailGPUStaticInst
+ {
+ public:
+ void generateDisassembly();
+
+ Brig::BrigWidth8_t width;
+ CRegOperand cond;
+ TargetType target;
+
+ CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : HsailGPUStaticInst(obj, "cbr")
+ {
+ o_type = Enums::OT_BRANCH;
+ width = ((Brig::BrigInstBr *)ib)->width;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ cond.init(op_offs, obj);
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ target.init(op_offs, obj);
+ o_type = Enums::OT_BRANCH;
+ }
+
+ uint32_t getTargetPc() override { return target.getTarget(0, 0); }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ // Assumption: Target is operand 0, Condition Register is operand 1
+ bool isVectorRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ if (!operandIndex)
+ return target.isVectorRegister();
+ else
+ return false;
+ }
+ bool isCondRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ if (!operandIndex)
+ return target.isCondRegister();
+ else
+ return true;
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return target.isScalarRegister();
+ else
+ return false;
+ }
+ bool isSrcOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == 0)
+ return true;
+ return false;
+ }
+ // both Condition Register and Target are source operands
+ bool isDstOperand(int operandIndex) {
+ return false;
+ }
+ int getOperandSize(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ if (!operandIndex)
+ return target.opSize();
+ else
+ return 1;
+ }
+ int getRegisterIndex(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ if (!operandIndex)
+ return target.regIndex();
+ else
+ return -1;
+ }
+
+ // Operands = Target, Condition Register
+ int getNumOperands() {
+ return 2;
+ }
+ };
+
+ template<typename TargetType>
+ void
+ CbrInstBase<TargetType>::generateDisassembly()
+ {
+ std::string widthClause;
+
+ if (width != 1) {
+ widthClause = csprintf("_width(%d)", width);
+ }
+
+ disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
+ cond.disassemble(), target.disassemble());
+ }
+
+ template<typename TargetType>
+ void
+ CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ const uint32_t curr_pc = w->pc();
+ const uint32_t curr_rpc = w->rpc();
+ const VectorMask curr_mask = w->execMask();
+
+ /**
+ * TODO: can we move this pop outside the instruction, and
+ * into the wavefront?
+ */
+ w->popFromReconvergenceStack();
+
+ // immediate post-dominator instruction
+ const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
+ if (curr_rpc != rpc) {
+ w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
+ }
+
+ // taken branch
+ const uint32_t true_pc = getTargetPc();
+ VectorMask true_mask;
+ for (unsigned int lane = 0; lane < VSZ; ++lane) {
+ true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
+ }
+
+ // not taken branch
+ const uint32_t false_pc = curr_pc + 1;
+ assert(true_pc != false_pc);
+ if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
+ VectorMask false_mask = curr_mask & ~true_mask;
+ w->pushToReconvergenceStack(false_pc, rpc, false_mask);
+ }
+
+ if (true_pc != rpc && true_mask.count()) {
+ w->pushToReconvergenceStack(true_pc, rpc, true_mask);
+ }
+ assert(w->pc() != curr_pc);
+ w->discardFetch();
+ }
+
+
+ class CbrDirectInst : public CbrInstBase<LabelOperand>
+ {
+ public:
+ CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : CbrInstBase<LabelOperand>(ib, obj)
+ {
+ }
+ // the source operand of a conditional branch is a Condition
+ // Register which is not stored in the VRF
+ // so we do not count it as a source-register operand
+ // even though, formally, it is one.
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return 0; }
+ };
+
+ class CbrIndirectInst : public CbrInstBase<SRegOperand>
+ {
+ public:
+ CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : CbrInstBase<SRegOperand>(ib, obj)
+ {
+ }
+ // one source operand of the conditional indirect branch is a Condition
+ // register which is not stored in the VRF so we do not count it
+ // as a source-register operand even though, formally, it is one.
+ int numSrcRegOperands() { return target.isVectorRegister(); }
+ int numDstRegOperands() { return 0; }
+ };
+
+ GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
+ const BrigObject *obj);
+
+ template<typename TargetType>
+ class BrInstBase : public HsailGPUStaticInst
+ {
+ public:
+ void generateDisassembly();
+
+ ImmOperand<uint32_t> width;
+ TargetType target;
+
+ BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : HsailGPUStaticInst(obj, "br")
+ {
+ o_type = Enums::OT_BRANCH;
+ width.init(((Brig::BrigInstBr *)ib)->width, obj);
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ target.init(op_offs, obj);
+ o_type = Enums::OT_BRANCH;
+ }
+
+ uint32_t getTargetPc() override { return target.getTarget(0, 0); }
+
+ bool unconditionalJumpInstruction() override { return true; }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ bool isVectorRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return true;
+ }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.opSize();
+ }
+ int getRegisterIndex(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.regIndex();
+ }
+ int getNumOperands() { return 1; }
+ };
+
+ template<typename TargetType>
+ void
+ BrInstBase<TargetType>::generateDisassembly()
+ {
+ std::string widthClause;
+
+ if (width.bits != 1) {
+ widthClause = csprintf("_width(%d)", width.bits);
+ }
+
+ disassembly = csprintf("%s%s %s", opcode, widthClause,
+ target.disassemble());
+ }
+
+ template<typename TargetType>
+ void
+ BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ if (getTargetPc() == w->rpc()) {
+ w->popFromReconvergenceStack();
+ } else {
+ // Rpc and execution mask remain the same
+ w->pc(getTargetPc());
+ }
+ w->discardFetch();
+ }
+
+ class BrDirectInst : public BrInstBase<LabelOperand>
+ {
+ public:
+ BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : BrInstBase<LabelOperand>(ib, obj)
+ {
+ }
+
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return 0; }
+ };
+
+ class BrIndirectInst : public BrInstBase<SRegOperand>
+ {
+ public:
+ BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : BrInstBase<SRegOperand>(ib, obj)
+ {
+ }
+ int numSrcRegOperands() { return target.isVectorRegister(); }
+ int numDstRegOperands() { return 0; }
+ };
+
+ GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
+ const BrigObject *obj);
+} // namespace HsailISA
+
+#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__
diff --git a/src/arch/hsail/insts/decl.hh b/src/arch/hsail/insts/decl.hh
new file mode 100644
index 000000000..e2da501b9
--- /dev/null
+++ b/src/arch/hsail/insts/decl.hh
@@ -0,0 +1,1106 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
+#define __ARCH_HSAIL_INSTS_DECL_HH__
+
+#include <cmath>
+
+#include "arch/hsail/generic_types.hh"
+#include "arch/hsail/insts/gpu_static_inst.hh"
+#include "arch/hsail/operand.hh"
+#include "debug/HSAIL.hh"
+#include "enums/OpType.hh"
+#include "gpu-compute/gpu_dyn_inst.hh"
+#include "gpu-compute/shader.hh"
+
+namespace HsailISA
+{
+ template<typename _DestOperand, typename _SrcOperand>
+ class HsailOperandType
+ {
+ public:
+ typedef _DestOperand DestOperand;
+ typedef _SrcOperand SrcOperand;
+ };
+
+ typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
+ typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
+ typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
+
+ // The IsBits parameter serves only to disambiguate tbhe B* types from
+ // the U* types, which otherwise would be identical (and
+ // indistinguishable).
+ template<typename _OperandType, typename _CType, Enums::MemType _memType,
+ vgpr_type _vgprType, int IsBits=0>
+ class HsailDataType
+ {
+ public:
+ typedef _OperandType OperandType;
+ typedef _CType CType;
+ static const Enums::MemType memType = _memType;
+ static const vgpr_type vgprType = _vgprType;
+ static const char *label;
+ };
+
+ typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
+ typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
+
+ typedef HsailDataType<SRegOperandType, uint16_t,
+ Enums::M_U16, VT_32, 1> B16;
+
+ typedef HsailDataType<SRegOperandType, uint32_t,
+ Enums::M_U32, VT_32, 1> B32;
+
+ typedef HsailDataType<DRegOperandType, uint64_t,
+ Enums::M_U64, VT_64, 1> B64;
+
+ typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
+ typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
+ typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
+ typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
+
+ typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
+ typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
+ typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
+ typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
+
+ typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
+ typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
+
+ template<typename DestOperandType, typename SrcOperandType,
+ int NumSrcOperands>
+ class CommonInstBase : public HsailGPUStaticInst
+ {
+ protected:
+ typename DestOperandType::DestOperand dest;
+ typename SrcOperandType::SrcOperand src[NumSrcOperands];
+
+ void
+ generateDisassembly()
+ {
+ disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
+ dest.disassemble());
+
+ for (int i = 0; i < NumSrcOperands; ++i) {
+ disassembly += ",";
+ disassembly += src[i].disassemble();
+ }
+ }
+
+ virtual std::string opcode_suffix() = 0;
+
+ public:
+ CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *opcode)
+ : HsailGPUStaticInst(obj, opcode)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+
+ dest.init(op_offs, obj);
+
+ for (int i = 0; i < NumSrcOperands; ++i) {
+ op_offs = obj->getOperandPtr(ib->operands, i + 1);
+ src[i].init(op_offs, obj);
+ }
+ }
+
+ bool isVectorRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].isVectorRegister();
+ else
+ return dest.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].isCondRegister();
+ else
+ return dest.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].isScalarRegister();
+ else
+ return dest.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return true;
+ return false;
+ }
+
+ bool isDstOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex >= NumSrcOperands)
+ return true;
+ return false;
+ }
+ int getOperandSize(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].opSize();
+ else
+ return dest.opSize();
+ }
+ int getRegisterIndex(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].regIndex();
+ else
+ return dest.regIndex();
+ }
+ int numSrcRegOperands() {
+ int operands = 0;
+ for (int i = 0; i < NumSrcOperands; i++) {
+ if (src[i].isVectorRegister() == true) {
+ operands++;
+ }
+ }
+ return operands;
+ }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ int getNumOperands() { return NumSrcOperands + 1; }
+ };
+
+ template<typename DataType, int NumSrcOperands>
+ class ArithInst : public CommonInstBase<typename DataType::OperandType,
+ typename DataType::OperandType,
+ NumSrcOperands>
+ {
+ public:
+ std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
+
+ ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *opcode)
+ : CommonInstBase<typename DataType::OperandType,
+ typename DataType::OperandType,
+ NumSrcOperands>(ib, obj, opcode)
+ {
+ }
+ };
+
+ template<typename DestOperandType, typename Src0OperandType,
+ typename Src1OperandType, typename Src2OperandType>
+ class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
+ {
+ protected:
+ typename DestOperandType::DestOperand dest;
+ typename Src0OperandType::SrcOperand src0;
+ typename Src1OperandType::SrcOperand src1;
+ typename Src2OperandType::SrcOperand src2;
+
+ void
+ generateDisassembly()
+ {
+ disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
+ src0.disassemble(), src1.disassemble(),
+ src2.disassemble());
+ }
+
+ public:
+ ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
+ const BrigObject *obj,
+ const char *opcode)
+ : HsailGPUStaticInst(obj, opcode)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ dest.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ src0.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 2);
+ src1.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 3);
+ src2.init(op_offs, obj);
+ }
+
+ bool isVectorRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.isVectorRegister();
+ else if (operandIndex == 1)
+ return src1.isVectorRegister();
+ else if (operandIndex == 2)
+ return src2.isVectorRegister();
+ else
+ return dest.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.isCondRegister();
+ else if (operandIndex == 1)
+ return src1.isCondRegister();
+ else if (operandIndex == 2)
+ return src2.isCondRegister();
+ else
+ return dest.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.isScalarRegister();
+ else if (operandIndex == 1)
+ return src1.isScalarRegister();
+ else if (operandIndex == 2)
+ return src2.isScalarRegister();
+ else
+ return dest.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < 3)
+ return true;
+ else
+ return false;
+ }
+ bool isDstOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex >= 3)
+ return true;
+ else
+ return false;
+ }
+ int getOperandSize(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.opSize();
+ else if (operandIndex == 1)
+ return src1.opSize();
+ else if (operandIndex == 2)
+ return src2.opSize();
+ else
+ return dest.opSize();
+ }
+ int getRegisterIndex(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.regIndex();
+ else if (operandIndex == 1)
+ return src1.regIndex();
+ else if (operandIndex == 2)
+ return src2.regIndex();
+ else
+ return dest.regIndex();
+ }
+
+ int numSrcRegOperands() {
+ int operands = 0;
+ if (src0.isVectorRegister() == true) {
+ operands++;
+ }
+ if (src1.isVectorRegister() == true) {
+ operands++;
+ }
+ if (src2.isVectorRegister() == true) {
+ operands++;
+ }
+ return operands;
+ }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ int getNumOperands() { return 4; }
+ };
+
+ template<typename DestDataType, typename Src0DataType,
+ typename Src1DataType, typename Src2DataType>
+ class ThreeNonUniformSourceInst :
+ public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
+ typename Src0DataType::OperandType,
+ typename Src1DataType::OperandType,
+ typename Src2DataType::OperandType>
+ {
+ public:
+ typedef typename DestDataType::CType DestCType;
+ typedef typename Src0DataType::CType Src0CType;
+ typedef typename Src1DataType::CType Src1CType;
+ typedef typename Src2DataType::CType Src2CType;
+
+ ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
+ const BrigObject *obj, const char *opcode)
+ : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
+ typename Src0DataType::OperandType,
+ typename Src1DataType::OperandType,
+ typename Src2DataType::OperandType>(ib,
+ obj, opcode)
+ {
+ }
+ };
+
+ template<typename DataType>
+ class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
+ DataType, DataType>
+ {
+ public:
+ CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *opcode)
+ : ThreeNonUniformSourceInst<DataType, B1, DataType,
+ DataType>(ib, obj, opcode)
+ {
+ }
+ };
+
+ template<typename DataType>
+ class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
+ DataType, U32,
+ U32>
+ {
+ public:
+ ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *opcode)
+ : ThreeNonUniformSourceInst<DataType, DataType, U32,
+ U32>(ib, obj, opcode)
+ {
+ }
+ };
+
+ template<typename DestOperandType, typename Src0OperandType,
+ typename Src1OperandType>
+ class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
+ {
+ protected:
+ typename DestOperandType::DestOperand dest;
+ typename Src0OperandType::SrcOperand src0;
+ typename Src1OperandType::SrcOperand src1;
+
+ void
+ generateDisassembly()
+ {
+ disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
+ src0.disassemble(), src1.disassemble());
+ }
+
+
+ public:
+ TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
+ const BrigObject *obj, const char *opcode)
+ : HsailGPUStaticInst(obj, opcode)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ dest.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ src0.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 2);
+ src1.init(op_offs, obj);
+ }
+ bool isVectorRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.isVectorRegister();
+ else if (operandIndex == 1)
+ return src1.isVectorRegister();
+ else
+ return dest.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.isCondRegister();
+ else if (operandIndex == 1)
+ return src1.isCondRegister();
+ else
+ return dest.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.isScalarRegister();
+ else if (operandIndex == 1)
+ return src1.isScalarRegister();
+ else
+ return dest.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < 2)
+ return true;
+ else
+ return false;
+ }
+ bool isDstOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex >= 2)
+ return true;
+ else
+ return false;
+ }
+ int getOperandSize(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.opSize();
+ else if (operandIndex == 1)
+ return src1.opSize();
+ else
+ return dest.opSize();
+ }
+ int getRegisterIndex(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return src0.regIndex();
+ else if (operandIndex == 1)
+ return src1.regIndex();
+ else
+ return dest.regIndex();
+ }
+
+ int numSrcRegOperands() {
+ int operands = 0;
+ if (src0.isVectorRegister() == true) {
+ operands++;
+ }
+ if (src1.isVectorRegister() == true) {
+ operands++;
+ }
+ return operands;
+ }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ int getNumOperands() { return 3; }
+ };
+
+ template<typename DestDataType, typename Src0DataType,
+ typename Src1DataType>
+ class TwoNonUniformSourceInst :
+ public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
+ typename Src0DataType::OperandType,
+ typename Src1DataType::OperandType>
+ {
+ public:
+ typedef typename DestDataType::CType DestCType;
+ typedef typename Src0DataType::CType Src0CType;
+ typedef typename Src1DataType::CType Src1CType;
+
+ TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
+ const BrigObject *obj, const char *opcode)
+ : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
+ typename Src0DataType::OperandType,
+ typename Src1DataType::OperandType>(ib,
+ obj, opcode)
+ {
+ }
+ };
+
+ // helper function for ClassInst
+ template<typename T>
+ bool
+ fpclassify(T src0, uint32_t src1)
+ {
+ int fpclass = std::fpclassify(src0);
+
+ if ((src1 & 0x3) && (fpclass == FP_NAN)) {
+ return true;
+ }
+
+ if (src0 <= -0.0) {
+ if ((src1 & 0x4) && fpclass == FP_INFINITE)
+ return true;
+ if ((src1 & 0x8) && fpclass == FP_NORMAL)
+ return true;
+ if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
+ return true;
+ if ((src1 & 0x20) && fpclass == FP_ZERO)
+ return true;
+ } else {
+ if ((src1 & 0x40) && fpclass == FP_ZERO)
+ return true;
+ if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
+ return true;
+ if ((src1 & 0x100) && fpclass == FP_NORMAL)
+ return true;
+ if ((src1 & 0x200) && fpclass == FP_INFINITE)
+ return true;
+ }
+ return false;
+ }
+
+ template<typename DataType>
+ class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
+ {
+ public:
+ ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *opcode)
+ : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
+ {
+ }
+ };
+
+ template<typename DataType>
+ class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
+ {
+ public:
+ ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *opcode)
+ : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
+ {
+ }
+ };
+
+ // helper function for CmpInst
+ template<typename T>
+ bool
+ compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
+ {
+ using namespace Brig;
+
+ switch (cmpOp) {
+ case BRIG_COMPARE_EQ:
+ case BRIG_COMPARE_EQU:
+ case BRIG_COMPARE_SEQ:
+ case BRIG_COMPARE_SEQU:
+ return (src0 == src1);
+
+ case BRIG_COMPARE_NE:
+ case BRIG_COMPARE_NEU:
+ case BRIG_COMPARE_SNE:
+ case BRIG_COMPARE_SNEU:
+ return (src0 != src1);
+
+ case BRIG_COMPARE_LT:
+ case BRIG_COMPARE_LTU:
+ case BRIG_COMPARE_SLT:
+ case BRIG_COMPARE_SLTU:
+ return (src0 < src1);
+
+ case BRIG_COMPARE_LE:
+ case BRIG_COMPARE_LEU:
+ case BRIG_COMPARE_SLE:
+ case BRIG_COMPARE_SLEU:
+ return (src0 <= src1);
+
+ case BRIG_COMPARE_GT:
+ case BRIG_COMPARE_GTU:
+ case BRIG_COMPARE_SGT:
+ case BRIG_COMPARE_SGTU:
+ return (src0 > src1);
+
+ case BRIG_COMPARE_GE:
+ case BRIG_COMPARE_GEU:
+ case BRIG_COMPARE_SGE:
+ case BRIG_COMPARE_SGEU:
+ return (src0 >= src1);
+
+ case BRIG_COMPARE_NUM:
+ case BRIG_COMPARE_SNUM:
+ return (src0 == src0) || (src1 == src1);
+
+ case BRIG_COMPARE_NAN:
+ case BRIG_COMPARE_SNAN:
+ return (src0 != src0) || (src1 != src1);
+
+ default:
+ fatal("Bad cmpOp value %d\n", (int)cmpOp);
+ }
+ }
+
+ template<typename T>
+ int32_t
+ firstbit(T src0)
+ {
+ if (!src0)
+ return -1;
+
+ //handle positive and negative numbers
+ T tmp = (src0 < 0) ? (~src0) : (src0);
+
+ //the starting pos is MSB
+ int pos = 8 * sizeof(T) - 1;
+ int cnt = 0;
+
+ //search the first bit set to 1
+ while (!(tmp & (1 << pos))) {
+ ++cnt;
+ --pos;
+ }
+ return cnt;
+ }
+
+ const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
+
+ template<typename DestOperandType, typename SrcOperandType>
+ class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
+ 2>
+ {
+ protected:
+ Brig::BrigCompareOperation cmpOp;
+
+ public:
+ CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
+ _opcode)
+ {
+ assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
+ Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
+ cmpOp = (Brig::BrigCompareOperation)i->compare;
+ }
+ };
+
+ template<typename DestDataType, typename SrcDataType>
+ class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
+ typename SrcDataType::OperandType>
+ {
+ public:
+ std::string
+ opcode_suffix()
+ {
+ return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
+ DestDataType::label, SrcDataType::label);
+ }
+
+ CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : CmpInstBase<typename DestDataType::OperandType,
+ typename SrcDataType::OperandType>(ib, obj, _opcode)
+ {
+ }
+ };
+
+ template<typename DestDataType, typename SrcDataType>
+ class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
+ typename SrcDataType::OperandType, 1>
+ {
+ public:
+ std::string opcode_suffix()
+ {
+ return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
+ }
+
+ CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : CommonInstBase<typename DestDataType::OperandType,
+ typename SrcDataType::OperandType,
+ 1>(ib, obj, _opcode)
+ {
+ }
+ };
+
+ class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
+ {
+ public:
+ SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
+ const BrigObject *obj, const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ }
+
+ bool isVectorRegister(int operandIndex) { return false; }
+ bool isCondRegister(int operandIndex) { return false; }
+ bool isScalarRegister(int operandIndex) { return false; }
+ bool isSrcOperand(int operandIndex) { return false; }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex) { return 0; }
+ int getRegisterIndex(int operandIndex) { return -1; }
+
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return 0; }
+ int getNumOperands() { return 0; }
+ };
+
+ template<typename DestOperandType>
+ class SpecialInstNoSrcBase : public HsailGPUStaticInst
+ {
+ protected:
+ typename DestOperandType::DestOperand dest;
+
+ void generateDisassembly()
+ {
+ disassembly = csprintf("%s %s", opcode, dest.disassemble());
+ }
+
+ public:
+ SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
+ const BrigObject *obj, const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ dest.init(op_offs, obj);
+ }
+
+ bool isVectorRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex) { return false; }
+ bool isDstOperand(int operandIndex) { return true; }
+ int getOperandSize(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.opSize();
+ }
+ int getRegisterIndex(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.regIndex();
+ }
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ int getNumOperands() { return 1; }
+ };
+
+ template<typename DestDataType>
+ class SpecialInstNoSrc :
+ public SpecialInstNoSrcBase<typename DestDataType::OperandType>
+ {
+ public:
+ typedef typename DestDataType::CType DestCType;
+
+ SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
+ _opcode)
+ {
+ }
+ };
+
+ template<typename DestOperandType>
+ class SpecialInst1SrcBase : public HsailGPUStaticInst
+ {
+ protected:
+ typedef int SrcCType; // used in execute() template
+
+ typename DestOperandType::DestOperand dest;
+ ImmOperand<SrcCType> src0;
+
+ void
+ generateDisassembly()
+ {
+ disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
+ src0.disassemble());
+ }
+
+ public:
+ SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
+ const BrigObject *obj, const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ dest.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ src0.init(op_offs, obj);
+ }
+ bool isVectorRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex) { return false; }
+ bool isDstOperand(int operandIndex) { return true; }
+ int getOperandSize(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.opSize();
+ }
+ int getRegisterIndex(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return dest.regIndex();
+ }
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ int getNumOperands() { return 1; }
+ };
+
+ template<typename DestDataType>
+ class SpecialInst1Src :
+ public SpecialInst1SrcBase<typename DestDataType::OperandType>
+ {
+ public:
+ typedef typename DestDataType::CType DestCType;
+
+ SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
+ _opcode)
+ {
+ }
+ };
+
+ class Ret : public SpecialInstNoSrcNoDest
+ {
+ public:
+ typedef SpecialInstNoSrcNoDest Base;
+
+ Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : Base(ib, obj, "ret")
+ {
+ o_type = Enums::OT_RET;
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ class Barrier : public SpecialInstNoSrcNoDest
+ {
+ public:
+ typedef SpecialInstNoSrcNoDest Base;
+ uint8_t width;
+
+ Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : Base(ib, obj, "barrier")
+ {
+ o_type = Enums::OT_BARRIER;
+ assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
+ width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ class MemFence : public SpecialInstNoSrcNoDest
+ {
+ public:
+ typedef SpecialInstNoSrcNoDest Base;
+
+ Brig::BrigMemoryOrder memFenceMemOrder;
+ Brig::BrigMemoryScope memFenceScopeSegGroup;
+ Brig::BrigMemoryScope memFenceScopeSegGlobal;
+ Brig::BrigMemoryScope memFenceScopeSegImage;
+
+ MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : Base(ib, obj, "memfence")
+ {
+ assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
+
+ memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
+ ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
+
+ memFenceScopeSegGroup = (Brig::BrigMemoryScope)
+ ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
+
+ memFenceScopeSegImage = (Brig::BrigMemoryScope)
+ ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
+
+ memFenceMemOrder = (Brig::BrigMemoryOrder)
+ ((Brig::BrigInstMemFence*)ib)->memoryOrder;
+
+ // set o_type based on scopes
+ if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
+ memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
+ o_type = Enums::OT_BOTH_MEMFENCE;
+ } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
+ o_type = Enums::OT_GLOBAL_MEMFENCE;
+ } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
+ o_type = Enums::OT_SHARED_MEMFENCE;
+ } else {
+ fatal("MemFence constructor: bad scope specifiers\n");
+ }
+ }
+
+ void
+ initiateAcc(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *wave = gpuDynInst->wavefront();
+ wave->computeUnit->injectGlobalMemFence(gpuDynInst);
+ }
+
+ void
+ execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+ // 2 cases:
+ // * memfence to a sequentially consistent memory (e.g., LDS).
+ // These can be handled as no-ops.
+ // * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
+ // etc.). We send a packet, tagged with the memory order and
+ // scope, and let the GPU coalescer handle it.
+
+ if (o_type == Enums::OT_GLOBAL_MEMFENCE ||
+ o_type == Enums::OT_BOTH_MEMFENCE) {
+ gpuDynInst->simdId = w->simdId;
+ gpuDynInst->wfSlotId = w->wfSlotId;
+ gpuDynInst->wfDynId = w->wfDynId;
+ gpuDynInst->kern_id = w->kern_id;
+ gpuDynInst->cu_id = w->computeUnit->cu_id;
+
+ gpuDynInst->memoryOrder =
+ getGenericMemoryOrder(memFenceMemOrder);
+ gpuDynInst->scope =
+ getGenericMemoryScope(memFenceScopeSegGlobal);
+ gpuDynInst->useContinuation = false;
+ GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
+ gmp->getGMReqFIFO().push(gpuDynInst);
+
+ w->wr_gm_reqs_in_pipe--;
+ w->rd_gm_reqs_in_pipe--;
+ w->mem_reqs_in_pipe--;
+ w->outstanding_reqs++;
+ } else if (o_type == Enums::OT_SHARED_MEMFENCE) {
+ // no-op
+ } else {
+ fatal("MemFence execute: bad o_type\n");
+ }
+ }
+ };
+
+ class Call : public HsailGPUStaticInst
+ {
+ public:
+ // private helper functions
+ void calcAddr(Wavefront* w, GPUDynInstPtr m);
+
+ void
+ generateDisassembly()
+ {
+ if (dest.disassemble() == "") {
+ disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
+ src1.disassemble());
+ } else {
+ disassembly = csprintf("%s %s (%s) (%s)", opcode,
+ src0.disassemble(), dest.disassemble(),
+ src1.disassemble());
+ }
+ }
+
+ bool
+ isPseudoOp()
+ {
+ std::string func_name = src0.disassemble();
+ if (func_name.find("__gem5_hsail_op") != std::string::npos) {
+ return true;
+ }
+ return false;
+ }
+
+ // member variables
+ ListOperand dest;
+ FunctionRefOperand src0;
+ ListOperand src1;
+ HsailCode *func_ptr;
+
+ // exec function for pseudo instructions mapped on top of call opcode
+ void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
+
+ // user-defined pseudo instructions
+ void MagicPrintLane(Wavefront *w);
+ void MagicPrintLane64(Wavefront *w);
+ void MagicPrintWF32(Wavefront *w);
+ void MagicPrintWF64(Wavefront *w);
+ void MagicPrintWFFloat(Wavefront *w);
+ void MagicSimBreak(Wavefront *w);
+ void MagicPrefixSum(Wavefront *w);
+ void MagicReduction(Wavefront *w);
+ void MagicMaskLower(Wavefront *w);
+ void MagicMaskUpper(Wavefront *w);
+ void MagicJoinWFBar(Wavefront *w);
+ void MagicWaitWFBar(Wavefront *w);
+ void MagicPanic(Wavefront *w);
+
+ void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
+ GPUDynInstPtr gpuDynInst);
+
+ void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
+ GPUDynInstPtr gpuDynInst);
+
+ void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
+
+ void MagicXactCasLd(Wavefront *w);
+ void MagicMostSigThread(Wavefront *w);
+ void MagicMostSigBroadcast(Wavefront *w);
+
+ void MagicPrintWF32ID(Wavefront *w);
+ void MagicPrintWFID64(Wavefront *w);
+
+ Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : HsailGPUStaticInst(obj, "call")
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ dest.init(op_offs, obj);
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ src0.init(op_offs, obj);
+
+ func_ptr = nullptr;
+ std::string func_name = src0.disassemble();
+ if (!isPseudoOp()) {
+ func_ptr = dynamic_cast<HsailCode*>(obj->
+ getFunction(func_name));
+
+ if (!func_ptr)
+ fatal("call::exec cannot find function: %s\n", func_name);
+ }
+
+ op_offs = obj->getOperandPtr(ib->operands, 2);
+ src1.init(op_offs, obj);
+ }
+
+ bool isVectorRegister(int operandIndex) { return false; }
+ bool isCondRegister(int operandIndex) { return false; }
+ bool isScalarRegister(int operandIndex) { return false; }
+ bool isSrcOperand(int operandIndex) { return false; }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex) { return 0; }
+ int getRegisterIndex(int operandIndex) { return -1; }
+
+ void
+ execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ std::string func_name = src0.disassemble();
+ if (isPseudoOp()) {
+ execPseudoInst(w, gpuDynInst);
+ } else {
+ fatal("Native HSAIL functions are not yet implemented: %s\n",
+ func_name);
+ }
+ }
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return 0; }
+ int getNumOperands() { return 2; }
+ };
+
+ template<typename T> T heynot(T arg) { return ~arg; }
+ template<> inline bool heynot<bool>(bool arg) { return !arg; }
+} // namespace HsailISA
+
+#endif // __ARCH_HSAIL_INSTS_DECL_HH__
diff --git a/src/arch/hsail/insts/gpu_static_inst.cc b/src/arch/hsail/insts/gpu_static_inst.cc
new file mode 100644
index 000000000..bbaeb13e6
--- /dev/null
+++ b/src/arch/hsail/insts/gpu_static_inst.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Anthony Gutierrez
+ */
+
+#include "arch/hsail/insts/gpu_static_inst.hh"
+
+#include "gpu-compute/brig_object.hh"
+
+namespace HsailISA
+{
+ HsailGPUStaticInst::HsailGPUStaticInst(const BrigObject *obj,
+ const std::string &opcode)
+ : GPUStaticInst(opcode), hsailCode(obj->currentCode)
+ {
+ }
+
+ void
+ HsailGPUStaticInst::generateDisassembly()
+ {
+ disassembly = opcode;
+ }
+
+ const std::string&
+ HsailGPUStaticInst::disassemble()
+ {
+ if (disassembly.empty()) {
+ generateDisassembly();
+ assert(!disassembly.empty());
+ }
+
+ return disassembly;
+ }
+} // namespace HsailISA
diff --git a/src/arch/hsail/insts/gpu_static_inst.hh b/src/arch/hsail/insts/gpu_static_inst.hh
new file mode 100644
index 000000000..29aab1f70
--- /dev/null
+++ b/src/arch/hsail/insts/gpu_static_inst.hh
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Anthony Gutierrez
+ */
+
+#ifndef __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
+#define __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
+
+/*
+ * @file gpu_static_inst.hh
+ *
+ * Defines the base class representing HSAIL GPU static instructions.
+ */
+
+#include "gpu-compute/gpu_static_inst.hh"
+
+class BrigObject;
+class HsailCode;
+
+namespace HsailISA
+{
+ class HsailGPUStaticInst : public GPUStaticInst
+ {
+ public:
+ HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode);
+ void generateDisassembly();
+ const std::string &disassemble();
+ uint32_t instSize() { return 4; }
+
+ protected:
+ HsailCode *hsailCode;
+ };
+} // namespace HsailISA
+
+#endif // __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc
new file mode 100644
index 000000000..4e70bf46a
--- /dev/null
+++ b/src/arch/hsail/insts/main.cc
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#include "arch/hsail/insts/decl.hh"
+#include "debug/GPUExec.hh"
+#include "gpu-compute/dispatcher.hh"
+#include "gpu-compute/simple_pool_manager.hh"
+
+namespace HsailISA
+{
+ template<> const char *B1::label = "b1";
+ template<> const char *B8::label = "b8";
+ template<> const char *B16::label = "b16";
+ template<> const char *B32::label = "b32";
+ template<> const char *B64::label = "b64";
+
+ template<> const char *S8::label = "s8";
+ template<> const char *S16::label = "s16";
+ template<> const char *S32::label = "s32";
+ template<> const char *S64::label = "s64";
+
+ template<> const char *U8::label = "u8";
+ template<> const char *U16::label = "u16";
+ template<> const char *U32::label = "u32";
+ template<> const char *U64::label = "u64";
+
+ template<> const char *F32::label = "f32";
+ template<> const char *F64::label = "f64";
+
+ const char*
+ cmpOpToString(Brig::BrigCompareOperation cmpOp)
+ {
+ using namespace Brig;
+
+ switch (cmpOp) {
+ case BRIG_COMPARE_EQ:
+ return "eq";
+ case BRIG_COMPARE_NE:
+ return "ne";
+ case BRIG_COMPARE_LT:
+ return "lt";
+ case BRIG_COMPARE_LE:
+ return "le";
+ case BRIG_COMPARE_GT:
+ return "gt";
+ case BRIG_COMPARE_GE:
+ return "ge";
+ case BRIG_COMPARE_EQU:
+ return "equ";
+ case BRIG_COMPARE_NEU:
+ return "neu";
+ case BRIG_COMPARE_LTU:
+ return "ltu";
+ case BRIG_COMPARE_LEU:
+ return "leu";
+ case BRIG_COMPARE_GTU:
+ return "gtu";
+ case BRIG_COMPARE_GEU:
+ return "geu";
+ case BRIG_COMPARE_NUM:
+ return "num";
+ case BRIG_COMPARE_NAN:
+ return "nan";
+ case BRIG_COMPARE_SEQ:
+ return "seq";
+ case BRIG_COMPARE_SNE:
+ return "sne";
+ case BRIG_COMPARE_SLT:
+ return "slt";
+ case BRIG_COMPARE_SLE:
+ return "sle";
+ case BRIG_COMPARE_SGT:
+ return "sgt";
+ case BRIG_COMPARE_SGE:
+ return "sge";
+ case BRIG_COMPARE_SGEU:
+ return "sgeu";
+ case BRIG_COMPARE_SEQU:
+ return "sequ";
+ case BRIG_COMPARE_SNEU:
+ return "sneu";
+ case BRIG_COMPARE_SLTU:
+ return "sltu";
+ case BRIG_COMPARE_SLEU:
+ return "sleu";
+ case BRIG_COMPARE_SNUM:
+ return "snum";
+ case BRIG_COMPARE_SNAN:
+ return "snan";
+ case BRIG_COMPARE_SGTU:
+ return "sgtu";
+ default:
+ return "unknown";
+ }
+ }
+
+ void
+ Ret::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ const VectorMask &mask = w->get_pred();
+
+ // mask off completed work-items
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ w->init_mask[lane] = 0;
+ }
+
+ }
+
+ // delete extra instructions fetched for completed work-items
+ w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
+ w->instructionBuffer.end());
+ if (w->pendingFetch) {
+ w->dropFetch = true;
+ }
+
+ // if all work-items have completed, then wave-front is done
+ if (w->init_mask.none()) {
+ w->status = Wavefront::S_STOPPED;
+
+ int32_t refCount = w->computeUnit->getLds().
+ decreaseRefCounter(w->dispatchid, w->wg_id);
+
+ DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
+ w->computeUnit->cu_id, w->wg_id, refCount);
+
+ // free the vector registers of the completed wavefront
+ w->computeUnit->vectorRegsReserved[w->simdId] -=
+ w->reservedVectorRegs;
+
+ assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);
+
+ uint32_t endIndex = (w->startVgprIndex +
+ w->reservedVectorRegs - 1) %
+ w->computeUnit->vrf[w->simdId]->numRegs();
+
+ w->computeUnit->vrf[w->simdId]->manager->
+ freeRegion(w->startVgprIndex, endIndex);
+
+ w->reservedVectorRegs = 0;
+ w->startVgprIndex = 0;
+ w->computeUnit->completedWfs++;
+
+ DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
+ w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
+
+ if (!refCount) {
+ // Notify Memory System of Kernel Completion
+ // Kernel End = isKernel + isRelease
+ w->status = Wavefront::S_RETURNING;
+ GPUDynInstPtr local_mempacket = gpuDynInst;
+ local_mempacket->memoryOrder = Enums::MEMORY_ORDER_SC_RELEASE;
+ local_mempacket->scope = Enums::MEMORY_SCOPE_SYSTEM;
+ local_mempacket->useContinuation = false;
+ local_mempacket->simdId = w->simdId;
+ local_mempacket->wfSlotId = w->wfSlotId;
+ local_mempacket->wfDynId = w->wfDynId;
+ w->computeUnit->injectGlobalMemFence(local_mempacket, true);
+ } else {
+ w->computeUnit->shader->dispatcher->scheduleDispatch();
+ }
+ }
+ }
+
+ void
+ Barrier::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ assert(w->barrier_cnt == w->old_barrier_cnt);
+ w->barrier_cnt = w->old_barrier_cnt + 1;
+ w->stalledAtBarrier = true;
+ }
+} // namespace HsailISA
diff --git a/src/arch/hsail/insts/mem.cc b/src/arch/hsail/insts/mem.cc
new file mode 100644
index 000000000..97d4c902b
--- /dev/null
+++ b/src/arch/hsail/insts/mem.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#include "arch/hsail/insts/mem.hh"
+
+#include "arch/hsail/Brig.h"
+#include "enums/OpType.hh"
+
+using namespace Brig;
+
+namespace HsailISA
+{
+ const char* atomicOpToString(BrigAtomicOperation brigOp);
+
+ Enums::MemOpType
+ brigAtomicToMemOpType(BrigOpcode brigOpCode, BrigAtomicOperation brigOp)
+ {
+ if (brigOpCode == Brig::BRIG_OPCODE_ATOMIC) {
+ switch (brigOp) {
+ case BRIG_ATOMIC_AND:
+ return Enums::MO_AAND;
+ case BRIG_ATOMIC_OR:
+ return Enums::MO_AOR;
+ case BRIG_ATOMIC_XOR:
+ return Enums::MO_AXOR;
+ case BRIG_ATOMIC_CAS:
+ return Enums::MO_ACAS;
+ case BRIG_ATOMIC_EXCH:
+ return Enums::MO_AEXCH;
+ case BRIG_ATOMIC_ADD:
+ return Enums::MO_AADD;
+ case BRIG_ATOMIC_WRAPINC:
+ return Enums::MO_AINC;
+ case BRIG_ATOMIC_WRAPDEC:
+ return Enums::MO_ADEC;
+ case BRIG_ATOMIC_MIN:
+ return Enums::MO_AMIN;
+ case BRIG_ATOMIC_MAX:
+ return Enums::MO_AMAX;
+ case BRIG_ATOMIC_SUB:
+ return Enums::MO_ASUB;
+ default:
+ fatal("Bad BrigAtomicOperation code %d\n", brigOp);
+ }
+ } else if (brigOpCode == Brig::BRIG_OPCODE_ATOMICNORET) {
+ switch (brigOp) {
+ case BRIG_ATOMIC_AND:
+ return Enums::MO_ANRAND;
+ case BRIG_ATOMIC_OR:
+ return Enums::MO_ANROR;
+ case BRIG_ATOMIC_XOR:
+ return Enums::MO_ANRXOR;
+ case BRIG_ATOMIC_CAS:
+ return Enums::MO_ANRCAS;
+ case BRIG_ATOMIC_EXCH:
+ return Enums::MO_ANREXCH;
+ case BRIG_ATOMIC_ADD:
+ return Enums::MO_ANRADD;
+ case BRIG_ATOMIC_WRAPINC:
+ return Enums::MO_ANRINC;
+ case BRIG_ATOMIC_WRAPDEC:
+ return Enums::MO_ANRDEC;
+ case BRIG_ATOMIC_MIN:
+ return Enums::MO_ANRMIN;
+ case BRIG_ATOMIC_MAX:
+ return Enums::MO_ANRMAX;
+ case BRIG_ATOMIC_SUB:
+ return Enums::MO_ANRSUB;
+ default:
+ fatal("Bad BrigAtomicOperation code %d\n", brigOp);
+ }
+ } else {
+ fatal("Bad BrigAtomicOpcode %d\n", brigOpCode);
+ }
+ }
+
+ const char*
+ atomicOpToString(BrigAtomicOperation brigOp)
+ {
+ switch (brigOp) {
+ case BRIG_ATOMIC_AND:
+ return "and";
+ case BRIG_ATOMIC_OR:
+ return "or";
+ case BRIG_ATOMIC_XOR:
+ return "xor";
+ case BRIG_ATOMIC_CAS:
+ return "cas";
+ case BRIG_ATOMIC_EXCH:
+ return "exch";
+ case BRIG_ATOMIC_ADD:
+ return "add";
+ case BRIG_ATOMIC_WRAPINC:
+ return "inc";
+ case BRIG_ATOMIC_WRAPDEC:
+ return "dec";
+ case BRIG_ATOMIC_MIN:
+ return "min";
+ case BRIG_ATOMIC_MAX:
+ return "max";
+ case BRIG_ATOMIC_SUB:
+ return "sub";
+ default:
+ return "unknown";
+ }
+ }
+} // namespace HsailISA
diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh
new file mode 100644
index 000000000..d3ce76dee
--- /dev/null
+++ b/src/arch/hsail/insts/mem.hh
@@ -0,0 +1,1629 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
+#define __ARCH_HSAIL_INSTS_MEM_HH__
+
+#include "arch/hsail/insts/decl.hh"
+#include "arch/hsail/insts/gpu_static_inst.hh"
+#include "arch/hsail/operand.hh"
+
+namespace HsailISA
+{
+ class MemInst
+ {
+ public:
+ MemInst() : size(0), addr_operand(nullptr) { }
+
+ MemInst(Enums::MemType m_type)
+ {
+ if (m_type == Enums::M_U64 ||
+ m_type == Enums::M_S64 ||
+ m_type == Enums::M_F64) {
+ size = 8;
+ } else if (m_type == Enums::M_U32 ||
+ m_type == Enums::M_S32 ||
+ m_type == Enums::M_F32) {
+ size = 4;
+ } else if (m_type == Enums::M_U16 ||
+ m_type == Enums::M_S16 ||
+ m_type == Enums::M_F16) {
+ size = 2;
+ } else {
+ size = 1;
+ }
+
+ addr_operand = nullptr;
+ }
+
+ void
+ init_addr(AddrOperandBase *_addr_operand)
+ {
+ addr_operand = _addr_operand;
+ }
+
+ private:
+ int size;
+ AddrOperandBase *addr_operand;
+
+ public:
+ int getMemOperandSize() { return size; }
+ AddrOperandBase *getAddressOperand() { return addr_operand; }
+ };
+
+ template<typename DestOperandType, typename AddrOperandType>
+ class LdaInstBase : public HsailGPUStaticInst
+ {
+ public:
+ typename DestOperandType::DestOperand dest;
+ AddrOperandType addr;
+
+ LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ using namespace Brig;
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ dest.init(op_offs, obj);
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+ }
+
+ int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isVectorRegister() :
+ this->addr.isVectorRegister());
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isCondRegister() :
+ this->addr.isCondRegister());
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isScalarRegister() :
+ this->addr.isScalarRegister());
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex > 0)
+ return(this->addr.isVectorRegister());
+ return false;
+ }
+ bool isDstOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return(operandIndex == 0);
+ }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.opSize() :
+ this->addr.opSize());
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.regIndex() :
+ this->addr.regIndex());
+ }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister())
+ return 2;
+ return 1;
+ }
+ };
+
+ template<typename DestDataType, typename AddrOperandType>
+ class LdaInst :
+ public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
+ public MemInst
+ {
+ public:
+ void generateDisassembly();
+
+ LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : LdaInstBase<typename DestDataType::OperandType,
+ AddrOperandType>(ib, obj, _opcode)
+ {
+ init_addr(&this->addr);
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ template<typename DataType>
+ GPUStaticInst*
+ decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
+ BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
+
+ if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
+ return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
+ } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
+ // V2/V4 not allowed
+ switch (regDataType.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
+ default:
+ fatal("Bad ldas register operand type %d\n", regDataType.type);
+ }
+ } else {
+ fatal("Bad ldas register operand kind %d\n", regDataType.kind);
+ }
+ }
+
+ template<typename MemOperandType, typename DestOperandType,
+ typename AddrOperandType>
+ class LdInstBase : public HsailGPUStaticInst
+ {
+ public:
+ Brig::BrigWidth8_t width;
+ typename DestOperandType::DestOperand dest;
+ AddrOperandType addr;
+
+ Brig::BrigSegment segment;
+ Brig::BrigMemoryOrder memoryOrder;
+ Brig::BrigMemoryScope memoryScope;
+ unsigned int equivClass;
+ bool isArgLoad()
+ {
+ return segment == Brig::BRIG_SEGMENT_KERNARG ||
+ segment == Brig::BRIG_SEGMENT_ARG;
+ }
+ void
+ initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstMem *ldst = (const BrigInstMem*)ib;
+
+ segment = (BrigSegment)ldst->segment;
+ memoryOrder = BRIG_MEMORY_ORDER_NONE;
+ memoryScope = BRIG_MEMORY_SCOPE_NONE;
+ equivClass = ldst->equivClass;
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_READ;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_READ;
+ break;
+
+ case BRIG_SEGMENT_PRIVATE:
+ o_type = Enums::OT_PRIVATE_READ;
+ break;
+
+ case BRIG_SEGMENT_READONLY:
+ o_type = Enums::OT_READONLY_READ;
+ break;
+
+ case BRIG_SEGMENT_SPILL:
+ o_type = Enums::OT_SPILL_READ;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_READ;
+ break;
+
+ case BRIG_SEGMENT_KERNARG:
+ o_type = Enums::OT_KERN_READ;
+ break;
+
+ case BRIG_SEGMENT_ARG:
+ o_type = Enums::OT_ARG;
+ break;
+
+ default:
+ panic("Ld: segment %d not supported\n", segment);
+ }
+
+ width = ldst->width;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
+ if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
+ dest.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+ }
+
+ void
+ initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
+
+ segment = (BrigSegment)at->segment;
+ memoryOrder = (BrigMemoryOrder)at->memoryOrder;
+ memoryScope = (BrigMemoryScope)at->memoryScope;
+ equivClass = 0;
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_READ;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_READ;
+ break;
+
+ case BRIG_SEGMENT_PRIVATE:
+ o_type = Enums::OT_PRIVATE_READ;
+ break;
+
+ case BRIG_SEGMENT_READONLY:
+ o_type = Enums::OT_READONLY_READ;
+ break;
+
+ case BRIG_SEGMENT_SPILL:
+ o_type = Enums::OT_SPILL_READ;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_READ;
+ break;
+
+ case BRIG_SEGMENT_KERNARG:
+ o_type = Enums::OT_KERN_READ;
+ break;
+
+ case BRIG_SEGMENT_ARG:
+ o_type = Enums::OT_ARG;
+ break;
+
+ default:
+ panic("Ld: segment %d not supported\n", segment);
+ }
+
+ width = BRIG_WIDTH_1;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
+
+ if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
+ dest.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands,1);
+ addr.init(op_offs, obj);
+ }
+
+ LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ using namespace Brig;
+
+ if (ib->opcode == BRIG_OPCODE_LD) {
+ initLd(ib, obj, _opcode);
+ } else {
+ initAtomicLd(ib, obj, _opcode);
+ }
+ }
+
+ int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister())
+ return 2;
+ else
+ return 1;
+ }
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isVectorRegister() :
+ this->addr.isVectorRegister());
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isCondRegister() :
+ this->addr.isCondRegister());
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isScalarRegister() :
+ this->addr.isScalarRegister());
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex > 0)
+ return(this->addr.isVectorRegister());
+ return false;
+ }
+ bool isDstOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return(operandIndex == 0);
+ }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.opSize() :
+ this->addr.opSize());
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.regIndex() :
+ this->addr.regIndex());
+ }
+ };
+
+ template<typename MemDataType, typename DestDataType,
+ typename AddrOperandType>
+ class LdInst :
+ public LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType, AddrOperandType>,
+ public MemInst
+ {
+ typename DestDataType::OperandType::DestOperand dest_vect[4];
+ uint16_t num_dest_operands;
+ void generateDisassembly();
+
+ public:
+ LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>(ib, obj, _opcode),
+ MemInst(MemDataType::memType)
+ {
+ init_addr(&this->addr);
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands,0);
+ const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
+
+ if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
+ const Brig::BrigOperandOperandList *brigRegVecOp =
+ (const Brig::BrigOperandOperandList*)brigOp;
+
+ num_dest_operands =
+ *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
+
+ assert(num_dest_operands <= 4);
+ } else {
+ num_dest_operands = 1;
+ }
+
+ if (num_dest_operands > 1) {
+ assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
+
+ for (int i = 0; i < num_dest_operands; ++i) {
+ dest_vect[i].init_from_vect(op_offs, obj, i);
+ }
+ }
+ }
+
+ void
+ initiateAcc(GPUDynInstPtr gpuDynInst) override
+ {
+ typedef typename MemDataType::CType c0;
+
+ gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
+
+ if (num_dest_operands > 1) {
+ for (int i = 0; i < VSZ; ++i)
+ if (gpuDynInst->exec_mask[i])
+ gpuDynInst->statusVector.push_back(num_dest_operands);
+ else
+ gpuDynInst->statusVector.push_back(0);
+ }
+
+ for (int k = 0; k < num_dest_operands; ++k) {
+
+ c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+
+ for (int i = 0; i < VSZ; ++i) {
+ if (gpuDynInst->exec_mask[i]) {
+ Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
+
+ if (isLocalMem()) {
+ // load from shared memory
+ *d = gpuDynInst->wavefront()->ldsChunk->
+ read<c0>(vaddr);
+ } else {
+ Request *req = new Request(0, vaddr, sizeof(c0), 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, i);
+
+ gpuDynInst->setRequestFlags(req);
+ PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
+ pkt->dataStatic(d);
+
+ if (gpuDynInst->computeUnit()->shader->
+ separate_acquire_release &&
+ gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE) {
+ // if this load has acquire semantics,
+ // set the response continuation function
+ // to perform an Acquire request
+ gpuDynInst->execContinuation =
+ &GPUStaticInst::execLdAcq;
+
+ gpuDynInst->useContinuation = true;
+ } else {
+ // the request will be finished when
+ // the load completes
+ gpuDynInst->useContinuation = false;
+ }
+ // translation is performed in sendRequest()
+ gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
+ i, pkt);
+ }
+ }
+ ++d;
+ }
+ }
+
+ gpuDynInst->updateStats();
+ }
+
+ private:
+ void
+ execLdAcq(GPUDynInstPtr gpuDynInst) override
+ {
+ // after the load has complete and if the load has acquire
+ // semantics, issue an acquire request.
+ if (!isLocalMem()) {
+ if (gpuDynInst->computeUnit()->shader->separate_acquire_release
+ && gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE) {
+ gpuDynInst->statusBitVector = VectorMask(1);
+ gpuDynInst->useContinuation = false;
+ // create request
+ Request *req = new Request(0, 0, 0, 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, -1);
+ req->setFlags(Request::ACQUIRE);
+ gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
+ }
+ }
+ }
+
+ public:
+ bool
+ isLocalMem() const override
+ {
+ return this->segment == Brig::BRIG_SEGMENT_GROUP;
+ }
+
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.isVectorRegister());
+ if (num_dest_operands > 1) {
+ return dest_vect[operandIndex].isVectorRegister();
+ }
+ else if (num_dest_operands == 1) {
+ return LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.isVectorRegister();
+ }
+ return false;
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.isCondRegister());
+ if (num_dest_operands > 1)
+ return dest_vect[operandIndex].isCondRegister();
+ else if (num_dest_operands == 1)
+ return LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.isCondRegister();
+ return false;
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.isScalarRegister());
+ if (num_dest_operands > 1)
+ return dest_vect[operandIndex].isScalarRegister();
+ else if (num_dest_operands == 1)
+ return LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.isScalarRegister();
+ return false;
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.isVectorRegister());
+ return false;
+ }
+ bool isDstOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return false;
+ return true;
+ }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.opSize());
+ if (num_dest_operands > 1)
+ return(dest_vect[operandIndex].opSize());
+ else if (num_dest_operands == 1)
+ return(LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.opSize());
+ return 0;
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.regIndex());
+ if (num_dest_operands > 1)
+ return(dest_vect[operandIndex].regIndex());
+ else if (num_dest_operands == 1)
+ return(LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.regIndex());
+ return -1;
+ }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
+ return(num_dest_operands+1);
+ else
+ return(num_dest_operands);
+ }
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ template<typename MemDT, typename DestDT>
+ GPUStaticInst*
+ decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands,1);
+ BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
+
+ if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
+ return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
+ } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
+ tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
+ switch (tmp.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ return new LdInst<MemDT, DestDT,
+ SRegAddrOperand>(ib, obj, "ld");
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ return new LdInst<MemDT, DestDT,
+ DRegAddrOperand>(ib, obj, "ld");
+ default:
+ fatal("Bad ld register operand type %d\n", tmp.regKind);
+ }
+ } else {
+ fatal("Bad ld register operand kind %d\n", tmp.kind);
+ }
+ }
+
+ template<typename MemDT>
+ GPUStaticInst*
+ decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands,0);
+ BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
+
+ assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
+ dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
+ switch(dest.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ switch (ib->type) {
+ case Brig::BRIG_TYPE_B8:
+ case Brig::BRIG_TYPE_B16:
+ case Brig::BRIG_TYPE_B32:
+ return decodeLd2<MemDT, B32>(ib, obj);
+ case Brig::BRIG_TYPE_U8:
+ case Brig::BRIG_TYPE_U16:
+ case Brig::BRIG_TYPE_U32:
+ return decodeLd2<MemDT, U32>(ib, obj);
+ case Brig::BRIG_TYPE_S8:
+ case Brig::BRIG_TYPE_S16:
+ case Brig::BRIG_TYPE_S32:
+ return decodeLd2<MemDT, S32>(ib, obj);
+ case Brig::BRIG_TYPE_F16:
+ case Brig::BRIG_TYPE_F32:
+ return decodeLd2<MemDT, U32>(ib, obj);
+ default:
+ fatal("Bad ld register operand type %d, %d\n",
+ dest.regKind, ib->type);
+ };
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ switch (ib->type) {
+ case Brig::BRIG_TYPE_B64:
+ return decodeLd2<MemDT, B64>(ib, obj);
+ case Brig::BRIG_TYPE_U64:
+ return decodeLd2<MemDT, U64>(ib, obj);
+ case Brig::BRIG_TYPE_S64:
+ return decodeLd2<MemDT, S64>(ib, obj);
+ case Brig::BRIG_TYPE_F64:
+ return decodeLd2<MemDT, U64>(ib, obj);
+ default:
+ fatal("Bad ld register operand type %d, %d\n",
+ dest.regKind, ib->type);
+ };
+ default:
+ fatal("Bad ld register operand type %d, %d\n", dest.regKind,
+ ib->type);
+ }
+ }
+
+ template<typename MemDataType, typename SrcOperandType,
+ typename AddrOperandType>
+ class StInstBase : public HsailGPUStaticInst
+ {
+ public:
+ typename SrcOperandType::SrcOperand src;
+ AddrOperandType addr;
+
+ Brig::BrigSegment segment;
+ Brig::BrigMemoryScope memoryScope;
+ Brig::BrigMemoryOrder memoryOrder;
+ unsigned int equivClass;
+
+ void
+ initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstMem *ldst = (const BrigInstMem*)ib;
+
+ segment = (BrigSegment)ldst->segment;
+ memoryOrder = BRIG_MEMORY_ORDER_NONE;
+ memoryScope = BRIG_MEMORY_SCOPE_NONE;
+ equivClass = ldst->equivClass;
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_WRITE;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_WRITE;
+ break;
+
+ case BRIG_SEGMENT_PRIVATE:
+ o_type = Enums::OT_PRIVATE_WRITE;
+ break;
+
+ case BRIG_SEGMENT_READONLY:
+ o_type = Enums::OT_READONLY_WRITE;
+ break;
+
+ case BRIG_SEGMENT_SPILL:
+ o_type = Enums::OT_SPILL_WRITE;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_WRITE;
+ break;
+
+ case BRIG_SEGMENT_ARG:
+ o_type = Enums::OT_ARG;
+ break;
+
+ default:
+ panic("St: segment %d not supported\n", segment);
+ }
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const BrigOperand *baseOp = obj->getOperand(op_offs);
+
+ if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
+ (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
+ src.init(op_offs, obj);
+ }
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+ }
+
+ void
+ initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
+
+ segment = (BrigSegment)at->segment;
+ memoryScope = (BrigMemoryScope)at->memoryScope;
+ memoryOrder = (BrigMemoryOrder)at->memoryOrder;
+ equivClass = 0;
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_WRITE;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_WRITE;
+ break;
+
+ case BRIG_SEGMENT_PRIVATE:
+ o_type = Enums::OT_PRIVATE_WRITE;
+ break;
+
+ case BRIG_SEGMENT_READONLY:
+ o_type = Enums::OT_READONLY_WRITE;
+ break;
+
+ case BRIG_SEGMENT_SPILL:
+ o_type = Enums::OT_SPILL_WRITE;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_WRITE;
+ break;
+
+ case BRIG_SEGMENT_ARG:
+ o_type = Enums::OT_ARG;
+ break;
+
+ default:
+ panic("St: segment %d not supported\n", segment);
+ }
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ addr.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ src.init(op_offs, obj);
+ }
+
+ StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ using namespace Brig;
+
+ if (ib->opcode == BRIG_OPCODE_ST) {
+ initSt(ib, obj, _opcode);
+ } else {
+ initAtomicSt(ib, obj, _opcode);
+ }
+ }
+
+ int numDstRegOperands() { return 0; }
+ int numSrcRegOperands()
+ {
+ return src.isVectorRegister() + this->addr.isVectorRegister();
+ }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
+ return 2;
+ else
+ return 1;
+ }
+ bool isVectorRegister(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.isVectorRegister() :
+ this->addr.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.isCondRegister() :
+ this->addr.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.isScalarRegister() :
+ this->addr.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return true;
+ }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.opSize() : this->addr.opSize();
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.regIndex() : this->addr.regIndex();
+ }
+ };
+
+
+ template<typename MemDataType, typename SrcDataType,
+ typename AddrOperandType>
+ class StInst :
+ public StInstBase<MemDataType, typename SrcDataType::OperandType,
+ AddrOperandType>,
+ public MemInst
+ {
+ public:
+ typename SrcDataType::OperandType::SrcOperand src_vect[4];
+ uint16_t num_src_operands;
+ void generateDisassembly();
+
+ StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode, int srcIdx)
+ : StInstBase<MemDataType, typename SrcDataType::OperandType,
+ AddrOperandType>(ib, obj, _opcode),
+ MemInst(SrcDataType::memType)
+ {
+ init_addr(&this->addr);
+
+ BrigRegOperandInfo rinfo;
+ unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
+ const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
+
+ if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
+ const Brig::BrigOperandConstantBytes *op =
+ (Brig::BrigOperandConstantBytes*)baseOp;
+
+ rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
+ Brig::BRIG_TYPE_NONE);
+ } else {
+ rinfo = findRegDataType(op_offs, obj);
+ }
+
+ if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
+ const Brig::BrigOperandOperandList *brigRegVecOp =
+ (const Brig::BrigOperandOperandList*)baseOp;
+
+ num_src_operands =
+ *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
+
+ assert(num_src_operands <= 4);
+ } else {
+ num_src_operands = 1;
+ }
+
+ if (num_src_operands > 1) {
+ assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
+
+ for (int i = 0; i < num_src_operands; ++i) {
+ src_vect[i].init_from_vect(op_offs, obj, i);
+ }
+ }
+ }
+
+ void
+ initiateAcc(GPUDynInstPtr gpuDynInst) override
+ {
+ // before performing a store, check if this store has
+ // release semantics, and if so issue a release first
+ if (!isLocalMem()) {
+ if (gpuDynInst->computeUnit()->shader->separate_acquire_release
+ && gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_RELEASE) {
+
+ gpuDynInst->statusBitVector = VectorMask(1);
+ gpuDynInst->execContinuation = &GPUStaticInst::execSt;
+ gpuDynInst->useContinuation = true;
+ // create request
+ Request *req = new Request(0, 0, 0, 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, -1);
+ req->setFlags(Request::RELEASE);
+ gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
+
+ return;
+ }
+ }
+
+ // if there is no release semantic, perform stores immediately
+ execSt(gpuDynInst);
+ }
+
+ bool
+ isLocalMem() const override
+ {
+ return this->segment == Brig::BRIG_SEGMENT_GROUP;
+ }
+
+ private:
+ // execSt may be called through a continuation
+ // if the store had release semantics. see comment for
+ // execSt in gpu_static_inst.hh
+ void
+ execSt(GPUDynInstPtr gpuDynInst) override
+ {
+ typedef typename MemDataType::CType c0;
+
+ gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
+
+ if (num_src_operands > 1) {
+ for (int i = 0; i < VSZ; ++i)
+ if (gpuDynInst->exec_mask[i])
+ gpuDynInst->statusVector.push_back(num_src_operands);
+ else
+ gpuDynInst->statusVector.push_back(0);
+ }
+
+ for (int k = 0; k < num_src_operands; ++k) {
+ c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+
+ for (int i = 0; i < VSZ; ++i) {
+ if (gpuDynInst->exec_mask[i]) {
+ Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
+
+ if (isLocalMem()) {
+ //store to shared memory
+ gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
+ *d);
+ } else {
+ Request *req =
+ new Request(0, vaddr, sizeof(c0), 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, i);
+
+ gpuDynInst->setRequestFlags(req);
+ PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
+ pkt->dataStatic<c0>(d);
+
+ // translation is performed in sendRequest()
+ // the request will be finished when the store completes
+ gpuDynInst->useContinuation = false;
+ gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
+ i, pkt);
+
+ }
+ }
+ ++d;
+ }
+ }
+
+ gpuDynInst->updateStats();
+ }
+
+ public:
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.isVectorRegister();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].isVectorRegister();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.isVectorRegister();
+ return false;
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.isCondRegister();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].isCondRegister();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.isCondRegister();
+ return false;
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.isScalarRegister();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].isScalarRegister();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.isScalarRegister();
+ return false;
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return true;
+ }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.opSize();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].opSize();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.opSize();
+ return 0;
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.regIndex();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].regIndex();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.regIndex();
+ return -1;
+ }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
+ return num_src_operands + 1;
+ else
+ return num_src_operands;
+ }
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ template<typename DataType, typename SrcDataType>
+ GPUStaticInst*
+ decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ int srcIdx = 0;
+ int destIdx = 1;
+ if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
+ ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
+ srcIdx = 1;
+ destIdx = 0;
+ }
+ unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
+
+ BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
+
+ if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
+ return new StInst<DataType, SrcDataType,
+ NoRegAddrOperand>(ib, obj, "st", srcIdx);
+ } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
+ // V2/V4 not allowed
+ switch (tmp.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ return new StInst<DataType, SrcDataType,
+ SRegAddrOperand>(ib, obj, "st", srcIdx);
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ return new StInst<DataType, SrcDataType,
+ DRegAddrOperand>(ib, obj, "st", srcIdx);
+ default:
+ fatal("Bad st register operand type %d\n", tmp.type);
+ }
+ } else {
+ fatal("Bad st register operand kind %d\n", tmp.kind);
+ }
+ }
+
+ Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
+ Brig::BrigAtomicOperation brigOp);
+
+ template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
+ bool HasDst>
+ class AtomicInstBase : public HsailGPUStaticInst
+ {
+ public:
+ typename OperandType::DestOperand dest;
+ typename OperandType::SrcOperand src[NumSrcOperands];
+ AddrOperandType addr;
+
+ Brig::BrigSegment segment;
+ Brig::BrigMemoryOrder memoryOrder;
+ Brig::BrigAtomicOperation atomicOperation;
+ Brig::BrigMemoryScope memoryScope;
+ Brig::BrigOpcode opcode;
+ Enums::MemOpType opType;
+
+ AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
+
+ segment = (BrigSegment)at->segment;
+ memoryScope = (BrigMemoryScope)at->memoryScope;
+ memoryOrder = (BrigMemoryOrder)at->memoryOrder;
+ atomicOperation = (BrigAtomicOperation)at->atomicOperation;
+ opcode = (BrigOpcode)ib->opcode;
+ opType = brigAtomicToMemOpType(opcode, atomicOperation);
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_ATOMIC;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_ATOMIC;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_ATOMIC;
+ break;
+
+ default:
+ panic("Atomic: segment %d not supported\n", segment);
+ }
+
+ if (HasDst) {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ dest.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+
+ for (int i = 0; i < NumSrcOperands; ++i) {
+ op_offs = obj->getOperandPtr(ib->operands, i + 2);
+ src[i].init(op_offs, obj);
+ }
+ } else {
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ addr.init(op_offs, obj);
+
+ for (int i = 0; i < NumSrcOperands; ++i) {
+ op_offs = obj->getOperandPtr(ib->operands, i + 1);
+ src[i].init(op_offs, obj);
+ }
+ }
+ }
+
+ int numSrcRegOperands()
+ {
+ int operands = 0;
+ for (int i = 0; i < NumSrcOperands; i++) {
+ if (src[i].isVectorRegister() == true) {
+ operands++;
+ }
+ }
+ if (addr.isVectorRegister())
+ operands++;
+ return operands;
+ }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ int getNumOperands()
+ {
+ if (addr.isVectorRegister())
+ return(NumSrcOperands + 2);
+ return(NumSrcOperands + 1);
+ }
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].isVectorRegister();
+ else if (operandIndex == NumSrcOperands)
+ return(addr.isVectorRegister());
+ else
+ return dest.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].isCondRegister();
+ else if (operandIndex == NumSrcOperands)
+ return(addr.isCondRegister());
+ else
+ return dest.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].isScalarRegister();
+ else if (operandIndex == NumSrcOperands)
+ return(addr.isScalarRegister());
+ else
+ return dest.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return true;
+ else if (operandIndex == NumSrcOperands)
+ return(addr.isVectorRegister());
+ else
+ return false;
+ }
+ bool isDstOperand(int operandIndex)
+ {
+ if (operandIndex <= NumSrcOperands)
+ return false;
+ else
+ return true;
+ }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return(src[operandIndex].opSize());
+ else if (operandIndex == NumSrcOperands)
+ return(addr.opSize());
+ else
+ return(dest.opSize());
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return(src[operandIndex].regIndex());
+ else if (operandIndex == NumSrcOperands)
+ return(addr.regIndex());
+ else
+ return(dest.regIndex());
+ return -1;
+ }
+ };
+
+ template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
+ bool HasDst>
+ class AtomicInst :
+ public AtomicInstBase<typename MemDataType::OperandType,
+ AddrOperandType, NumSrcOperands, HasDst>,
+ public MemInst
+ {
+ public:
+ void generateDisassembly();
+
+ AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
+ NumSrcOperands, HasDst>
+ (ib, obj, _opcode),
+ MemInst(MemDataType::memType)
+ {
+ init_addr(&this->addr);
+ }
+
+ void
+ initiateAcc(GPUDynInstPtr gpuDynInst) override
+ {
+ // before doing the RMW, check if this atomic has
+ // release semantics, and if so issue a release first
+ if (!isLocalMem()) {
+ if (gpuDynInst->computeUnit()->shader->separate_acquire_release
+ && (gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
+
+ gpuDynInst->statusBitVector = VectorMask(1);
+
+ gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
+ gpuDynInst->useContinuation = true;
+
+ // create request
+ Request *req = new Request(0, 0, 0, 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, -1);
+ req->setFlags(Request::RELEASE);
+ gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
+
+ return;
+ }
+ }
+
+ // if there is no release semantic, execute the RMW immediately
+ execAtomic(gpuDynInst);
+
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+
+ bool
+ isLocalMem() const override
+ {
+ return this->segment == Brig::BRIG_SEGMENT_GROUP;
+ }
+
+ private:
+ // execAtomic may be called through a continuation
+ // if the RMW had release semantics. see comment for
+ // execContinuation in gpu_dyn_inst.hh
+ void
+ execAtomic(GPUDynInstPtr gpuDynInst) override
+ {
+ gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
+
+ typedef typename MemDataType::CType c0;
+
+ c0 *d = &((c0*) gpuDynInst->d_data)[0];
+ c0 *e = &((c0*) gpuDynInst->a_data)[0];
+ c0 *f = &((c0*) gpuDynInst->x_data)[0];
+
+ for (int i = 0; i < VSZ; ++i) {
+ if (gpuDynInst->exec_mask[i]) {
+ Addr vaddr = gpuDynInst->addr[i];
+
+ if (isLocalMem()) {
+ Wavefront *wavefront = gpuDynInst->wavefront();
+ *d = wavefront->ldsChunk->read<c0>(vaddr);
+
+ switch (this->opType) {
+ case Enums::MO_AADD:
+ case Enums::MO_ANRADD:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) + (*e));
+ break;
+ case Enums::MO_ASUB:
+ case Enums::MO_ANRSUB:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) - (*e));
+ break;
+ case Enums::MO_AMAX:
+ case Enums::MO_ANRMAX:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ std::max(wavefront->ldsChunk->read<c0>(vaddr),
+ (*e)));
+ break;
+ case Enums::MO_AMIN:
+ case Enums::MO_ANRMIN:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ std::min(wavefront->ldsChunk->read<c0>(vaddr),
+ (*e)));
+ break;
+ case Enums::MO_AAND:
+ case Enums::MO_ANRAND:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) & (*e));
+ break;
+ case Enums::MO_AOR:
+ case Enums::MO_ANROR:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) | (*e));
+ break;
+ case Enums::MO_AXOR:
+ case Enums::MO_ANRXOR:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
+ break;
+ case Enums::MO_AINC:
+ case Enums::MO_ANRINC:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) + 1);
+ break;
+ case Enums::MO_ADEC:
+ case Enums::MO_ANRDEC:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) - 1);
+ break;
+ case Enums::MO_AEXCH:
+ case Enums::MO_ANREXCH:
+ wavefront->ldsChunk->write<c0>(vaddr, (*e));
+ break;
+ case Enums::MO_ACAS:
+ case Enums::MO_ANRCAS:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
+ (*f) : wavefront->ldsChunk->read<c0>(vaddr));
+ break;
+ default:
+ fatal("Unrecognized or invalid HSAIL atomic op "
+ "type.\n");
+ break;
+ }
+ } else {
+ Request *req =
+ new Request(0, vaddr, sizeof(c0), 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, i,
+ gpuDynInst->makeAtomicOpFunctor<c0>(e,
+ f, this->opType));
+
+ gpuDynInst->setRequestFlags(req);
+ PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
+ pkt->dataStatic(d);
+
+ if (gpuDynInst->computeUnit()->shader->
+ separate_acquire_release &&
+ (gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE)) {
+ // if this atomic has acquire semantics,
+ // schedule the continuation to perform an
+ // acquire after the RMW completes
+ gpuDynInst->execContinuation =
+ &GPUStaticInst::execAtomicAcq;
+
+ gpuDynInst->useContinuation = true;
+ } else {
+ // the request will be finished when the RMW completes
+ gpuDynInst->useContinuation = false;
+ }
+ // translation is performed in sendRequest()
+ gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
+ pkt);
+ }
+ }
+
+ ++d;
+ ++e;
+ ++f;
+ }
+
+ gpuDynInst->updateStats();
+ }
+
+ // execAtomicACq will always be called through a continuation.
+ // see comment for execContinuation in gpu_dyn_inst.hh
+ void
+ execAtomicAcq(GPUDynInstPtr gpuDynInst) override
+ {
+ // after performing the RMW, check to see if this instruction
+ // has acquire semantics, and if so, issue an acquire
+ if (!isLocalMem()) {
+ if (gpuDynInst->computeUnit()->shader->separate_acquire_release
+ && gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE) {
+ gpuDynInst->statusBitVector = VectorMask(1);
+
+ // the request will be finished when
+ // the acquire completes
+ gpuDynInst->useContinuation = false;
+ // create request
+ Request *req = new Request(0, 0, 0, 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, -1);
+ req->setFlags(Request::ACQUIRE);
+ gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
+ }
+ }
+ }
+ };
+
+ template<typename DataType, typename AddrOperandType, int NumSrcOperands>
+ GPUStaticInst*
+ constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
+
+ if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
+ return decodeLd<DataType>(ib, obj);
+ } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
+ switch (ib->type) {
+ case Brig::BRIG_TYPE_B8:
+ return decodeSt<S8,S8>(ib, obj);
+ case Brig::BRIG_TYPE_B16:
+ return decodeSt<S8,S16>(ib, obj);
+ case Brig::BRIG_TYPE_B32:
+ return decodeSt<S8,S32>(ib, obj);
+ case Brig::BRIG_TYPE_B64:
+ return decodeSt<S8,S64>(ib, obj);
+ default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
+ }
+ } else {
+ if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
+ return new AtomicInst<DataType, AddrOperandType,
+ NumSrcOperands, false>(ib, obj, "atomicnoret");
+ else
+ return new AtomicInst<DataType, AddrOperandType,
+ NumSrcOperands, true>(ib, obj, "atomic");
+ }
+ }
+
+ template<typename DataType, int NumSrcOperands>
+ GPUStaticInst*
+ decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
+ Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
+
+ BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
+
+ if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
+ return constructAtomic<DataType, NoRegAddrOperand,
+ NumSrcOperands>(ib, obj);
+ } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
+ // V2/V4 not allowed
+ switch (tmp.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ return constructAtomic<DataType, SRegAddrOperand,
+ NumSrcOperands>(ib, obj);
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ return constructAtomic<DataType, DRegAddrOperand,
+ NumSrcOperands>(ib, obj);
+ default:
+ fatal("Bad atomic register operand type %d\n", tmp.type);
+ }
+ } else {
+ fatal("Bad atomic register operand kind %d\n", tmp.kind);
+ }
+ }
+
+
+ template<typename DataType>
+ GPUStaticInst*
+ decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
+
+ if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
+ return decodeAtomicHelper<DataType, 2>(ib, obj);
+ } else {
+ return decodeAtomicHelper<DataType, 1>(ib, obj);
+ }
+ }
+
+ template<typename DataType>
+ GPUStaticInst*
+ decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
+ if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
+ return decodeAtomicHelper<DataType, 2>(ib, obj);
+ } else {
+ return decodeAtomicHelper<DataType, 1>(ib, obj);
+ }
+ }
+} // namespace HsailISA
+
+#endif // __ARCH_HSAIL_INSTS_MEM_HH__
diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh
new file mode 100644
index 000000000..94f0cd6aa
--- /dev/null
+++ b/src/arch/hsail/insts/mem_impl.hh
@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#include "arch/hsail/generic_types.hh"
+#include "gpu-compute/hsail_code.hh"
+
+// defined in code.cc, but not worth sucking in all of code.h for this
+// at this point
+extern const char *segmentNames[];
+
+namespace HsailISA
+{
+ template<typename DestDataType, typename AddrRegOperandType>
+ void
+ LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
+ {
+ this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
+ DestDataType::label,
+ this->dest.disassemble(),
+ this->addr.disassemble());
+ }
+
+ template<typename DestDataType, typename AddrRegOperandType>
+ void
+ LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef typename DestDataType::CType CType M5_VAR_USED;
+ const VectorMask &mask = w->get_pred();
+ uint64_t addr_vec[VSZ];
+ this->addr.calcVector(w, addr_vec);
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ this->dest.set(w, lane, addr_vec[lane]);
+ }
+ }
+ }
+
+ template<typename MemDataType, typename DestDataType,
+ typename AddrRegOperandType>
+ void
+ LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
+ {
+ switch (num_dest_operands) {
+ case 1:
+ this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
+ segmentNames[this->segment],
+ MemDataType::label,
+ this->dest.disassemble(),
+ this->addr.disassemble());
+ break;
+ case 2:
+ this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
+ segmentNames[this->segment],
+ MemDataType::label,
+ this->dest_vect[0].disassemble(),
+ this->dest_vect[1].disassemble(),
+ this->addr.disassemble());
+ break;
+ case 4:
+ this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
+ this->opcode,
+ segmentNames[this->segment],
+ MemDataType::label,
+ this->dest_vect[0].disassemble(),
+ this->dest_vect[1].disassemble(),
+ this->dest_vect[2].disassemble(),
+ this->dest_vect[3].disassemble(),
+ this->addr.disassemble());
+ break;
+ default:
+ fatal("Bad ld register dest operand, num vector operands: %d \n",
+ num_dest_operands);
+ break;
+ }
+ }
+
+ static Addr
+ calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
+ {
+ // what is the size of the object we are accessing??
+ // NOTE: the compiler doesn't generate enough information
+ // to do this yet..have to just line up all the private
+ // work-item spaces back to back for now
+ /*
+ StorageElement* se =
+ i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
+ assert(se);
+
+ return w->wfSlotId * w->privSizePerItem * VSZ +
+ se->offset * VSZ +
+ lane * se->size;
+ */
+
+ // addressing strategy: interleave the private spaces of
+ // work-items in a wave-front on 8 byte granularity.
+ // this won't be perfect coalescing like the spill space
+ // strategy, but it's better than nothing. The spill space
+ // strategy won't work with private because the same address
+ // may be accessed by different sized loads/stores.
+
+ // Note: I'm assuming that the largest load/store to private
+ // is 8 bytes. If it is larger, the stride will have to increase
+
+ Addr addr_div8 = addr / 8;
+ Addr addr_mod8 = addr % 8;
+
+ Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase;
+
+ assert(ret < w->privBase + (w->privSizePerItem * VSZ));
+
+ return ret;
+ }
+
+ template<typename MemDataType, typename DestDataType,
+ typename AddrRegOperandType>
+ void
+ LdInst<MemDataType, DestDataType,
+ AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef typename MemDataType::CType MemCType;
+ const VectorMask &mask = w->get_pred();
+
+ // Kernarg references are handled uniquely for now (no Memory Request
+ // is used), so special-case them up front. Someday we should
+ // make this more realistic, at which we should get rid of this
+ // block and fold this case into the switch below.
+ if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
+ MemCType val;
+
+ // I assume no vector ld for kernargs
+ assert(num_dest_operands == 1);
+
+ // assuming for the moment that we'll never do register
+ // offsets into kernarg space... just to make life simpler
+ uint64_t address = this->addr.calcUniform();
+
+ val = *(MemCType*)&w->kernelArgs[address];
+
+ DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ this->dest.set(w, lane, val);
+ }
+ }
+
+ return;
+ } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
+ uint64_t address = this->addr.calcUniform();
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ MemCType val = w->readCallArgMem<MemCType>(lane, address);
+
+ DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
+ (unsigned long long)val);
+
+ this->dest.set(w, lane, val);
+ }
+ }
+
+ return;
+ }
+
+ GPUDynInstPtr m = gpuDynInst;
+
+ this->addr.calcVector(w, m->addr);
+
+ m->m_op = Enums::MO_LD;
+ m->m_type = MemDataType::memType;
+ m->v_type = DestDataType::vgprType;
+
+ m->exec_mask = w->execMask();
+ m->statusBitVector = 0;
+ m->equiv = this->equivClass;
+ m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
+
+ m->scope = getGenericMemoryScope(this->memoryScope);
+
+ if (num_dest_operands == 1) {
+ m->dst_reg = this->dest.regIndex();
+ m->n_reg = 1;
+ } else {
+ m->n_reg = num_dest_operands;
+ for (int i = 0; i < num_dest_operands; ++i) {
+ m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
+ }
+ }
+
+ m->simdId = w->simdId;
+ m->wfSlotId = w->wfSlotId;
+ m->wfDynId = w->wfDynId;
+ m->kern_id = w->kern_id;
+ m->cu_id = w->computeUnit->cu_id;
+ m->latency.init(&w->computeUnit->shader->tick_cnt);
+
+ switch (this->segment) {
+ case Brig::BRIG_SEGMENT_GLOBAL:
+ m->s_type = SEG_GLOBAL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+
+ // this is a complete hack to get around a compiler bug
+ // (the compiler currently generates global access for private
+ // addresses (starting from 0). We need to add the private offset)
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (m->addr[lane] < w->privSizePerItem) {
+ if (mask[lane]) {
+ // what is the size of the object we are accessing?
+ // find base for for this wavefront
+
+ // calcPrivAddr will fail if accesses are unaligned
+ assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
+
+ Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
+ this);
+
+ m->addr[lane] = privAddr;
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_SPILL:
+ assert(num_dest_operands == 1);
+ m->s_type = SEG_SPILL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+ {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ // note: this calculation will NOT WORK if the compiler
+ // ever generates loads/stores to the same address with
+ // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
+ if (mask[lane]) {
+ assert(m->addr[lane] < w->spillSizePerItem);
+
+ m->addr[lane] = m->addr[lane] * w->spillWidth +
+ lane * sizeof(MemCType) + w->spillBase;
+
+ w->last_addr[lane] = m->addr[lane];
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_GROUP:
+ m->s_type = SEG_SHARED;
+ m->pipeId = LDSMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(24));
+ w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
+ w->outstanding_reqs_rd_lm++;
+ w->rd_lm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_READONLY:
+ m->s_type = SEG_READONLY;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
+ m->addr[lane] += w->roBase;
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_PRIVATE:
+ m->s_type = SEG_PRIVATE;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+ {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ assert(m->addr[lane] < w->privSizePerItem);
+
+ m->addr[lane] = m->addr[lane] +
+ lane * sizeof(MemCType) + w->privBase;
+ }
+ }
+ }
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ default:
+ fatal("Load to unsupported segment %d %llxe\n", this->segment,
+ m->addr[0]);
+ }
+
+ w->outstanding_reqs++;
+ w->mem_reqs_in_pipe--;
+ }
+
+ template<typename OperationType, typename SrcDataType,
+ typename AddrRegOperandType>
+ void
+ StInst<OperationType, SrcDataType,
+ AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef typename OperationType::CType CType;
+
+ const VectorMask &mask = w->get_pred();
+
+ // arg references are handled uniquely for now (no Memory Request
+ // is used), so special-case them up front. Someday we should
+ // make this more realistic, at which we should get rid of this
+ // block and fold this case into the switch below.
+ if (this->segment == Brig::BRIG_SEGMENT_ARG) {
+ uint64_t address = this->addr.calcUniform();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ CType data = this->src.template get<CType>(w, lane);
+ DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
+ w->writeCallArgMem<CType>(lane, address, data);
+ }
+ }
+
+ return;
+ }
+
+ GPUDynInstPtr m = gpuDynInst;
+
+ m->exec_mask = w->execMask();
+
+ this->addr.calcVector(w, m->addr);
+
+ if (num_src_operands == 1) {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ ((CType*)m->d_data)[lane] =
+ this->src.template get<CType>(w, lane);
+ }
+ }
+ } else {
+ for (int k= 0; k < num_src_operands; ++k) {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ ((CType*)m->d_data)[k * VSZ + lane] =
+ this->src_vect[k].template get<CType>(w, lane);
+ }
+ }
+ }
+ }
+
+ m->m_op = Enums::MO_ST;
+ m->m_type = OperationType::memType;
+ m->v_type = OperationType::vgprType;
+
+ m->statusBitVector = 0;
+ m->equiv = this->equivClass;
+
+ if (num_src_operands == 1) {
+ m->n_reg = 1;
+ } else {
+ m->n_reg = num_src_operands;
+ }
+
+ m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
+
+ m->scope = getGenericMemoryScope(this->memoryScope);
+
+ m->simdId = w->simdId;
+ m->wfSlotId = w->wfSlotId;
+ m->wfDynId = w->wfDynId;
+ m->kern_id = w->kern_id;
+ m->cu_id = w->computeUnit->cu_id;
+ m->latency.init(&w->computeUnit->shader->tick_cnt);
+
+ switch (this->segment) {
+ case Brig::BRIG_SEGMENT_GLOBAL:
+ m->s_type = SEG_GLOBAL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+
+ // this is a complete hack to get around a compiler bug
+ // (the compiler currently generates global access for private
+ // addresses (starting from 0). We need to add the private offset)
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ if (m->addr[lane] < w->privSizePerItem) {
+
+ // calcPrivAddr will fail if accesses are unaligned
+ assert(!((sizeof(CType)-1) & m->addr[lane]));
+
+ Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
+ this);
+
+ m->addr[lane] = privAddr;
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_SPILL:
+ assert(num_src_operands == 1);
+ m->s_type = SEG_SPILL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+ {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ assert(m->addr[lane] < w->spillSizePerItem);
+
+ m->addr[lane] = m->addr[lane] * w->spillWidth +
+ lane * sizeof(CType) + w->spillBase;
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_GROUP:
+ m->s_type = SEG_SHARED;
+ m->pipeId = LDSMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(24));
+ w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
+ w->outstanding_reqs_wr_lm++;
+ w->wr_lm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_PRIVATE:
+ m->s_type = SEG_PRIVATE;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+ {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ assert(m->addr[lane] < w->privSizePerItem);
+ m->addr[lane] = m->addr[lane] + lane *
+ sizeof(CType)+w->privBase;
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ break;
+
+ default:
+ fatal("Store to unsupported segment %d\n", this->segment);
+ }
+
+ w->outstanding_reqs++;
+ w->mem_reqs_in_pipe--;
+ }
+
+ template<typename OperationType, typename SrcDataType,
+ typename AddrRegOperandType>
+ void
+ StInst<OperationType, SrcDataType,
+ AddrRegOperandType>::generateDisassembly()
+ {
+ switch (num_src_operands) {
+ case 1:
+ this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
+ segmentNames[this->segment],
+ OperationType::label,
+ this->src.disassemble(),
+ this->addr.disassemble());
+ break;
+ case 2:
+ this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
+ segmentNames[this->segment],
+ OperationType::label,
+ this->src_vect[0].disassemble(),
+ this->src_vect[1].disassemble(),
+ this->addr.disassemble());
+ break;
+ case 4:
+ this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
+ this->opcode,
+ segmentNames[this->segment],
+ OperationType::label,
+ this->src_vect[0].disassemble(),
+ this->src_vect[1].disassemble(),
+ this->src_vect[2].disassemble(),
+ this->src_vect[3].disassemble(),
+ this->addr.disassemble());
+ break;
+ default: fatal("Bad ld register src operand, num vector operands: "
+ "%d \n", num_src_operands);
+ break;
+ }
+ }
+
+ template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
+ bool HasDst>
+ void
+ AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
+ HasDst>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ typedef typename DataType::CType CType;
+
+ Wavefront *w = gpuDynInst->wavefront();
+
+ GPUDynInstPtr m = gpuDynInst;
+
+ this->addr.calcVector(w, m->addr);
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ ((CType *)m->a_data)[lane] =
+ this->src[0].template get<CType>(w, lane);
+ }
+
+ // load second source operand for CAS
+ if (NumSrcOperands > 1) {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ ((CType*)m->x_data)[lane] =
+ this->src[1].template get<CType>(w, lane);
+ }
+ }
+
+ assert(NumSrcOperands <= 2);
+
+ m->m_op = this->opType;
+ m->m_type = DataType::memType;
+ m->v_type = DataType::vgprType;
+
+ m->exec_mask = w->execMask();
+ m->statusBitVector = 0;
+ m->equiv = 0; // atomics don't have an equivalence class operand
+ m->n_reg = 1;
+ m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
+
+ m->scope = getGenericMemoryScope(this->memoryScope);
+
+ if (HasDst) {
+ m->dst_reg = this->dest.regIndex();
+ }
+
+ m->simdId = w->simdId;
+ m->wfSlotId = w->wfSlotId;
+ m->wfDynId = w->wfDynId;
+ m->kern_id = w->kern_id;
+ m->cu_id = w->computeUnit->cu_id;
+ m->latency.init(&w->computeUnit->shader->tick_cnt);
+
+ switch (this->segment) {
+ case Brig::BRIG_SEGMENT_GLOBAL:
+ m->s_type = SEG_GLOBAL;
+ m->latency.set(w->computeUnit->shader->ticks(64));
+ m->pipeId = GLBMEM_PIPE;
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_GROUP:
+ m->s_type = SEG_SHARED;
+ m->pipeId = LDSMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(24));
+ w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
+ w->outstanding_reqs_wr_lm++;
+ w->wr_lm_reqs_in_pipe--;
+ w->outstanding_reqs_rd_lm++;
+ w->rd_lm_reqs_in_pipe--;
+ break;
+
+ default:
+ fatal("Atomic op to unsupported segment %d\n",
+ this->segment);
+ }
+
+ w->outstanding_reqs++;
+ w->mem_reqs_in_pipe--;
+ }
+
+ const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
+
+ template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
+ bool HasDst>
+ void
+ AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
+ HasDst>::generateDisassembly()
+ {
+ if (HasDst) {
+ this->disassembly =
+ csprintf("%s_%s_%s_%s %s,%s", this->opcode,
+ atomicOpToString(this->atomicOperation),
+ segmentNames[this->segment],
+ DataType::label, this->dest.disassemble(),
+ this->addr.disassemble());
+ } else {
+ this->disassembly =
+ csprintf("%s_%s_%s_%s %s", this->opcode,
+ atomicOpToString(this->atomicOperation),
+ segmentNames[this->segment],
+ DataType::label, this->addr.disassemble());
+ }
+
+ for (int i = 0; i < NumSrcOperands; ++i) {
+ this->disassembly += ",";
+ this->disassembly += this->src[i].disassemble();
+ }
+ }
+} // namespace HsailISA
diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc
new file mode 100644
index 000000000..9506a80ab
--- /dev/null
+++ b/src/arch/hsail/insts/pseudo_inst.cc
@@ -0,0 +1,787 @@
+/*
+ * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Marc Orr
+ */
+
+#include <csignal>
+
+#include "arch/hsail/insts/decl.hh"
+#include "arch/hsail/insts/mem.hh"
+
+namespace HsailISA
+{
+ // Pseudo (or magic) instructions are overloaded on the hsail call
+ // instruction, because of its flexible parameter signature.
+
+ // To add a new magic instruction:
+ // 1. Add an entry to the enum.
+ // 2. Implement it in the switch statement below (Call::exec).
+ // 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h,
+ // so its easy to call from an OpenCL kernel.
+
+ // This enum should be identical to the enum in
+ // hsa/hsail-gpu-compute/util/magicinst.h
+ enum
+ {
+ MAGIC_PRINT_WF_32 = 0,
+ MAGIC_PRINT_WF_64,
+ MAGIC_PRINT_LANE,
+ MAGIC_PRINT_LANE_64,
+ MAGIC_PRINT_WF_FLOAT,
+ MAGIC_SIM_BREAK,
+ MAGIC_PREF_SUM,
+ MAGIC_REDUCTION,
+ MAGIC_MASKLANE_LOWER,
+ MAGIC_MASKLANE_UPPER,
+ MAGIC_JOIN_WF_BAR,
+ MAGIC_WAIT_WF_BAR,
+ MAGIC_PANIC,
+ MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG,
+ MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG,
+ MAGIC_LOAD_GLOBAL_U32_REG,
+ MAGIC_XACT_CAS_LD,
+ MAGIC_MOST_SIG_THD,
+ MAGIC_MOST_SIG_BROADCAST,
+ MAGIC_PRINT_WFID_32,
+ MAGIC_PRINT_WFID_64
+ };
+
+ void
+ Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
+ {
+ const VectorMask &mask = w->get_pred();
+
+ int op = 0;
+ bool got_op = false;
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ int src_val0 = src1.get<int>(w, lane, 0);
+ if (got_op) {
+ if (src_val0 != op) {
+ fatal("Multiple magic instructions per PC not "
+ "supported\n");
+ }
+ } else {
+ op = src_val0;
+ got_op = true;
+ }
+ }
+ }
+
+ switch(op) {
+ case MAGIC_PRINT_WF_32:
+ MagicPrintWF32(w);
+ break;
+ case MAGIC_PRINT_WF_64:
+ MagicPrintWF64(w);
+ break;
+ case MAGIC_PRINT_LANE:
+ MagicPrintLane(w);
+ break;
+ case MAGIC_PRINT_LANE_64:
+ MagicPrintLane64(w);
+ break;
+ case MAGIC_PRINT_WF_FLOAT:
+ MagicPrintWFFloat(w);
+ break;
+ case MAGIC_SIM_BREAK:
+ MagicSimBreak(w);
+ break;
+ case MAGIC_PREF_SUM:
+ MagicPrefixSum(w);
+ break;
+ case MAGIC_REDUCTION:
+ MagicReduction(w);
+ break;
+ case MAGIC_MASKLANE_LOWER:
+ MagicMaskLower(w);
+ break;
+ case MAGIC_MASKLANE_UPPER:
+ MagicMaskUpper(w);
+ break;
+ case MAGIC_JOIN_WF_BAR:
+ MagicJoinWFBar(w);
+ break;
+ case MAGIC_WAIT_WF_BAR:
+ MagicWaitWFBar(w);
+ break;
+ case MAGIC_PANIC:
+ MagicPanic(w);
+ break;
+
+ // atomic instructions
+ case MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG:
+ MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst);
+ break;
+
+ case MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG:
+ MagicAtomicNRAddGroupU32Reg(w, gpuDynInst);
+ break;
+
+ case MAGIC_LOAD_GLOBAL_U32_REG:
+ MagicLoadGlobalU32Reg(w, gpuDynInst);
+ break;
+
+ case MAGIC_XACT_CAS_LD:
+ MagicXactCasLd(w);
+ break;
+
+ case MAGIC_MOST_SIG_THD:
+ MagicMostSigThread(w);
+ break;
+
+ case MAGIC_MOST_SIG_BROADCAST:
+ MagicMostSigBroadcast(w);
+ break;
+
+ case MAGIC_PRINT_WFID_32:
+ MagicPrintWF32ID(w);
+ break;
+
+ case MAGIC_PRINT_WFID_64:
+ MagicPrintWFID64(w);
+ break;
+
+ default: fatal("unrecognized magic instruction: %d\n", op);
+ }
+ }
+
+ void
+ Call::MagicPrintLane(Wavefront *w)
+ {
+ #if TRACING_ON
+ const VectorMask &mask = w->get_pred();
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ int src_val1 = src1.get<int>(w, lane, 1);
+ int src_val2 = src1.get<int>(w, lane, 2);
+ if (src_val2) {
+ DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
+ disassemble(), w->computeUnit->cu_id, w->simdId,
+ w->wfSlotId, lane, src_val1);
+ } else {
+ DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
+ disassemble(), w->computeUnit->cu_id, w->simdId,
+ w->wfSlotId, lane, src_val1);
+ }
+ }
+ }
+ #endif
+ }
+
+ void
+ Call::MagicPrintLane64(Wavefront *w)
+ {
+ #if TRACING_ON
+ const VectorMask &mask = w->get_pred();
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
+ int src_val2 = src1.get<int>(w, lane, 2);
+ if (src_val2) {
+ DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
+ disassemble(), w->computeUnit->cu_id, w->simdId,
+ w->wfSlotId, lane, src_val1);
+ } else {
+ DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
+ disassemble(), w->computeUnit->cu_id, w->simdId,
+ w->wfSlotId, lane, src_val1);
+ }
+ }
+ }
+ #endif
+ }
+
+ void
+ Call::MagicPrintWF32(Wavefront *w)
+ {
+ #if TRACING_ON
+ const VectorMask &mask = w->get_pred();
+ std::string res_str;
+ res_str = csprintf("krl_prt (%s)\n", disassemble());
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (!(lane & 7)) {
+ res_str += csprintf("DB%03d: ", (int)w->wfDynId);
+ }
+
+ if (mask[lane]) {
+ int src_val1 = src1.get<int>(w, lane, 1);
+ int src_val2 = src1.get<int>(w, lane, 2);
+
+ if (src_val2) {
+ res_str += csprintf("%08x", src_val1);
+ } else {
+ res_str += csprintf("%08d", src_val1);
+ }
+ } else {
+ res_str += csprintf("xxxxxxxx");
+ }
+
+ if ((lane & 7) == 7) {
+ res_str += csprintf("\n");
+ } else {
+ res_str += csprintf(" ");
+ }
+ }
+
+ res_str += "\n\n";
+ DPRINTFN(res_str.c_str());
+ #endif
+ }
+
+ void
+ Call::MagicPrintWF32ID(Wavefront *w)
+ {
+ #if TRACING_ON
+ const VectorMask &mask = w->get_pred();
+ std::string res_str;
+ int src_val3 = -1;
+ res_str = csprintf("krl_prt (%s)\n", disassemble());
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (!(lane & 7)) {
+ res_str += csprintf("DB%03d: ", (int)w->wfDynId);
+ }
+
+ if (mask[lane]) {
+ int src_val1 = src1.get<int>(w, lane, 1);
+ int src_val2 = src1.get<int>(w, lane, 2);
+ src_val3 = src1.get<int>(w, lane, 3);
+
+ if (src_val2) {
+ res_str += csprintf("%08x", src_val1);
+ } else {
+ res_str += csprintf("%08d", src_val1);
+ }
+ } else {
+ res_str += csprintf("xxxxxxxx");
+ }
+
+ if ((lane & 7) == 7) {
+ res_str += csprintf("\n");
+ } else {
+ res_str += csprintf(" ");
+ }
+ }
+
+ res_str += "\n\n";
+ if (w->wfDynId == src_val3) {
+ DPRINTFN(res_str.c_str());
+ }
+ #endif
+ }
+
+ void
+ Call::MagicPrintWF64(Wavefront *w)
+ {
+ #if TRACING_ON
+ const VectorMask &mask = w->get_pred();
+ std::string res_str;
+ res_str = csprintf("krl_prt (%s)\n", disassemble());
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (!(lane & 3)) {
+ res_str += csprintf("DB%03d: ", (int)w->wfDynId);
+ }
+
+ if (mask[lane]) {
+ int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
+ int src_val2 = src1.get<int>(w, lane, 2);
+
+ if (src_val2) {
+ res_str += csprintf("%016x", src_val1);
+ } else {
+ res_str += csprintf("%016d", src_val1);
+ }
+ } else {
+ res_str += csprintf("xxxxxxxxxxxxxxxx");
+ }
+
+ if ((lane & 3) == 3) {
+ res_str += csprintf("\n");
+ } else {
+ res_str += csprintf(" ");
+ }
+ }
+
+ res_str += "\n\n";
+ DPRINTFN(res_str.c_str());
+ #endif
+ }
+
+ void
+ Call::MagicPrintWFID64(Wavefront *w)
+ {
+ #if TRACING_ON
+ const VectorMask &mask = w->get_pred();
+ std::string res_str;
+ int src_val3 = -1;
+ res_str = csprintf("krl_prt (%s)\n", disassemble());
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (!(lane & 3)) {
+ res_str += csprintf("DB%03d: ", (int)w->wfDynId);
+ }
+
+ if (mask[lane]) {
+ int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
+ int src_val2 = src1.get<int>(w, lane, 2);
+ src_val3 = src1.get<int>(w, lane, 3);
+
+ if (src_val2) {
+ res_str += csprintf("%016x", src_val1);
+ } else {
+ res_str += csprintf("%016d", src_val1);
+ }
+ } else {
+ res_str += csprintf("xxxxxxxxxxxxxxxx");
+ }
+
+ if ((lane & 3) == 3) {
+ res_str += csprintf("\n");
+ } else {
+ res_str += csprintf(" ");
+ }
+ }
+
+ res_str += "\n\n";
+ if (w->wfDynId == src_val3) {
+ DPRINTFN(res_str.c_str());
+ }
+ #endif
+ }
+
+ void
+ Call::MagicPrintWFFloat(Wavefront *w)
+ {
+ #if TRACING_ON
+ const VectorMask &mask = w->get_pred();
+ std::string res_str;
+ res_str = csprintf("krl_prt (%s)\n", disassemble());
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (!(lane & 7)) {
+ res_str += csprintf("DB%03d: ", (int)w->wfDynId);
+ }
+
+ if (mask[lane]) {
+ float src_val1 = src1.get<float>(w, lane, 1);
+ res_str += csprintf("%08f", src_val1);
+ } else {
+ res_str += csprintf("xxxxxxxx");
+ }
+
+ if ((lane & 7) == 7) {
+ res_str += csprintf("\n");
+ } else {
+ res_str += csprintf(" ");
+ }
+ }
+
+ res_str += "\n\n";
+ DPRINTFN(res_str.c_str());
+ #endif
+ }
+
+ // raises a signal that GDB will catch
+ // when done with the break, type "signal 0" in gdb to continue
+ void
+ Call::MagicSimBreak(Wavefront *w)
+ {
+ std::string res_str;
+ // print out state for this wavefront and then break
+ res_str = csprintf("Breakpoint encountered for wavefront %i\n",
+ w->wfSlotId);
+
+ res_str += csprintf(" Kern ID: %i\n", w->kern_id);
+ res_str += csprintf(" Phase ID: %i\n", w->simdId);
+ res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
+ res_str += csprintf(" Exec mask: ");
+
+ for (int i = VSZ - 1; i >= 0; --i) {
+ if (w->execMask(i))
+ res_str += "1";
+ else
+ res_str += "0";
+
+ if ((i & 7) == 7)
+ res_str += " ";
+ }
+
+ res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong());
+
+ res_str += "\nHelpful debugging hints:\n";
+ res_str += " Check out w->s_reg / w->d_reg for register state\n";
+
+ res_str += "\n\n";
+ DPRINTFN(res_str.c_str());
+ fflush(stdout);
+
+ raise(SIGTRAP);
+ }
+
+ void
+ Call::MagicPrefixSum(Wavefront *w)
+ {
+ const VectorMask &mask = w->get_pred();
+ int res = 0;
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ int src_val1 = src1.get<int>(w, lane, 1);
+ dest.set<int>(w, lane, res);
+ res += src_val1;
+ }
+ }
+ }
+
+ void
+ Call::MagicReduction(Wavefront *w)
+ {
+ // reduction magic instruction
+ // The reduction instruction takes up to 64 inputs (one from
+ // each thread in a WF) and sums them. It returns the sum to
+ // each thread in the WF.
+ const VectorMask &mask = w->get_pred();
+ int res = 0;
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ int src_val1 = src1.get<int>(w, lane, 1);
+ res += src_val1;
+ }
+ }
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ dest.set<int>(w, lane, res);
+ }
+ }
+ }
+
+ void
+ Call::MagicMaskLower(Wavefront *w)
+ {
+ const VectorMask &mask = w->get_pred();
+ int res = 0;
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ int src_val1 = src1.get<int>(w, lane, 1);
+
+ if (src_val1) {
+ if (lane < (VSZ/2)) {
+ res = res | ((uint32_t)(1) << lane);
+ }
+ }
+ }
+ }
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ dest.set<int>(w, lane, res);
+ }
+ }
+ }
+
+ void
+ Call::MagicMaskUpper(Wavefront *w)
+ {
+ const VectorMask &mask = w->get_pred();
+ int res = 0;
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ int src_val1 = src1.get<int>(w, lane, 1);
+
+ if (src_val1) {
+ if (lane >= (VSZ/2)) {
+ res = res | ((uint32_t)(1) << (lane - (VSZ/2)));
+ }
+ }
+ }
+ }
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ dest.set<int>(w, lane, res);
+ }
+ }
+ }
+
+ void
+ Call::MagicJoinWFBar(Wavefront *w)
+ {
+ const VectorMask &mask = w->get_pred();
+ int max_cnt = 0;
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ w->bar_cnt[lane]++;
+
+ if (w->bar_cnt[lane] > max_cnt) {
+ max_cnt = w->bar_cnt[lane];
+ }
+ }
+ }
+
+ if (max_cnt > w->max_bar_cnt) {
+ w->max_bar_cnt = max_cnt;
+ }
+ }
+
+ void
+ Call::MagicWaitWFBar(Wavefront *w)
+ {
+ const VectorMask &mask = w->get_pred();
+ int max_cnt = 0;
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ w->bar_cnt[lane]--;
+ }
+
+ if (w->bar_cnt[lane] > max_cnt) {
+ max_cnt = w->bar_cnt[lane];
+ }
+ }
+
+ if (max_cnt < w->max_bar_cnt) {
+ w->max_bar_cnt = max_cnt;
+ }
+
+ w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
+ w->instructionBuffer.end());
+ if (w->pendingFetch)
+ w->dropFetch = true;
+ }
+
+ void
+ Call::MagicPanic(Wavefront *w)
+ {
+ const VectorMask &mask = w->get_pred();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ int src_val1 = src1.get<int>(w, lane, 1);
+ panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
+ src_val1, lane);
+ }
+ }
+ }
+
+ void
+ Call::calcAddr(Wavefront *w, GPUDynInstPtr m)
+ {
+ // the address is in src1 | src2
+ for (int lane = 0; lane < VSZ; ++lane) {
+ int src_val1 = src1.get<int>(w, lane, 1);
+ int src_val2 = src1.get<int>(w, lane, 2);
+ Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
+
+ m->addr[lane] = addr;
+ }
+
+ }
+
+ void
+ Call::MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
+ {
+ GPUDynInstPtr m = gpuDynInst;
+
+ calcAddr(w, m);
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
+ }
+
+ m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
+ Brig::BRIG_ATOMIC_ADD);
+ m->m_type = U32::memType;
+ m->v_type = U32::vgprType;
+
+ m->exec_mask = w->execMask();
+ m->statusBitVector = 0;
+ m->equiv = 0; // atomics don't have an equivalence class operand
+ m->n_reg = 1;
+ m->memoryOrder = Enums::MEMORY_ORDER_NONE;
+ m->scope = Enums::MEMORY_SCOPE_NONE;
+
+ m->simdId = w->simdId;
+ m->wfSlotId = w->wfSlotId;
+ m->wfDynId = w->wfDynId;
+ m->latency.init(&w->computeUnit->shader->tick_cnt);
+
+ m->s_type = SEG_GLOBAL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(64));
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ w->outstanding_reqs++;
+ w->mem_reqs_in_pipe--;
+ }
+
+ void
+ Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
+ {
+ GPUDynInstPtr m = gpuDynInst;
+ calcAddr(w, m);
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
+ }
+
+ m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
+ Brig::BRIG_ATOMIC_ADD);
+ m->m_type = U32::memType;
+ m->v_type = U32::vgprType;
+
+ m->exec_mask = w->execMask();
+ m->statusBitVector = 0;
+ m->equiv = 0; // atomics don't have an equivalence class operand
+ m->n_reg = 1;
+ m->memoryOrder = Enums::MEMORY_ORDER_NONE;
+ m->scope = Enums::MEMORY_SCOPE_NONE;
+
+ m->simdId = w->simdId;
+ m->wfSlotId = w->wfSlotId;
+ m->wfDynId = w->wfDynId;
+ m->latency.init(&w->computeUnit->shader->tick_cnt);
+
+ m->s_type = SEG_GLOBAL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(64));
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ w->outstanding_reqs++;
+ w->mem_reqs_in_pipe--;
+ }
+
+ void
+ Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
+ {
+ GPUDynInstPtr m = gpuDynInst;
+ // calculate the address
+ calcAddr(w, m);
+
+ m->m_op = Enums::MO_LD;
+ m->m_type = U32::memType; //MemDataType::memType;
+ m->v_type = U32::vgprType; //DestDataType::vgprType;
+
+ m->exec_mask = w->execMask();
+ m->statusBitVector = 0;
+ m->equiv = 0;
+ m->n_reg = 1;
+ m->memoryOrder = Enums::MEMORY_ORDER_NONE;
+ m->scope = Enums::MEMORY_SCOPE_NONE;
+
+ // FIXME
+ //m->dst_reg = this->dest.regIndex();
+
+ m->simdId = w->simdId;
+ m->wfSlotId = w->wfSlotId;
+ m->wfDynId = w->wfDynId;
+ m->latency.init(&w->computeUnit->shader->tick_cnt);
+
+ m->s_type = SEG_GLOBAL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ w->outstanding_reqs++;
+ w->mem_reqs_in_pipe--;
+ }
+
+ void
+ Call::MagicXactCasLd(Wavefront *w)
+ {
+ const VectorMask &mask = w->get_pred();
+ int src_val1 = 0;
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ src_val1 = src1.get<int>(w, lane, 1);
+ break;
+ }
+ }
+
+ if (!w->computeUnit->xactCasLoadMap.count(src_val1)) {
+ w->computeUnit->xactCasLoadMap[src_val1] = ComputeUnit::waveQueue();
+ w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear();
+ }
+
+ w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue
+ .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId));
+ }
+
+ void
+ Call::MagicMostSigThread(Wavefront *w)
+ {
+ const VectorMask &mask = w->get_pred();
+ unsigned mst = true;
+
+ for (int lane = VSZ - 1; lane >= 0; --lane) {
+ if (mask[lane]) {
+ dest.set<int>(w, lane, mst);
+ mst = false;
+ }
+ }
+ }
+
+ void
+ Call::MagicMostSigBroadcast(Wavefront *w)
+ {
+ const VectorMask &mask = w->get_pred();
+ int res = 0;
+ bool got_res = false;
+
+ for (int lane = VSZ - 1; lane >= 0; --lane) {
+ if (mask[lane]) {
+ if (!got_res) {
+ res = src1.get<int>(w, lane, 1);
+ got_res = true;
+ }
+ dest.set<int>(w, lane, res);
+ }
+ }
+ }
+
+} // namespace HsailISA
diff --git a/src/arch/hsail/operand.cc b/src/arch/hsail/operand.cc
new file mode 100644
index 000000000..d0e6c5541
--- /dev/null
+++ b/src/arch/hsail/operand.cc
@@ -0,0 +1,449 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#include "arch/hsail/operand.hh"
+
+using namespace Brig;
+
+bool
+BaseRegOperand::init(unsigned opOffset, const BrigObject *obj,
+ unsigned &maxRegIdx, char _regFileChar)
+{
+ regFileChar = _regFileChar;
+ const BrigOperand *brigOp = obj->getOperand(opOffset);
+
+ if (brigOp->kind != BRIG_KIND_OPERAND_REGISTER)
+ return false;
+
+ const BrigOperandRegister *brigRegOp = (const BrigOperandRegister*)brigOp;
+
+ regIdx = brigRegOp->regNum;
+
+ DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d\n", regIdx,
+ brigRegOp->regKind);
+
+ maxRegIdx = std::max(maxRegIdx, regIdx);
+
+ return true;
+}
+
+void
+ListOperand::init(unsigned opOffset, const BrigObject *obj)
+{
+ const BrigOperand *brigOp = (const BrigOperand*)obj->getOperand(opOffset);
+
+ switch (brigOp->kind) {
+ case BRIG_KIND_OPERAND_CODE_LIST:
+ {
+ const BrigOperandCodeList *opList =
+ (const BrigOperandCodeList*)brigOp;
+
+ const Brig::BrigData *oprnd_data =
+ obj->getBrigBaseData(opList->elements);
+
+ // Note: for calls Dest list of operands could be size of 0.
+ elementCount = oprnd_data->byteCount / 4;
+
+ DPRINTF(GPUReg, "Operand Code List: # elements: %d\n",
+ elementCount);
+
+ for (int i = 0; i < elementCount; ++i) {
+ unsigned *data_offset =
+ (unsigned*)obj->getData(opList->elements + 4 * (i + 1));
+
+ const BrigDirectiveVariable *p =
+ (const BrigDirectiveVariable*)obj->
+ getCodeSectionEntry(*data_offset);
+
+ StorageElement *se = obj->currentCode->storageMap->
+ findSymbol(BRIG_SEGMENT_ARG, p);
+
+ assert(se);
+ callArgs.push_back(se);
+ }
+ }
+ break;
+ default:
+ fatal("ListOperand: bad operand kind %d\n", brigOp->kind);
+ }
+}
+
+std::string
+ListOperand::disassemble()
+{
+ std::string res_str("");
+
+ for (auto it : callArgs) {
+ res_str += csprintf("%s ", it->name.c_str());
+ }
+
+ return res_str;
+}
+
+void
+FunctionRefOperand::init(unsigned opOffset, const BrigObject *obj)
+{
+ const BrigOperand *baseOp = obj->getOperand(opOffset);
+
+ if (baseOp->kind != BRIG_KIND_OPERAND_CODE_REF) {
+ fatal("FunctionRefOperand: bad operand kind %d\n", baseOp->kind);
+ }
+
+ const BrigOperandCodeRef *brigOp = (const BrigOperandCodeRef*)baseOp;
+
+ const BrigDirectiveExecutable *p =
+ (const BrigDirectiveExecutable*)obj->getCodeSectionEntry(brigOp->ref);
+
+ func_name = obj->getString(p->name);
+}
+
+std::string
+FunctionRefOperand::disassemble()
+{
+ DPRINTF(GPUReg, "Operand Func-ref name: %s\n", func_name);
+
+ return csprintf("%s", func_name);
+}
+
+bool
+BaseRegOperand::init_from_vect(unsigned opOffset, const BrigObject *obj,
+ int at, unsigned &maxRegIdx, char _regFileChar)
+{
+ regFileChar = _regFileChar;
+ const BrigOperand *brigOp = obj->getOperand(opOffset);
+
+ if (brigOp->kind != BRIG_KIND_OPERAND_OPERAND_LIST)
+ return false;
+
+
+ const Brig::BrigOperandOperandList *brigRegVecOp =
+ (const Brig::BrigOperandOperandList*)brigOp;
+
+ unsigned *data_offset =
+ (unsigned*)obj->getData(brigRegVecOp->elements + 4 * (at + 1));
+
+ const BrigOperand *p =
+ (const BrigOperand*)obj->getOperand(*data_offset);
+ if (p->kind != BRIG_KIND_OPERAND_REGISTER) {
+ return false;
+ }
+
+ const BrigOperandRegister *brigRegOp =(const BrigOperandRegister*)p;
+
+ regIdx = brigRegOp->regNum;
+
+ DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d \n", regIdx,
+ brigRegOp->regKind);
+
+ maxRegIdx = std::max(maxRegIdx, regIdx);
+
+ return true;
+}
+
+void
+BaseRegOperand::initWithStrOffset(unsigned strOffset, const BrigObject *obj,
+ unsigned &maxRegIdx, char _regFileChar)
+{
+ const char *name = obj->getString(strOffset);
+ char *endptr;
+ regIdx = strtoul(name + 2, &endptr, 10);
+
+ if (name[0] != '$' || name[1] != _regFileChar) {
+ fatal("register operand parse error on \"%s\"\n", name);
+ }
+
+ maxRegIdx = std::max(maxRegIdx, regIdx);
+}
+
+unsigned SRegOperand::maxRegIdx;
+unsigned DRegOperand::maxRegIdx;
+unsigned CRegOperand::maxRegIdx;
+
+std::string
+SRegOperand::disassemble()
+{
+ return csprintf("$s%d", regIdx);
+}
+
+std::string
+DRegOperand::disassemble()
+{
+ return csprintf("$d%d", regIdx);
+}
+
+std::string
+CRegOperand::disassemble()
+{
+ return csprintf("$c%d", regIdx);
+}
+
+BrigRegOperandInfo
+findRegDataType(unsigned opOffset, const BrigObject *obj)
+{
+ const BrigOperand *baseOp = obj->getOperand(opOffset);
+
+ switch (baseOp->kind) {
+ case BRIG_KIND_OPERAND_REGISTER:
+ {
+ const BrigOperandRegister *op = (BrigOperandRegister*)baseOp;
+
+ return BrigRegOperandInfo((BrigKind16_t)baseOp->kind,
+ (BrigRegisterKind)op->regKind);
+ }
+ break;
+
+ case BRIG_KIND_OPERAND_OPERAND_LIST:
+ {
+ const BrigOperandOperandList *op =
+ (BrigOperandOperandList*)baseOp;
+ const BrigData *data_p = (BrigData*)obj->getData(op->elements);
+
+
+ int num_operands = 0;
+ BrigRegisterKind reg_kind = (BrigRegisterKind)0;
+ for (int offset = 0; offset < data_p->byteCount; offset += 4) {
+ const BrigOperand *op_p = (const BrigOperand *)
+ obj->getOperand(((int *)data_p->bytes)[offset/4]);
+
+ if (op_p->kind == BRIG_KIND_OPERAND_REGISTER) {
+ const BrigOperandRegister *brigRegOp =
+ (const BrigOperandRegister*)op_p;
+ reg_kind = (BrigRegisterKind)brigRegOp->regKind;
+ } else if (op_p->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) {
+ uint16_t num_bytes =
+ ((Brig::BrigOperandConstantBytes*)op_p)->base.byteCount
+ - sizeof(BrigBase);
+ if (num_bytes == sizeof(uint32_t)) {
+ reg_kind = BRIG_REGISTER_KIND_SINGLE;
+ } else if (num_bytes == sizeof(uint64_t)) {
+ reg_kind = BRIG_REGISTER_KIND_DOUBLE;
+ } else {
+ fatal("OperandList: bad operand size %d\n", num_bytes);
+ }
+ } else {
+ fatal("OperandList: bad operand kind %d\n", op_p->kind);
+ }
+
+ num_operands++;
+ }
+ assert(baseOp->kind == BRIG_KIND_OPERAND_OPERAND_LIST);
+
+ return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind);
+ }
+ break;
+
+ case BRIG_KIND_OPERAND_ADDRESS:
+ {
+ const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
+
+ if (!op->reg) {
+ BrigType type = BRIG_TYPE_NONE;
+
+ if (op->symbol) {
+ const BrigDirective *dir = (BrigDirective*)
+ obj->getCodeSectionEntry(op->symbol);
+
+ assert(dir->kind == BRIG_KIND_DIRECTIVE_VARIABLE);
+
+ const BrigDirectiveVariable *sym =
+ (const BrigDirectiveVariable*)dir;
+
+ type = (BrigType)sym->type;
+ }
+ return BrigRegOperandInfo(BRIG_KIND_OPERAND_ADDRESS,
+ (BrigType)type);
+ } else {
+ const BrigOperandAddress *b = (const BrigOperandAddress*)baseOp;
+ const BrigOperand *reg = obj->getOperand(b->reg);
+ const BrigOperandRegister *rop = (BrigOperandRegister*)reg;
+
+ return BrigRegOperandInfo(BRIG_KIND_OPERAND_REGISTER,
+ (BrigRegisterKind)rop->regKind);
+ }
+ }
+ break;
+
+ default:
+ fatal("AddrOperand: bad operand kind %d\n", baseOp->kind);
+ break;
+ }
+}
+
+void
+AddrOperandBase::parseAddr(const BrigOperandAddress *op, const BrigObject *obj)
+{
+ assert(op->base.kind == BRIG_KIND_OPERAND_ADDRESS);
+
+ const BrigDirective *d =
+ (BrigDirective*)obj->getCodeSectionEntry(op->symbol);
+
+ assert(d->kind == BRIG_KIND_DIRECTIVE_VARIABLE);
+ const BrigDirectiveVariable *sym = (BrigDirectiveVariable*)d;
+ name = obj->getString(sym->name);
+
+ if (sym->segment != BRIG_SEGMENT_ARG) {
+ storageElement =
+ obj->currentCode->storageMap->findSymbol(sym->segment, name);
+ assert(storageElement);
+ offset = 0;
+ } else {
+ // sym->name does not work for BRIG_SEGMENT_ARG for the following case:
+ //
+ // void foo(int a);
+ // void bar(double a);
+ //
+ // foo(...) --> arg_u32 %param_p0;
+ // st_arg_u32 $s0, [%param_p0];
+ // call &foo (%param_p0);
+ // bar(...) --> arg_f64 %param_p0;
+ // st_arg_u64 $d0, [%param_p0];
+ // call &foo (%param_p0);
+ //
+ // Both functions use the same variable name (param_p0)!!!
+ //
+ // Maybe this is a bug in the compiler (I don't know).
+ //
+ // Solution:
+ // Use directive pointer (BrigDirectiveVariable) to differentiate 2
+ // versions of param_p0.
+ //
+ // Note this solution is kind of stupid, because we are pulling stuff
+ // out of the brig binary via the directive pointer and putting it into
+ // the symbol table, but now we are indexing the symbol table by the
+ // brig directive pointer! It makes the symbol table sort of pointless.
+ // But I don't want to mess with the rest of the infrastructure, so
+ // let's go with this for now.
+ //
+ // When we update the compiler again, we should see if this problem goes
+ // away. If so, we can fold some of this functionality into the code for
+ // kernel arguments. If not, maybe we can index the symbol name on a
+ // hash of the variable AND function name
+ storageElement = obj->currentCode->
+ storageMap->findSymbol((Brig::BrigSegment)sym->segment, sym);
+
+ assert(storageElement);
+ }
+}
+
+uint64_t
+AddrOperandBase::calcUniformBase()
+{
+ // start with offset, will be 0 if not specified
+ uint64_t address = offset;
+
+ // add in symbol value if specified
+ if (storageElement) {
+ address += storageElement->offset;
+ }
+
+ return address;
+}
+
+std::string
+AddrOperandBase::disassemble(std::string reg_disassembly)
+{
+ std::string disasm;
+
+ if (offset || reg_disassembly != "") {
+ disasm += "[";
+
+ if (reg_disassembly != "") {
+ disasm += reg_disassembly;
+
+ if (offset > 0) {
+ disasm += "+";
+ }
+ }
+
+ if (offset) {
+ disasm += csprintf("%d", offset);
+ }
+
+ disasm += "]";
+ } else if (name) {
+ disasm += csprintf("[%s]", name);
+ }
+
+ return disasm;
+}
+
+void
+NoRegAddrOperand::init(unsigned opOffset, const BrigObject *obj)
+{
+ const BrigOperand *baseOp = obj->getOperand(opOffset);
+
+ if (baseOp->kind == BRIG_KIND_OPERAND_ADDRESS) {
+ BrigOperandAddress *addrOp = (BrigOperandAddress*)baseOp;
+ parseAddr(addrOp, obj);
+ offset = (uint64_t(addrOp->offset.hi) << 32) |
+ uint64_t(addrOp->offset.lo);
+ } else {
+ fatal("NoRegAddrOperand: bad operand kind %d\n", baseOp->kind);
+ }
+
+}
+
+std::string
+NoRegAddrOperand::disassemble()
+{
+ return AddrOperandBase::disassemble(std::string(""));
+}
+
+void
+LabelOperand::init(unsigned opOffset, const BrigObject *obj)
+{
+ const BrigOperandCodeRef *op =
+ (const BrigOperandCodeRef*)obj->getOperand(opOffset);
+
+ assert(op->base.kind == BRIG_KIND_OPERAND_CODE_REF);
+
+ const BrigDirective *dir =
+ (const BrigDirective*)obj->getCodeSectionEntry(op->ref);
+
+ assert(dir->kind == BRIG_KIND_DIRECTIVE_LABEL);
+ label = obj->currentCode->refLabel((BrigDirectiveLabel*)dir, obj);
+}
+
+uint32_t
+LabelOperand::getTarget(Wavefront *w, int lane)
+{
+ return label->get();
+}
+
+std::string
+LabelOperand::disassemble()
+{
+ return label->name;
+}
diff --git a/src/arch/hsail/operand.hh b/src/arch/hsail/operand.hh
new file mode 100644
index 000000000..e3d275b10
--- /dev/null
+++ b/src/arch/hsail/operand.hh
@@ -0,0 +1,768 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#ifndef __ARCH_HSAIL_OPERAND_HH__
+#define __ARCH_HSAIL_OPERAND_HH__
+
+/**
+ * @file operand.hh
+ *
+ * Defines classes encapsulating HSAIL instruction operands.
+ */
+
+#include <string>
+
+#include "arch/hsail/Brig.h"
+#include "base/trace.hh"
+#include "base/types.hh"
+#include "debug/GPUReg.hh"
+#include "enums/RegisterType.hh"
+#include "gpu-compute/brig_object.hh"
+#include "gpu-compute/compute_unit.hh"
+#include "gpu-compute/hsail_code.hh"
+#include "gpu-compute/shader.hh"
+#include "gpu-compute/vector_register_file.hh"
+#include "gpu-compute/wavefront.hh"
+
+class Label;
+class StorageElement;
+
+class BaseOperand
+{
+ public:
+ Enums::RegisterType registerType;
+ uint32_t regOperandSize;
+ BaseOperand() { registerType = Enums::RT_NONE; regOperandSize = 0; }
+ bool isVectorRegister() { return registerType == Enums::RT_VECTOR; }
+ bool isScalarRegister() { return registerType == Enums::RT_SCALAR; }
+ bool isCondRegister() { return registerType == Enums::RT_CONDITION; }
+ unsigned int regIndex() { return 0; }
+ uint32_t opSize() { return regOperandSize; }
+ virtual ~BaseOperand() { }
+};
+
+class BrigRegOperandInfo
+{
+ public:
+ Brig::BrigKind16_t kind;
+ Brig::BrigType type;
+ Brig::BrigRegisterKind regKind;
+
+ BrigRegOperandInfo(Brig::BrigKind16_t _kind,
+ Brig::BrigRegisterKind _regKind)
+ : kind(_kind), regKind(_regKind)
+ {
+ }
+
+ BrigRegOperandInfo(Brig::BrigKind16_t _kind, Brig::BrigType _type)
+ : kind(_kind), type(_type)
+ {
+ }
+
+ BrigRegOperandInfo() : kind(Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES),
+ type(Brig::BRIG_TYPE_NONE)
+ {
+ }
+};
+
+BrigRegOperandInfo findRegDataType(unsigned opOffset, const BrigObject *obj);
+
+class BaseRegOperand : public BaseOperand
+{
+ public:
+ unsigned regIdx;
+ char regFileChar;
+
+ bool init(unsigned opOffset, const BrigObject *obj,
+ unsigned &maxRegIdx, char _regFileChar);
+
+ bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at,
+ unsigned &maxRegIdx, char _regFileChar);
+
+ void initWithStrOffset(unsigned strOffset, const BrigObject *obj,
+ unsigned &maxRegIdx, char _regFileChar);
+ unsigned int regIndex() { return regIdx; }
+};
+
+class SRegOperand : public BaseRegOperand
+{
+ public:
+ static unsigned maxRegIdx;
+
+ bool
+ init(unsigned opOffset, const BrigObject *obj)
+ {
+ regOperandSize = sizeof(uint32_t);
+ registerType = Enums::RT_VECTOR;
+
+ return BaseRegOperand::init(opOffset, obj, maxRegIdx, 's');
+ }
+
+ bool
+ init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
+ {
+ regOperandSize = sizeof(uint32_t);
+ registerType = Enums::RT_VECTOR;
+
+ return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
+ 's');
+ }
+
+ void
+ initWithStrOffset(unsigned strOffset, const BrigObject *obj)
+ {
+ regOperandSize = sizeof(uint32_t);
+ registerType = Enums::RT_VECTOR;
+
+ return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
+ 's');
+ }
+
+ template<typename OperandType>
+ OperandType
+ get(Wavefront *w, int lane)
+ {
+ assert(sizeof(OperandType) <= sizeof(uint32_t));
+ assert(regIdx < w->maxSpVgprs);
+ // if OperandType is smaller than 32-bit, we truncate the value
+ OperandType ret;
+ uint32_t vgprIdx;
+
+ switch (sizeof(OperandType)) {
+ case 1: // 1 byte operand
+ vgprIdx = w->remap(regIdx, 1, 1);
+ ret = (w->computeUnit->vrf[w->simdId]->
+ read<uint32_t>(vgprIdx, lane)) & 0xff;
+ break;
+ case 2: // 2 byte operand
+ vgprIdx = w->remap(regIdx, 2, 1);
+ ret = (w->computeUnit->vrf[w->simdId]->
+ read<uint32_t>(vgprIdx, lane)) & 0xffff;
+ break;
+ case 4: // 4 byte operand
+ vgprIdx = w->remap(regIdx,sizeof(OperandType), 1);
+ ret = w->computeUnit->vrf[w->simdId]->
+ read<OperandType>(vgprIdx, lane);
+ break;
+ default:
+ panic("Bad OperandType\n");
+ break;
+ }
+
+ return (OperandType)ret;
+ }
+
+ // special get method for compatibility with LabelOperand
+ uint32_t
+ getTarget(Wavefront *w, int lane)
+ {
+ return get<uint32_t>(w, lane);
+ }
+
+ template<typename OperandType>
+ void set(Wavefront *w, int lane, OperandType &val);
+ std::string disassemble();
+};
+
+template<typename OperandType>
+void
+SRegOperand::set(Wavefront *w, int lane, OperandType &val)
+{
+ DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
+ w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
+
+ assert(sizeof(OperandType) == sizeof(uint32_t));
+ assert(regIdx < w->maxSpVgprs);
+ uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
+ w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
+}
+
+template<>
+inline void
+SRegOperand::set(Wavefront *w, int lane, uint64_t &val)
+{
+ DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
+ w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
+
+ assert(regIdx < w->maxSpVgprs);
+ uint32_t vgprIdx = w->remap(regIdx, sizeof(uint32_t), 1);
+ w->computeUnit->vrf[w->simdId]->write<uint32_t>(vgprIdx, val, lane);
+}
+
+class DRegOperand : public BaseRegOperand
+{
+ public:
+ static unsigned maxRegIdx;
+
+ bool
+ init(unsigned opOffset, const BrigObject *obj)
+ {
+ regOperandSize = sizeof(uint64_t);
+ registerType = Enums::RT_VECTOR;
+
+ return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'd');
+ }
+
+ bool
+ init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
+ {
+ regOperandSize = sizeof(uint64_t);
+ registerType = Enums::RT_VECTOR;
+
+ return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
+ 'd');
+ }
+
+ void
+ initWithStrOffset(unsigned strOffset, const BrigObject *obj)
+ {
+ regOperandSize = sizeof(uint64_t);
+ registerType = Enums::RT_VECTOR;
+
+ return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
+ 'd');
+ }
+
+ template<typename OperandType>
+ OperandType
+ get(Wavefront *w, int lane)
+ {
+ assert(sizeof(OperandType) <= sizeof(uint64_t));
+ // TODO: this check is valid only for HSAIL
+ assert(regIdx < w->maxDpVgprs);
+ uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
+
+ return w->computeUnit->vrf[w->simdId]->read<OperandType>(vgprIdx,lane);
+ }
+
+ template<typename OperandType>
+ void
+ set(Wavefront *w, int lane, OperandType &val)
+ {
+ DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $d%d <- %d\n",
+ w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
+ val);
+
+ assert(sizeof(OperandType) <= sizeof(uint64_t));
+ // TODO: this check is valid only for HSAIL
+ assert(regIdx < w->maxDpVgprs);
+ uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
+ w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
+ }
+
+ std::string disassemble();
+};
+
+class CRegOperand : public BaseRegOperand
+{
+ public:
+ static unsigned maxRegIdx;
+
+ bool
+ init(unsigned opOffset, const BrigObject *obj)
+ {
+ regOperandSize = sizeof(uint8_t);
+ registerType = Enums::RT_CONDITION;
+
+ return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'c');
+ }
+
+ bool
+ init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
+ {
+ regOperandSize = sizeof(uint8_t);
+ registerType = Enums::RT_CONDITION;
+
+ return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
+ 'c');
+ }
+
+ void
+ initWithStrOffset(unsigned strOffset, const BrigObject *obj)
+ {
+ regOperandSize = sizeof(uint8_t);
+ registerType = Enums::RT_CONDITION;
+
+ return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
+ 'c');
+ }
+
+ template<typename OperandType>
+ OperandType
+ get(Wavefront *w, int lane)
+ {
+ assert(regIdx < w->condRegState->numRegs());
+
+ return w->condRegState->read<OperandType>((int)regIdx, lane);
+ }
+
+ template<typename OperandType>
+ void
+ set(Wavefront *w, int lane, OperandType &val)
+ {
+ DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $c%d <- %d\n",
+ w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
+ val);
+
+ assert(regIdx < w->condRegState->numRegs());
+ w->condRegState->write<OperandType>(regIdx,lane,val);
+ }
+
+ std::string disassemble();
+};
+
+template<typename T>
+class ImmOperand : public BaseOperand
+{
+ public:
+ T bits;
+
+ bool init(unsigned opOffset, const BrigObject *obj);
+ bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
+ std::string disassemble();
+
+ template<typename OperandType>
+ OperandType
+ get()
+ {
+ assert(sizeof(OperandType) <= sizeof(T));
+
+ return *(OperandType*)&bits;
+ }
+
+ // This version of get() takes a WF* and a lane id for
+ // compatibility with the register-based get() methods.
+ template<typename OperandType>
+ OperandType
+ get(Wavefront *w, int lane)
+ {
+ return get<OperandType>();
+ }
+};
+
+template<typename T>
+bool
+ImmOperand<T>::init(unsigned opOffset, const BrigObject *obj)
+{
+ const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
+
+ switch (brigOp->kind) {
+ // this is immediate operand
+ case Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES:
+ {
+ DPRINTF(GPUReg, "sizeof(T): %lu, byteCount: %d\n", sizeof(T),
+ brigOp->byteCount);
+
+ auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp;
+
+ bits = *((T*)(obj->getData(cbptr->bytes + 4)));
+
+ return true;
+ }
+ break;
+
+ case Brig::BRIG_KIND_OPERAND_WAVESIZE:
+ bits = VSZ;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+template <typename T>
+bool
+ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
+{
+ const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
+
+ if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
+ return false;
+ }
+
+
+ const Brig::BrigOperandOperandList *brigVecOp =
+ (const Brig::BrigOperandOperandList *)brigOp;
+
+ unsigned *data_offset =
+ (unsigned *)obj->getData(brigVecOp->elements + 4 * (at + 1));
+
+ const Brig::BrigOperand *p =
+ (const Brig::BrigOperand *)obj->getOperand(*data_offset);
+
+ if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
+ return false;
+ }
+
+ return init(*data_offset, obj);
+}
+template<typename T>
+std::string
+ImmOperand<T>::disassemble()
+{
+ return csprintf("0x%08x", bits);
+}
+
+template<typename RegOperand, typename T>
+class RegOrImmOperand : public BaseOperand
+{
+ private:
+ bool is_imm;
+
+ public:
+ void setImm(const bool value) { is_imm = value; }
+
+ ImmOperand<T> imm_op;
+ RegOperand reg_op;
+
+ RegOrImmOperand() { is_imm = false; }
+ void init(unsigned opOffset, const BrigObject *obj);
+ void init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
+ std::string disassemble();
+
+ template<typename OperandType>
+ OperandType
+ get(Wavefront *w, int lane)
+ {
+ return is_imm ? imm_op.template get<OperandType>() :
+ reg_op.template get<OperandType>(w, lane);
+ }
+
+ uint32_t
+ opSize()
+ {
+ if (!is_imm) {
+ return reg_op.opSize();
+ }
+
+ return 0;
+ }
+
+ bool
+ isVectorRegister()
+ {
+ if (!is_imm) {
+ return reg_op.registerType == Enums::RT_VECTOR;
+ }
+ return false;
+ }
+
+ bool
+ isCondRegister()
+ {
+ if (!is_imm) {
+ return reg_op.registerType == Enums::RT_CONDITION;
+ }
+
+ return false;
+ }
+
+ bool
+ isScalarRegister()
+ {
+ if (!is_imm) {
+ return reg_op.registerType == Enums::RT_SCALAR;
+ }
+
+ return false;
+ }
+
+ unsigned int
+ regIndex()
+ {
+ if (!is_imm) {
+ return reg_op.regIndex();
+ }
+ return 0;
+ }
+};
+
+template<typename RegOperand, typename T>
+void
+RegOrImmOperand<RegOperand, T>::init(unsigned opOffset, const BrigObject *obj)
+{
+ is_imm = false;
+
+ if (reg_op.init(opOffset, obj)) {
+ return;
+ }
+
+ if (imm_op.init(opOffset, obj)) {
+ is_imm = true;
+ return;
+ }
+
+ fatal("RegOrImmOperand::init(): bad operand kind %d\n",
+ obj->getOperand(opOffset)->kind);
+}
+
+template<typename RegOperand, typename T>
+void
+RegOrImmOperand<RegOperand, T>::init_from_vect(unsigned opOffset,
+ const BrigObject *obj, int at)
+{
+ if (reg_op.init_from_vect(opOffset, obj, at)) {
+ is_imm = false;
+
+ return;
+ }
+
+ if (imm_op.init_from_vect(opOffset, obj, at)) {
+ is_imm = true;
+
+ return;
+ }
+
+ fatal("RegOrImmOperand::init(): bad operand kind %d\n",
+ obj->getOperand(opOffset)->kind);
+}
+
+template<typename RegOperand, typename T>
+std::string
+RegOrImmOperand<RegOperand, T>::disassemble()
+{
+ return is_imm ? imm_op.disassemble() : reg_op.disassemble();
+}
+
+typedef RegOrImmOperand<SRegOperand, uint32_t> SRegOrImmOperand;
+typedef RegOrImmOperand<DRegOperand, uint64_t> DRegOrImmOperand;
+typedef RegOrImmOperand<CRegOperand, bool> CRegOrImmOperand;
+
+class AddrOperandBase : public BaseOperand
+{
+ protected:
+ // helper function for init()
+ void parseAddr(const Brig::BrigOperandAddress *op, const BrigObject *obj);
+
+ // helper function for disassemble()
+ std::string disassemble(std::string reg_disassembly);
+ uint64_t calcUniformBase();
+
+ public:
+ virtual void calcVector(Wavefront *w, uint64_t *addrVec) = 0;
+ virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0;
+
+ uint64_t offset;
+ const char *name = nullptr;
+ StorageElement *storageElement;
+};
+
+template<typename RegOperandType>
+class RegAddrOperand : public AddrOperandBase
+{
+ public:
+ RegOperandType reg;
+ void init(unsigned opOffset, const BrigObject *obj);
+ uint64_t calcUniform();
+ void calcVector(Wavefront *w, uint64_t *addrVec);
+ uint64_t calcLane(Wavefront *w, int lane=0);
+ uint32_t opSize() { return reg.opSize(); }
+ bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; }
+ bool isCondRegister() { return reg.registerType == Enums::RT_CONDITION; }
+ bool isScalarRegister() { return reg.registerType == Enums::RT_SCALAR; }
+ unsigned int regIndex() { return reg.regIndex(); }
+ std::string disassemble();
+};
+
+template<typename RegOperandType>
+void
+RegAddrOperand<RegOperandType>::init(unsigned opOffset, const BrigObject *obj)
+{
+ using namespace Brig;
+
+ const BrigOperand *baseOp = obj->getOperand(opOffset);
+
+ switch (baseOp->kind) {
+ case BRIG_KIND_OPERAND_ADDRESS:
+ {
+ const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
+ storageElement = nullptr;
+
+ offset = (uint64_t(op->offset.hi) << 32) | uint64_t(op->offset.lo);
+ reg.init(op->reg, obj);
+
+ if (reg.regFileChar == 's') {
+ reg.regOperandSize = sizeof(uint32_t);
+ registerType = Enums::RT_VECTOR;
+ }
+ else if (reg.regFileChar == 'd') {
+ reg.regOperandSize = sizeof(uint64_t);
+ registerType = Enums::RT_VECTOR;
+ }
+ }
+ break;
+
+ default:
+ fatal("RegAddrOperand: bad operand kind %d\n", baseOp->kind);
+ break;
+ }
+}
+
+template<typename RegOperandType>
+uint64_t
+RegAddrOperand<RegOperandType>::calcUniform()
+{
+ fatal("can't do calcUniform() on register-based address\n");
+
+ return 0;
+}
+
+template<typename RegOperandType>
+void
+RegAddrOperand<RegOperandType>::calcVector(Wavefront *w, uint64_t *addrVec)
+{
+ Addr address = calcUniformBase();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (w->execMask(lane)) {
+ if (reg.regFileChar == 's') {
+ addrVec[lane] = address + reg.template get<uint32_t>(w, lane);
+ } else {
+ addrVec[lane] = address + reg.template get<Addr>(w, lane);
+ }
+ }
+ }
+}
+
+template<typename RegOperandType>
+uint64_t
+RegAddrOperand<RegOperandType>::calcLane(Wavefront *w, int lane)
+{
+ Addr address = calcUniformBase();
+
+ return address + reg.template get<Addr>(w, lane);
+}
+
+template<typename RegOperandType>
+std::string
+RegAddrOperand<RegOperandType>::disassemble()
+{
+ return AddrOperandBase::disassemble(reg.disassemble());
+}
+
+typedef RegAddrOperand<SRegOperand> SRegAddrOperand;
+typedef RegAddrOperand<DRegOperand> DRegAddrOperand;
+
+class NoRegAddrOperand : public AddrOperandBase
+{
+ public:
+ void init(unsigned opOffset, const BrigObject *obj);
+ uint64_t calcUniform();
+ void calcVector(Wavefront *w, uint64_t *addrVec);
+ uint64_t calcLane(Wavefront *w, int lane=0);
+ std::string disassemble();
+};
+
+inline uint64_t
+NoRegAddrOperand::calcUniform()
+{
+ return AddrOperandBase::calcUniformBase();
+}
+
+inline uint64_t
+NoRegAddrOperand::calcLane(Wavefront *w, int lane)
+{
+ return calcUniform();
+}
+
+inline void
+NoRegAddrOperand::calcVector(Wavefront *w, uint64_t *addrVec)
+{
+ uint64_t address = calcUniformBase();
+
+ for (int lane = 0; lane < VSZ; ++lane)
+ addrVec[lane] = address;
+}
+
+class LabelOperand : public BaseOperand
+{
+ public:
+ Label *label;
+
+ void init(unsigned opOffset, const BrigObject *obj);
+ std::string disassemble();
+
+ // special get method for compatibility with SRegOperand
+ uint32_t getTarget(Wavefront *w, int lane);
+
+};
+
+class ListOperand : public BaseOperand
+{
+ public:
+ int elementCount;
+ std::vector<StorageElement*> callArgs;
+
+ int
+ getSrcOperand(int idx)
+ {
+ DPRINTF(GPUReg, "getSrcOperand, idx: %d, sz_args: %d\n", idx,
+ callArgs.size());
+
+ return callArgs.at(idx)->offset;
+ }
+
+ void init(unsigned opOffset, const BrigObject *obj);
+
+ std::string disassemble();
+
+ template<typename OperandType>
+ OperandType
+ get(Wavefront *w, int lane, int arg_idx)
+ {
+ return w->readCallArgMem<OperandType>(lane, getSrcOperand(arg_idx));
+ }
+
+ template<typename OperandType>
+ void
+ set(Wavefront *w, int lane, OperandType val)
+ {
+ w->writeCallArgMem<OperandType>(lane, getSrcOperand(0), val);
+ DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: arg[%d] <- %d\n",
+ w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane,
+ getSrcOperand(0), val);
+ }
+};
+
+class FunctionRefOperand : public BaseOperand
+{
+ public:
+ const char *func_name;
+
+ void init(unsigned opOffset, const BrigObject *obj);
+ std::string disassemble();
+};
+
+#endif // __ARCH_HSAIL_OPERAND_HH__