diff options
author | Tony Gutierrez <anthony.gutierrez@amd.com> | 2016-01-19 14:28:22 -0500 |
---|---|---|
committer | Tony Gutierrez <anthony.gutierrez@amd.com> | 2016-01-19 14:28:22 -0500 |
commit | 1a7d3f9fcb76a68540dd948f91413533a383bfde (patch) | |
tree | 867510a147cd095f19499d26b7c02d27de4cae9d /src/arch | |
parent | 28e353e0403ea379d244a418e8dc8ee0b48187cf (diff) | |
download | gem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz |
gpu-compute: AMD's baseline GPU model
Diffstat (limited to 'src/arch')
-rw-r--r-- | src/arch/SConscript | 8 | ||||
-rw-r--r-- | src/arch/hsail/Brig.h | 67 | ||||
-rw-r--r-- | src/arch/hsail/Brig_new.hpp | 1587 | ||||
-rw-r--r-- | src/arch/hsail/SConscript | 54 | ||||
-rw-r--r-- | src/arch/hsail/SConsopts | 40 | ||||
-rwxr-xr-x | src/arch/hsail/gen.py | 806 | ||||
-rw-r--r-- | src/arch/hsail/generic_types.cc | 47 | ||||
-rw-r--r-- | src/arch/hsail/generic_types.hh | 16 | ||||
-rw-r--r-- | src/arch/hsail/gpu_decoder.hh | 77 | ||||
-rw-r--r-- | src/arch/hsail/gpu_types.hh | 69 | ||||
-rw-r--r-- | src/arch/hsail/insts/branch.cc | 86 | ||||
-rw-r--r-- | src/arch/hsail/insts/branch.hh | 442 | ||||
-rw-r--r-- | src/arch/hsail/insts/decl.hh | 1106 | ||||
-rw-r--r-- | src/arch/hsail/insts/gpu_static_inst.cc | 64 | ||||
-rw-r--r-- | src/arch/hsail/insts/gpu_static_inst.hh | 65 | ||||
-rw-r--r-- | src/arch/hsail/insts/main.cc | 208 | ||||
-rw-r--r-- | src/arch/hsail/insts/mem.cc | 139 | ||||
-rw-r--r-- | src/arch/hsail/insts/mem.hh | 1629 | ||||
-rw-r--r-- | src/arch/hsail/insts/mem_impl.hh | 660 | ||||
-rw-r--r-- | src/arch/hsail/insts/pseudo_inst.cc | 787 | ||||
-rw-r--r-- | src/arch/hsail/operand.cc | 449 | ||||
-rw-r--r-- | src/arch/hsail/operand.hh | 768 |
22 files changed, 9174 insertions, 0 deletions
diff --git a/src/arch/SConscript b/src/arch/SConscript index e0d6845f5..b022cb01f 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -68,6 +68,14 @@ isa_switch_hdrs = Split(''' # Set up this directory to support switching headers make_switching_dir('arch', isa_switch_hdrs, env) +if env['BUILD_GPU']: + gpu_isa_switch_hdrs = Split(''' + gpu_decoder.hh + gpu_types.hh + ''') + + make_gpu_switching_dir('arch', gpu_isa_switch_hdrs, env) + ################################################################# # # Include architecture-specific files. diff --git a/src/arch/hsail/Brig.h b/src/arch/hsail/Brig.h new file mode 100644 index 000000000..b260157ab --- /dev/null +++ b/src/arch/hsail/Brig.h @@ -0,0 +1,67 @@ +// University of Illinois/NCSA +// Open Source License +// +// Copyright (c) 2013, Advanced Micro Devices, Inc. +// All rights reserved. +// +// Developed by: +// +// HSA Team +// +// Advanced Micro Devices, Inc +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal with +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimers in the +// documentation and/or other materials provided with the distribution. +// +// * Neither the names of the LLVM Team, University of Illinois at +// Urbana-Champaign, nor the names of its contributors may be used to +// endorse or promote products derived from this Software without specific +// prior written permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +// SOFTWARE. +#ifndef INTERNAL_BRIG_H +#define INTERNAL_BRIG_H + +#include <stdint.h> + +namespace Brig { +#include "Brig_new.hpp" + +// These typedefs provide some backward compatibility with earlier versions +// of Brig.h, reducing the number of code changes. The distinct names also +// increase legibility by showing the code's intent. +typedef BrigBase BrigDirective; +typedef BrigBase BrigOperand; + +enum BrigMemoryFenceSegments { // for internal use only + //.mnemo={ s/^BRIG_MEMORY_FENCE_SEGMENT_//;lc } + //.mnemo_token=_EMMemoryFenceSegments + //.mnemo_context=EInstModifierInstFenceContext + BRIG_MEMORY_FENCE_SEGMENT_GLOBAL = 0, + BRIG_MEMORY_FENCE_SEGMENT_GROUP = 1, + BRIG_MEMORY_FENCE_SEGMENT_IMAGE = 2, + BRIG_MEMORY_FENCE_SEGMENT_LAST = 3 //.skip +}; + +} + +#endif // defined(INTERNAL_BRIG_H) diff --git a/src/arch/hsail/Brig_new.hpp b/src/arch/hsail/Brig_new.hpp new file mode 100644 index 000000000..60e6f4dea --- /dev/null +++ b/src/arch/hsail/Brig_new.hpp @@ -0,0 +1,1587 @@ +// University of Illinois/NCSA +// Open Source License +// +// Copyright (c) 2013-2015, Advanced Micro Devices, Inc. +// All rights reserved. +// +// Developed by: +// +// HSA Team +// +// Advanced Micro Devices, Inc +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal with +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimers in the +// documentation and/or other materials provided with the distribution. +// +// * Neither the names of the LLVM Team, University of Illinois at +// Urbana-Champaign, nor the names of its contributors may be used to +// endorse or promote products derived from this Software without specific +// prior written permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +// SOFTWARE. + +//.ignore{ + +#ifndef INCLUDED_BRIG_H +#define INCLUDED_BRIG_H + +#include <stdint.h> + +enum BrigAuxDefs { + MAX_OPERANDS_NUM = 6 +}; + +//} + +typedef uint32_t BrigVersion32_t; + +enum BrigVersion { + + //.nowrap + //.nodump + //.nollvm + + BRIG_VERSION_HSAIL_MAJOR = 1, + BRIG_VERSION_HSAIL_MINOR = 0, + BRIG_VERSION_BRIG_MAJOR = 1, + BRIG_VERSION_BRIG_MINOR = 0 +}; + +typedef uint8_t BrigAlignment8_t; //.defValue=BRIG_ALIGNMENT_NONE + +typedef uint8_t BrigAllocation8_t; //.defValue=BRIG_ALLOCATION_NONE + +typedef uint8_t BrigAluModifier8_t; + +typedef uint8_t BrigAtomicOperation8_t; + +typedef uint32_t BrigCodeOffset32_t; //.defValue=0 //.wtype=ItemRef<Code> + +typedef uint8_t BrigCompareOperation8_t; + +typedef uint16_t BrigControlDirective16_t; + +typedef uint32_t BrigDataOffset32_t; + +typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t; //.wtype=ListRef<Code> //.defValue=0 + +typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t; //.wtype=ListRef<Operand> //.defValue=0 + +typedef BrigDataOffset32_t BrigDataOffsetString32_t; //.wtype=StrRef //.defValue=0 + +typedef uint8_t BrigExecutableModifier8_t; + +typedef uint8_t BrigImageChannelOrder8_t; //.defValue=BRIG_CHANNEL_ORDER_UNKNOWN + +typedef uint8_t BrigImageChannelType8_t; //.defValue=BRIG_CHANNEL_TYPE_UNKNOWN + +typedef uint8_t BrigImageGeometry8_t; //.defValue=BRIG_GEOMETRY_UNKNOWN + +typedef uint8_t BrigImageQuery8_t; + +typedef uint16_t BrigKind16_t; + +typedef uint8_t BrigLinkage8_t; //.defValue=BRIG_LINKAGE_NONE + +typedef uint8_t BrigMachineModel8_t; //.defValue=BRIG_MACHINE_LARGE + +typedef uint8_t BrigMemoryModifier8_t; + +typedef uint8_t BrigMemoryOrder8_t; //.defValue=BRIG_MEMORY_ORDER_RELAXED + +typedef uint8_t BrigMemoryScope8_t; //.defValue=BRIG_MEMORY_SCOPE_SYSTEM + +typedef uint16_t BrigOpcode16_t; + +typedef uint32_t BrigOperandOffset32_t; //.defValue=0 //.wtype=ItemRef<Operand> + +typedef uint8_t BrigPack8_t; //.defValue=BRIG_PACK_NONE + +typedef uint8_t BrigProfile8_t; //.defValue=BRIG_PROFILE_FULL + +typedef uint16_t BrigRegisterKind16_t; + +typedef uint8_t BrigRound8_t; //.defValue=BRIG_ROUND_NONE + +typedef uint8_t BrigSamplerAddressing8_t; //.defValue=BRIG_ADDRESSING_CLAMP_TO_EDGE + +typedef uint8_t BrigSamplerCoordNormalization8_t; + +typedef uint8_t BrigSamplerFilter8_t; + +typedef uint8_t BrigSamplerQuery8_t; + +typedef uint32_t BrigSectionIndex32_t; + +typedef uint8_t BrigSegCvtModifier8_t; + +typedef uint8_t BrigSegment8_t; //.defValue=BRIG_SEGMENT_NONE + +typedef uint32_t BrigStringOffset32_t; //.defValue=0 //.wtype=StrRef + +typedef uint16_t BrigType16_t; + +typedef uint8_t BrigVariableModifier8_t; + +typedef uint8_t BrigWidth8_t; + +typedef uint32_t BrigExceptions32_t; + +enum BrigKind { + + //.nollvm + // + //.wname={ s/^BRIG_KIND//; MACRO2Name($_) } + //.mnemo=$wname{ $wname } + // + //.sizeof=$wname{ "sizeof(".$structs->{"Brig".$wname}->{rawbrig}.")" } + //.sizeof_switch //.sizeof_proto="int size_of_brig_record(unsigned arg)" //.sizeof_default="return -1" + // + //.isBodyOnly={ "false" } + //.isBodyOnly_switch //.isBodyOnly_proto="bool isBodyOnly(Directive d)" //.isBodyOnly_arg="d.kind()" + //.isBodyOnly_default="assert(false); return false" + // + //.isToplevelOnly={ "false" } + //.isToplevelOnly_switch //.isToplevelOnly_proto="bool isToplevelOnly(Directive d)" //.isToplevelOnly_arg="d.kind()" + //.isToplevelOnly_default="assert(false); return false" + + BRIG_KIND_NONE = 0x0000, //.skip + + BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, //.skip + BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_COMMENT = 0x1002, + BRIG_KIND_DIRECTIVE_CONTROL = 0x1003, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005, + BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_KERNEL = 0x1008, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_LABEL = 0x1009, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_LOC = 0x100a, + BRIG_KIND_DIRECTIVE_MODULE = 0x100b, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c, + BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e, + BRIG_KIND_DIRECTIVE_END = 0x100f, //.skip + + BRIG_KIND_INST_BEGIN = 0x2000, //.skip + BRIG_KIND_INST_ADDR = 0x2000, + BRIG_KIND_INST_ATOMIC = 0x2001, + BRIG_KIND_INST_BASIC = 0x2002, + BRIG_KIND_INST_BR = 0x2003, + BRIG_KIND_INST_CMP = 0x2004, + BRIG_KIND_INST_CVT = 0x2005, + BRIG_KIND_INST_IMAGE = 0x2006, + BRIG_KIND_INST_LANE = 0x2007, + BRIG_KIND_INST_MEM = 0x2008, + BRIG_KIND_INST_MEM_FENCE = 0x2009, + BRIG_KIND_INST_MOD = 0x200a, + BRIG_KIND_INST_QUERY_IMAGE = 0x200b, + BRIG_KIND_INST_QUERY_SAMPLER = 0x200c, + BRIG_KIND_INST_QUEUE = 0x200d, + BRIG_KIND_INST_SEG = 0x200e, + BRIG_KIND_INST_SEG_CVT = 0x200f, + BRIG_KIND_INST_SIGNAL = 0x2010, + BRIG_KIND_INST_SOURCE_TYPE = 0x2011, + BRIG_KIND_INST_END = 0x2012, //.skip + + BRIG_KIND_OPERAND_BEGIN = 0x3000, //.skip + BRIG_KIND_OPERAND_ADDRESS = 0x3000, + BRIG_KIND_OPERAND_ALIGN = 0x3001, + BRIG_KIND_OPERAND_CODE_LIST = 0x3002, + BRIG_KIND_OPERAND_CODE_REF = 0x3003, + BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004, + BRIG_KIND_OPERAND_RESERVED = 0x3005, //.skip + BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006, + BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007, + BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008, + BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009, + BRIG_KIND_OPERAND_REGISTER = 0x300a, + BRIG_KIND_OPERAND_STRING = 0x300b, + BRIG_KIND_OPERAND_WAVESIZE = 0x300c, + BRIG_KIND_OPERAND_END = 0x300d //.skip +}; + +enum BrigAlignment { + + //.mnemo={ s/^BRIG_ALIGNMENT_//; lc } + //.mnemo_proto="const char* align2str(unsigned arg)" + // + //.bytes={ /(\d+)/ ? $1 : undef } + //.bytes_switch //.bytes_proto="unsigned align2num(unsigned arg)" //.bytes_default="assert(false); return -1" + // + //.rbytes=$bytes{ $bytes } + //.rbytes_switch //.rbytes_reverse //.rbytes_proto="BrigAlignment num2align(uint64_t arg)" + //.rbytes_default="return BRIG_ALIGNMENT_LAST" + // + //.print=$bytes{ $bytes>1 ? "_align($bytes)" : "" } + + BRIG_ALIGNMENT_NONE = 0, //.no_mnemo + BRIG_ALIGNMENT_1 = 1, //.mnemo="" + BRIG_ALIGNMENT_2 = 2, + BRIG_ALIGNMENT_4 = 3, + BRIG_ALIGNMENT_8 = 4, + BRIG_ALIGNMENT_16 = 5, + BRIG_ALIGNMENT_32 = 6, + BRIG_ALIGNMENT_64 = 7, + BRIG_ALIGNMENT_128 = 8, + BRIG_ALIGNMENT_256 = 9, + + BRIG_ALIGNMENT_LAST, //.skip + BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1 //.skip +}; + +enum BrigAllocation { + + //.mnemo={ s/^BRIG_ALLOCATION_//;lc } + //.mnemo_token=EAllocKind + + BRIG_ALLOCATION_NONE = 0, //.mnemo="" + BRIG_ALLOCATION_PROGRAM = 1, + BRIG_ALLOCATION_AGENT = 2, + BRIG_ALLOCATION_AUTOMATIC = 3 +}; + +enum BrigAluModifierMask { + BRIG_ALU_FTZ = 1 +}; + +enum BrigAtomicOperation { + + //.tdcaption="Atomic Operations" + // + //.mnemo={ s/^BRIG_ATOMIC_//;lc } + //.mnemo_token=_EMAtomicOp + //.mnemo_context=EInstModifierInstAtomicContext + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_ATOMIC_ADD = 0, + BRIG_ATOMIC_AND = 1, + BRIG_ATOMIC_CAS = 2, + BRIG_ATOMIC_EXCH = 3, + BRIG_ATOMIC_LD = 4, + BRIG_ATOMIC_MAX = 5, + BRIG_ATOMIC_MIN = 6, + BRIG_ATOMIC_OR = 7, + BRIG_ATOMIC_ST = 8, + BRIG_ATOMIC_SUB = 9, + BRIG_ATOMIC_WRAPDEC = 10, + BRIG_ATOMIC_WRAPINC = 11, + BRIG_ATOMIC_XOR = 12, + BRIG_ATOMIC_WAIT_EQ = 13, + BRIG_ATOMIC_WAIT_NE = 14, + BRIG_ATOMIC_WAIT_LT = 15, + BRIG_ATOMIC_WAIT_GTE = 16, + BRIG_ATOMIC_WAITTIMEOUT_EQ = 17, + BRIG_ATOMIC_WAITTIMEOUT_NE = 18, + BRIG_ATOMIC_WAITTIMEOUT_LT = 19, + BRIG_ATOMIC_WAITTIMEOUT_GTE = 20 +}; + +enum BrigCompareOperation { + + //.tdcaption="Comparison Operators" + // + //.mnemo={ s/^BRIG_COMPARE_//;lc } + //.mnemo_token=_EMCompare + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_COMPARE_EQ = 0, + BRIG_COMPARE_NE = 1, + BRIG_COMPARE_LT = 2, + BRIG_COMPARE_LE = 3, + BRIG_COMPARE_GT = 4, + BRIG_COMPARE_GE = 5, + BRIG_COMPARE_EQU = 6, + BRIG_COMPARE_NEU = 7, + BRIG_COMPARE_LTU = 8, + BRIG_COMPARE_LEU = 9, + BRIG_COMPARE_GTU = 10, + BRIG_COMPARE_GEU = 11, + BRIG_COMPARE_NUM = 12, + BRIG_COMPARE_NAN = 13, + BRIG_COMPARE_SEQ = 14, + BRIG_COMPARE_SNE = 15, + BRIG_COMPARE_SLT = 16, + BRIG_COMPARE_SLE = 17, + BRIG_COMPARE_SGT = 18, + BRIG_COMPARE_SGE = 19, + BRIG_COMPARE_SGEU = 20, + BRIG_COMPARE_SEQU = 21, + BRIG_COMPARE_SNEU = 22, + BRIG_COMPARE_SLTU = 23, + BRIG_COMPARE_SLEU = 24, + BRIG_COMPARE_SNUM = 25, + BRIG_COMPARE_SNAN = 26, + BRIG_COMPARE_SGTU = 27 +}; + +enum BrigControlDirective { + + //.mnemo={ s/^BRIG_CONTROL_//;lc } + //.mnemo_token=EControl + // + //.print=$mnemo{ $mnemo } + + BRIG_CONTROL_NONE = 0, //.skip + BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1, + BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2, + BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3, + BRIG_CONTROL_MAXFLATGRIDSIZE = 4, + BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5, + BRIG_CONTROL_REQUIREDDIM = 6, + BRIG_CONTROL_REQUIREDGRIDSIZE = 7, + BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8, + BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9 +}; + +enum BrigExecutableModifierMask { + //.nodump + BRIG_EXECUTABLE_DEFINITION = 1 +}; + +enum BrigImageChannelOrder { + + //.mnemo={ s/^BRIG_CHANNEL_ORDER_?//;lc } + //.mnemo_token=EImageOrder + //.mnemo_context=EImageOrderContext + // + //.print=$mnemo{ $mnemo } + + BRIG_CHANNEL_ORDER_A = 0, + BRIG_CHANNEL_ORDER_R = 1, + BRIG_CHANNEL_ORDER_RX = 2, + BRIG_CHANNEL_ORDER_RG = 3, + BRIG_CHANNEL_ORDER_RGX = 4, + BRIG_CHANNEL_ORDER_RA = 5, + BRIG_CHANNEL_ORDER_RGB = 6, + BRIG_CHANNEL_ORDER_RGBX = 7, + BRIG_CHANNEL_ORDER_RGBA = 8, + BRIG_CHANNEL_ORDER_BGRA = 9, + BRIG_CHANNEL_ORDER_ARGB = 10, + BRIG_CHANNEL_ORDER_ABGR = 11, + BRIG_CHANNEL_ORDER_SRGB = 12, + BRIG_CHANNEL_ORDER_SRGBX = 13, + BRIG_CHANNEL_ORDER_SRGBA = 14, + BRIG_CHANNEL_ORDER_SBGRA = 15, + BRIG_CHANNEL_ORDER_INTENSITY = 16, + BRIG_CHANNEL_ORDER_LUMINANCE = 17, + BRIG_CHANNEL_ORDER_DEPTH = 18, + BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19, + + // used internally + BRIG_CHANNEL_ORDER_UNKNOWN, //.mnemo="" // used when no order is specified + + BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128 //.skip + +}; + +enum BrigImageChannelType { + + //.mnemo={ s/^BRIG_CHANNEL_TYPE_//;lc } + //.mnemo_token=EImageFormat + // + //.print=$mnemo{ $mnemo } + + BRIG_CHANNEL_TYPE_SNORM_INT8 = 0, + BRIG_CHANNEL_TYPE_SNORM_INT16 = 1, + BRIG_CHANNEL_TYPE_UNORM_INT8 = 2, + BRIG_CHANNEL_TYPE_UNORM_INT16 = 3, + BRIG_CHANNEL_TYPE_UNORM_INT24 = 4, + BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5, + BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6, + BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7, + BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8, + BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9, + BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10, + BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11, + BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12, + BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13, + BRIG_CHANNEL_TYPE_HALF_FLOAT = 14, + BRIG_CHANNEL_TYPE_FLOAT = 15, + + // used internally + BRIG_CHANNEL_TYPE_UNKNOWN, //.mnemo="" + + BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigImageGeometry { + + //.tdcaption="Geometry" + // + //.mnemo={ s/^BRIG_GEOMETRY_//;lc } + //.mnemo_token=EImageGeometry + // + //.dim={/_([0-9]+D)(A)?/ ? $1+(defined $2?1:0) : undef} + //.dim_switch //.dim_proto="unsigned getBrigGeometryDim(unsigned geo)" //.dim_arg="geo" + //.dim_default="assert(0); return 0" + // + //.depth={/DEPTH$/?"true":"false"} + //.depth_switch //.depth_proto="bool isBrigGeometryDepth(unsigned geo)" //.depth_arg="geo" + //.depth_default="return false" + + BRIG_GEOMETRY_1D = 0, + BRIG_GEOMETRY_2D = 1, + BRIG_GEOMETRY_3D = 2, + BRIG_GEOMETRY_1DA = 3, + BRIG_GEOMETRY_2DA = 4, + BRIG_GEOMETRY_1DB = 5, + BRIG_GEOMETRY_2DDEPTH = 6, + BRIG_GEOMETRY_2DADEPTH = 7, + + // used internally + BRIG_GEOMETRY_UNKNOWN, //.mnemo="" + + BRIG_GEOMETRY_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigImageQuery { + + //.mnemo={ s/^BRIG_IMAGE_QUERY_//;lc } + // + //.print=$mnemo{ $mnemo } + + BRIG_IMAGE_QUERY_WIDTH = 0, + BRIG_IMAGE_QUERY_HEIGHT = 1, + BRIG_IMAGE_QUERY_DEPTH = 2, + BRIG_IMAGE_QUERY_ARRAY = 3, + BRIG_IMAGE_QUERY_CHANNELORDER = 4, + BRIG_IMAGE_QUERY_CHANNELTYPE = 5, + BRIG_IMAGE_QUERY_NUMMIPLEVELS = 6 +}; + +enum BrigLinkage { + + //.mnemo={ s/^BRIG_LINKAGE_//;s/NONE//;lc } + + BRIG_LINKAGE_NONE = 0, + BRIG_LINKAGE_PROGRAM = 1, + BRIG_LINKAGE_MODULE = 2, + BRIG_LINKAGE_FUNCTION = 3, + BRIG_LINKAGE_ARG = 4 +}; + +enum BrigMachineModel { + + //.mnemo={ s/^BRIG_MACHINE_//; '$'.lc } + //.mnemo_token=ETargetMachine + // + //.print=$mnemo{ $mnemo } + + BRIG_MACHINE_SMALL = 0, + BRIG_MACHINE_LARGE = 1, + + BRIG_MACHINE_UNDEF = 2 //.skip +}; + +enum BrigMemoryModifierMask { //.tddef=0 + BRIG_MEMORY_CONST = 1 +}; + +enum BrigMemoryOrder { + + //.mnemo={ s/^BRIG_MEMORY_ORDER_//; lc } + //.mnemo_token=_EMMemoryOrder + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_MEMORY_ORDER_NONE = 0, //.mnemo="" + BRIG_MEMORY_ORDER_RELAXED = 1, //.mnemo=rlx + BRIG_MEMORY_ORDER_SC_ACQUIRE = 2, //.mnemo=scacq + BRIG_MEMORY_ORDER_SC_RELEASE = 3, //.mnemo=screl + BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4, //.mnemo=scar + + BRIG_MEMORY_ORDER_LAST = 5 //.skip +}; + +enum BrigMemoryScope { + + //.mnemo={ s/^BRIG_MEMORY_SCOPE_//; lc } + //.mnemo_token=_EMMemoryScope + // + //.print=$mnemo{ $mnemo } + + BRIG_MEMORY_SCOPE_NONE = 0, //.mnemo="" + BRIG_MEMORY_SCOPE_WORKITEM = 1, //.mnemo="" + BRIG_MEMORY_SCOPE_WAVEFRONT = 2, //.mnemo=wave + BRIG_MEMORY_SCOPE_WORKGROUP = 3, //.mnemo=wg + BRIG_MEMORY_SCOPE_AGENT = 4, //.mnemo=agent + BRIG_MEMORY_SCOPE_SYSTEM = 5, //.mnemo=system + + BRIG_MEMORY_SCOPE_LAST = 6 //.skip +}; + +enum BrigOpcode { + + //.tdcaption="Instruction Opcodes" + // + //.k={ "BASIC" } + //.pscode=$k{ MACRO2Name("_".$k) } + //.opcodeparser=$pscode{ return $pscode && "parseMnemo$pscode" } + //.opcodeparser_incfile=ParserUtilities + //.opcodeparser_switch //.opcodeparser_proto="OpcodeParser getOpcodeParser(BrigOpcode16_t arg)" //.opcodeparser_default="return parseMnemoBasic" + // + //.psopnd={undef} + //.opndparser=$psopnd{ return $psopnd && "&Parser::parse$psopnd" } + //.opndparser_incfile=ParserUtilities + //.opndparser_switch //.opndparser_proto="Parser::OperandParser Parser::getOperandParser(BrigOpcode16_t arg)" //.opndparser_default="return &Parser::parseOperands" + // + //.mnemo={ s/^BRIG_OPCODE_//; s/GCN([^_])/GCN_$1/; lc } + //.mnemo_scanner=Instructions //.mnemo_token=EInstruction + //.mnemo_context=EDefaultContext + // + //.has_memory_order={undef} + //.semsupport=$has_memory_order{ return $has_memory_order && "true" } + // + //.hasType=$k{ return ($k and $k eq "BASIC_NO_TYPE") ? "false" : undef; } + //.hasType_switch //.hasType_proto="bool instHasType(BrigOpcode16_t arg)" //.hasType_default="return true" + // + //.opcodevis=$pscode{ s/^BRIG_OPCODE_//; sprintf("%-47s(","vis.visitOpcode_".$_) . ($pscode =~m/^(BasicOrMod|Nop)$/? "inst" : "HSAIL_ASM::Inst". ($pscode=~m/BasicNoType/? "Basic":$pscode) ."(inst)").")" } + //.opcodevis_switch //.opcodevis_proto="template <typename RetType, typename Visitor> RetType visitOpcode_gen(HSAIL_ASM::Inst inst, Visitor& vis)" + //.opcodevis_arg="inst.opcode()" //.opcodevis_default="return RetType()" + //.opcodevis_incfile=ItemUtils + // + //.ftz=$k{ return ($k eq "BASIC_OR_MOD" or $k eq "CMP" or $k eq "CVT") ? "true" : undef } + //.ftz_incfile=ItemUtils //.ftz_switch //.ftz_proto="inline bool instSupportsFtz(BrigOpcode16_t arg)" //.ftz_default="return false" + // + //.vecOpndIndex={undef} + //.vecOpndIndex_switch //.vecOpndIndex_proto="int vecOpndIndex(BrigOpcode16_t arg)" //.vecOpndIndex_default="return -1" + //.vecOpndIndex_incfile=ParserUtilities + // + //.numdst={undef} + //.numdst_switch //.numdst_proto="int instNumDstOperands(BrigOpcode16_t arg)" //.numdst_default="return 1" + // + //.print=$mnemo{ $mnemo } + + BRIG_OPCODE_NOP = 0, //.k=NOP //.hasType=false + BRIG_OPCODE_ABS = 1, //.k=BASIC_OR_MOD + BRIG_OPCODE_ADD = 2, //.k=BASIC_OR_MOD + BRIG_OPCODE_BORROW = 3, + BRIG_OPCODE_CARRY = 4, + BRIG_OPCODE_CEIL = 5, //.k=BASIC_OR_MOD + BRIG_OPCODE_COPYSIGN = 6, //.k=BASIC_OR_MOD + BRIG_OPCODE_DIV = 7, //.k=BASIC_OR_MOD + BRIG_OPCODE_FLOOR = 8, //.k=BASIC_OR_MOD + BRIG_OPCODE_FMA = 9, //.k=BASIC_OR_MOD + BRIG_OPCODE_FRACT = 10, //.k=BASIC_OR_MOD + BRIG_OPCODE_MAD = 11, //.k=BASIC_OR_MOD + BRIG_OPCODE_MAX = 12, //.k=BASIC_OR_MOD + BRIG_OPCODE_MIN = 13, //.k=BASIC_OR_MOD + BRIG_OPCODE_MUL = 14, //.k=BASIC_OR_MOD + BRIG_OPCODE_MULHI = 15, //.k=BASIC_OR_MOD + BRIG_OPCODE_NEG = 16, //.k=BASIC_OR_MOD + BRIG_OPCODE_REM = 17, + BRIG_OPCODE_RINT = 18, //.k=BASIC_OR_MOD + BRIG_OPCODE_SQRT = 19, //.k=BASIC_OR_MOD + BRIG_OPCODE_SUB = 20, //.k=BASIC_OR_MOD + BRIG_OPCODE_TRUNC = 21, //.k=BASIC_OR_MOD + BRIG_OPCODE_MAD24 = 22, + BRIG_OPCODE_MAD24HI = 23, + BRIG_OPCODE_MUL24 = 24, + BRIG_OPCODE_MUL24HI = 25, + BRIG_OPCODE_SHL = 26, + BRIG_OPCODE_SHR = 27, + BRIG_OPCODE_AND = 28, + BRIG_OPCODE_NOT = 29, + BRIG_OPCODE_OR = 30, + BRIG_OPCODE_POPCOUNT = 31, //.k=SOURCE_TYPE + BRIG_OPCODE_XOR = 32, + BRIG_OPCODE_BITEXTRACT = 33, + BRIG_OPCODE_BITINSERT = 34, + BRIG_OPCODE_BITMASK = 35, + BRIG_OPCODE_BITREV = 36, + BRIG_OPCODE_BITSELECT = 37, + BRIG_OPCODE_FIRSTBIT = 38, //.k=SOURCE_TYPE + BRIG_OPCODE_LASTBIT = 39, //.k=SOURCE_TYPE + BRIG_OPCODE_COMBINE = 40, //.k=SOURCE_TYPE //.vecOpndIndex=1 + BRIG_OPCODE_EXPAND = 41, //.k=SOURCE_TYPE //.vecOpndIndex=0 + BRIG_OPCODE_LDA = 42, //.k=ADDR + BRIG_OPCODE_MOV = 43, + BRIG_OPCODE_SHUFFLE = 44, + BRIG_OPCODE_UNPACKHI = 45, + BRIG_OPCODE_UNPACKLO = 46, + BRIG_OPCODE_PACK = 47, //.k=SOURCE_TYPE + BRIG_OPCODE_UNPACK = 48, //.k=SOURCE_TYPE + BRIG_OPCODE_CMOV = 49, + BRIG_OPCODE_CLASS = 50, //.k=SOURCE_TYPE + BRIG_OPCODE_NCOS = 51, + BRIG_OPCODE_NEXP2 = 52, + BRIG_OPCODE_NFMA = 53, + BRIG_OPCODE_NLOG2 = 54, + BRIG_OPCODE_NRCP = 55, + BRIG_OPCODE_NRSQRT = 56, + BRIG_OPCODE_NSIN = 57, + BRIG_OPCODE_NSQRT = 58, + BRIG_OPCODE_BITALIGN = 59, + BRIG_OPCODE_BYTEALIGN = 60, + BRIG_OPCODE_PACKCVT = 61, //.k=SOURCE_TYPE + BRIG_OPCODE_UNPACKCVT = 62, //.k=SOURCE_TYPE + BRIG_OPCODE_LERP = 63, + BRIG_OPCODE_SAD = 64, //.k=SOURCE_TYPE + BRIG_OPCODE_SADHI = 65, //.k=SOURCE_TYPE + BRIG_OPCODE_SEGMENTP = 66, //.k=SEG_CVT + BRIG_OPCODE_FTOS = 67, //.k=SEG_CVT + BRIG_OPCODE_STOF = 68, //.k=SEG_CVT + BRIG_OPCODE_CMP = 69, //.k=CMP + BRIG_OPCODE_CVT = 70, //.k=CVT + BRIG_OPCODE_LD = 71, //.k=MEM //.has_memory_order //.vecOpndIndex=0 + BRIG_OPCODE_ST = 72, //.k=MEM //.has_memory_order //.vecOpndIndex=0 //.numdst=0 + BRIG_OPCODE_ATOMIC = 73, //.k=ATOMIC + BRIG_OPCODE_ATOMICNORET = 74, //.k=ATOMIC //.numdst=0 + BRIG_OPCODE_SIGNAL = 75, //.k=SIGNAL + BRIG_OPCODE_SIGNALNORET = 76, //.k=SIGNAL //.numdst=0 + BRIG_OPCODE_MEMFENCE = 77, //.k=MEM_FENCE //.numdst=0 + BRIG_OPCODE_RDIMAGE = 78, //.k=IMAGE //.vecOpndIndex=0 + BRIG_OPCODE_LDIMAGE = 79, //.k=IMAGE //.vecOpndIndex=0 + BRIG_OPCODE_STIMAGE = 80, //.k=IMAGE //.vecOpndIndex=0 //.numdst=0 + BRIG_OPCODE_IMAGEFENCE = 81, //.k=BASIC_NO_TYPE + BRIG_OPCODE_QUERYIMAGE = 82, //.k=QUERY_IMAGE + BRIG_OPCODE_QUERYSAMPLER = 83, //.k=QUERY_SAMPLER + BRIG_OPCODE_CBR = 84, //.k=BR //.numdst=0 + BRIG_OPCODE_BR = 85, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_SBR = 86, //.k=BR //.numdst=0 //.psopnd=SbrOperands + BRIG_OPCODE_BARRIER = 87, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_WAVEBARRIER = 88, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_ARRIVEFBAR = 89, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_INITFBAR = 90, //.k=BASIC_NO_TYPE //.numdst=0 //.hasType=false + BRIG_OPCODE_JOINFBAR = 91, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_LEAVEFBAR = 92, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_RELEASEFBAR = 93, //.k=BASIC_NO_TYPE //.numdst=0 + BRIG_OPCODE_WAITFBAR = 94, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_LDF = 95, + BRIG_OPCODE_ACTIVELANECOUNT = 96, //.k=LANE + BRIG_OPCODE_ACTIVELANEID = 97, //.k=LANE + BRIG_OPCODE_ACTIVELANEMASK = 98, //.k=LANE //.vecOpndIndex=0 + BRIG_OPCODE_ACTIVELANEPERMUTE = 99, //.k=LANE + BRIG_OPCODE_CALL = 100, //.k=BR //.psopnd=CallOperands //.numdst=0 //.hasType=false + BRIG_OPCODE_SCALL = 101, //.k=BR //.psopnd=CallOperands //.numdst=0 + BRIG_OPCODE_ICALL = 102, //.k=BR //.psopnd=CallOperands //.numdst=0 + BRIG_OPCODE_RET = 103, //.k=BASIC_NO_TYPE + BRIG_OPCODE_ALLOCA = 104, //.k=MEM + BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105, + BRIG_OPCODE_CURRENTWORKITEMFLATID = 106, + BRIG_OPCODE_DIM = 107, + BRIG_OPCODE_GRIDGROUPS = 108, + BRIG_OPCODE_GRIDSIZE = 109, + BRIG_OPCODE_PACKETCOMPLETIONSIG = 110, + BRIG_OPCODE_PACKETID = 111, + BRIG_OPCODE_WORKGROUPID = 112, + BRIG_OPCODE_WORKGROUPSIZE = 113, + BRIG_OPCODE_WORKITEMABSID = 114, + BRIG_OPCODE_WORKITEMFLATABSID = 115, + BRIG_OPCODE_WORKITEMFLATID = 116, + BRIG_OPCODE_WORKITEMID = 117, + BRIG_OPCODE_CLEARDETECTEXCEPT = 118, //.numdst=0 + BRIG_OPCODE_GETDETECTEXCEPT = 119, + BRIG_OPCODE_SETDETECTEXCEPT = 120, //.numdst=0 + BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121, //.k=QUEUE + BRIG_OPCODE_CASQUEUEWRITEINDEX = 122, //.k=QUEUE + BRIG_OPCODE_LDQUEUEREADINDEX = 123, //.k=QUEUE + BRIG_OPCODE_LDQUEUEWRITEINDEX = 124, //.k=QUEUE + BRIG_OPCODE_STQUEUEREADINDEX = 125, //.k=QUEUE //.numdst=0 + BRIG_OPCODE_STQUEUEWRITEINDEX = 126, //.k=QUEUE //.numdst=0 + BRIG_OPCODE_CLOCK = 127, + BRIG_OPCODE_CUID = 128, + BRIG_OPCODE_DEBUGTRAP = 129, //.numdst=0 + BRIG_OPCODE_GROUPBASEPTR = 130, + BRIG_OPCODE_KERNARGBASEPTR = 131, + BRIG_OPCODE_LANEID = 132, + BRIG_OPCODE_MAXCUID = 133, + BRIG_OPCODE_MAXWAVEID = 134, + BRIG_OPCODE_NULLPTR = 135, //.k=SEG + BRIG_OPCODE_WAVEID = 136, + BRIG_OPCODE_FIRST_USER_DEFINED = 32768, //.skip + + BRIG_OPCODE_GCNMADU = (1u << 15) | 0, //.k=BASIC_NO_TYPE + BRIG_OPCODE_GCNMADS = (1u << 15) | 1, //.k=BASIC_NO_TYPE + BRIG_OPCODE_GCNMAX3 = (1u << 15) | 2, + BRIG_OPCODE_GCNMIN3 = (1u << 15) | 3, + BRIG_OPCODE_GCNMED3 = (1u << 15) | 4, + BRIG_OPCODE_GCNFLDEXP = (1u << 15) | 5, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNFREXP_EXP = (1u << 15) | 6, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNFREXP_MANT = (1u << 15) | 7, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNTRIG_PREOP = (1u << 15) | 8, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNBFM = (1u << 15) | 9, + BRIG_OPCODE_GCNLD = (1u << 15) | 10, //.k=MEM //.has_memory_order //.vecOpndIndex=0 + BRIG_OPCODE_GCNST = (1u << 15) | 11, //.k=MEM //.has_memory_order //.vecOpndIndex=0 + BRIG_OPCODE_GCNATOMIC = (1u << 15) | 12, //.k=ATOMIC + BRIG_OPCODE_GCNATOMICNORET = (1u << 15) | 13, //.k=ATOMIC //.mnemo=gcn_atomicNoRet + BRIG_OPCODE_GCNSLEEP = (1u << 15) | 14, + BRIG_OPCODE_GCNPRIORITY = (1u << 15) | 15, + BRIG_OPCODE_GCNREGIONALLOC = (1u << 15) | 16, //.k=BASIC_NO_TYPE //.mnemo=gcn_region_alloc + BRIG_OPCODE_GCNMSAD = (1u << 15) | 17, + BRIG_OPCODE_GCNQSAD = (1u << 15) | 18, + BRIG_OPCODE_GCNMQSAD = (1u << 15) | 19, + BRIG_OPCODE_GCNMQSAD4 = (1u << 15) | 20, //.k=BASIC_NO_TYPE + BRIG_OPCODE_GCNSADW = (1u << 15) | 21, + BRIG_OPCODE_GCNSADD = (1u << 15) | 22, + BRIG_OPCODE_GCNCONSUME = (1u << 15) | 23, //.k=ADDR //.mnemo=gcn_atomic_consume + BRIG_OPCODE_GCNAPPEND = (1u << 15) | 24, //.k=ADDR //.mnemo=gcn_atomic_append + BRIG_OPCODE_GCNB4XCHG = (1u << 15) | 25, //.mnemo=gcn_b4xchg + BRIG_OPCODE_GCNB32XCHG = (1u << 15) | 26, //.mnemo=gcn_b32xchg + BRIG_OPCODE_GCNMAX = (1u << 15) | 27, + BRIG_OPCODE_GCNMIN = (1u << 15) | 28, + BRIG_OPCODE_GCNDIVRELAXED = (1u << 15) | 29, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNDIVRELAXEDNARROW = (1u << 15) | 30, + + BRIG_OPCODE_AMDRDIMAGELOD = (1u << 15) | 31, //.k=IMAGE //.mnemo=amd_rdimagelod //.vecOpndIndex=0 + BRIG_OPCODE_AMDRDIMAGEGRAD = (1u << 15) | 32, //.k=IMAGE //.mnemo=amd_rdimagegrad //.vecOpndIndex=0 + BRIG_OPCODE_AMDLDIMAGEMIP = (1u << 15) | 33, //.k=IMAGE //.mnemo=amd_ldimagemip //.vecOpndIndex=0 + BRIG_OPCODE_AMDSTIMAGEMIP = (1u << 15) | 34, //.k=IMAGE //.mnemo=amd_stimagemip //.vecOpndIndex=0 //.numdst=0 + BRIG_OPCODE_AMDQUERYIMAGE = (1u << 15) | 35 //.k=QUERY_IMAGE //.mnemo=amd_queryimage +}; + +enum BrigPack { + + //.tdcaption="Packing" + // + //.mnemo={ s/^BRIG_PACK_//;s/SAT$/_sat/;lc } + //.mnemo_token=_EMPacking + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_PACK_NONE = 0, //.mnemo="" + BRIG_PACK_PP = 1, + BRIG_PACK_PS = 2, + BRIG_PACK_SP = 3, + BRIG_PACK_SS = 4, + BRIG_PACK_S = 5, + BRIG_PACK_P = 6, + BRIG_PACK_PPSAT = 7, + BRIG_PACK_PSSAT = 8, + BRIG_PACK_SPSAT = 9, + BRIG_PACK_SSSAT = 10, + BRIG_PACK_SSAT = 11, + BRIG_PACK_PSAT = 12 +}; + +enum BrigProfile { + + //.mnemo={ s/^BRIG_PROFILE_//;'$'.lc } + //.mnemo_token=ETargetProfile + // + //.print=$mnemo{ $mnemo } + + BRIG_PROFILE_BASE = 0, + BRIG_PROFILE_FULL = 1, + + BRIG_PROFILE_UNDEF = 2 //.skip +}; + +enum BrigRegisterKind { + + //.mnemo={ s/^BRIG_REGISTER_KIND_//;'$'.lc(substr($_,0,1)) } + // + //.bits={ } + //.bits_switch //.bits_proto="unsigned getRegBits(BrigRegisterKind16_t arg)" //.bits_default="return (unsigned)-1" + // + //.nollvm + + BRIG_REGISTER_KIND_CONTROL = 0, //.bits=1 + BRIG_REGISTER_KIND_SINGLE = 1, //.bits=32 + BRIG_REGISTER_KIND_DOUBLE = 2, //.bits=64 + BRIG_REGISTER_KIND_QUAD = 3 //.bits=128 +}; + +enum BrigRound { + + //.mnemo={} + //.mnemo_fn=round2str //.mnemo_token=_EMRound + // + //.sat={/_SAT$/? "true" : "false"} + //.sat_switch //.sat_proto="bool isSatRounding(unsigned rounding)" //.sat_arg="rounding" + //.sat_default="return false" + // + //.sig={/_SIGNALING_/? "true" : "false"} + //.sig_switch //.sig_proto="bool isSignalingRounding(unsigned rounding)" //.sig_arg="rounding" + //.sig_default="return false" + // + //.int={/_INTEGER_/? "true" : "false"} + //.int_switch //.int_proto="bool isIntRounding(unsigned rounding)" //.int_arg="rounding" + //.int_default="return false" + // + //.flt={/_FLOAT_/? "true" : "false"} + //.flt_switch //.flt_proto="bool isFloatRounding(unsigned rounding)" //.flt_arg="rounding" + //.flt_default="return false" + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_ROUND_NONE = 0, //.no_mnemo + BRIG_ROUND_FLOAT_DEFAULT = 1, //.no_mnemo + BRIG_ROUND_FLOAT_NEAR_EVEN = 2, //.mnemo=near + BRIG_ROUND_FLOAT_ZERO = 3, //.mnemo=zero + BRIG_ROUND_FLOAT_PLUS_INFINITY = 4, //.mnemo=up + BRIG_ROUND_FLOAT_MINUS_INFINITY = 5, //.mnemo=down + BRIG_ROUND_INTEGER_NEAR_EVEN = 6, //.mnemo=neari + BRIG_ROUND_INTEGER_ZERO = 7, //.mnemo=zeroi + BRIG_ROUND_INTEGER_PLUS_INFINITY = 8, //.mnemo=upi + BRIG_ROUND_INTEGER_MINUS_INFINITY = 9, //.mnemo=downi + BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10, //.mnemo=neari_sat + BRIG_ROUND_INTEGER_ZERO_SAT = 11, //.mnemo=zeroi_sat + BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12, //.mnemo=upi_sat + BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13, //.mnemo=downi_sat + BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14, //.mnemo=sneari + BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15, //.mnemo=szeroi + BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16, //.mnemo=supi + BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17, //.mnemo=sdowni + BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18, //.mnemo=sneari_sat + BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19, //.mnemo=szeroi_sat + BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20, //.mnemo=supi_sat + BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21 //.mnemo=sdowni_sat +}; + +enum BrigSamplerAddressing { + + //.mnemo={ s/^BRIG_ADDRESSING_//;lc } + //.mnemo_token=ESamplerAddressingMode + + BRIG_ADDRESSING_UNDEFINED = 0, + BRIG_ADDRESSING_CLAMP_TO_EDGE = 1, + BRIG_ADDRESSING_CLAMP_TO_BORDER = 2, + BRIG_ADDRESSING_REPEAT = 3, + BRIG_ADDRESSING_MIRRORED_REPEAT = 4, + + BRIG_ADDRESSING_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigSamplerCoordNormalization { + + //.mnemo={ s/^BRIG_COORD_//;lc } + //.mnemo_token=ESamplerCoord + // + //.print=$mnemo{ $mnemo } + + BRIG_COORD_UNNORMALIZED = 0, + BRIG_COORD_NORMALIZED = 1 +}; + +enum BrigSamplerFilter { + + //.mnemo={ s/^BRIG_FILTER_//;lc } + // + //.print=$mnemo{ $mnemo } + + BRIG_FILTER_NEAREST = 0, + BRIG_FILTER_LINEAR = 1, + + BRIG_FILTER_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigSamplerQuery { + + //.mnemo={ s/^BRIG_SAMPLER_QUERY_//;lc } + //.mnemo_token=_EMSamplerQuery + // + //.print=$mnemo{ $mnemo } + + BRIG_SAMPLER_QUERY_ADDRESSING = 0, + BRIG_SAMPLER_QUERY_COORD = 1, + BRIG_SAMPLER_QUERY_FILTER = 2 +}; + +enum BrigSectionIndex { + + //.nollvm + // + //.mnemo={ s/^BRIG_SECTION_INDEX_/HSA_/;lc } + + BRIG_SECTION_INDEX_DATA = 0, + BRIG_SECTION_INDEX_CODE = 1, + BRIG_SECTION_INDEX_OPERAND = 2, + BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3, + + // used internally + BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED //.skip +}; + +enum BrigSegCvtModifierMask { + BRIG_SEG_CVT_NONULL = 1 //.mnemo="nonull" //.print="_nonull" +}; + +enum BrigSegment { + + //.mnemo={ s/^BRIG_SEGMENT_//;lc} + //.mnemo_token=_EMSegment + //.mnemo_context=EInstModifierContext + // + //.print=$mnemo{ $mnemo ? "_$mnemo" : "" } + + BRIG_SEGMENT_NONE = 0, //.mnemo="" + BRIG_SEGMENT_FLAT = 1, //.mnemo="" + BRIG_SEGMENT_GLOBAL = 2, + BRIG_SEGMENT_READONLY = 3, + BRIG_SEGMENT_KERNARG = 4, + BRIG_SEGMENT_GROUP = 5, + BRIG_SEGMENT_PRIVATE = 6, + BRIG_SEGMENT_SPILL = 7, + BRIG_SEGMENT_ARG = 8, + + BRIG_SEGMENT_FIRST_USER_DEFINED = 128, //.skip + + BRIG_SEGMENT_AMD_GCN = 9, //.mnemo="region" +}; + +enum BrigPackedTypeBits { + + //.nodump + // + //.nollvm + + BRIG_TYPE_BASE_SIZE = 5, + BRIG_TYPE_PACK_SIZE = 2, + BRIG_TYPE_ARRAY_SIZE = 1, + + BRIG_TYPE_BASE_SHIFT = 0, + BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE, + BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE, + + BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) << BRIG_TYPE_BASE_SHIFT, + BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT, + + BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT, + + BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT +}; + +enum BrigType { + + //.numBits={ /ARRAY$/ ? undef : /([0-9]+)X([0-9]+)/ ? $1*$2 : /([0-9]+)/ ? $1 : undef } + //.numBits_switch //.numBits_proto="unsigned getBrigTypeNumBits(unsigned arg)" //.numBits_default="assert(0); return 0" + //.numBytes=$numBits{ $numBits > 1 ? $numBits/8 : undef } + //.numBytes_switch //.numBytes_proto="unsigned getBrigTypeNumBytes(unsigned arg)" //.numBytes_default="assert(0); return 0" + // + //.mnemo={ s/^BRIG_TYPE_//;lc } + //.mnemo_token=_EMType + // + //.array={/ARRAY$/?"true":"false"} + //.array_switch //.array_proto="bool isArrayType(unsigned type)" //.array_arg="type" + //.array_default="return false" + // + //.a2e={/(.*)_ARRAY$/? $1 : "BRIG_TYPE_NONE"} + //.a2e_switch //.a2e_proto="unsigned arrayType2elementType(unsigned type)" //.a2e_arg="type" + //.a2e_default="return BRIG_TYPE_NONE" + // + //.e2a={/_ARRAY$/? "BRIG_TYPE_NONE" : /_NONE$/ ? "BRIG_TYPE_NONE" : /_B1$/ ? "BRIG_TYPE_NONE" : $_ . "_ARRAY"} + //.e2a_switch //.e2a_proto="unsigned elementType2arrayType(unsigned type)" //.e2a_arg="type" + //.e2a_default="return BRIG_TYPE_NONE" + // + //.t2s={s/^BRIG_TYPE_//;lc s/_ARRAY$/[]/;lc} + //.t2s_switch //.t2s_proto="const char* type2name(unsigned type)" //.t2s_arg="type" + //.t2s_default="return NULL" + // + //.dispatch_switch //.dispatch_incfile=TemplateUtilities + //.dispatch_proto="template<typename RetType, typename Visitor>\nRetType dispatchByType_gen(unsigned type, Visitor& v)" + //.dispatch={ /ARRAY$/ ? "v.visitNone(type)" : /^BRIG_TYPE_([BUSF]|SIG)[0-9]+/ ? "v.template visit< BrigTypeTraits<$_> >()" : "v.visitNone(type)" } + //.dispatch_arg="type" //.dispatch_default="return v.visitNone(type)" + // + //- .tdname=BrigType + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_TYPE_NONE = 0, //.mnemo="" //.print="" + BRIG_TYPE_U8 = 1, //.ctype=uint8_t + BRIG_TYPE_U16 = 2, //.ctype=uint16_t + BRIG_TYPE_U32 = 3, //.ctype=uint32_t + BRIG_TYPE_U64 = 4, //.ctype=uint64_t + BRIG_TYPE_S8 = 5, //.ctype=int8_t + BRIG_TYPE_S16 = 6, //.ctype=int16_t + BRIG_TYPE_S32 = 7, //.ctype=int32_t + BRIG_TYPE_S64 = 8, //.ctype=int64_t + BRIG_TYPE_F16 = 9, //.ctype=f16_t + BRIG_TYPE_F32 = 10, //.ctype=float + BRIG_TYPE_F64 = 11, //.ctype=double + BRIG_TYPE_B1 = 12, //.ctype=bool //.numBytes=1 + BRIG_TYPE_B8 = 13, //.ctype=uint8_t + BRIG_TYPE_B16 = 14, //.ctype=uint16_t + BRIG_TYPE_B32 = 15, //.ctype=uint32_t + BRIG_TYPE_B64 = 16, //.ctype=uint64_t + BRIG_TYPE_B128 = 17, //.ctype=b128_t + BRIG_TYPE_SAMP = 18, //.mnemo=samp //.numBits=64 + BRIG_TYPE_ROIMG = 19, //.mnemo=roimg //.numBits=64 + BRIG_TYPE_WOIMG = 20, //.mnemo=woimg //.numBits=64 + BRIG_TYPE_RWIMG = 21, //.mnemo=rwimg //.numBits=64 + BRIG_TYPE_SIG32 = 22, //.mnemo=sig32 //.numBits=64 + BRIG_TYPE_SIG64 = 23, //.mnemo=sig64 //.numBits=64 + + BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32, //.ctype=uint8_t + BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64, //.ctype=uint8_t + BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128, //.ctype=uint8_t + BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32, //.ctype=uint16_t + BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64, //.ctype=uint16_t + BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128, //.ctype=uint16_t + BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64, //.ctype=uint32_t + BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128, //.ctype=uint32_t + BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128, //.ctype=uint64_t + BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32, //.ctype=int8_t + BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64, //.ctype=int8_t + BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128, //.ctype=int8_t + BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32, //.ctype=int16_t + BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64, //.ctype=int16_t + BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128, //.ctype=int16_t + BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64, //.ctype=int32_t + BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128, //.ctype=int32_t + BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128, //.ctype=int64_t + BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32, //.ctype=f16_t + BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64, //.ctype=f16_t + BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128, //.ctype=f16_t + BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64, //.ctype=float + BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128, //.ctype=float + BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128, //.ctype=double + + BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + + // Used internally + BRIG_TYPE_INVALID = (unsigned) -1 //.skip +}; + +enum BrigVariableModifierMask { + + //.nodump + + BRIG_VARIABLE_DEFINITION = 1, + BRIG_VARIABLE_CONST = 2 +}; + +enum BrigWidth { + + //.tddef=1 + // + //.print={ s/^BRIG_WIDTH_//; "_width($_)" } + + BRIG_WIDTH_NONE = 0, + BRIG_WIDTH_1 = 1, + BRIG_WIDTH_2 = 2, + BRIG_WIDTH_4 = 3, + BRIG_WIDTH_8 = 4, + BRIG_WIDTH_16 = 5, + BRIG_WIDTH_32 = 6, + BRIG_WIDTH_64 = 7, + BRIG_WIDTH_128 = 8, + BRIG_WIDTH_256 = 9, + BRIG_WIDTH_512 = 10, + BRIG_WIDTH_1024 = 11, + BRIG_WIDTH_2048 = 12, + BRIG_WIDTH_4096 = 13, + BRIG_WIDTH_8192 = 14, + BRIG_WIDTH_16384 = 15, + BRIG_WIDTH_32768 = 16, + BRIG_WIDTH_65536 = 17, + BRIG_WIDTH_131072 = 18, + BRIG_WIDTH_262144 = 19, + BRIG_WIDTH_524288 = 20, + BRIG_WIDTH_1048576 = 21, + BRIG_WIDTH_2097152 = 22, + BRIG_WIDTH_4194304 = 23, + BRIG_WIDTH_8388608 = 24, + BRIG_WIDTH_16777216 = 25, + BRIG_WIDTH_33554432 = 26, + BRIG_WIDTH_67108864 = 27, + BRIG_WIDTH_134217728 = 28, + BRIG_WIDTH_268435456 = 29, + BRIG_WIDTH_536870912 = 30, + BRIG_WIDTH_1073741824 = 31, + BRIG_WIDTH_2147483648 = 32, + BRIG_WIDTH_WAVESIZE = 33, + BRIG_WIDTH_ALL = 34, + + BRIG_WIDTH_LAST //.skip +}; + +struct BrigUInt64 { //.isroot //.standalone + uint32_t lo; //.defValue=0 + uint32_t hi; //.defValue=0 + + //+hcode KLASS& operator=(uint64_t rhs); + //+hcode operator uint64_t(); + //+implcode inline KLASS& KLASS::operator=(uint64_t rhs) { lo() = (uint32_t)rhs; hi() = (uint32_t)(rhs >> 32); return *this; } + //+implcode inline KLASS::operator uint64_t() { return ((uint64_t)hi()) << 32 | lo(); } +}; + +struct BrigAluModifier { //.isroot //.standalone + BrigAluModifier8_t allBits; //.defValue=0 + //^^ bool ftz; //.wtype=BitValRef<0> +}; + +struct BrigBase { //.nowrap + uint16_t byteCount; + BrigKind16_t kind; +}; + +//.alias Code:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_CODE }; +//.alias Directive:Code { //.generic }; +//.alias Operand:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_OPERAND }; + +struct BrigData { + //.nowrap + uint32_t byteCount; + uint8_t bytes[1]; +}; + +struct BrigExecutableModifier { //.isroot //.standalone + BrigExecutableModifier8_t allBits; //.defValue=0 + //^^ bool isDefinition; //.wtype=BitValRef<0> +}; + +struct BrigMemoryModifier { //.isroot //.standalone + BrigMemoryModifier8_t allBits; //.defValue=0 + //^^ bool isConst; //.wtype=BitValRef<0> +}; + +struct BrigSegCvtModifier { //.isroot //.standalone + BrigSegCvtModifier8_t allBits; //.defValue=0 + //^^ bool isNoNull; //.wtype=BitValRef<0> +}; + +struct BrigVariableModifier { //.isroot //.standalone + BrigVariableModifier8_t allBits; //.defValue=0 + + //^^ bool isDefinition; //.wtype=BitValRef<0> + //^^ bool isConst; //.wtype=BitValRef<1> +}; + +struct BrigDirectiveArgBlockEnd { + BrigBase base; +}; + +struct BrigDirectiveArgBlockStart { + BrigBase base; +}; + +struct BrigDirectiveComment { + BrigBase base; + BrigDataOffsetString32_t name; +}; + +struct BrigDirectiveControl { + BrigBase base; + BrigControlDirective16_t control; + uint16_t reserved; //.defValue=0 + BrigDataOffsetOperandList32_t operands; +}; + +struct BrigDirectiveExecutable { //.generic + BrigBase base; + BrigDataOffsetString32_t name; + uint16_t outArgCount; //.defValue=0 + uint16_t inArgCount; //.defValue=0 + BrigCodeOffset32_t firstInArg; + BrigCodeOffset32_t firstCodeBlockEntry; + BrigCodeOffset32_t nextModuleEntry; + BrigExecutableModifier modifier; //.acc=subItem<ExecutableModifier> //.wtype=ExecutableModifier + BrigLinkage8_t linkage; + uint16_t reserved; //.defValue=0 +}; + +//.alias DirectiveKernel:DirectiveExecutable { }; +//.alias DirectiveFunction:DirectiveExecutable { }; +//.alias DirectiveSignature:DirectiveExecutable { }; +//.alias DirectiveIndirectFunction:DirectiveExecutable { }; + +struct BrigDirectiveExtension { + BrigBase base; + BrigDataOffsetString32_t name; +}; + +struct BrigDirectiveFbarrier { + BrigBase base; + BrigDataOffsetString32_t name; + BrigVariableModifier modifier; //.acc=subItem<VariableModifier> //.wtype=VariableModifier + BrigLinkage8_t linkage; + uint16_t reserved; //.defValue=0 +}; + +struct BrigDirectiveLabel { + BrigBase base; + BrigDataOffsetString32_t name; +}; + +struct BrigDirectiveLoc { + BrigBase base; + BrigDataOffsetString32_t filename; + uint32_t line; + uint32_t column; //.defValue=1 +}; + +struct BrigDirectiveNone { //.enum=BRIG_KIND_NONE + BrigBase base; +}; + +struct BrigDirectivePragma { + BrigBase base; + BrigDataOffsetOperandList32_t operands; +}; + +struct BrigDirectiveVariable { + BrigBase base; + BrigDataOffsetString32_t name; + BrigOperandOffset32_t init; + BrigType16_t type; + + //+hcode bool isArray(); + //+implcode inline bool KLASS::isArray() { return isArrayType(type()); } + + //+hcode unsigned elementType(); + //+implcode inline unsigned KLASS::elementType() { return isArray()? arrayType2elementType(type()) : type(); } + + BrigSegment8_t segment; + BrigAlignment8_t align; + BrigUInt64 dim; //.acc=subItem<UInt64> //.wtype=UInt64 + BrigVariableModifier modifier; //.acc=subItem<VariableModifier> //.wtype=VariableModifier + BrigLinkage8_t linkage; + BrigAllocation8_t allocation; + uint8_t reserved; //.defValue=0 +}; + +struct BrigDirectiveModule { + BrigBase base; + BrigDataOffsetString32_t name; + BrigVersion32_t hsailMajor; //.wtype=ValRef<uint32_t> + BrigVersion32_t hsailMinor; //.wtype=ValRef<uint32_t> + BrigProfile8_t profile; + BrigMachineModel8_t machineModel; + BrigRound8_t defaultFloatRound; + uint8_t reserved; //.defValue=0 +}; + +struct BrigInstBase { //.wname=Inst //.generic //.parent=BrigCode + BrigBase base; + BrigOpcode16_t opcode; + BrigType16_t type; + BrigDataOffsetOperandList32_t operands; + + //+hcode Operand operand(int index); + //+implcode inline Operand KLASS::operand(int index) { return operands()[index]; } +}; + +struct BrigInstAddr { + BrigInstBase base; + BrigSegment8_t segment; + uint8_t reserved[3]; //.defValue=0 +}; + +struct BrigInstAtomic { + BrigInstBase base; + BrigSegment8_t segment; + BrigMemoryOrder8_t memoryOrder; + BrigMemoryScope8_t memoryScope; + BrigAtomicOperation8_t atomicOperation; + uint8_t equivClass; + uint8_t reserved[3]; //.defValue=0 +}; + +struct BrigInstBasic { + BrigInstBase base; +}; + +struct BrigInstBr { + BrigInstBase base; + BrigWidth8_t width; + uint8_t reserved[3]; //.defValue=0 +}; + +struct BrigInstCmp { + BrigInstBase base; + BrigType16_t sourceType; + BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier + BrigCompareOperation8_t compare; + BrigPack8_t pack; + uint8_t reserved[3]; //.defValue=0 +}; + +struct BrigInstCvt { + BrigInstBase base; + BrigType16_t sourceType; + BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier + BrigRound8_t round; +}; + +struct BrigInstImage { + BrigInstBase base; + BrigType16_t imageType; + BrigType16_t coordType; + BrigImageGeometry8_t geometry; + uint8_t equivClass; + uint16_t reserved; //.defValue=0 +}; + +struct BrigInstLane { + BrigInstBase base; + BrigType16_t sourceType; + BrigWidth8_t width; + uint8_t reserved; //.defValue=0 +}; + +struct BrigInstMem { + BrigInstBase base; + BrigSegment8_t segment; + BrigAlignment8_t align; + uint8_t equivClass; + BrigWidth8_t width; + BrigMemoryModifier modifier; //.acc=subItem<MemoryModifier> //.wtype=MemoryModifier + uint8_t reserved[3]; //.defValue=0 +}; + +struct BrigInstMemFence { + BrigInstBase base; + BrigMemoryOrder8_t memoryOrder; + BrigMemoryScope8_t globalSegmentMemoryScope; + BrigMemoryScope8_t groupSegmentMemoryScope; + BrigMemoryScope8_t imageSegmentMemoryScope; +}; + +struct BrigInstMod { + BrigInstBase base; + BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier + BrigRound8_t round; + BrigPack8_t pack; + uint8_t reserved; //.defValue=0 +}; + +struct BrigInstQueryImage { + BrigInstBase base; + BrigType16_t imageType; + BrigImageGeometry8_t geometry; + BrigImageQuery8_t imageQuery; +}; + +struct BrigInstQuerySampler { + BrigInstBase base; + BrigSamplerQuery8_t samplerQuery; + uint8_t reserved[3]; //.defValue=0 +}; + +struct BrigInstQueue { + BrigInstBase base; + BrigSegment8_t segment; + BrigMemoryOrder8_t memoryOrder; + uint16_t reserved; //.defValue=0 +}; + +struct BrigInstSeg { + BrigInstBase base; + BrigSegment8_t segment; + uint8_t reserved[3]; //.defValue=0 +}; + +struct BrigInstSegCvt { + BrigInstBase base; + BrigType16_t sourceType; + BrigSegment8_t segment; + BrigSegCvtModifier modifier; //.acc=subItem<SegCvtModifier> //.wtype=SegCvtModifier +}; + +struct BrigInstSignal { + BrigInstBase base; + BrigType16_t signalType; + BrigMemoryOrder8_t memoryOrder; + BrigAtomicOperation8_t signalOperation; +}; + +struct BrigInstSourceType { + BrigInstBase base; + BrigType16_t sourceType; + uint16_t reserved; //.defValue=0 +}; + +struct BrigOperandAddress { + BrigBase base; + BrigCodeOffset32_t symbol; //.wtype=ItemRef<DirectiveVariable> + BrigOperandOffset32_t reg; //.wtype=ItemRef<OperandRegister> + BrigUInt64 offset; //.acc=subItem<UInt64> //.wtype=UInt64 +}; + +struct BrigOperandAlign { + BrigBase base; + BrigAlignment8_t align; + uint8_t reserved[3]; //.defValue=0 +}; + +struct BrigOperandCodeList { + BrigBase base; + BrigDataOffsetCodeList32_t elements; + + //+hcode unsigned elementCount(); + //+implcode inline unsigned KLASS::elementCount() { return elements().size(); } + //+hcode Code elements(int index); + //+implcode inline Code KLASS::elements(int index) { return elements()[index]; } +}; + +struct BrigOperandCodeRef { + BrigBase base; + BrigCodeOffset32_t ref; +}; + +struct BrigOperandConstantBytes { + BrigBase base; + BrigType16_t type; //.defValue=0 + uint16_t reserved; //.defValue=0 + BrigDataOffsetString32_t bytes; +}; + +struct BrigOperandConstantOperandList { + BrigBase base; + BrigType16_t type; + uint16_t reserved; //.defValue=0 + BrigDataOffsetOperandList32_t elements; + + //+hcode unsigned elementCount(); + //+implcode inline unsigned KLASS::elementCount() { return elements().size(); } + //+hcode Operand elements(int index); + //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; } +}; + +struct BrigOperandConstantImage { + BrigBase base; + BrigType16_t type; + BrigImageGeometry8_t geometry; + BrigImageChannelOrder8_t channelOrder; + BrigImageChannelType8_t channelType; + uint8_t reserved[3]; //.defValue=0 + BrigUInt64 width; //.acc=subItem<UInt64> //.wtype=UInt64 + BrigUInt64 height; //.acc=subItem<UInt64> //.wtype=UInt64 + BrigUInt64 depth; //.acc=subItem<UInt64> //.wtype=UInt64 + BrigUInt64 array; //.acc=subItem<UInt64> //.wtype=UInt64 +}; + +struct BrigOperandOperandList { + BrigBase base; + BrigDataOffsetOperandList32_t elements; + + //+hcode unsigned elementCount(); + //+implcode inline unsigned KLASS::elementCount() { return elements().size(); } + //+hcode Operand elements(int index); + //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; } +}; + +struct BrigOperandRegister { + BrigBase base; + BrigRegisterKind16_t regKind; + uint16_t regNum; +}; + +struct BrigOperandConstantSampler { + BrigBase base; + BrigType16_t type; + BrigSamplerCoordNormalization8_t coord; + BrigSamplerFilter8_t filter; + BrigSamplerAddressing8_t addressing; + uint8_t reserved[3]; //.defValue=0 +}; + +struct BrigOperandString { + BrigBase base; + BrigDataOffsetString32_t string; +}; + +struct BrigOperandWavesize { + BrigBase base; +}; + +//.ignore{ + +enum BrigExceptionsMask { + BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0, + BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1, + BRIG_EXCEPTIONS_OVERFLOW = 1 << 2, + BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3, + BRIG_EXCEPTIONS_INEXACT = 1 << 4, + + BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16 +}; + +struct BrigSectionHeader { + uint64_t byteCount; + uint32_t headerByteCount; + uint32_t nameLength; + uint8_t name[1]; +}; + +#define MODULE_IDENTIFICATION_LENGTH (8) + +struct BrigModuleHeader { + char identification[MODULE_IDENTIFICATION_LENGTH]; + BrigVersion32_t brigMajor; + BrigVersion32_t brigMinor; + uint64_t byteCount; + uint8_t hash[64]; + uint32_t reserved; + uint32_t sectionCount; + uint64_t sectionIndex; +}; + +typedef BrigModuleHeader* BrigModule_t; + +#endif // defined(INCLUDED_BRIG_H) +//} diff --git a/src/arch/hsail/SConscript b/src/arch/hsail/SConscript new file mode 100644 index 000000000..3455823a6 --- /dev/null +++ b/src/arch/hsail/SConscript @@ -0,0 +1,54 @@ +# -*- mode:python -*- + +# Copyright (c) 2015 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: Anthony Gutierrez +# + +Import('*') + +if not env['BUILD_GPU']: + Return() + +if env['TARGET_GPU_ISA'] == 'hsail': + env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'], + 'gen.py', '$SOURCE $TARGETS') + + Source('generic_types.cc') + Source('gpu_decoder.cc') + Source('insts/branch.cc') + Source('insts/gen_exec.cc') + Source('insts/gpu_static_inst.cc') + Source('insts/main.cc') + Source('insts/pseudo_inst.cc') + Source('insts/mem.cc') + Source('operand.cc') diff --git a/src/arch/hsail/SConsopts b/src/arch/hsail/SConsopts new file mode 100644 index 000000000..641963c82 --- /dev/null +++ b/src/arch/hsail/SConsopts @@ -0,0 +1,40 @@ +# -*- mode:python -*- + +# +# Copyright (c) 2015 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: Anthony Gutierrez +# + +Import('*') + +all_gpu_isa_list.append('hsail') diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py new file mode 100755 index 000000000..f2996019b --- /dev/null +++ b/src/arch/hsail/gen.py @@ -0,0 +1,806 @@ +#! /usr/bin/python + +# +# Copyright (c) 2015 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: Steve Reinhardt +# + +import sys, re + +from m5.util import code_formatter + +if len(sys.argv) != 4: + print "Error: need 3 args (file names)" + sys.exit(0) + +header_code = code_formatter() +decoder_code = code_formatter() +exec_code = code_formatter() + +############### +# +# Generate file prologs (includes etc.) +# +############### + +header_code(''' +#include "arch/hsail/insts/decl.hh" +#include "base/bitfield.hh" +#include "gpu-compute/hsail_code.hh" +#include "gpu-compute/wavefront.hh" + +namespace HsailISA +{ +''') +header_code.indent() + +decoder_code(''' +#include "arch/hsail/gpu_decoder.hh" +#include "arch/hsail/insts/branch.hh" +#include "arch/hsail/insts/decl.hh" +#include "arch/hsail/insts/gen_decl.hh" +#include "arch/hsail/insts/mem.hh" +#include "arch/hsail/insts/mem_impl.hh" +#include "gpu-compute/brig_object.hh" + +namespace HsailISA +{ + std::vector<GPUStaticInst*> Decoder::decodedInsts; + + GPUStaticInst* + Decoder::decode(MachInst machInst) + { + using namespace Brig; + + const BrigInstBase *ib = machInst.brigInstBase; + const BrigObject *obj = machInst.brigObj; + + switch(ib->opcode) { +''') +decoder_code.indent() +decoder_code.indent() + +exec_code(''' +#include "arch/hsail/insts/gen_decl.hh" +#include "base/intmath.hh" + +namespace HsailISA +{ +''') +exec_code.indent() + +############### +# +# Define code templates for class declarations (for header file) +# +############### + +# Basic header template for an instruction with no template parameters. +header_template_nodt = ''' +class $class_name : public $base_class +{ + public: + typedef $base_class Base; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } + + void execute(GPUDynInstPtr gpuDynInst); +}; + +''' + +# Basic header template for an instruction with a single DataType +# template parameter. +header_template_1dt = ''' +template<typename DataType> +class $class_name : public $base_class<DataType> +{ + public: + typedef $base_class<DataType> Base; + typedef typename DataType::CType CType; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } + + void execute(GPUDynInstPtr gpuDynInst); +}; + +''' + +header_template_1dt_noexec = ''' +template<typename DataType> +class $class_name : public $base_class<DataType> +{ + public: + typedef $base_class<DataType> Base; + typedef typename DataType::CType CType; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } +}; + +''' + +# Same as header_template_1dt, except the base class has a second +# template parameter NumSrcOperands to allow a variable number of +# source operands. Note that since this is implemented with an array, +# it only works for instructions where all sources are of the same +# type (like most arithmetics). +header_template_1dt_varsrcs = ''' +template<typename DataType> +class $class_name : public $base_class<DataType, $num_srcs> +{ + public: + typedef $base_class<DataType, $num_srcs> Base; + typedef typename DataType::CType CType; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } + + void execute(GPUDynInstPtr gpuDynInst); +}; + +''' + +# Header template for instruction with two DataType template +# parameters, one for the dest and one for the source. This is used +# by compare and convert. +header_template_2dt = ''' +template<typename DestDataType, class SrcDataType> +class $class_name : public $base_class<DestDataType, SrcDataType> +{ + public: + typedef $base_class<DestDataType, SrcDataType> Base; + typedef typename DestDataType::CType DestCType; + typedef typename SrcDataType::CType SrcCType; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } + + void execute(GPUDynInstPtr gpuDynInst); +}; + +''' + +header_templates = { + 'ArithInst': header_template_1dt_varsrcs, + 'CmovInst': header_template_1dt, + 'ClassInst': header_template_1dt, + 'ShiftInst': header_template_1dt, + 'ExtractInsertInst': header_template_1dt, + 'CmpInst': header_template_2dt, + 'CvtInst': header_template_2dt, + 'LdInst': '', + 'StInst': '', + 'SpecialInstNoSrc': header_template_nodt, + 'SpecialInst1Src': header_template_nodt, + 'SpecialInstNoSrcNoDest': '', +} + +############### +# +# Define code templates for exec functions +# +############### + +# exec function body +exec_template_nodt_nosrc = ''' +void +$class_name::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + typedef Base::DestCType DestCType; + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + DestCType dest_val = $expr; + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_nodt_1src = ''' +void +$class_name::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + typedef Base::DestCType DestCType; + typedef Base::SrcCType SrcCType; + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); + DestCType dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_1dt_varsrcs = ''' +template<typename DataType> +void +$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + CType dest_val; + if ($dest_is_src_flag) { + dest_val = this->dest.template get<CType>(w, lane); + } + + CType src_val[$num_srcs]; + + for (int i = 0; i < $num_srcs; ++i) { + src_val[i] = this->src[i].template get<CType>(w, lane); + } + + dest_val = (CType)($expr); + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_1dt_3srcs = ''' +template<typename DataType> +void +$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + typedef typename Base::Src0CType Src0T; + typedef typename Base::Src1CType Src1T; + typedef typename Base::Src2CType Src2T; + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + CType dest_val; + + if ($dest_is_src_flag) { + dest_val = this->dest.template get<CType>(w, lane); + } + + Src0T src_val0 = this->src0.template get<Src0T>(w, lane); + Src1T src_val1 = this->src1.template get<Src1T>(w, lane); + Src2T src_val2 = this->src2.template get<Src2T>(w, lane); + + dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_1dt_2src_1dest = ''' +template<typename DataType> +void +$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + typedef typename Base::DestCType DestT; + typedef CType Src0T; + typedef typename Base::Src1CType Src1T; + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + DestT dest_val; + if ($dest_is_src_flag) { + dest_val = this->dest.template get<DestT>(w, lane); + } + Src0T src_val0 = this->src0.template get<Src0T>(w, lane); + Src1T src_val1 = this->src1.template get<Src1T>(w, lane); + + dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_shift = ''' +template<typename DataType> +void +$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + const VectorMask &mask = w->get_pred(); + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + CType dest_val; + + if ($dest_is_src_flag) { + dest_val = this->dest.template get<CType>(w, lane); + } + + CType src_val0 = this->src0.template get<CType>(w, lane); + uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane); + + dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_2dt = ''' +template<typename DestDataType, class SrcDataType> +void +$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + DestCType dest_val; + SrcCType src_val[$num_srcs]; + + for (int i = 0; i < $num_srcs; ++i) { + src_val[i] = this->src[i].template get<SrcCType>(w, lane); + } + + dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_templates = { + 'ArithInst': exec_template_1dt_varsrcs, + 'CmovInst': exec_template_1dt_3srcs, + 'ExtractInsertInst': exec_template_1dt_3srcs, + 'ClassInst': exec_template_1dt_2src_1dest, + 'CmpInst': exec_template_2dt, + 'CvtInst': exec_template_2dt, + 'LdInst': '', + 'StInst': '', + 'SpecialInstNoSrc': exec_template_nodt_nosrc, + 'SpecialInst1Src': exec_template_nodt_1src, + 'SpecialInstNoSrcNoDest': '', +} + +############### +# +# Define code templates for the decoder cases +# +############### + +# decode template for nodt-opcode case +decode_nodt_template = ''' + case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' + +decode_case_prolog_class_inst = ''' + case BRIG_OPCODE_$brig_opcode_upper: + { + //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]); + BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType; + //switch (baseOp->kind) { + // case BRIG_OPERAND_REG: + // type = ((const BrigOperandReg*)baseOp)->type; + // break; + // case BRIG_OPERAND_IMMED: + // type = ((const BrigOperandImmed*)baseOp)->type; + // break; + // default: + // fatal("CLASS unrecognized kind of operand %d\\n", + // baseOp->kind); + //} + switch (type) {''' + +# common prolog for 1dt- or 2dt-opcode case: switch on data type +decode_case_prolog = ''' + case BRIG_OPCODE_$brig_opcode_upper: + { + switch (ib->type) {''' + +# single-level decode case entry (for 1dt opcodes) +decode_case_entry = \ +' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' + +decode_store_prolog = \ +' case BRIG_TYPE_$type_name: {' + +decode_store_case_epilog = ''' + }''' + +decode_store_case_entry = \ +' return $constructor(ib, obj);' + +# common epilog for type switch +decode_case_epilog = ''' + default: fatal("$brig_opcode_upper: unrecognized type %d\\n", + ib->type); + } + } + break;''' + +# Additional templates for nested decode on a second type field (for +# compare and convert). These are used in place of the +# decode_case_entry template to create a second-level switch on on the +# second type field inside each case of the first-level type switch. +# Because the name and location of the second type can vary, the Brig +# instruction type must be provided in $brig_type, and the name of the +# second type field must be provided in $type_field. +decode_case2_prolog = ''' + case BRIG_TYPE_$type_name: + switch (((Brig$brig_type*)ib)->$type2_field) {''' + +decode_case2_entry = \ +' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' + +decode_case2_epilog = ''' + default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", + ((Brig$brig_type*)ib)->$type2_field); + } + break;''' + +# Figure out how many source operands an expr needs by looking for the +# highest-numbered srcN value referenced. Since sources are numbered +# starting at 0, the return value is N+1. +def num_src_operands(expr): + if expr.find('src2') != -1: + return 3 + elif expr.find('src1') != -1: + return 2 + elif expr.find('src0') != -1: + return 1 + else: + return 0 + +############### +# +# Define final code generation methods +# +# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for +# generating actual instructions. +# +############### + +# Generate class declaration, exec function, and decode switch case +# for an brig_opcode with a single-level type switch. The 'types' +# parameter is a list or tuple of types for which the instruction +# should be instantiated. +def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', + type2_info=None, constructor_prefix='new ', is_store=False): + brig_opcode_upper = brig_opcode.upper() + class_name = brig_opcode + opcode = class_name.lower() + + if base_class == 'ArithInst': + # note that expr must be provided with ArithInst so we can + # derive num_srcs for the template + assert expr + + if expr: + # Derive several bits of info from expr. If expr is not used, + # this info will be irrelevant. + num_srcs = num_src_operands(expr) + # if the RHS expression includes 'dest', then we're doing an RMW + # on the reg and we need to treat it like a source + dest_is_src = expr.find('dest') != -1 + dest_is_src_flag = str(dest_is_src).lower() # for C++ + if base_class in ['ShiftInst']: + expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) + elif base_class in ['ArithInst', 'CmpInst', 'CvtInst']: + expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) + else: + expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) + expr = re.sub(r'\bdest\b', r'dest_val', expr) + + # Strip template arguments off of base class before looking up + # appropriate templates + base_class_base = re.sub(r'<.*>$', '', base_class) + header_code(header_templates[base_class_base]) + + if base_class.startswith('SpecialInst'): + exec_code(exec_templates[base_class_base]) + elif base_class.startswith('ShiftInst'): + header_code(exec_template_shift) + else: + header_code(exec_templates[base_class_base]) + + if not types or isinstance(types, str): + # Just a single type + constructor = constructor_prefix + class_name + decoder_code(decode_nodt_template) + else: + # multiple types, need at least one level of decode + if brig_opcode == 'Class': + decoder_code(decode_case_prolog_class_inst) + else: + decoder_code(decode_case_prolog) + if not type2_info: + if is_store == False: + # single list of types, to basic one-level decode + for type_name in types: + full_class_name = '%s<%s>' % (class_name, type_name.upper()) + constructor = constructor_prefix + full_class_name + decoder_code(decode_case_entry) + else: + # single list of types, to basic one-level decode + for type_name in types: + decoder_code(decode_store_prolog) + type_size = int(re.findall(r'[0-9]+', type_name)[0]) + src_size = 32 + type_type = type_name[0] + full_class_name = '%s<%s,%s>' % (class_name, \ + type_name.upper(), \ + '%s%d' % \ + (type_type.upper(), \ + type_size)) + constructor = constructor_prefix + full_class_name + decoder_code(decode_store_case_entry) + decoder_code(decode_store_case_epilog) + else: + # need secondary type switch (convert, compare) + # unpack extra info on second switch + (type2_field, types2) = type2_info + brig_type = 'Inst%s' % brig_opcode + for type_name in types: + decoder_code(decode_case2_prolog) + fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) + for type2_name in types2: + full_class_name = fmt % type2_name.upper() + constructor = constructor_prefix + full_class_name + decoder_code(decode_case2_entry) + + decoder_code(decode_case2_epilog) + + decoder_code(decode_case_epilog) + +############### +# +# Generate instructions +# +############### + +# handy abbreviations for common sets of types + +# arithmetic ops are typically defined only on 32- and 64-bit sizes +arith_int_types = ('S32', 'U32', 'S64', 'U64') +arith_float_types = ('F32', 'F64') +arith_types = arith_int_types + arith_float_types + +bit_types = ('B1', 'B32', 'B64') + +all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types + +# I think you might be able to do 'f16' memory ops too, but we'll +# ignore them for now. +mem_types = all_int_types + arith_float_types +mem_atom_types = all_int_types + ('B32', 'B64') + +##### Arithmetic & logical operations +gen('Add', arith_types, 'src0 + src1') +gen('Sub', arith_types, 'src0 - src1') +gen('Mul', arith_types, 'src0 * src1') +gen('Div', arith_types, 'src0 / src1') +gen('Min', arith_types, 'std::min(src0, src1)') +gen('Max', arith_types, 'std::max(src0, src1)') +gen('Gcnmin', arith_types, 'std::min(src0, src1)') + +gen('CopySign', arith_float_types, + 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') +gen('Sqrt', arith_float_types, 'sqrt(src0)') +gen('Floor', arith_float_types, 'floor(src0)') + +# "fast" sqrt... same as slow for us +gen('Nsqrt', arith_float_types, 'sqrt(src0)') +gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') +gen('Nrcp', arith_float_types, '1.0/src0') +gen('Fract', arith_float_types, + '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') + +gen('Ncos', arith_float_types, 'cos(src0)'); +gen('Nsin', arith_float_types, 'sin(src0)'); + +gen('And', bit_types, 'src0 & src1') +gen('Or', bit_types, 'src0 | src1') +gen('Xor', bit_types, 'src0 ^ src1') + +gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)') +gen('Firstbit',bit_types, 'firstbit(src0)') +gen('Popcount', ('B32', 'B64'), '__builtin_popcount(src0)') + +gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') +gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') + +# gen('Mul_hi', types=('s32','u32', '??')) +# gen('Mul24', types=('s32','u32', '??')) +gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') + +gen('Abs', arith_types, 'std::abs(src0)') +gen('Neg', arith_types, '-src0') + +gen('Mov', bit_types, 'src0') +gen('Not', bit_types, 'heynot(src0)') + +# mad and fma differ only in rounding behavior, which we don't emulate +# also there's an integer form of mad, but not of fma +gen('Mad', arith_types, 'src0 * src1 + src2') +gen('Fma', arith_float_types, 'src0 * src1 + src2') + +#native floating point operations +gen('Nfma', arith_float_types, 'src0 * src1 + src2') + +gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') +gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))') +gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))') + +# see base/bitfield.hh +gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', + 'ExtractInsertInst') + +gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', + 'ExtractInsertInst') + +##### Compare +gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', + 'CmpInst', ('sourceType', arith_types + bit_types)) +gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') + +##### Conversion + +# Conversion operations are only defined on B1, not B32 or B64 +cvt_types = ('B1',) + mem_types + +gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) + + +##### Load & Store +gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') +gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') +gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', + is_store=True) +gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') +gen('AtomicNoRet', mem_atom_types, base_class='StInst', + constructor_prefix='decode') + +gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') +gen('Br', base_class = 'LdInst', constructor_prefix='decode') + +##### Special operations +def gen_special(brig_opcode, expr, dest_type='U32'): + num_srcs = num_src_operands(expr) + if num_srcs == 0: + base_class = 'SpecialInstNoSrc<%s>' % dest_type + elif num_srcs == 1: + base_class = 'SpecialInst1Src<%s>' % dest_type + else: + assert false + + gen(brig_opcode, None, expr, base_class) + +gen_special('WorkItemId', 'w->workitemid[src0][lane]') +gen_special('WorkItemAbsId', + 'w->workitemid[src0][lane] + (w->workgroupid[src0] * w->workgroupsz[src0])') +gen_special('WorkGroupId', 'w->workgroupid[src0]') +gen_special('WorkGroupSize', 'w->workgroupsz[src0]') +gen_special('CurrentWorkGroupSize', 'w->workgroupsz[src0]') +gen_special('GridSize', 'w->gridsz[src0]') +gen_special('GridGroups', + 'divCeil(w->gridsz[src0],w->workgroupsz[src0])') +gen_special('LaneId', 'lane') +gen_special('WaveId', 'w->dynwaveid') +gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') + +# gen_special('CU'', ') + +gen('Ret', base_class='SpecialInstNoSrcNoDest') +gen('Barrier', base_class='SpecialInstNoSrcNoDest') +gen('MemFence', base_class='SpecialInstNoSrcNoDest') + +# Map magic instructions to the BrigSyscall opcode +# Magic instructions are defined in magic.hh +# +# In the future, real HSA kernel system calls can be implemented and coexist +# with magic instructions. +gen('Call', base_class='SpecialInstNoSrcNoDest') + +############### +# +# Generate file epilogs +# +############### +header_code.dedent() +header_code(''' +} // namespace HsailISA +''') + +# close off main decode switch +decoder_code.dedent() +decoder_code.dedent() +decoder_code(''' + default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); + } // end switch(ib->opcode) + } // end decode() +} // namespace HsailISA +''') + +exec_code.dedent() +exec_code(''' +} // namespace HsailISA +''') + +############### +# +# Output accumulated code to files +# +############### +header_code.write(sys.argv[1]) +decoder_code.write(sys.argv[2]) +exec_code.write(sys.argv[3]) diff --git a/src/arch/hsail/generic_types.cc b/src/arch/hsail/generic_types.cc new file mode 100644 index 000000000..0cd55d1d5 --- /dev/null +++ b/src/arch/hsail/generic_types.cc @@ -0,0 +1,47 @@ +#include "arch/hsail/generic_types.hh" +#include "base/misc.hh" + +using namespace Brig; + +namespace HsailISA +{ + Enums::GenericMemoryOrder + getGenericMemoryOrder(BrigMemoryOrder brig_memory_order) + { + switch(brig_memory_order) { + case BRIG_MEMORY_ORDER_NONE: + return Enums::MEMORY_ORDER_NONE; + case BRIG_MEMORY_ORDER_RELAXED: + return Enums::MEMORY_ORDER_RELAXED; + case BRIG_MEMORY_ORDER_SC_ACQUIRE: + return Enums::MEMORY_ORDER_SC_ACQUIRE; + case BRIG_MEMORY_ORDER_SC_RELEASE: + return Enums::MEMORY_ORDER_SC_RELEASE; + case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: + return Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE; + default: + fatal("HsailISA::MemInst::getGenericMemoryOrder -> ", + "bad BrigMemoryOrder\n"); + } + } + + Enums::GenericMemoryScope + getGenericMemoryScope(BrigMemoryScope brig_memory_scope) + { + switch(brig_memory_scope) { + case BRIG_MEMORY_SCOPE_NONE: + return Enums::MEMORY_SCOPE_NONE; + case BRIG_MEMORY_SCOPE_WORKITEM: + return Enums::MEMORY_SCOPE_WORKITEM; + case BRIG_MEMORY_SCOPE_WORKGROUP: + return Enums::MEMORY_SCOPE_WORKGROUP; + case BRIG_MEMORY_SCOPE_AGENT: + return Enums::MEMORY_SCOPE_DEVICE; + case BRIG_MEMORY_SCOPE_SYSTEM: + return Enums::MEMORY_SCOPE_SYSTEM; + default: + fatal("HsailISA::MemInst::getGenericMemoryScope -> ", + "bad BrigMemoryScope\n"); + } + } +} // namespace HsailISA diff --git a/src/arch/hsail/generic_types.hh b/src/arch/hsail/generic_types.hh new file mode 100644 index 000000000..50e430bef --- /dev/null +++ b/src/arch/hsail/generic_types.hh @@ -0,0 +1,16 @@ +#ifndef __ARCH_HSAIL_GENERIC_TYPES_HH__ +#define __ARCH_HSAIL_GENERIC_TYPES_HH__ + +#include "arch/hsail/Brig.h" +#include "enums/GenericMemoryOrder.hh" +#include "enums/GenericMemoryScope.hh" + +namespace HsailISA +{ + Enums::GenericMemoryOrder + getGenericMemoryOrder(Brig::BrigMemoryOrder brig_memory_order); + Enums::GenericMemoryScope + getGenericMemoryScope(Brig::BrigMemoryScope brig_memory_scope); +} // namespace HsailISA + +#endif // __ARCH_HSAIL_GENERIC_TYPES_HH__ diff --git a/src/arch/hsail/gpu_decoder.hh b/src/arch/hsail/gpu_decoder.hh new file mode 100644 index 000000000..98a689664 --- /dev/null +++ b/src/arch/hsail/gpu_decoder.hh @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Anthony Gutierrez + */ + +#ifndef __ARCH_HSAIL_GPU_DECODER_HH__ +#define __ARCH_HSAIL_GPU_DECODER_HH__ + +#include <vector> + +#include "arch/hsail/gpu_types.hh" + +class BrigObject; +class GPUStaticInst; + +namespace Brig +{ + class BrigInstBase; +} + +namespace HsailISA +{ + class Decoder + { + public: + GPUStaticInst* decode(MachInst machInst); + + GPUStaticInst* + decode(RawMachInst inst) + { + return inst < decodedInsts.size() ? decodedInsts.at(inst) : nullptr; + } + + RawMachInst + saveInst(GPUStaticInst *decodedInst) + { + decodedInsts.push_back(decodedInst); + + return decodedInsts.size() - 1; + } + + private: + static std::vector<GPUStaticInst*> decodedInsts; + }; +} // namespace HsailISA + +#endif // __ARCH_HSAIL_GPU_DECODER_HH__ diff --git a/src/arch/hsail/gpu_types.hh b/src/arch/hsail/gpu_types.hh new file mode 100644 index 000000000..4b3a66a9a --- /dev/null +++ b/src/arch/hsail/gpu_types.hh @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Anthony Gutierrez + */ + +#ifndef __ARCH_HSAIL_GPU_TYPES_HH__ +#define __ARCH_HSAIL_GPU_TYPES_HH__ + +#include <cstdint> + +namespace Brig +{ + class BrigInstBase; +} + +class BrigObject; + +namespace HsailISA +{ + // A raw machine instruction represents the raw bits that + // our model uses to represent an actual instruction. In + // the case of HSAIL this is just an index into a list of + // instruction objects. + typedef uint64_t RawMachInst; + + // The MachInst is a representation of an instruction + // that has more information than just the machine code. + // For HSAIL the actual machine code is a BrigInstBase + // and the BrigObject contains more pertinent + // information related to operaands, etc. + + struct MachInst + { + const Brig::BrigInstBase *brigInstBase; + const BrigObject *brigObj; + }; +} + +#endif // __ARCH_HSAIL_GPU_TYPES_HH__ diff --git a/src/arch/hsail/insts/branch.cc b/src/arch/hsail/insts/branch.cc new file mode 100644 index 000000000..d65279cc8 --- /dev/null +++ b/src/arch/hsail/insts/branch.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Anthony Gutierrez + */ + +#include "arch/hsail/insts/branch.hh" + +#include "gpu-compute/hsail_code.hh" + +namespace HsailISA +{ + GPUStaticInst* + decodeBrn(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + // Detect direct vs indirect branch by seeing whether we have a + // register operand. + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + const Brig::BrigOperand *reg = obj->getOperand(op_offs); + + if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) { + return new BrnIndirectInst(ib, obj); + } else { + return new BrnDirectInst(ib, obj); + } + } + + GPUStaticInst* + decodeCbr(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + // Detect direct vs indirect branch by seeing whether we have a + // second register operand (after the condition). + unsigned op_offs = obj->getOperandPtr(ib->operands, 1); + const Brig::BrigOperand *reg = obj->getOperand(op_offs); + + if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) { + return new CbrIndirectInst(ib, obj); + } else { + return new CbrDirectInst(ib, obj); + } + } + + GPUStaticInst* + decodeBr(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + // Detect direct vs indirect branch by seeing whether we have a + // second register operand (after the condition). + unsigned op_offs = obj->getOperandPtr(ib->operands, 1); + const Brig::BrigOperand *reg = obj->getOperand(op_offs); + + if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) { + return new BrIndirectInst(ib, obj); + } else { + return new BrDirectInst(ib, obj); + } + } +} // namespace HsailISA diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh new file mode 100644 index 000000000..54ad9a042 --- /dev/null +++ b/src/arch/hsail/insts/branch.hh @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Steve Reinhardt + */ + +#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__ +#define __ARCH_HSAIL_INSTS_BRANCH_HH__ + +#include "arch/hsail/insts/gpu_static_inst.hh" +#include "arch/hsail/operand.hh" +#include "gpu-compute/gpu_dyn_inst.hh" +#include "gpu-compute/wavefront.hh" + +namespace HsailISA +{ + + // The main difference between a direct branch and an indirect branch + // is whether the target is a register or a label, so we can share a + // lot of code if we template the base implementation on that type. + template<typename TargetType> + class BrnInstBase : public HsailGPUStaticInst + { + public: + void generateDisassembly(); + + Brig::BrigWidth8_t width; + TargetType target; + + BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) + : HsailGPUStaticInst(obj, "brn") + { + o_type = Enums::OT_BRANCH; + width = ((Brig::BrigInstBr*)ib)->width; + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + target.init(op_offs, obj); + o_type = Enums::OT_BRANCH; + } + + uint32_t getTargetPc() override { return target.getTarget(0, 0); } + + bool unconditionalJumpInstruction() override { return true; } + bool isVectorRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isVectorRegister(); + } + bool isCondRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isCondRegister(); + } + bool isScalarRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isScalarRegister(); + } + + bool isSrcOperand(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return true; + } + + bool isDstOperand(int operandIndex) { + return false; + } + + int getOperandSize(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.opSize(); + } + + int getRegisterIndex(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.regIndex(); + } + + int getNumOperands() { + return 1; + } + + void execute(GPUDynInstPtr gpuDynInst); + }; + + template<typename TargetType> + void + BrnInstBase<TargetType>::generateDisassembly() + { + std::string widthClause; + + if (width != 1) { + widthClause = csprintf("_width(%d)", width); + } + + disassembly = csprintf("%s%s %s", opcode, widthClause, + target.disassemble()); + } + + template<typename TargetType> + void + BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + if (getTargetPc() == w->rpc()) { + w->popFromReconvergenceStack(); + } else { + // Rpc and execution mask remain the same + w->pc(getTargetPc()); + } + w->discardFetch(); + } + + class BrnDirectInst : public BrnInstBase<LabelOperand> + { + public: + BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : BrnInstBase<LabelOperand>(ib, obj) + { + } + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return 0; } + }; + + class BrnIndirectInst : public BrnInstBase<SRegOperand> + { + public: + BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : BrnInstBase<SRegOperand>(ib, obj) + { + } + int numSrcRegOperands() { return target.isVectorRegister(); } + int numDstRegOperands() { return 0; } + }; + + GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib, + const BrigObject *obj); + + template<typename TargetType> + class CbrInstBase : public HsailGPUStaticInst + { + public: + void generateDisassembly(); + + Brig::BrigWidth8_t width; + CRegOperand cond; + TargetType target; + + CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) + : HsailGPUStaticInst(obj, "cbr") + { + o_type = Enums::OT_BRANCH; + width = ((Brig::BrigInstBr *)ib)->width; + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + cond.init(op_offs, obj); + op_offs = obj->getOperandPtr(ib->operands, 1); + target.init(op_offs, obj); + o_type = Enums::OT_BRANCH; + } + + uint32_t getTargetPc() override { return target.getTarget(0, 0); } + + void execute(GPUDynInstPtr gpuDynInst); + // Assumption: Target is operand 0, Condition Register is operand 1 + bool isVectorRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + if (!operandIndex) + return target.isVectorRegister(); + else + return false; + } + bool isCondRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + if (!operandIndex) + return target.isCondRegister(); + else + return true; + } + bool isScalarRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return target.isScalarRegister(); + else + return false; + } + bool isSrcOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex == 0) + return true; + return false; + } + // both Condition Register and Target are source operands + bool isDstOperand(int operandIndex) { + return false; + } + int getOperandSize(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + if (!operandIndex) + return target.opSize(); + else + return 1; + } + int getRegisterIndex(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + if (!operandIndex) + return target.regIndex(); + else + return -1; + } + + // Operands = Target, Condition Register + int getNumOperands() { + return 2; + } + }; + + template<typename TargetType> + void + CbrInstBase<TargetType>::generateDisassembly() + { + std::string widthClause; + + if (width != 1) { + widthClause = csprintf("_width(%d)", width); + } + + disassembly = csprintf("%s%s %s,%s", opcode, widthClause, + cond.disassemble(), target.disassemble()); + } + + template<typename TargetType> + void + CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + const uint32_t curr_pc = w->pc(); + const uint32_t curr_rpc = w->rpc(); + const VectorMask curr_mask = w->execMask(); + + /** + * TODO: can we move this pop outside the instruction, and + * into the wavefront? + */ + w->popFromReconvergenceStack(); + + // immediate post-dominator instruction + const uint32_t rpc = static_cast<uint32_t>(ipdInstNum()); + if (curr_rpc != rpc) { + w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask); + } + + // taken branch + const uint32_t true_pc = getTargetPc(); + VectorMask true_mask; + for (unsigned int lane = 0; lane < VSZ; ++lane) { + true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane]; + } + + // not taken branch + const uint32_t false_pc = curr_pc + 1; + assert(true_pc != false_pc); + if (false_pc != rpc && true_mask.count() < curr_mask.count()) { + VectorMask false_mask = curr_mask & ~true_mask; + w->pushToReconvergenceStack(false_pc, rpc, false_mask); + } + + if (true_pc != rpc && true_mask.count()) { + w->pushToReconvergenceStack(true_pc, rpc, true_mask); + } + assert(w->pc() != curr_pc); + w->discardFetch(); + } + + + class CbrDirectInst : public CbrInstBase<LabelOperand> + { + public: + CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : CbrInstBase<LabelOperand>(ib, obj) + { + } + // the source operand of a conditional branch is a Condition + // Register which is not stored in the VRF + // so we do not count it as a source-register operand + // even though, formally, it is one. + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return 0; } + }; + + class CbrIndirectInst : public CbrInstBase<SRegOperand> + { + public: + CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : CbrInstBase<SRegOperand>(ib, obj) + { + } + // one source operand of the conditional indirect branch is a Condition + // register which is not stored in the VRF so we do not count it + // as a source-register operand even though, formally, it is one. + int numSrcRegOperands() { return target.isVectorRegister(); } + int numDstRegOperands() { return 0; } + }; + + GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib, + const BrigObject *obj); + + template<typename TargetType> + class BrInstBase : public HsailGPUStaticInst + { + public: + void generateDisassembly(); + + ImmOperand<uint32_t> width; + TargetType target; + + BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) + : HsailGPUStaticInst(obj, "br") + { + o_type = Enums::OT_BRANCH; + width.init(((Brig::BrigInstBr *)ib)->width, obj); + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + target.init(op_offs, obj); + o_type = Enums::OT_BRANCH; + } + + uint32_t getTargetPc() override { return target.getTarget(0, 0); } + + bool unconditionalJumpInstruction() override { return true; } + + void execute(GPUDynInstPtr gpuDynInst); + bool isVectorRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isVectorRegister(); + } + bool isCondRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isCondRegister(); + } + bool isScalarRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isScalarRegister(); + } + bool isSrcOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return true; + } + bool isDstOperand(int operandIndex) { return false; } + int getOperandSize(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.opSize(); + } + int getRegisterIndex(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.regIndex(); + } + int getNumOperands() { return 1; } + }; + + template<typename TargetType> + void + BrInstBase<TargetType>::generateDisassembly() + { + std::string widthClause; + + if (width.bits != 1) { + widthClause = csprintf("_width(%d)", width.bits); + } + + disassembly = csprintf("%s%s %s", opcode, widthClause, + target.disassemble()); + } + + template<typename TargetType> + void + BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + if (getTargetPc() == w->rpc()) { + w->popFromReconvergenceStack(); + } else { + // Rpc and execution mask remain the same + w->pc(getTargetPc()); + } + w->discardFetch(); + } + + class BrDirectInst : public BrInstBase<LabelOperand> + { + public: + BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : BrInstBase<LabelOperand>(ib, obj) + { + } + + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return 0; } + }; + + class BrIndirectInst : public BrInstBase<SRegOperand> + { + public: + BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : BrInstBase<SRegOperand>(ib, obj) + { + } + int numSrcRegOperands() { return target.isVectorRegister(); } + int numDstRegOperands() { return 0; } + }; + + GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib, + const BrigObject *obj); +} // namespace HsailISA + +#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__ diff --git a/src/arch/hsail/insts/decl.hh b/src/arch/hsail/insts/decl.hh new file mode 100644 index 000000000..e2da501b9 --- /dev/null +++ b/src/arch/hsail/insts/decl.hh @@ -0,0 +1,1106 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Steve Reinhardt + */ + +#ifndef __ARCH_HSAIL_INSTS_DECL_HH__ +#define __ARCH_HSAIL_INSTS_DECL_HH__ + +#include <cmath> + +#include "arch/hsail/generic_types.hh" +#include "arch/hsail/insts/gpu_static_inst.hh" +#include "arch/hsail/operand.hh" +#include "debug/HSAIL.hh" +#include "enums/OpType.hh" +#include "gpu-compute/gpu_dyn_inst.hh" +#include "gpu-compute/shader.hh" + +namespace HsailISA +{ + template<typename _DestOperand, typename _SrcOperand> + class HsailOperandType + { + public: + typedef _DestOperand DestOperand; + typedef _SrcOperand SrcOperand; + }; + + typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType; + typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType; + typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType; + + // The IsBits parameter serves only to disambiguate tbhe B* types from + // the U* types, which otherwise would be identical (and + // indistinguishable). + template<typename _OperandType, typename _CType, Enums::MemType _memType, + vgpr_type _vgprType, int IsBits=0> + class HsailDataType + { + public: + typedef _OperandType OperandType; + typedef _CType CType; + static const Enums::MemType memType = _memType; + static const vgpr_type vgprType = _vgprType; + static const char *label; + }; + + typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1; + typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8; + + typedef HsailDataType<SRegOperandType, uint16_t, + Enums::M_U16, VT_32, 1> B16; + + typedef HsailDataType<SRegOperandType, uint32_t, + Enums::M_U32, VT_32, 1> B32; + + typedef HsailDataType<DRegOperandType, uint64_t, + Enums::M_U64, VT_64, 1> B64; + + typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8; + typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16; + typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32; + typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64; + + typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8; + typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16; + typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32; + typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64; + + typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32; + typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64; + + template<typename DestOperandType, typename SrcOperandType, + int NumSrcOperands> + class CommonInstBase : public HsailGPUStaticInst + { + protected: + typename DestOperandType::DestOperand dest; + typename SrcOperandType::SrcOperand src[NumSrcOperands]; + + void + generateDisassembly() + { + disassembly = csprintf("%s%s %s", opcode, opcode_suffix(), + dest.disassemble()); + + for (int i = 0; i < NumSrcOperands; ++i) { + disassembly += ","; + disassembly += src[i].disassemble(); + } + } + + virtual std::string opcode_suffix() = 0; + + public: + CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *opcode) + : HsailGPUStaticInst(obj, opcode) + { + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + + dest.init(op_offs, obj); + + for (int i = 0; i < NumSrcOperands; ++i) { + op_offs = obj->getOperandPtr(ib->operands, i + 1); + src[i].init(op_offs, obj); + } + } + + bool isVectorRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return src[operandIndex].isVectorRegister(); + else + return dest.isVectorRegister(); + } + bool isCondRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return src[operandIndex].isCondRegister(); + else + return dest.isCondRegister(); + } + bool isScalarRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return src[operandIndex].isScalarRegister(); + else + return dest.isScalarRegister(); + } + bool isSrcOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return true; + return false; + } + + bool isDstOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex >= NumSrcOperands) + return true; + return false; + } + int getOperandSize(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return src[operandIndex].opSize(); + else + return dest.opSize(); + } + int getRegisterIndex(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + + if (operandIndex < NumSrcOperands) + return src[operandIndex].regIndex(); + else + return dest.regIndex(); + } + int numSrcRegOperands() { + int operands = 0; + for (int i = 0; i < NumSrcOperands; i++) { + if (src[i].isVectorRegister() == true) { + operands++; + } + } + return operands; + } + int numDstRegOperands() { return dest.isVectorRegister(); } + int getNumOperands() { return NumSrcOperands + 1; } + }; + + template<typename DataType, int NumSrcOperands> + class ArithInst : public CommonInstBase<typename DataType::OperandType, + typename DataType::OperandType, + NumSrcOperands> + { + public: + std::string opcode_suffix() { return csprintf("_%s", DataType::label); } + + ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *opcode) + : CommonInstBase<typename DataType::OperandType, + typename DataType::OperandType, + NumSrcOperands>(ib, obj, opcode) + { + } + }; + + template<typename DestOperandType, typename Src0OperandType, + typename Src1OperandType, typename Src2OperandType> + class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst + { + protected: + typename DestOperandType::DestOperand dest; + typename Src0OperandType::SrcOperand src0; + typename Src1OperandType::SrcOperand src1; + typename Src2OperandType::SrcOperand src2; + + void + generateDisassembly() + { + disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(), + src0.disassemble(), src1.disassemble(), + src2.disassemble()); + } + + public: + ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib, + const BrigObject *obj, + const char *opcode) + : HsailGPUStaticInst(obj, opcode) + { + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + dest.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands, 1); + src0.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands, 2); + src1.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands, 3); + src2.init(op_offs, obj); + } + + bool isVectorRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.isVectorRegister(); + else if (operandIndex == 1) + return src1.isVectorRegister(); + else if (operandIndex == 2) + return src2.isVectorRegister(); + else + return dest.isVectorRegister(); + } + bool isCondRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.isCondRegister(); + else if (operandIndex == 1) + return src1.isCondRegister(); + else if (operandIndex == 2) + return src2.isCondRegister(); + else + return dest.isCondRegister(); + } + bool isScalarRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.isScalarRegister(); + else if (operandIndex == 1) + return src1.isScalarRegister(); + else if (operandIndex == 2) + return src2.isScalarRegister(); + else + return dest.isScalarRegister(); + } + bool isSrcOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < 3) + return true; + else + return false; + } + bool isDstOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex >= 3) + return true; + else + return false; + } + int getOperandSize(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.opSize(); + else if (operandIndex == 1) + return src1.opSize(); + else if (operandIndex == 2) + return src2.opSize(); + else + return dest.opSize(); + } + int getRegisterIndex(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.regIndex(); + else if (operandIndex == 1) + return src1.regIndex(); + else if (operandIndex == 2) + return src2.regIndex(); + else + return dest.regIndex(); + } + + int numSrcRegOperands() { + int operands = 0; + if (src0.isVectorRegister() == true) { + operands++; + } + if (src1.isVectorRegister() == true) { + operands++; + } + if (src2.isVectorRegister() == true) { + operands++; + } + return operands; + } + int numDstRegOperands() { return dest.isVectorRegister(); } + int getNumOperands() { return 4; } + }; + + template<typename DestDataType, typename Src0DataType, + typename Src1DataType, typename Src2DataType> + class ThreeNonUniformSourceInst : + public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType, + typename Src0DataType::OperandType, + typename Src1DataType::OperandType, + typename Src2DataType::OperandType> + { + public: + typedef typename DestDataType::CType DestCType; + typedef typename Src0DataType::CType Src0CType; + typedef typename Src1DataType::CType Src1CType; + typedef typename Src2DataType::CType Src2CType; + + ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib, + const BrigObject *obj, const char *opcode) + : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType, + typename Src0DataType::OperandType, + typename Src1DataType::OperandType, + typename Src2DataType::OperandType>(ib, + obj, opcode) + { + } + }; + + template<typename DataType> + class CmovInst : public ThreeNonUniformSourceInst<DataType, B1, + DataType, DataType> + { + public: + CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *opcode) + : ThreeNonUniformSourceInst<DataType, B1, DataType, + DataType>(ib, obj, opcode) + { + } + }; + + template<typename DataType> + class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType, + DataType, U32, + U32> + { + public: + ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *opcode) + : ThreeNonUniformSourceInst<DataType, DataType, U32, + U32>(ib, obj, opcode) + { + } + }; + + template<typename DestOperandType, typename Src0OperandType, + typename Src1OperandType> + class TwoNonUniformSourceInstBase : public HsailGPUStaticInst + { + protected: + typename DestOperandType::DestOperand dest; + typename Src0OperandType::SrcOperand src0; + typename Src1OperandType::SrcOperand src1; + + void + generateDisassembly() + { + disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(), + src0.disassemble(), src1.disassemble()); + } + + + public: + TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib, + const BrigObject *obj, const char *opcode) + : HsailGPUStaticInst(obj, opcode) + { + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + dest.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands, 1); + src0.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands, 2); + src1.init(op_offs, obj); + } + bool isVectorRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.isVectorRegister(); + else if (operandIndex == 1) + return src1.isVectorRegister(); + else + return dest.isVectorRegister(); + } + bool isCondRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.isCondRegister(); + else if (operandIndex == 1) + return src1.isCondRegister(); + else + return dest.isCondRegister(); + } + bool isScalarRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.isScalarRegister(); + else if (operandIndex == 1) + return src1.isScalarRegister(); + else + return dest.isScalarRegister(); + } + bool isSrcOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < 2) + return true; + else + return false; + } + bool isDstOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex >= 2) + return true; + else + return false; + } + int getOperandSize(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.opSize(); + else if (operandIndex == 1) + return src1.opSize(); + else + return dest.opSize(); + } + int getRegisterIndex(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return src0.regIndex(); + else if (operandIndex == 1) + return src1.regIndex(); + else + return dest.regIndex(); + } + + int numSrcRegOperands() { + int operands = 0; + if (src0.isVectorRegister() == true) { + operands++; + } + if (src1.isVectorRegister() == true) { + operands++; + } + return operands; + } + int numDstRegOperands() { return dest.isVectorRegister(); } + int getNumOperands() { return 3; } + }; + + template<typename DestDataType, typename Src0DataType, + typename Src1DataType> + class TwoNonUniformSourceInst : + public TwoNonUniformSourceInstBase<typename DestDataType::OperandType, + typename Src0DataType::OperandType, + typename Src1DataType::OperandType> + { + public: + typedef typename DestDataType::CType DestCType; + typedef typename Src0DataType::CType Src0CType; + typedef typename Src1DataType::CType Src1CType; + + TwoNonUniformSourceInst(const Brig::BrigInstBase *ib, + const BrigObject *obj, const char *opcode) + : TwoNonUniformSourceInstBase<typename DestDataType::OperandType, + typename Src0DataType::OperandType, + typename Src1DataType::OperandType>(ib, + obj, opcode) + { + } + }; + + // helper function for ClassInst + template<typename T> + bool + fpclassify(T src0, uint32_t src1) + { + int fpclass = std::fpclassify(src0); + + if ((src1 & 0x3) && (fpclass == FP_NAN)) { + return true; + } + + if (src0 <= -0.0) { + if ((src1 & 0x4) && fpclass == FP_INFINITE) + return true; + if ((src1 & 0x8) && fpclass == FP_NORMAL) + return true; + if ((src1 & 0x10) && fpclass == FP_SUBNORMAL) + return true; + if ((src1 & 0x20) && fpclass == FP_ZERO) + return true; + } else { + if ((src1 & 0x40) && fpclass == FP_ZERO) + return true; + if ((src1 & 0x80) && fpclass == FP_SUBNORMAL) + return true; + if ((src1 & 0x100) && fpclass == FP_NORMAL) + return true; + if ((src1 & 0x200) && fpclass == FP_INFINITE) + return true; + } + return false; + } + + template<typename DataType> + class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32> + { + public: + ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *opcode) + : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode) + { + } + }; + + template<typename DataType> + class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32> + { + public: + ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *opcode) + : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode) + { + } + }; + + // helper function for CmpInst + template<typename T> + bool + compare(T src0, T src1, Brig::BrigCompareOperation cmpOp) + { + using namespace Brig; + + switch (cmpOp) { + case BRIG_COMPARE_EQ: + case BRIG_COMPARE_EQU: + case BRIG_COMPARE_SEQ: + case BRIG_COMPARE_SEQU: + return (src0 == src1); + + case BRIG_COMPARE_NE: + case BRIG_COMPARE_NEU: + case BRIG_COMPARE_SNE: + case BRIG_COMPARE_SNEU: + return (src0 != src1); + + case BRIG_COMPARE_LT: + case BRIG_COMPARE_LTU: + case BRIG_COMPARE_SLT: + case BRIG_COMPARE_SLTU: + return (src0 < src1); + + case BRIG_COMPARE_LE: + case BRIG_COMPARE_LEU: + case BRIG_COMPARE_SLE: + case BRIG_COMPARE_SLEU: + return (src0 <= src1); + + case BRIG_COMPARE_GT: + case BRIG_COMPARE_GTU: + case BRIG_COMPARE_SGT: + case BRIG_COMPARE_SGTU: + return (src0 > src1); + + case BRIG_COMPARE_GE: + case BRIG_COMPARE_GEU: + case BRIG_COMPARE_SGE: + case BRIG_COMPARE_SGEU: + return (src0 >= src1); + + case BRIG_COMPARE_NUM: + case BRIG_COMPARE_SNUM: + return (src0 == src0) || (src1 == src1); + + case BRIG_COMPARE_NAN: + case BRIG_COMPARE_SNAN: + return (src0 != src0) || (src1 != src1); + + default: + fatal("Bad cmpOp value %d\n", (int)cmpOp); + } + } + + template<typename T> + int32_t + firstbit(T src0) + { + if (!src0) + return -1; + + //handle positive and negative numbers + T tmp = (src0 < 0) ? (~src0) : (src0); + + //the starting pos is MSB + int pos = 8 * sizeof(T) - 1; + int cnt = 0; + + //search the first bit set to 1 + while (!(tmp & (1 << pos))) { + ++cnt; + --pos; + } + return cnt; + } + + const char* cmpOpToString(Brig::BrigCompareOperation cmpOp); + + template<typename DestOperandType, typename SrcOperandType> + class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType, + 2> + { + protected: + Brig::BrigCompareOperation cmpOp; + + public: + CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj, + _opcode) + { + assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP); + Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib; + cmpOp = (Brig::BrigCompareOperation)i->compare; + } + }; + + template<typename DestDataType, typename SrcDataType> + class CmpInst : public CmpInstBase<typename DestDataType::OperandType, + typename SrcDataType::OperandType> + { + public: + std::string + opcode_suffix() + { + return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp), + DestDataType::label, SrcDataType::label); + } + + CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : CmpInstBase<typename DestDataType::OperandType, + typename SrcDataType::OperandType>(ib, obj, _opcode) + { + } + }; + + template<typename DestDataType, typename SrcDataType> + class CvtInst : public CommonInstBase<typename DestDataType::OperandType, + typename SrcDataType::OperandType, 1> + { + public: + std::string opcode_suffix() + { + return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); + } + + CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : CommonInstBase<typename DestDataType::OperandType, + typename SrcDataType::OperandType, + 1>(ib, obj, _opcode) + { + } + }; + + class SpecialInstNoSrcNoDest : public HsailGPUStaticInst + { + public: + SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib, + const BrigObject *obj, const char *_opcode) + : HsailGPUStaticInst(obj, _opcode) + { + } + + bool isVectorRegister(int operandIndex) { return false; } + bool isCondRegister(int operandIndex) { return false; } + bool isScalarRegister(int operandIndex) { return false; } + bool isSrcOperand(int operandIndex) { return false; } + bool isDstOperand(int operandIndex) { return false; } + int getOperandSize(int operandIndex) { return 0; } + int getRegisterIndex(int operandIndex) { return -1; } + + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return 0; } + int getNumOperands() { return 0; } + }; + + template<typename DestOperandType> + class SpecialInstNoSrcBase : public HsailGPUStaticInst + { + protected: + typename DestOperandType::DestOperand dest; + + void generateDisassembly() + { + disassembly = csprintf("%s %s", opcode, dest.disassemble()); + } + + public: + SpecialInstNoSrcBase(const Brig::BrigInstBase *ib, + const BrigObject *obj, const char *_opcode) + : HsailGPUStaticInst(obj, _opcode) + { + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + dest.init(op_offs, obj); + } + + bool isVectorRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.isVectorRegister(); + } + bool isCondRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.isCondRegister(); + } + bool isScalarRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.isScalarRegister(); + } + bool isSrcOperand(int operandIndex) { return false; } + bool isDstOperand(int operandIndex) { return true; } + int getOperandSize(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.opSize(); + } + int getRegisterIndex(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.regIndex(); + } + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return dest.isVectorRegister(); } + int getNumOperands() { return 1; } + }; + + template<typename DestDataType> + class SpecialInstNoSrc : + public SpecialInstNoSrcBase<typename DestDataType::OperandType> + { + public: + typedef typename DestDataType::CType DestCType; + + SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj, + _opcode) + { + } + }; + + template<typename DestOperandType> + class SpecialInst1SrcBase : public HsailGPUStaticInst + { + protected: + typedef int SrcCType; // used in execute() template + + typename DestOperandType::DestOperand dest; + ImmOperand<SrcCType> src0; + + void + generateDisassembly() + { + disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(), + src0.disassemble()); + } + + public: + SpecialInst1SrcBase(const Brig::BrigInstBase *ib, + const BrigObject *obj, const char *_opcode) + : HsailGPUStaticInst(obj, _opcode) + { + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + dest.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands, 1); + src0.init(op_offs, obj); + } + bool isVectorRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.isVectorRegister(); + } + bool isCondRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.isCondRegister(); + } + bool isScalarRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.isScalarRegister(); + } + bool isSrcOperand(int operandIndex) { return false; } + bool isDstOperand(int operandIndex) { return true; } + int getOperandSize(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.opSize(); + } + int getRegisterIndex(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return dest.regIndex(); + } + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return dest.isVectorRegister(); } + int getNumOperands() { return 1; } + }; + + template<typename DestDataType> + class SpecialInst1Src : + public SpecialInst1SrcBase<typename DestDataType::OperandType> + { + public: + typedef typename DestDataType::CType DestCType; + + SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj, + _opcode) + { + } + }; + + class Ret : public SpecialInstNoSrcNoDest + { + public: + typedef SpecialInstNoSrcNoDest Base; + + Ret(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "ret") + { + o_type = Enums::OT_RET; + } + + void execute(GPUDynInstPtr gpuDynInst); + }; + + class Barrier : public SpecialInstNoSrcNoDest + { + public: + typedef SpecialInstNoSrcNoDest Base; + uint8_t width; + + Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "barrier") + { + o_type = Enums::OT_BARRIER; + assert(ib->base.kind == Brig::BRIG_KIND_INST_BR); + width = (uint8_t)((Brig::BrigInstBr*)ib)->width; + } + + void execute(GPUDynInstPtr gpuDynInst); + }; + + class MemFence : public SpecialInstNoSrcNoDest + { + public: + typedef SpecialInstNoSrcNoDest Base; + + Brig::BrigMemoryOrder memFenceMemOrder; + Brig::BrigMemoryScope memFenceScopeSegGroup; + Brig::BrigMemoryScope memFenceScopeSegGlobal; + Brig::BrigMemoryScope memFenceScopeSegImage; + + MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "memfence") + { + assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE); + + memFenceScopeSegGlobal = (Brig::BrigMemoryScope) + ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope; + + memFenceScopeSegGroup = (Brig::BrigMemoryScope) + ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope; + + memFenceScopeSegImage = (Brig::BrigMemoryScope) + ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope; + + memFenceMemOrder = (Brig::BrigMemoryOrder) + ((Brig::BrigInstMemFence*)ib)->memoryOrder; + + // set o_type based on scopes + if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE && + memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { + o_type = Enums::OT_BOTH_MEMFENCE; + } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) { + o_type = Enums::OT_GLOBAL_MEMFENCE; + } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { + o_type = Enums::OT_SHARED_MEMFENCE; + } else { + fatal("MemFence constructor: bad scope specifiers\n"); + } + } + + void + initiateAcc(GPUDynInstPtr gpuDynInst) + { + Wavefront *wave = gpuDynInst->wavefront(); + wave->computeUnit->injectGlobalMemFence(gpuDynInst); + } + + void + execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + // 2 cases: + // * memfence to a sequentially consistent memory (e.g., LDS). + // These can be handled as no-ops. + // * memfence to a relaxed consistency cache (e.g., Hermes, Viper, + // etc.). We send a packet, tagged with the memory order and + // scope, and let the GPU coalescer handle it. + + if (o_type == Enums::OT_GLOBAL_MEMFENCE || + o_type == Enums::OT_BOTH_MEMFENCE) { + gpuDynInst->simdId = w->simdId; + gpuDynInst->wfSlotId = w->wfSlotId; + gpuDynInst->wfDynId = w->wfDynId; + gpuDynInst->kern_id = w->kern_id; + gpuDynInst->cu_id = w->computeUnit->cu_id; + + gpuDynInst->memoryOrder = + getGenericMemoryOrder(memFenceMemOrder); + gpuDynInst->scope = + getGenericMemoryScope(memFenceScopeSegGlobal); + gpuDynInst->useContinuation = false; + GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe); + gmp->getGMReqFIFO().push(gpuDynInst); + + w->wr_gm_reqs_in_pipe--; + w->rd_gm_reqs_in_pipe--; + w->mem_reqs_in_pipe--; + w->outstanding_reqs++; + } else if (o_type == Enums::OT_SHARED_MEMFENCE) { + // no-op + } else { + fatal("MemFence execute: bad o_type\n"); + } + } + }; + + class Call : public HsailGPUStaticInst + { + public: + // private helper functions + void calcAddr(Wavefront* w, GPUDynInstPtr m); + + void + generateDisassembly() + { + if (dest.disassemble() == "") { + disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(), + src1.disassemble()); + } else { + disassembly = csprintf("%s %s (%s) (%s)", opcode, + src0.disassemble(), dest.disassemble(), + src1.disassemble()); + } + } + + bool + isPseudoOp() + { + std::string func_name = src0.disassemble(); + if (func_name.find("__gem5_hsail_op") != std::string::npos) { + return true; + } + return false; + } + + // member variables + ListOperand dest; + FunctionRefOperand src0; + ListOperand src1; + HsailCode *func_ptr; + + // exec function for pseudo instructions mapped on top of call opcode + void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst); + + // user-defined pseudo instructions + void MagicPrintLane(Wavefront *w); + void MagicPrintLane64(Wavefront *w); + void MagicPrintWF32(Wavefront *w); + void MagicPrintWF64(Wavefront *w); + void MagicPrintWFFloat(Wavefront *w); + void MagicSimBreak(Wavefront *w); + void MagicPrefixSum(Wavefront *w); + void MagicReduction(Wavefront *w); + void MagicMaskLower(Wavefront *w); + void MagicMaskUpper(Wavefront *w); + void MagicJoinWFBar(Wavefront *w); + void MagicWaitWFBar(Wavefront *w); + void MagicPanic(Wavefront *w); + + void MagicAtomicNRAddGlobalU32Reg(Wavefront *w, + GPUDynInstPtr gpuDynInst); + + void MagicAtomicNRAddGroupU32Reg(Wavefront *w, + GPUDynInstPtr gpuDynInst); + + void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst); + + void MagicXactCasLd(Wavefront *w); + void MagicMostSigThread(Wavefront *w); + void MagicMostSigBroadcast(Wavefront *w); + + void MagicPrintWF32ID(Wavefront *w); + void MagicPrintWFID64(Wavefront *w); + + Call(const Brig::BrigInstBase *ib, const BrigObject *obj) + : HsailGPUStaticInst(obj, "call") + { + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + dest.init(op_offs, obj); + op_offs = obj->getOperandPtr(ib->operands, 1); + src0.init(op_offs, obj); + + func_ptr = nullptr; + std::string func_name = src0.disassemble(); + if (!isPseudoOp()) { + func_ptr = dynamic_cast<HsailCode*>(obj-> + getFunction(func_name)); + + if (!func_ptr) + fatal("call::exec cannot find function: %s\n", func_name); + } + + op_offs = obj->getOperandPtr(ib->operands, 2); + src1.init(op_offs, obj); + } + + bool isVectorRegister(int operandIndex) { return false; } + bool isCondRegister(int operandIndex) { return false; } + bool isScalarRegister(int operandIndex) { return false; } + bool isSrcOperand(int operandIndex) { return false; } + bool isDstOperand(int operandIndex) { return false; } + int getOperandSize(int operandIndex) { return 0; } + int getRegisterIndex(int operandIndex) { return -1; } + + void + execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + std::string func_name = src0.disassemble(); + if (isPseudoOp()) { + execPseudoInst(w, gpuDynInst); + } else { + fatal("Native HSAIL functions are not yet implemented: %s\n", + func_name); + } + } + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return 0; } + int getNumOperands() { return 2; } + }; + + template<typename T> T heynot(T arg) { return ~arg; } + template<> inline bool heynot<bool>(bool arg) { return !arg; } +} // namespace HsailISA + +#endif // __ARCH_HSAIL_INSTS_DECL_HH__ diff --git a/src/arch/hsail/insts/gpu_static_inst.cc b/src/arch/hsail/insts/gpu_static_inst.cc new file mode 100644 index 000000000..bbaeb13e6 --- /dev/null +++ b/src/arch/hsail/insts/gpu_static_inst.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Anthony Gutierrez + */ + +#include "arch/hsail/insts/gpu_static_inst.hh" + +#include "gpu-compute/brig_object.hh" + +namespace HsailISA +{ + HsailGPUStaticInst::HsailGPUStaticInst(const BrigObject *obj, + const std::string &opcode) + : GPUStaticInst(opcode), hsailCode(obj->currentCode) + { + } + + void + HsailGPUStaticInst::generateDisassembly() + { + disassembly = opcode; + } + + const std::string& + HsailGPUStaticInst::disassemble() + { + if (disassembly.empty()) { + generateDisassembly(); + assert(!disassembly.empty()); + } + + return disassembly; + } +} // namespace HsailISA diff --git a/src/arch/hsail/insts/gpu_static_inst.hh b/src/arch/hsail/insts/gpu_static_inst.hh new file mode 100644 index 000000000..29aab1f70 --- /dev/null +++ b/src/arch/hsail/insts/gpu_static_inst.hh @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Anthony Gutierrez + */ + +#ifndef __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__ +#define __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__ + +/* + * @file gpu_static_inst.hh + * + * Defines the base class representing HSAIL GPU static instructions. + */ + +#include "gpu-compute/gpu_static_inst.hh" + +class BrigObject; +class HsailCode; + +namespace HsailISA +{ + class HsailGPUStaticInst : public GPUStaticInst + { + public: + HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode); + void generateDisassembly(); + const std::string &disassemble(); + uint32_t instSize() { return 4; } + + protected: + HsailCode *hsailCode; + }; +} // namespace HsailISA + +#endif // __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__ diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc new file mode 100644 index 000000000..4e70bf46a --- /dev/null +++ b/src/arch/hsail/insts/main.cc @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Steve Reinhardt + */ + +#include "arch/hsail/insts/decl.hh" +#include "debug/GPUExec.hh" +#include "gpu-compute/dispatcher.hh" +#include "gpu-compute/simple_pool_manager.hh" + +namespace HsailISA +{ + template<> const char *B1::label = "b1"; + template<> const char *B8::label = "b8"; + template<> const char *B16::label = "b16"; + template<> const char *B32::label = "b32"; + template<> const char *B64::label = "b64"; + + template<> const char *S8::label = "s8"; + template<> const char *S16::label = "s16"; + template<> const char *S32::label = "s32"; + template<> const char *S64::label = "s64"; + + template<> const char *U8::label = "u8"; + template<> const char *U16::label = "u16"; + template<> const char *U32::label = "u32"; + template<> const char *U64::label = "u64"; + + template<> const char *F32::label = "f32"; + template<> const char *F64::label = "f64"; + + const char* + cmpOpToString(Brig::BrigCompareOperation cmpOp) + { + using namespace Brig; + + switch (cmpOp) { + case BRIG_COMPARE_EQ: + return "eq"; + case BRIG_COMPARE_NE: + return "ne"; + case BRIG_COMPARE_LT: + return "lt"; + case BRIG_COMPARE_LE: + return "le"; + case BRIG_COMPARE_GT: + return "gt"; + case BRIG_COMPARE_GE: + return "ge"; + case BRIG_COMPARE_EQU: + return "equ"; + case BRIG_COMPARE_NEU: + return "neu"; + case BRIG_COMPARE_LTU: + return "ltu"; + case BRIG_COMPARE_LEU: + return "leu"; + case BRIG_COMPARE_GTU: + return "gtu"; + case BRIG_COMPARE_GEU: + return "geu"; + case BRIG_COMPARE_NUM: + return "num"; + case BRIG_COMPARE_NAN: + return "nan"; + case BRIG_COMPARE_SEQ: + return "seq"; + case BRIG_COMPARE_SNE: + return "sne"; + case BRIG_COMPARE_SLT: + return "slt"; + case BRIG_COMPARE_SLE: + return "sle"; + case BRIG_COMPARE_SGT: + return "sgt"; + case BRIG_COMPARE_SGE: + return "sge"; + case BRIG_COMPARE_SGEU: + return "sgeu"; + case BRIG_COMPARE_SEQU: + return "sequ"; + case BRIG_COMPARE_SNEU: + return "sneu"; + case BRIG_COMPARE_SLTU: + return "sltu"; + case BRIG_COMPARE_SLEU: + return "sleu"; + case BRIG_COMPARE_SNUM: + return "snum"; + case BRIG_COMPARE_SNAN: + return "snan"; + case BRIG_COMPARE_SGTU: + return "sgtu"; + default: + return "unknown"; + } + } + + void + Ret::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + const VectorMask &mask = w->get_pred(); + + // mask off completed work-items + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + w->init_mask[lane] = 0; + } + + } + + // delete extra instructions fetched for completed work-items + w->instructionBuffer.erase(w->instructionBuffer.begin() + 1, + w->instructionBuffer.end()); + if (w->pendingFetch) { + w->dropFetch = true; + } + + // if all work-items have completed, then wave-front is done + if (w->init_mask.none()) { + w->status = Wavefront::S_STOPPED; + + int32_t refCount = w->computeUnit->getLds(). + decreaseRefCounter(w->dispatchid, w->wg_id); + + DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", + w->computeUnit->cu_id, w->wg_id, refCount); + + // free the vector registers of the completed wavefront + w->computeUnit->vectorRegsReserved[w->simdId] -= + w->reservedVectorRegs; + + assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0); + + uint32_t endIndex = (w->startVgprIndex + + w->reservedVectorRegs - 1) % + w->computeUnit->vrf[w->simdId]->numRegs(); + + w->computeUnit->vrf[w->simdId]->manager-> + freeRegion(w->startVgprIndex, endIndex); + + w->reservedVectorRegs = 0; + w->startVgprIndex = 0; + w->computeUnit->completedWfs++; + + DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n", + w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId); + + if (!refCount) { + // Notify Memory System of Kernel Completion + // Kernel End = isKernel + isRelease + w->status = Wavefront::S_RETURNING; + GPUDynInstPtr local_mempacket = gpuDynInst; + local_mempacket->memoryOrder = Enums::MEMORY_ORDER_SC_RELEASE; + local_mempacket->scope = Enums::MEMORY_SCOPE_SYSTEM; + local_mempacket->useContinuation = false; + local_mempacket->simdId = w->simdId; + local_mempacket->wfSlotId = w->wfSlotId; + local_mempacket->wfDynId = w->wfDynId; + w->computeUnit->injectGlobalMemFence(local_mempacket, true); + } else { + w->computeUnit->shader->dispatcher->scheduleDispatch(); + } + } + } + + void + Barrier::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + assert(w->barrier_cnt == w->old_barrier_cnt); + w->barrier_cnt = w->old_barrier_cnt + 1; + w->stalledAtBarrier = true; + } +} // namespace HsailISA diff --git a/src/arch/hsail/insts/mem.cc b/src/arch/hsail/insts/mem.cc new file mode 100644 index 000000000..97d4c902b --- /dev/null +++ b/src/arch/hsail/insts/mem.cc @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Steve Reinhardt + */ + +#include "arch/hsail/insts/mem.hh" + +#include "arch/hsail/Brig.h" +#include "enums/OpType.hh" + +using namespace Brig; + +namespace HsailISA +{ + const char* atomicOpToString(BrigAtomicOperation brigOp); + + Enums::MemOpType + brigAtomicToMemOpType(BrigOpcode brigOpCode, BrigAtomicOperation brigOp) + { + if (brigOpCode == Brig::BRIG_OPCODE_ATOMIC) { + switch (brigOp) { + case BRIG_ATOMIC_AND: + return Enums::MO_AAND; + case BRIG_ATOMIC_OR: + return Enums::MO_AOR; + case BRIG_ATOMIC_XOR: + return Enums::MO_AXOR; + case BRIG_ATOMIC_CAS: + return Enums::MO_ACAS; + case BRIG_ATOMIC_EXCH: + return Enums::MO_AEXCH; + case BRIG_ATOMIC_ADD: + return Enums::MO_AADD; + case BRIG_ATOMIC_WRAPINC: + return Enums::MO_AINC; + case BRIG_ATOMIC_WRAPDEC: + return Enums::MO_ADEC; + case BRIG_ATOMIC_MIN: + return Enums::MO_AMIN; + case BRIG_ATOMIC_MAX: + return Enums::MO_AMAX; + case BRIG_ATOMIC_SUB: + return Enums::MO_ASUB; + default: + fatal("Bad BrigAtomicOperation code %d\n", brigOp); + } + } else if (brigOpCode == Brig::BRIG_OPCODE_ATOMICNORET) { + switch (brigOp) { + case BRIG_ATOMIC_AND: + return Enums::MO_ANRAND; + case BRIG_ATOMIC_OR: + return Enums::MO_ANROR; + case BRIG_ATOMIC_XOR: + return Enums::MO_ANRXOR; + case BRIG_ATOMIC_CAS: + return Enums::MO_ANRCAS; + case BRIG_ATOMIC_EXCH: + return Enums::MO_ANREXCH; + case BRIG_ATOMIC_ADD: + return Enums::MO_ANRADD; + case BRIG_ATOMIC_WRAPINC: + return Enums::MO_ANRINC; + case BRIG_ATOMIC_WRAPDEC: + return Enums::MO_ANRDEC; + case BRIG_ATOMIC_MIN: + return Enums::MO_ANRMIN; + case BRIG_ATOMIC_MAX: + return Enums::MO_ANRMAX; + case BRIG_ATOMIC_SUB: + return Enums::MO_ANRSUB; + default: + fatal("Bad BrigAtomicOperation code %d\n", brigOp); + } + } else { + fatal("Bad BrigAtomicOpcode %d\n", brigOpCode); + } + } + + const char* + atomicOpToString(BrigAtomicOperation brigOp) + { + switch (brigOp) { + case BRIG_ATOMIC_AND: + return "and"; + case BRIG_ATOMIC_OR: + return "or"; + case BRIG_ATOMIC_XOR: + return "xor"; + case BRIG_ATOMIC_CAS: + return "cas"; + case BRIG_ATOMIC_EXCH: + return "exch"; + case BRIG_ATOMIC_ADD: + return "add"; + case BRIG_ATOMIC_WRAPINC: + return "inc"; + case BRIG_ATOMIC_WRAPDEC: + return "dec"; + case BRIG_ATOMIC_MIN: + return "min"; + case BRIG_ATOMIC_MAX: + return "max"; + case BRIG_ATOMIC_SUB: + return "sub"; + default: + return "unknown"; + } + } +} // namespace HsailISA diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh new file mode 100644 index 000000000..d3ce76dee --- /dev/null +++ b/src/arch/hsail/insts/mem.hh @@ -0,0 +1,1629 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Steve Reinhardt + */ + +#ifndef __ARCH_HSAIL_INSTS_MEM_HH__ +#define __ARCH_HSAIL_INSTS_MEM_HH__ + +#include "arch/hsail/insts/decl.hh" +#include "arch/hsail/insts/gpu_static_inst.hh" +#include "arch/hsail/operand.hh" + +namespace HsailISA +{ + class MemInst + { + public: + MemInst() : size(0), addr_operand(nullptr) { } + + MemInst(Enums::MemType m_type) + { + if (m_type == Enums::M_U64 || + m_type == Enums::M_S64 || + m_type == Enums::M_F64) { + size = 8; + } else if (m_type == Enums::M_U32 || + m_type == Enums::M_S32 || + m_type == Enums::M_F32) { + size = 4; + } else if (m_type == Enums::M_U16 || + m_type == Enums::M_S16 || + m_type == Enums::M_F16) { + size = 2; + } else { + size = 1; + } + + addr_operand = nullptr; + } + + void + init_addr(AddrOperandBase *_addr_operand) + { + addr_operand = _addr_operand; + } + + private: + int size; + AddrOperandBase *addr_operand; + + public: + int getMemOperandSize() { return size; } + AddrOperandBase *getAddressOperand() { return addr_operand; } + }; + + template<typename DestOperandType, typename AddrOperandType> + class LdaInstBase : public HsailGPUStaticInst + { + public: + typename DestOperandType::DestOperand dest; + AddrOperandType addr; + + LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : HsailGPUStaticInst(obj, _opcode) + { + using namespace Brig; + + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + dest.init(op_offs, obj); + op_offs = obj->getOperandPtr(ib->operands, 1); + addr.init(op_offs, obj); + } + + int numSrcRegOperands() { return(this->addr.isVectorRegister()); } + int numDstRegOperands() { return dest.isVectorRegister(); } + bool isVectorRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.isVectorRegister() : + this->addr.isVectorRegister()); + } + bool isCondRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.isCondRegister() : + this->addr.isCondRegister()); + } + bool isScalarRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.isScalarRegister() : + this->addr.isScalarRegister()); + } + bool isSrcOperand(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex > 0) + return(this->addr.isVectorRegister()); + return false; + } + bool isDstOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return(operandIndex == 0); + } + int getOperandSize(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.opSize() : + this->addr.opSize()); + } + int getRegisterIndex(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.regIndex() : + this->addr.regIndex()); + } + int getNumOperands() + { + if (this->addr.isVectorRegister()) + return 2; + return 1; + } + }; + + template<typename DestDataType, typename AddrOperandType> + class LdaInst : + public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>, + public MemInst + { + public: + void generateDisassembly(); + + LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : LdaInstBase<typename DestDataType::OperandType, + AddrOperandType>(ib, obj, _opcode) + { + init_addr(&this->addr); + } + + void execute(GPUDynInstPtr gpuDynInst); + }; + + template<typename DataType> + GPUStaticInst* + decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + unsigned op_offs = obj->getOperandPtr(ib->operands, 1); + BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj); + + if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { + return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas"); + } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { + // V2/V4 not allowed + switch (regDataType.regKind) { + case Brig::BRIG_REGISTER_KIND_SINGLE: + return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas"); + case Brig::BRIG_REGISTER_KIND_DOUBLE: + return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas"); + default: + fatal("Bad ldas register operand type %d\n", regDataType.type); + } + } else { + fatal("Bad ldas register operand kind %d\n", regDataType.kind); + } + } + + template<typename MemOperandType, typename DestOperandType, + typename AddrOperandType> + class LdInstBase : public HsailGPUStaticInst + { + public: + Brig::BrigWidth8_t width; + typename DestOperandType::DestOperand dest; + AddrOperandType addr; + + Brig::BrigSegment segment; + Brig::BrigMemoryOrder memoryOrder; + Brig::BrigMemoryScope memoryScope; + unsigned int equivClass; + bool isArgLoad() + { + return segment == Brig::BRIG_SEGMENT_KERNARG || + segment == Brig::BRIG_SEGMENT_ARG; + } + void + initLd(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + { + using namespace Brig; + + const BrigInstMem *ldst = (const BrigInstMem*)ib; + + segment = (BrigSegment)ldst->segment; + memoryOrder = BRIG_MEMORY_ORDER_NONE; + memoryScope = BRIG_MEMORY_SCOPE_NONE; + equivClass = ldst->equivClass; + + switch (segment) { + case BRIG_SEGMENT_GLOBAL: + o_type = Enums::OT_GLOBAL_READ; + break; + + case BRIG_SEGMENT_GROUP: + o_type = Enums::OT_SHARED_READ; + break; + + case BRIG_SEGMENT_PRIVATE: + o_type = Enums::OT_PRIVATE_READ; + break; + + case BRIG_SEGMENT_READONLY: + o_type = Enums::OT_READONLY_READ; + break; + + case BRIG_SEGMENT_SPILL: + o_type = Enums::OT_SPILL_READ; + break; + + case BRIG_SEGMENT_FLAT: + o_type = Enums::OT_FLAT_READ; + break; + + case BRIG_SEGMENT_KERNARG: + o_type = Enums::OT_KERN_READ; + break; + + case BRIG_SEGMENT_ARG: + o_type = Enums::OT_ARG; + break; + + default: + panic("Ld: segment %d not supported\n", segment); + } + + width = ldst->width; + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); + if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) + dest.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands, 1); + addr.init(op_offs, obj); + } + + void + initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + { + using namespace Brig; + + const BrigInstAtomic *at = (const BrigInstAtomic*)ib; + + segment = (BrigSegment)at->segment; + memoryOrder = (BrigMemoryOrder)at->memoryOrder; + memoryScope = (BrigMemoryScope)at->memoryScope; + equivClass = 0; + + switch (segment) { + case BRIG_SEGMENT_GLOBAL: + o_type = Enums::OT_GLOBAL_READ; + break; + + case BRIG_SEGMENT_GROUP: + o_type = Enums::OT_SHARED_READ; + break; + + case BRIG_SEGMENT_PRIVATE: + o_type = Enums::OT_PRIVATE_READ; + break; + + case BRIG_SEGMENT_READONLY: + o_type = Enums::OT_READONLY_READ; + break; + + case BRIG_SEGMENT_SPILL: + o_type = Enums::OT_SPILL_READ; + break; + + case BRIG_SEGMENT_FLAT: + o_type = Enums::OT_FLAT_READ; + break; + + case BRIG_SEGMENT_KERNARG: + o_type = Enums::OT_KERN_READ; + break; + + case BRIG_SEGMENT_ARG: + o_type = Enums::OT_ARG; + break; + + default: + panic("Ld: segment %d not supported\n", segment); + } + + width = BRIG_WIDTH_1; + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); + + if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) + dest.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands,1); + addr.init(op_offs, obj); + } + + LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : HsailGPUStaticInst(obj, _opcode) + { + using namespace Brig; + + if (ib->opcode == BRIG_OPCODE_LD) { + initLd(ib, obj, _opcode); + } else { + initAtomicLd(ib, obj, _opcode); + } + } + + int numSrcRegOperands() { return(this->addr.isVectorRegister()); } + int numDstRegOperands() { return dest.isVectorRegister(); } + int getNumOperands() + { + if (this->addr.isVectorRegister()) + return 2; + else + return 1; + } + bool isVectorRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.isVectorRegister() : + this->addr.isVectorRegister()); + } + bool isCondRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.isCondRegister() : + this->addr.isCondRegister()); + } + bool isScalarRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.isScalarRegister() : + this->addr.isScalarRegister()); + } + bool isSrcOperand(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex > 0) + return(this->addr.isVectorRegister()); + return false; + } + bool isDstOperand(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return(operandIndex == 0); + } + int getOperandSize(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.opSize() : + this->addr.opSize()); + } + int getRegisterIndex(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return((operandIndex == 0) ? dest.regIndex() : + this->addr.regIndex()); + } + }; + + template<typename MemDataType, typename DestDataType, + typename AddrOperandType> + class LdInst : + public LdInstBase<typename MemDataType::CType, + typename DestDataType::OperandType, AddrOperandType>, + public MemInst + { + typename DestDataType::OperandType::DestOperand dest_vect[4]; + uint16_t num_dest_operands; + void generateDisassembly(); + + public: + LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : LdInstBase<typename MemDataType::CType, + typename DestDataType::OperandType, + AddrOperandType>(ib, obj, _opcode), + MemInst(MemDataType::memType) + { + init_addr(&this->addr); + + unsigned op_offs = obj->getOperandPtr(ib->operands,0); + const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); + + if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { + const Brig::BrigOperandOperandList *brigRegVecOp = + (const Brig::BrigOperandOperandList*)brigOp; + + num_dest_operands = + *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; + + assert(num_dest_operands <= 4); + } else { + num_dest_operands = 1; + } + + if (num_dest_operands > 1) { + assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); + + for (int i = 0; i < num_dest_operands; ++i) { + dest_vect[i].init_from_vect(op_offs, obj, i); + } + } + } + + void + initiateAcc(GPUDynInstPtr gpuDynInst) override + { + typedef typename MemDataType::CType c0; + + gpuDynInst->statusBitVector = gpuDynInst->exec_mask; + + if (num_dest_operands > 1) { + for (int i = 0; i < VSZ; ++i) + if (gpuDynInst->exec_mask[i]) + gpuDynInst->statusVector.push_back(num_dest_operands); + else + gpuDynInst->statusVector.push_back(0); + } + + for (int k = 0; k < num_dest_operands; ++k) { + + c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; + + for (int i = 0; i < VSZ; ++i) { + if (gpuDynInst->exec_mask[i]) { + Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); + + if (isLocalMem()) { + // load from shared memory + *d = gpuDynInst->wavefront()->ldsChunk-> + read<c0>(vaddr); + } else { + Request *req = new Request(0, vaddr, sizeof(c0), 0, + gpuDynInst->computeUnit()->masterId(), + 0, gpuDynInst->wfDynId, i); + + gpuDynInst->setRequestFlags(req); + PacketPtr pkt = new Packet(req, MemCmd::ReadReq); + pkt->dataStatic(d); + + if (gpuDynInst->computeUnit()->shader-> + separate_acquire_release && + gpuDynInst->memoryOrder == + Enums::MEMORY_ORDER_SC_ACQUIRE) { + // if this load has acquire semantics, + // set the response continuation function + // to perform an Acquire request + gpuDynInst->execContinuation = + &GPUStaticInst::execLdAcq; + + gpuDynInst->useContinuation = true; + } else { + // the request will be finished when + // the load completes + gpuDynInst->useContinuation = false; + } + // translation is performed in sendRequest() + gpuDynInst->computeUnit()->sendRequest(gpuDynInst, + i, pkt); + } + } + ++d; + } + } + + gpuDynInst->updateStats(); + } + + private: + void + execLdAcq(GPUDynInstPtr gpuDynInst) override + { + // after the load has complete and if the load has acquire + // semantics, issue an acquire request. + if (!isLocalMem()) { + if (gpuDynInst->computeUnit()->shader->separate_acquire_release + && gpuDynInst->memoryOrder == + Enums::MEMORY_ORDER_SC_ACQUIRE) { + gpuDynInst->statusBitVector = VectorMask(1); + gpuDynInst->useContinuation = false; + // create request + Request *req = new Request(0, 0, 0, 0, + gpuDynInst->computeUnit()->masterId(), + 0, gpuDynInst->wfDynId, -1); + req->setFlags(Request::ACQUIRE); + gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); + } + } + } + + public: + bool + isLocalMem() const override + { + return this->segment == Brig::BRIG_SEGMENT_GROUP; + } + + bool isVectorRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if ((num_dest_operands != getNumOperands()) && + (operandIndex == (getNumOperands()-1))) + return(this->addr.isVectorRegister()); + if (num_dest_operands > 1) { + return dest_vect[operandIndex].isVectorRegister(); + } + else if (num_dest_operands == 1) { + return LdInstBase<typename MemDataType::CType, + typename DestDataType::OperandType, + AddrOperandType>::dest.isVectorRegister(); + } + return false; + } + bool isCondRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if ((num_dest_operands != getNumOperands()) && + (operandIndex == (getNumOperands()-1))) + return(this->addr.isCondRegister()); + if (num_dest_operands > 1) + return dest_vect[operandIndex].isCondRegister(); + else if (num_dest_operands == 1) + return LdInstBase<typename MemDataType::CType, + typename DestDataType::OperandType, + AddrOperandType>::dest.isCondRegister(); + return false; + } + bool isScalarRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if ((num_dest_operands != getNumOperands()) && + (operandIndex == (getNumOperands()-1))) + return(this->addr.isScalarRegister()); + if (num_dest_operands > 1) + return dest_vect[operandIndex].isScalarRegister(); + else if (num_dest_operands == 1) + return LdInstBase<typename MemDataType::CType, + typename DestDataType::OperandType, + AddrOperandType>::dest.isScalarRegister(); + return false; + } + bool isSrcOperand(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if ((num_dest_operands != getNumOperands()) && + (operandIndex == (getNumOperands()-1))) + return(this->addr.isVectorRegister()); + return false; + } + bool isDstOperand(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if ((num_dest_operands != getNumOperands()) && + (operandIndex == (getNumOperands()-1))) + return false; + return true; + } + int getOperandSize(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if ((num_dest_operands != getNumOperands()) && + (operandIndex == (getNumOperands()-1))) + return(this->addr.opSize()); + if (num_dest_operands > 1) + return(dest_vect[operandIndex].opSize()); + else if (num_dest_operands == 1) + return(LdInstBase<typename MemDataType::CType, + typename DestDataType::OperandType, + AddrOperandType>::dest.opSize()); + return 0; + } + int getRegisterIndex(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if ((num_dest_operands != getNumOperands()) && + (operandIndex == (getNumOperands()-1))) + return(this->addr.regIndex()); + if (num_dest_operands > 1) + return(dest_vect[operandIndex].regIndex()); + else if (num_dest_operands == 1) + return(LdInstBase<typename MemDataType::CType, + typename DestDataType::OperandType, + AddrOperandType>::dest.regIndex()); + return -1; + } + int getNumOperands() + { + if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) + return(num_dest_operands+1); + else + return(num_dest_operands); + } + void execute(GPUDynInstPtr gpuDynInst); + }; + + template<typename MemDT, typename DestDT> + GPUStaticInst* + decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + unsigned op_offs = obj->getOperandPtr(ib->operands,1); + BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); + + if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { + return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld"); + } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER || + tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { + switch (tmp.regKind) { + case Brig::BRIG_REGISTER_KIND_SINGLE: + return new LdInst<MemDT, DestDT, + SRegAddrOperand>(ib, obj, "ld"); + case Brig::BRIG_REGISTER_KIND_DOUBLE: + return new LdInst<MemDT, DestDT, + DRegAddrOperand>(ib, obj, "ld"); + default: + fatal("Bad ld register operand type %d\n", tmp.regKind); + } + } else { + fatal("Bad ld register operand kind %d\n", tmp.kind); + } + } + + template<typename MemDT> + GPUStaticInst* + decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + unsigned op_offs = obj->getOperandPtr(ib->operands,0); + BrigRegOperandInfo dest = findRegDataType(op_offs, obj); + + assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER || + dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); + switch(dest.regKind) { + case Brig::BRIG_REGISTER_KIND_SINGLE: + switch (ib->type) { + case Brig::BRIG_TYPE_B8: + case Brig::BRIG_TYPE_B16: + case Brig::BRIG_TYPE_B32: + return decodeLd2<MemDT, B32>(ib, obj); + case Brig::BRIG_TYPE_U8: + case Brig::BRIG_TYPE_U16: + case Brig::BRIG_TYPE_U32: + return decodeLd2<MemDT, U32>(ib, obj); + case Brig::BRIG_TYPE_S8: + case Brig::BRIG_TYPE_S16: + case Brig::BRIG_TYPE_S32: + return decodeLd2<MemDT, S32>(ib, obj); + case Brig::BRIG_TYPE_F16: + case Brig::BRIG_TYPE_F32: + return decodeLd2<MemDT, U32>(ib, obj); + default: + fatal("Bad ld register operand type %d, %d\n", + dest.regKind, ib->type); + }; + case Brig::BRIG_REGISTER_KIND_DOUBLE: + switch (ib->type) { + case Brig::BRIG_TYPE_B64: + return decodeLd2<MemDT, B64>(ib, obj); + case Brig::BRIG_TYPE_U64: + return decodeLd2<MemDT, U64>(ib, obj); + case Brig::BRIG_TYPE_S64: + return decodeLd2<MemDT, S64>(ib, obj); + case Brig::BRIG_TYPE_F64: + return decodeLd2<MemDT, U64>(ib, obj); + default: + fatal("Bad ld register operand type %d, %d\n", + dest.regKind, ib->type); + }; + default: + fatal("Bad ld register operand type %d, %d\n", dest.regKind, + ib->type); + } + } + + template<typename MemDataType, typename SrcOperandType, + typename AddrOperandType> + class StInstBase : public HsailGPUStaticInst + { + public: + typename SrcOperandType::SrcOperand src; + AddrOperandType addr; + + Brig::BrigSegment segment; + Brig::BrigMemoryScope memoryScope; + Brig::BrigMemoryOrder memoryOrder; + unsigned int equivClass; + + void + initSt(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + { + using namespace Brig; + + const BrigInstMem *ldst = (const BrigInstMem*)ib; + + segment = (BrigSegment)ldst->segment; + memoryOrder = BRIG_MEMORY_ORDER_NONE; + memoryScope = BRIG_MEMORY_SCOPE_NONE; + equivClass = ldst->equivClass; + + switch (segment) { + case BRIG_SEGMENT_GLOBAL: + o_type = Enums::OT_GLOBAL_WRITE; + break; + + case BRIG_SEGMENT_GROUP: + o_type = Enums::OT_SHARED_WRITE; + break; + + case BRIG_SEGMENT_PRIVATE: + o_type = Enums::OT_PRIVATE_WRITE; + break; + + case BRIG_SEGMENT_READONLY: + o_type = Enums::OT_READONLY_WRITE; + break; + + case BRIG_SEGMENT_SPILL: + o_type = Enums::OT_SPILL_WRITE; + break; + + case BRIG_SEGMENT_FLAT: + o_type = Enums::OT_FLAT_WRITE; + break; + + case BRIG_SEGMENT_ARG: + o_type = Enums::OT_ARG; + break; + + default: + panic("St: segment %d not supported\n", segment); + } + + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + const BrigOperand *baseOp = obj->getOperand(op_offs); + + if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) || + (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) { + src.init(op_offs, obj); + } + + op_offs = obj->getOperandPtr(ib->operands, 1); + addr.init(op_offs, obj); + } + + void + initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + { + using namespace Brig; + + const BrigInstAtomic *at = (const BrigInstAtomic*)ib; + + segment = (BrigSegment)at->segment; + memoryScope = (BrigMemoryScope)at->memoryScope; + memoryOrder = (BrigMemoryOrder)at->memoryOrder; + equivClass = 0; + + switch (segment) { + case BRIG_SEGMENT_GLOBAL: + o_type = Enums::OT_GLOBAL_WRITE; + break; + + case BRIG_SEGMENT_GROUP: + o_type = Enums::OT_SHARED_WRITE; + break; + + case BRIG_SEGMENT_PRIVATE: + o_type = Enums::OT_PRIVATE_WRITE; + break; + + case BRIG_SEGMENT_READONLY: + o_type = Enums::OT_READONLY_WRITE; + break; + + case BRIG_SEGMENT_SPILL: + o_type = Enums::OT_SPILL_WRITE; + break; + + case BRIG_SEGMENT_FLAT: + o_type = Enums::OT_FLAT_WRITE; + break; + + case BRIG_SEGMENT_ARG: + o_type = Enums::OT_ARG; + break; + + default: + panic("St: segment %d not supported\n", segment); + } + + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + addr.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands, 1); + src.init(op_offs, obj); + } + + StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : HsailGPUStaticInst(obj, _opcode) + { + using namespace Brig; + + if (ib->opcode == BRIG_OPCODE_ST) { + initSt(ib, obj, _opcode); + } else { + initAtomicSt(ib, obj, _opcode); + } + } + + int numDstRegOperands() { return 0; } + int numSrcRegOperands() + { + return src.isVectorRegister() + this->addr.isVectorRegister(); + } + int getNumOperands() + { + if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) + return 2; + else + return 1; + } + bool isVectorRegister(int operandIndex) + { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return !operandIndex ? src.isVectorRegister() : + this->addr.isVectorRegister(); + } + bool isCondRegister(int operandIndex) + { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return !operandIndex ? src.isCondRegister() : + this->addr.isCondRegister(); + } + bool isScalarRegister(int operandIndex) + { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return !operandIndex ? src.isScalarRegister() : + this->addr.isScalarRegister(); + } + bool isSrcOperand(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return true; + } + bool isDstOperand(int operandIndex) { return false; } + int getOperandSize(int operandIndex) + { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return !operandIndex ? src.opSize() : this->addr.opSize(); + } + int getRegisterIndex(int operandIndex) + { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return !operandIndex ? src.regIndex() : this->addr.regIndex(); + } + }; + + + template<typename MemDataType, typename SrcDataType, + typename AddrOperandType> + class StInst : + public StInstBase<MemDataType, typename SrcDataType::OperandType, + AddrOperandType>, + public MemInst + { + public: + typename SrcDataType::OperandType::SrcOperand src_vect[4]; + uint16_t num_src_operands; + void generateDisassembly(); + + StInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode, int srcIdx) + : StInstBase<MemDataType, typename SrcDataType::OperandType, + AddrOperandType>(ib, obj, _opcode), + MemInst(SrcDataType::memType) + { + init_addr(&this->addr); + + BrigRegOperandInfo rinfo; + unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx); + const Brig::BrigOperand *baseOp = obj->getOperand(op_offs); + + if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) { + const Brig::BrigOperandConstantBytes *op = + (Brig::BrigOperandConstantBytes*)baseOp; + + rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind, + Brig::BRIG_TYPE_NONE); + } else { + rinfo = findRegDataType(op_offs, obj); + } + + if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { + const Brig::BrigOperandOperandList *brigRegVecOp = + (const Brig::BrigOperandOperandList*)baseOp; + + num_src_operands = + *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; + + assert(num_src_operands <= 4); + } else { + num_src_operands = 1; + } + + if (num_src_operands > 1) { + assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); + + for (int i = 0; i < num_src_operands; ++i) { + src_vect[i].init_from_vect(op_offs, obj, i); + } + } + } + + void + initiateAcc(GPUDynInstPtr gpuDynInst) override + { + // before performing a store, check if this store has + // release semantics, and if so issue a release first + if (!isLocalMem()) { + if (gpuDynInst->computeUnit()->shader->separate_acquire_release + && gpuDynInst->memoryOrder == + Enums::MEMORY_ORDER_SC_RELEASE) { + + gpuDynInst->statusBitVector = VectorMask(1); + gpuDynInst->execContinuation = &GPUStaticInst::execSt; + gpuDynInst->useContinuation = true; + // create request + Request *req = new Request(0, 0, 0, 0, + gpuDynInst->computeUnit()->masterId(), + 0, gpuDynInst->wfDynId, -1); + req->setFlags(Request::RELEASE); + gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); + + return; + } + } + + // if there is no release semantic, perform stores immediately + execSt(gpuDynInst); + } + + bool + isLocalMem() const override + { + return this->segment == Brig::BRIG_SEGMENT_GROUP; + } + + private: + // execSt may be called through a continuation + // if the store had release semantics. see comment for + // execSt in gpu_static_inst.hh + void + execSt(GPUDynInstPtr gpuDynInst) override + { + typedef typename MemDataType::CType c0; + + gpuDynInst->statusBitVector = gpuDynInst->exec_mask; + + if (num_src_operands > 1) { + for (int i = 0; i < VSZ; ++i) + if (gpuDynInst->exec_mask[i]) + gpuDynInst->statusVector.push_back(num_src_operands); + else + gpuDynInst->statusVector.push_back(0); + } + + for (int k = 0; k < num_src_operands; ++k) { + c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; + + for (int i = 0; i < VSZ; ++i) { + if (gpuDynInst->exec_mask[i]) { + Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); + + if (isLocalMem()) { + //store to shared memory + gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr, + *d); + } else { + Request *req = + new Request(0, vaddr, sizeof(c0), 0, + gpuDynInst->computeUnit()->masterId(), + 0, gpuDynInst->wfDynId, i); + + gpuDynInst->setRequestFlags(req); + PacketPtr pkt = new Packet(req, MemCmd::WriteReq); + pkt->dataStatic<c0>(d); + + // translation is performed in sendRequest() + // the request will be finished when the store completes + gpuDynInst->useContinuation = false; + gpuDynInst->computeUnit()->sendRequest(gpuDynInst, + i, pkt); + + } + } + ++d; + } + } + + gpuDynInst->updateStats(); + } + + public: + bool isVectorRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex == num_src_operands) + return this->addr.isVectorRegister(); + if (num_src_operands > 1) + return src_vect[operandIndex].isVectorRegister(); + else if (num_src_operands == 1) + return StInstBase<MemDataType, + typename SrcDataType::OperandType, + AddrOperandType>::src.isVectorRegister(); + return false; + } + bool isCondRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex == num_src_operands) + return this->addr.isCondRegister(); + if (num_src_operands > 1) + return src_vect[operandIndex].isCondRegister(); + else if (num_src_operands == 1) + return StInstBase<MemDataType, + typename SrcDataType::OperandType, + AddrOperandType>::src.isCondRegister(); + return false; + } + bool isScalarRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex == num_src_operands) + return this->addr.isScalarRegister(); + if (num_src_operands > 1) + return src_vect[operandIndex].isScalarRegister(); + else if (num_src_operands == 1) + return StInstBase<MemDataType, + typename SrcDataType::OperandType, + AddrOperandType>::src.isScalarRegister(); + return false; + } + bool isSrcOperand(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return true; + } + bool isDstOperand(int operandIndex) { return false; } + int getOperandSize(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex == num_src_operands) + return this->addr.opSize(); + if (num_src_operands > 1) + return src_vect[operandIndex].opSize(); + else if (num_src_operands == 1) + return StInstBase<MemDataType, + typename SrcDataType::OperandType, + AddrOperandType>::src.opSize(); + return 0; + } + int getRegisterIndex(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex == num_src_operands) + return this->addr.regIndex(); + if (num_src_operands > 1) + return src_vect[operandIndex].regIndex(); + else if (num_src_operands == 1) + return StInstBase<MemDataType, + typename SrcDataType::OperandType, + AddrOperandType>::src.regIndex(); + return -1; + } + int getNumOperands() + { + if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) + return num_src_operands + 1; + else + return num_src_operands; + } + void execute(GPUDynInstPtr gpuDynInst); + }; + + template<typename DataType, typename SrcDataType> + GPUStaticInst* + decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + int srcIdx = 0; + int destIdx = 1; + if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC || + ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) { + srcIdx = 1; + destIdx = 0; + } + unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx); + + BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); + + if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { + return new StInst<DataType, SrcDataType, + NoRegAddrOperand>(ib, obj, "st", srcIdx); + } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { + // V2/V4 not allowed + switch (tmp.regKind) { + case Brig::BRIG_REGISTER_KIND_SINGLE: + return new StInst<DataType, SrcDataType, + SRegAddrOperand>(ib, obj, "st", srcIdx); + case Brig::BRIG_REGISTER_KIND_DOUBLE: + return new StInst<DataType, SrcDataType, + DRegAddrOperand>(ib, obj, "st", srcIdx); + default: + fatal("Bad st register operand type %d\n", tmp.type); + } + } else { + fatal("Bad st register operand kind %d\n", tmp.kind); + } + } + + Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode, + Brig::BrigAtomicOperation brigOp); + + template<typename OperandType, typename AddrOperandType, int NumSrcOperands, + bool HasDst> + class AtomicInstBase : public HsailGPUStaticInst + { + public: + typename OperandType::DestOperand dest; + typename OperandType::SrcOperand src[NumSrcOperands]; + AddrOperandType addr; + + Brig::BrigSegment segment; + Brig::BrigMemoryOrder memoryOrder; + Brig::BrigAtomicOperation atomicOperation; + Brig::BrigMemoryScope memoryScope; + Brig::BrigOpcode opcode; + Enums::MemOpType opType; + + AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : HsailGPUStaticInst(obj, _opcode) + { + using namespace Brig; + + const BrigInstAtomic *at = (const BrigInstAtomic*)ib; + + segment = (BrigSegment)at->segment; + memoryScope = (BrigMemoryScope)at->memoryScope; + memoryOrder = (BrigMemoryOrder)at->memoryOrder; + atomicOperation = (BrigAtomicOperation)at->atomicOperation; + opcode = (BrigOpcode)ib->opcode; + opType = brigAtomicToMemOpType(opcode, atomicOperation); + + switch (segment) { + case BRIG_SEGMENT_GLOBAL: + o_type = Enums::OT_GLOBAL_ATOMIC; + break; + + case BRIG_SEGMENT_GROUP: + o_type = Enums::OT_SHARED_ATOMIC; + break; + + case BRIG_SEGMENT_FLAT: + o_type = Enums::OT_FLAT_ATOMIC; + break; + + default: + panic("Atomic: segment %d not supported\n", segment); + } + + if (HasDst) { + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + dest.init(op_offs, obj); + + op_offs = obj->getOperandPtr(ib->operands, 1); + addr.init(op_offs, obj); + + for (int i = 0; i < NumSrcOperands; ++i) { + op_offs = obj->getOperandPtr(ib->operands, i + 2); + src[i].init(op_offs, obj); + } + } else { + + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + addr.init(op_offs, obj); + + for (int i = 0; i < NumSrcOperands; ++i) { + op_offs = obj->getOperandPtr(ib->operands, i + 1); + src[i].init(op_offs, obj); + } + } + } + + int numSrcRegOperands() + { + int operands = 0; + for (int i = 0; i < NumSrcOperands; i++) { + if (src[i].isVectorRegister() == true) { + operands++; + } + } + if (addr.isVectorRegister()) + operands++; + return operands; + } + int numDstRegOperands() { return dest.isVectorRegister(); } + int getNumOperands() + { + if (addr.isVectorRegister()) + return(NumSrcOperands + 2); + return(NumSrcOperands + 1); + } + bool isVectorRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return src[operandIndex].isVectorRegister(); + else if (operandIndex == NumSrcOperands) + return(addr.isVectorRegister()); + else + return dest.isVectorRegister(); + } + bool isCondRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return src[operandIndex].isCondRegister(); + else if (operandIndex == NumSrcOperands) + return(addr.isCondRegister()); + else + return dest.isCondRegister(); + } + bool isScalarRegister(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return src[operandIndex].isScalarRegister(); + else if (operandIndex == NumSrcOperands) + return(addr.isScalarRegister()); + else + return dest.isScalarRegister(); + } + bool isSrcOperand(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return true; + else if (operandIndex == NumSrcOperands) + return(addr.isVectorRegister()); + else + return false; + } + bool isDstOperand(int operandIndex) + { + if (operandIndex <= NumSrcOperands) + return false; + else + return true; + } + int getOperandSize(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return(src[operandIndex].opSize()); + else if (operandIndex == NumSrcOperands) + return(addr.opSize()); + else + return(dest.opSize()); + } + int getRegisterIndex(int operandIndex) + { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex < NumSrcOperands) + return(src[operandIndex].regIndex()); + else if (operandIndex == NumSrcOperands) + return(addr.regIndex()); + else + return(dest.regIndex()); + return -1; + } + }; + + template<typename MemDataType, typename AddrOperandType, int NumSrcOperands, + bool HasDst> + class AtomicInst : + public AtomicInstBase<typename MemDataType::OperandType, + AddrOperandType, NumSrcOperands, HasDst>, + public MemInst + { + public: + void generateDisassembly(); + + AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj, + const char *_opcode) + : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType, + NumSrcOperands, HasDst> + (ib, obj, _opcode), + MemInst(MemDataType::memType) + { + init_addr(&this->addr); + } + + void + initiateAcc(GPUDynInstPtr gpuDynInst) override + { + // before doing the RMW, check if this atomic has + // release semantics, and if so issue a release first + if (!isLocalMem()) { + if (gpuDynInst->computeUnit()->shader->separate_acquire_release + && (gpuDynInst->memoryOrder == + Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder == + Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) { + + gpuDynInst->statusBitVector = VectorMask(1); + + gpuDynInst->execContinuation = &GPUStaticInst::execAtomic; + gpuDynInst->useContinuation = true; + + // create request + Request *req = new Request(0, 0, 0, 0, + gpuDynInst->computeUnit()->masterId(), + 0, gpuDynInst->wfDynId, -1); + req->setFlags(Request::RELEASE); + gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); + + return; + } + } + + // if there is no release semantic, execute the RMW immediately + execAtomic(gpuDynInst); + + } + + void execute(GPUDynInstPtr gpuDynInst); + + bool + isLocalMem() const override + { + return this->segment == Brig::BRIG_SEGMENT_GROUP; + } + + private: + // execAtomic may be called through a continuation + // if the RMW had release semantics. see comment for + // execContinuation in gpu_dyn_inst.hh + void + execAtomic(GPUDynInstPtr gpuDynInst) override + { + gpuDynInst->statusBitVector = gpuDynInst->exec_mask; + + typedef typename MemDataType::CType c0; + + c0 *d = &((c0*) gpuDynInst->d_data)[0]; + c0 *e = &((c0*) gpuDynInst->a_data)[0]; + c0 *f = &((c0*) gpuDynInst->x_data)[0]; + + for (int i = 0; i < VSZ; ++i) { + if (gpuDynInst->exec_mask[i]) { + Addr vaddr = gpuDynInst->addr[i]; + + if (isLocalMem()) { + Wavefront *wavefront = gpuDynInst->wavefront(); + *d = wavefront->ldsChunk->read<c0>(vaddr); + + switch (this->opType) { + case Enums::MO_AADD: + case Enums::MO_ANRADD: + wavefront->ldsChunk->write<c0>(vaddr, + wavefront->ldsChunk->read<c0>(vaddr) + (*e)); + break; + case Enums::MO_ASUB: + case Enums::MO_ANRSUB: + wavefront->ldsChunk->write<c0>(vaddr, + wavefront->ldsChunk->read<c0>(vaddr) - (*e)); + break; + case Enums::MO_AMAX: + case Enums::MO_ANRMAX: + wavefront->ldsChunk->write<c0>(vaddr, + std::max(wavefront->ldsChunk->read<c0>(vaddr), + (*e))); + break; + case Enums::MO_AMIN: + case Enums::MO_ANRMIN: + wavefront->ldsChunk->write<c0>(vaddr, + std::min(wavefront->ldsChunk->read<c0>(vaddr), + (*e))); + break; + case Enums::MO_AAND: + case Enums::MO_ANRAND: + wavefront->ldsChunk->write<c0>(vaddr, + wavefront->ldsChunk->read<c0>(vaddr) & (*e)); + break; + case Enums::MO_AOR: + case Enums::MO_ANROR: + wavefront->ldsChunk->write<c0>(vaddr, + wavefront->ldsChunk->read<c0>(vaddr) | (*e)); + break; + case Enums::MO_AXOR: + case Enums::MO_ANRXOR: + wavefront->ldsChunk->write<c0>(vaddr, + wavefront->ldsChunk->read<c0>(vaddr) ^ (*e)); + break; + case Enums::MO_AINC: + case Enums::MO_ANRINC: + wavefront->ldsChunk->write<c0>(vaddr, + wavefront->ldsChunk->read<c0>(vaddr) + 1); + break; + case Enums::MO_ADEC: + case Enums::MO_ANRDEC: + wavefront->ldsChunk->write<c0>(vaddr, + wavefront->ldsChunk->read<c0>(vaddr) - 1); + break; + case Enums::MO_AEXCH: + case Enums::MO_ANREXCH: + wavefront->ldsChunk->write<c0>(vaddr, (*e)); + break; + case Enums::MO_ACAS: + case Enums::MO_ANRCAS: + wavefront->ldsChunk->write<c0>(vaddr, + (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ? + (*f) : wavefront->ldsChunk->read<c0>(vaddr)); + break; + default: + fatal("Unrecognized or invalid HSAIL atomic op " + "type.\n"); + break; + } + } else { + Request *req = + new Request(0, vaddr, sizeof(c0), 0, + gpuDynInst->computeUnit()->masterId(), + 0, gpuDynInst->wfDynId, i, + gpuDynInst->makeAtomicOpFunctor<c0>(e, + f, this->opType)); + + gpuDynInst->setRequestFlags(req); + PacketPtr pkt = new Packet(req, MemCmd::SwapReq); + pkt->dataStatic(d); + + if (gpuDynInst->computeUnit()->shader-> + separate_acquire_release && + (gpuDynInst->memoryOrder == + Enums::MEMORY_ORDER_SC_ACQUIRE)) { + // if this atomic has acquire semantics, + // schedule the continuation to perform an + // acquire after the RMW completes + gpuDynInst->execContinuation = + &GPUStaticInst::execAtomicAcq; + + gpuDynInst->useContinuation = true; + } else { + // the request will be finished when the RMW completes + gpuDynInst->useContinuation = false; + } + // translation is performed in sendRequest() + gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i, + pkt); + } + } + + ++d; + ++e; + ++f; + } + + gpuDynInst->updateStats(); + } + + // execAtomicACq will always be called through a continuation. + // see comment for execContinuation in gpu_dyn_inst.hh + void + execAtomicAcq(GPUDynInstPtr gpuDynInst) override + { + // after performing the RMW, check to see if this instruction + // has acquire semantics, and if so, issue an acquire + if (!isLocalMem()) { + if (gpuDynInst->computeUnit()->shader->separate_acquire_release + && gpuDynInst->memoryOrder == + Enums::MEMORY_ORDER_SC_ACQUIRE) { + gpuDynInst->statusBitVector = VectorMask(1); + + // the request will be finished when + // the acquire completes + gpuDynInst->useContinuation = false; + // create request + Request *req = new Request(0, 0, 0, 0, + gpuDynInst->computeUnit()->masterId(), + 0, gpuDynInst->wfDynId, -1); + req->setFlags(Request::ACQUIRE); + gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); + } + } + } + }; + + template<typename DataType, typename AddrOperandType, int NumSrcOperands> + GPUStaticInst* + constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; + + if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) { + return decodeLd<DataType>(ib, obj); + } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) { + switch (ib->type) { + case Brig::BRIG_TYPE_B8: + return decodeSt<S8,S8>(ib, obj); + case Brig::BRIG_TYPE_B16: + return decodeSt<S8,S16>(ib, obj); + case Brig::BRIG_TYPE_B32: + return decodeSt<S8,S32>(ib, obj); + case Brig::BRIG_TYPE_B64: + return decodeSt<S8,S64>(ib, obj); + default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type); + } + } else { + if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) + return new AtomicInst<DataType, AddrOperandType, + NumSrcOperands, false>(ib, obj, "atomicnoret"); + else + return new AtomicInst<DataType, AddrOperandType, + NumSrcOperands, true>(ib, obj, "atomic"); + } + } + + template<typename DataType, int NumSrcOperands> + GPUStaticInst* + decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + unsigned addrIndex = (Brig::BrigOpcode)ib->opcode == + Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1; + + unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex); + + BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); + + if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { + return constructAtomic<DataType, NoRegAddrOperand, + NumSrcOperands>(ib, obj); + } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { + // V2/V4 not allowed + switch (tmp.regKind) { + case Brig::BRIG_REGISTER_KIND_SINGLE: + return constructAtomic<DataType, SRegAddrOperand, + NumSrcOperands>(ib, obj); + case Brig::BRIG_REGISTER_KIND_DOUBLE: + return constructAtomic<DataType, DRegAddrOperand, + NumSrcOperands>(ib, obj); + default: + fatal("Bad atomic register operand type %d\n", tmp.type); + } + } else { + fatal("Bad atomic register operand kind %d\n", tmp.kind); + } + } + + + template<typename DataType> + GPUStaticInst* + decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; + + if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { + return decodeAtomicHelper<DataType, 2>(ib, obj); + } else { + return decodeAtomicHelper<DataType, 1>(ib, obj); + } + } + + template<typename DataType> + GPUStaticInst* + decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj) + { + const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; + if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { + return decodeAtomicHelper<DataType, 2>(ib, obj); + } else { + return decodeAtomicHelper<DataType, 1>(ib, obj); + } + } +} // namespace HsailISA + +#endif // __ARCH_HSAIL_INSTS_MEM_HH__ diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh new file mode 100644 index 000000000..94f0cd6aa --- /dev/null +++ b/src/arch/hsail/insts/mem_impl.hh @@ -0,0 +1,660 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Steve Reinhardt + */ + +#include "arch/hsail/generic_types.hh" +#include "gpu-compute/hsail_code.hh" + +// defined in code.cc, but not worth sucking in all of code.h for this +// at this point +extern const char *segmentNames[]; + +namespace HsailISA +{ + template<typename DestDataType, typename AddrRegOperandType> + void + LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly() + { + this->disassembly = csprintf("%s_%s %s,%s", this->opcode, + DestDataType::label, + this->dest.disassemble(), + this->addr.disassemble()); + } + + template<typename DestDataType, typename AddrRegOperandType> + void + LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + typedef typename DestDataType::CType CType M5_VAR_USED; + const VectorMask &mask = w->get_pred(); + uint64_t addr_vec[VSZ]; + this->addr.calcVector(w, addr_vec); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + this->dest.set(w, lane, addr_vec[lane]); + } + } + } + + template<typename MemDataType, typename DestDataType, + typename AddrRegOperandType> + void + LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly() + { + switch (num_dest_operands) { + case 1: + this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, + segmentNames[this->segment], + MemDataType::label, + this->dest.disassemble(), + this->addr.disassemble()); + break; + case 2: + this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, + segmentNames[this->segment], + MemDataType::label, + this->dest_vect[0].disassemble(), + this->dest_vect[1].disassemble(), + this->addr.disassemble()); + break; + case 4: + this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", + this->opcode, + segmentNames[this->segment], + MemDataType::label, + this->dest_vect[0].disassemble(), + this->dest_vect[1].disassemble(), + this->dest_vect[2].disassemble(), + this->dest_vect[3].disassemble(), + this->addr.disassemble()); + break; + default: + fatal("Bad ld register dest operand, num vector operands: %d \n", + num_dest_operands); + break; + } + } + + static Addr + calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i) + { + // what is the size of the object we are accessing?? + // NOTE: the compiler doesn't generate enough information + // to do this yet..have to just line up all the private + // work-item spaces back to back for now + /* + StorageElement* se = + i->parent->findSymbol(Brig::BrigPrivateSpace, addr); + assert(se); + + return w->wfSlotId * w->privSizePerItem * VSZ + + se->offset * VSZ + + lane * se->size; + */ + + // addressing strategy: interleave the private spaces of + // work-items in a wave-front on 8 byte granularity. + // this won't be perfect coalescing like the spill space + // strategy, but it's better than nothing. The spill space + // strategy won't work with private because the same address + // may be accessed by different sized loads/stores. + + // Note: I'm assuming that the largest load/store to private + // is 8 bytes. If it is larger, the stride will have to increase + + Addr addr_div8 = addr / 8; + Addr addr_mod8 = addr % 8; + + Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase; + + assert(ret < w->privBase + (w->privSizePerItem * VSZ)); + + return ret; + } + + template<typename MemDataType, typename DestDataType, + typename AddrRegOperandType> + void + LdInst<MemDataType, DestDataType, + AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + typedef typename MemDataType::CType MemCType; + const VectorMask &mask = w->get_pred(); + + // Kernarg references are handled uniquely for now (no Memory Request + // is used), so special-case them up front. Someday we should + // make this more realistic, at which we should get rid of this + // block and fold this case into the switch below. + if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { + MemCType val; + + // I assume no vector ld for kernargs + assert(num_dest_operands == 1); + + // assuming for the moment that we'll never do register + // offsets into kernarg space... just to make life simpler + uint64_t address = this->addr.calcUniform(); + + val = *(MemCType*)&w->kernelArgs[address]; + + DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + this->dest.set(w, lane, val); + } + } + + return; + } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { + uint64_t address = this->addr.calcUniform(); + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + MemCType val = w->readCallArgMem<MemCType>(lane, address); + + DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address, + (unsigned long long)val); + + this->dest.set(w, lane, val); + } + } + + return; + } + + GPUDynInstPtr m = gpuDynInst; + + this->addr.calcVector(w, m->addr); + + m->m_op = Enums::MO_LD; + m->m_type = MemDataType::memType; + m->v_type = DestDataType::vgprType; + + m->exec_mask = w->execMask(); + m->statusBitVector = 0; + m->equiv = this->equivClass; + m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); + + m->scope = getGenericMemoryScope(this->memoryScope); + + if (num_dest_operands == 1) { + m->dst_reg = this->dest.regIndex(); + m->n_reg = 1; + } else { + m->n_reg = num_dest_operands; + for (int i = 0; i < num_dest_operands; ++i) { + m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); + } + } + + m->simdId = w->simdId; + m->wfSlotId = w->wfSlotId; + m->wfDynId = w->wfDynId; + m->kern_id = w->kern_id; + m->cu_id = w->computeUnit->cu_id; + m->latency.init(&w->computeUnit->shader->tick_cnt); + + switch (this->segment) { + case Brig::BRIG_SEGMENT_GLOBAL: + m->s_type = SEG_GLOBAL; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(1)); + + // this is a complete hack to get around a compiler bug + // (the compiler currently generates global access for private + // addresses (starting from 0). We need to add the private offset) + for (int lane = 0; lane < VSZ; ++lane) { + if (m->addr[lane] < w->privSizePerItem) { + if (mask[lane]) { + // what is the size of the object we are accessing? + // find base for for this wavefront + + // calcPrivAddr will fail if accesses are unaligned + assert(!((sizeof(MemCType) - 1) & m->addr[lane])); + + Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, + this); + + m->addr[lane] = privAddr; + } + } + } + + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_rd_gm++; + w->rd_gm_reqs_in_pipe--; + break; + + case Brig::BRIG_SEGMENT_SPILL: + assert(num_dest_operands == 1); + m->s_type = SEG_SPILL; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(1)); + { + for (int lane = 0; lane < VSZ; ++lane) { + // note: this calculation will NOT WORK if the compiler + // ever generates loads/stores to the same address with + // different widths (e.g., a ld_u32 addr and a ld_u16 addr) + if (mask[lane]) { + assert(m->addr[lane] < w->spillSizePerItem); + + m->addr[lane] = m->addr[lane] * w->spillWidth + + lane * sizeof(MemCType) + w->spillBase; + + w->last_addr[lane] = m->addr[lane]; + } + } + } + + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_rd_gm++; + w->rd_gm_reqs_in_pipe--; + break; + + case Brig::BRIG_SEGMENT_GROUP: + m->s_type = SEG_SHARED; + m->pipeId = LDSMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(24)); + w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); + w->outstanding_reqs_rd_lm++; + w->rd_lm_reqs_in_pipe--; + break; + + case Brig::BRIG_SEGMENT_READONLY: + m->s_type = SEG_READONLY; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(1)); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); + m->addr[lane] += w->roBase; + } + } + + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_rd_gm++; + w->rd_gm_reqs_in_pipe--; + break; + + case Brig::BRIG_SEGMENT_PRIVATE: + m->s_type = SEG_PRIVATE; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(1)); + { + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + assert(m->addr[lane] < w->privSizePerItem); + + m->addr[lane] = m->addr[lane] + + lane * sizeof(MemCType) + w->privBase; + } + } + } + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_rd_gm++; + w->rd_gm_reqs_in_pipe--; + break; + + default: + fatal("Load to unsupported segment %d %llxe\n", this->segment, + m->addr[0]); + } + + w->outstanding_reqs++; + w->mem_reqs_in_pipe--; + } + + template<typename OperationType, typename SrcDataType, + typename AddrRegOperandType> + void + StInst<OperationType, SrcDataType, + AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + typedef typename OperationType::CType CType; + + const VectorMask &mask = w->get_pred(); + + // arg references are handled uniquely for now (no Memory Request + // is used), so special-case them up front. Someday we should + // make this more realistic, at which we should get rid of this + // block and fold this case into the switch below. + if (this->segment == Brig::BRIG_SEGMENT_ARG) { + uint64_t address = this->addr.calcUniform(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + CType data = this->src.template get<CType>(w, lane); + DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); + w->writeCallArgMem<CType>(lane, address, data); + } + } + + return; + } + + GPUDynInstPtr m = gpuDynInst; + + m->exec_mask = w->execMask(); + + this->addr.calcVector(w, m->addr); + + if (num_src_operands == 1) { + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + ((CType*)m->d_data)[lane] = + this->src.template get<CType>(w, lane); + } + } + } else { + for (int k= 0; k < num_src_operands; ++k) { + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + ((CType*)m->d_data)[k * VSZ + lane] = + this->src_vect[k].template get<CType>(w, lane); + } + } + } + } + + m->m_op = Enums::MO_ST; + m->m_type = OperationType::memType; + m->v_type = OperationType::vgprType; + + m->statusBitVector = 0; + m->equiv = this->equivClass; + + if (num_src_operands == 1) { + m->n_reg = 1; + } else { + m->n_reg = num_src_operands; + } + + m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); + + m->scope = getGenericMemoryScope(this->memoryScope); + + m->simdId = w->simdId; + m->wfSlotId = w->wfSlotId; + m->wfDynId = w->wfDynId; + m->kern_id = w->kern_id; + m->cu_id = w->computeUnit->cu_id; + m->latency.init(&w->computeUnit->shader->tick_cnt); + + switch (this->segment) { + case Brig::BRIG_SEGMENT_GLOBAL: + m->s_type = SEG_GLOBAL; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(1)); + + // this is a complete hack to get around a compiler bug + // (the compiler currently generates global access for private + // addresses (starting from 0). We need to add the private offset) + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + if (m->addr[lane] < w->privSizePerItem) { + + // calcPrivAddr will fail if accesses are unaligned + assert(!((sizeof(CType)-1) & m->addr[lane])); + + Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, + this); + + m->addr[lane] = privAddr; + } + } + } + + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_wr_gm++; + w->wr_gm_reqs_in_pipe--; + break; + + case Brig::BRIG_SEGMENT_SPILL: + assert(num_src_operands == 1); + m->s_type = SEG_SPILL; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(1)); + { + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + assert(m->addr[lane] < w->spillSizePerItem); + + m->addr[lane] = m->addr[lane] * w->spillWidth + + lane * sizeof(CType) + w->spillBase; + } + } + } + + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_wr_gm++; + w->wr_gm_reqs_in_pipe--; + break; + + case Brig::BRIG_SEGMENT_GROUP: + m->s_type = SEG_SHARED; + m->pipeId = LDSMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(24)); + w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); + w->outstanding_reqs_wr_lm++; + w->wr_lm_reqs_in_pipe--; + break; + + case Brig::BRIG_SEGMENT_PRIVATE: + m->s_type = SEG_PRIVATE; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(1)); + { + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + assert(m->addr[lane] < w->privSizePerItem); + m->addr[lane] = m->addr[lane] + lane * + sizeof(CType)+w->privBase; + } + } + } + + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_wr_gm++; + w->wr_gm_reqs_in_pipe--; + break; + + default: + fatal("Store to unsupported segment %d\n", this->segment); + } + + w->outstanding_reqs++; + w->mem_reqs_in_pipe--; + } + + template<typename OperationType, typename SrcDataType, + typename AddrRegOperandType> + void + StInst<OperationType, SrcDataType, + AddrRegOperandType>::generateDisassembly() + { + switch (num_src_operands) { + case 1: + this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, + segmentNames[this->segment], + OperationType::label, + this->src.disassemble(), + this->addr.disassemble()); + break; + case 2: + this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, + segmentNames[this->segment], + OperationType::label, + this->src_vect[0].disassemble(), + this->src_vect[1].disassemble(), + this->addr.disassemble()); + break; + case 4: + this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", + this->opcode, + segmentNames[this->segment], + OperationType::label, + this->src_vect[0].disassemble(), + this->src_vect[1].disassemble(), + this->src_vect[2].disassemble(), + this->src_vect[3].disassemble(), + this->addr.disassemble()); + break; + default: fatal("Bad ld register src operand, num vector operands: " + "%d \n", num_src_operands); + break; + } + } + + template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, + bool HasDst> + void + AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, + HasDst>::execute(GPUDynInstPtr gpuDynInst) + { + typedef typename DataType::CType CType; + + Wavefront *w = gpuDynInst->wavefront(); + + GPUDynInstPtr m = gpuDynInst; + + this->addr.calcVector(w, m->addr); + + for (int lane = 0; lane < VSZ; ++lane) { + ((CType *)m->a_data)[lane] = + this->src[0].template get<CType>(w, lane); + } + + // load second source operand for CAS + if (NumSrcOperands > 1) { + for (int lane = 0; lane < VSZ; ++lane) { + ((CType*)m->x_data)[lane] = + this->src[1].template get<CType>(w, lane); + } + } + + assert(NumSrcOperands <= 2); + + m->m_op = this->opType; + m->m_type = DataType::memType; + m->v_type = DataType::vgprType; + + m->exec_mask = w->execMask(); + m->statusBitVector = 0; + m->equiv = 0; // atomics don't have an equivalence class operand + m->n_reg = 1; + m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); + + m->scope = getGenericMemoryScope(this->memoryScope); + + if (HasDst) { + m->dst_reg = this->dest.regIndex(); + } + + m->simdId = w->simdId; + m->wfSlotId = w->wfSlotId; + m->wfDynId = w->wfDynId; + m->kern_id = w->kern_id; + m->cu_id = w->computeUnit->cu_id; + m->latency.init(&w->computeUnit->shader->tick_cnt); + + switch (this->segment) { + case Brig::BRIG_SEGMENT_GLOBAL: + m->s_type = SEG_GLOBAL; + m->latency.set(w->computeUnit->shader->ticks(64)); + m->pipeId = GLBMEM_PIPE; + + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_wr_gm++; + w->wr_gm_reqs_in_pipe--; + w->outstanding_reqs_rd_gm++; + w->rd_gm_reqs_in_pipe--; + break; + + case Brig::BRIG_SEGMENT_GROUP: + m->s_type = SEG_SHARED; + m->pipeId = LDSMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(24)); + w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); + w->outstanding_reqs_wr_lm++; + w->wr_lm_reqs_in_pipe--; + w->outstanding_reqs_rd_lm++; + w->rd_lm_reqs_in_pipe--; + break; + + default: + fatal("Atomic op to unsupported segment %d\n", + this->segment); + } + + w->outstanding_reqs++; + w->mem_reqs_in_pipe--; + } + + const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); + + template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, + bool HasDst> + void + AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, + HasDst>::generateDisassembly() + { + if (HasDst) { + this->disassembly = + csprintf("%s_%s_%s_%s %s,%s", this->opcode, + atomicOpToString(this->atomicOperation), + segmentNames[this->segment], + DataType::label, this->dest.disassemble(), + this->addr.disassemble()); + } else { + this->disassembly = + csprintf("%s_%s_%s_%s %s", this->opcode, + atomicOpToString(this->atomicOperation), + segmentNames[this->segment], + DataType::label, this->addr.disassemble()); + } + + for (int i = 0; i < NumSrcOperands; ++i) { + this->disassembly += ","; + this->disassembly += this->src[i].disassemble(); + } + } +} // namespace HsailISA diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc new file mode 100644 index 000000000..9506a80ab --- /dev/null +++ b/src/arch/hsail/insts/pseudo_inst.cc @@ -0,0 +1,787 @@ +/* + * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Marc Orr + */ + +#include <csignal> + +#include "arch/hsail/insts/decl.hh" +#include "arch/hsail/insts/mem.hh" + +namespace HsailISA +{ + // Pseudo (or magic) instructions are overloaded on the hsail call + // instruction, because of its flexible parameter signature. + + // To add a new magic instruction: + // 1. Add an entry to the enum. + // 2. Implement it in the switch statement below (Call::exec). + // 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h, + // so its easy to call from an OpenCL kernel. + + // This enum should be identical to the enum in + // hsa/hsail-gpu-compute/util/magicinst.h + enum + { + MAGIC_PRINT_WF_32 = 0, + MAGIC_PRINT_WF_64, + MAGIC_PRINT_LANE, + MAGIC_PRINT_LANE_64, + MAGIC_PRINT_WF_FLOAT, + MAGIC_SIM_BREAK, + MAGIC_PREF_SUM, + MAGIC_REDUCTION, + MAGIC_MASKLANE_LOWER, + MAGIC_MASKLANE_UPPER, + MAGIC_JOIN_WF_BAR, + MAGIC_WAIT_WF_BAR, + MAGIC_PANIC, + MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG, + MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG, + MAGIC_LOAD_GLOBAL_U32_REG, + MAGIC_XACT_CAS_LD, + MAGIC_MOST_SIG_THD, + MAGIC_MOST_SIG_BROADCAST, + MAGIC_PRINT_WFID_32, + MAGIC_PRINT_WFID_64 + }; + + void + Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst) + { + const VectorMask &mask = w->get_pred(); + + int op = 0; + bool got_op = false; + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + int src_val0 = src1.get<int>(w, lane, 0); + if (got_op) { + if (src_val0 != op) { + fatal("Multiple magic instructions per PC not " + "supported\n"); + } + } else { + op = src_val0; + got_op = true; + } + } + } + + switch(op) { + case MAGIC_PRINT_WF_32: + MagicPrintWF32(w); + break; + case MAGIC_PRINT_WF_64: + MagicPrintWF64(w); + break; + case MAGIC_PRINT_LANE: + MagicPrintLane(w); + break; + case MAGIC_PRINT_LANE_64: + MagicPrintLane64(w); + break; + case MAGIC_PRINT_WF_FLOAT: + MagicPrintWFFloat(w); + break; + case MAGIC_SIM_BREAK: + MagicSimBreak(w); + break; + case MAGIC_PREF_SUM: + MagicPrefixSum(w); + break; + case MAGIC_REDUCTION: + MagicReduction(w); + break; + case MAGIC_MASKLANE_LOWER: + MagicMaskLower(w); + break; + case MAGIC_MASKLANE_UPPER: + MagicMaskUpper(w); + break; + case MAGIC_JOIN_WF_BAR: + MagicJoinWFBar(w); + break; + case MAGIC_WAIT_WF_BAR: + MagicWaitWFBar(w); + break; + case MAGIC_PANIC: + MagicPanic(w); + break; + + // atomic instructions + case MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG: + MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst); + break; + + case MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG: + MagicAtomicNRAddGroupU32Reg(w, gpuDynInst); + break; + + case MAGIC_LOAD_GLOBAL_U32_REG: + MagicLoadGlobalU32Reg(w, gpuDynInst); + break; + + case MAGIC_XACT_CAS_LD: + MagicXactCasLd(w); + break; + + case MAGIC_MOST_SIG_THD: + MagicMostSigThread(w); + break; + + case MAGIC_MOST_SIG_BROADCAST: + MagicMostSigBroadcast(w); + break; + + case MAGIC_PRINT_WFID_32: + MagicPrintWF32ID(w); + break; + + case MAGIC_PRINT_WFID_64: + MagicPrintWFID64(w); + break; + + default: fatal("unrecognized magic instruction: %d\n", op); + } + } + + void + Call::MagicPrintLane(Wavefront *w) + { + #if TRACING_ON + const VectorMask &mask = w->get_pred(); + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + int src_val1 = src1.get<int>(w, lane, 1); + int src_val2 = src1.get<int>(w, lane, 2); + if (src_val2) { + DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", + disassemble(), w->computeUnit->cu_id, w->simdId, + w->wfSlotId, lane, src_val1); + } else { + DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n", + disassemble(), w->computeUnit->cu_id, w->simdId, + w->wfSlotId, lane, src_val1); + } + } + } + #endif + } + + void + Call::MagicPrintLane64(Wavefront *w) + { + #if TRACING_ON + const VectorMask &mask = w->get_pred(); + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + int64_t src_val1 = src1.get<int64_t>(w, lane, 1); + int src_val2 = src1.get<int>(w, lane, 2); + if (src_val2) { + DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", + disassemble(), w->computeUnit->cu_id, w->simdId, + w->wfSlotId, lane, src_val1); + } else { + DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n", + disassemble(), w->computeUnit->cu_id, w->simdId, + w->wfSlotId, lane, src_val1); + } + } + } + #endif + } + + void + Call::MagicPrintWF32(Wavefront *w) + { + #if TRACING_ON + const VectorMask &mask = w->get_pred(); + std::string res_str; + res_str = csprintf("krl_prt (%s)\n", disassemble()); + + for (int lane = 0; lane < VSZ; ++lane) { + if (!(lane & 7)) { + res_str += csprintf("DB%03d: ", (int)w->wfDynId); + } + + if (mask[lane]) { + int src_val1 = src1.get<int>(w, lane, 1); + int src_val2 = src1.get<int>(w, lane, 2); + + if (src_val2) { + res_str += csprintf("%08x", src_val1); + } else { + res_str += csprintf("%08d", src_val1); + } + } else { + res_str += csprintf("xxxxxxxx"); + } + + if ((lane & 7) == 7) { + res_str += csprintf("\n"); + } else { + res_str += csprintf(" "); + } + } + + res_str += "\n\n"; + DPRINTFN(res_str.c_str()); + #endif + } + + void + Call::MagicPrintWF32ID(Wavefront *w) + { + #if TRACING_ON + const VectorMask &mask = w->get_pred(); + std::string res_str; + int src_val3 = -1; + res_str = csprintf("krl_prt (%s)\n", disassemble()); + + for (int lane = 0; lane < VSZ; ++lane) { + if (!(lane & 7)) { + res_str += csprintf("DB%03d: ", (int)w->wfDynId); + } + + if (mask[lane]) { + int src_val1 = src1.get<int>(w, lane, 1); + int src_val2 = src1.get<int>(w, lane, 2); + src_val3 = src1.get<int>(w, lane, 3); + + if (src_val2) { + res_str += csprintf("%08x", src_val1); + } else { + res_str += csprintf("%08d", src_val1); + } + } else { + res_str += csprintf("xxxxxxxx"); + } + + if ((lane & 7) == 7) { + res_str += csprintf("\n"); + } else { + res_str += csprintf(" "); + } + } + + res_str += "\n\n"; + if (w->wfDynId == src_val3) { + DPRINTFN(res_str.c_str()); + } + #endif + } + + void + Call::MagicPrintWF64(Wavefront *w) + { + #if TRACING_ON + const VectorMask &mask = w->get_pred(); + std::string res_str; + res_str = csprintf("krl_prt (%s)\n", disassemble()); + + for (int lane = 0; lane < VSZ; ++lane) { + if (!(lane & 3)) { + res_str += csprintf("DB%03d: ", (int)w->wfDynId); + } + + if (mask[lane]) { + int64_t src_val1 = src1.get<int64_t>(w, lane, 1); + int src_val2 = src1.get<int>(w, lane, 2); + + if (src_val2) { + res_str += csprintf("%016x", src_val1); + } else { + res_str += csprintf("%016d", src_val1); + } + } else { + res_str += csprintf("xxxxxxxxxxxxxxxx"); + } + + if ((lane & 3) == 3) { + res_str += csprintf("\n"); + } else { + res_str += csprintf(" "); + } + } + + res_str += "\n\n"; + DPRINTFN(res_str.c_str()); + #endif + } + + void + Call::MagicPrintWFID64(Wavefront *w) + { + #if TRACING_ON + const VectorMask &mask = w->get_pred(); + std::string res_str; + int src_val3 = -1; + res_str = csprintf("krl_prt (%s)\n", disassemble()); + + for (int lane = 0; lane < VSZ; ++lane) { + if (!(lane & 3)) { + res_str += csprintf("DB%03d: ", (int)w->wfDynId); + } + + if (mask[lane]) { + int64_t src_val1 = src1.get<int64_t>(w, lane, 1); + int src_val2 = src1.get<int>(w, lane, 2); + src_val3 = src1.get<int>(w, lane, 3); + + if (src_val2) { + res_str += csprintf("%016x", src_val1); + } else { + res_str += csprintf("%016d", src_val1); + } + } else { + res_str += csprintf("xxxxxxxxxxxxxxxx"); + } + + if ((lane & 3) == 3) { + res_str += csprintf("\n"); + } else { + res_str += csprintf(" "); + } + } + + res_str += "\n\n"; + if (w->wfDynId == src_val3) { + DPRINTFN(res_str.c_str()); + } + #endif + } + + void + Call::MagicPrintWFFloat(Wavefront *w) + { + #if TRACING_ON + const VectorMask &mask = w->get_pred(); + std::string res_str; + res_str = csprintf("krl_prt (%s)\n", disassemble()); + + for (int lane = 0; lane < VSZ; ++lane) { + if (!(lane & 7)) { + res_str += csprintf("DB%03d: ", (int)w->wfDynId); + } + + if (mask[lane]) { + float src_val1 = src1.get<float>(w, lane, 1); + res_str += csprintf("%08f", src_val1); + } else { + res_str += csprintf("xxxxxxxx"); + } + + if ((lane & 7) == 7) { + res_str += csprintf("\n"); + } else { + res_str += csprintf(" "); + } + } + + res_str += "\n\n"; + DPRINTFN(res_str.c_str()); + #endif + } + + // raises a signal that GDB will catch + // when done with the break, type "signal 0" in gdb to continue + void + Call::MagicSimBreak(Wavefront *w) + { + std::string res_str; + // print out state for this wavefront and then break + res_str = csprintf("Breakpoint encountered for wavefront %i\n", + w->wfSlotId); + + res_str += csprintf(" Kern ID: %i\n", w->kern_id); + res_str += csprintf(" Phase ID: %i\n", w->simdId); + res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id); + res_str += csprintf(" Exec mask: "); + + for (int i = VSZ - 1; i >= 0; --i) { + if (w->execMask(i)) + res_str += "1"; + else + res_str += "0"; + + if ((i & 7) == 7) + res_str += " "; + } + + res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong()); + + res_str += "\nHelpful debugging hints:\n"; + res_str += " Check out w->s_reg / w->d_reg for register state\n"; + + res_str += "\n\n"; + DPRINTFN(res_str.c_str()); + fflush(stdout); + + raise(SIGTRAP); + } + + void + Call::MagicPrefixSum(Wavefront *w) + { + const VectorMask &mask = w->get_pred(); + int res = 0; + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + int src_val1 = src1.get<int>(w, lane, 1); + dest.set<int>(w, lane, res); + res += src_val1; + } + } + } + + void + Call::MagicReduction(Wavefront *w) + { + // reduction magic instruction + // The reduction instruction takes up to 64 inputs (one from + // each thread in a WF) and sums them. It returns the sum to + // each thread in the WF. + const VectorMask &mask = w->get_pred(); + int res = 0; + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + int src_val1 = src1.get<int>(w, lane, 1); + res += src_val1; + } + } + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + dest.set<int>(w, lane, res); + } + } + } + + void + Call::MagicMaskLower(Wavefront *w) + { + const VectorMask &mask = w->get_pred(); + int res = 0; + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + int src_val1 = src1.get<int>(w, lane, 1); + + if (src_val1) { + if (lane < (VSZ/2)) { + res = res | ((uint32_t)(1) << lane); + } + } + } + } + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + dest.set<int>(w, lane, res); + } + } + } + + void + Call::MagicMaskUpper(Wavefront *w) + { + const VectorMask &mask = w->get_pred(); + int res = 0; + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + int src_val1 = src1.get<int>(w, lane, 1); + + if (src_val1) { + if (lane >= (VSZ/2)) { + res = res | ((uint32_t)(1) << (lane - (VSZ/2))); + } + } + } + } + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + dest.set<int>(w, lane, res); + } + } + } + + void + Call::MagicJoinWFBar(Wavefront *w) + { + const VectorMask &mask = w->get_pred(); + int max_cnt = 0; + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + w->bar_cnt[lane]++; + + if (w->bar_cnt[lane] > max_cnt) { + max_cnt = w->bar_cnt[lane]; + } + } + } + + if (max_cnt > w->max_bar_cnt) { + w->max_bar_cnt = max_cnt; + } + } + + void + Call::MagicWaitWFBar(Wavefront *w) + { + const VectorMask &mask = w->get_pred(); + int max_cnt = 0; + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + w->bar_cnt[lane]--; + } + + if (w->bar_cnt[lane] > max_cnt) { + max_cnt = w->bar_cnt[lane]; + } + } + + if (max_cnt < w->max_bar_cnt) { + w->max_bar_cnt = max_cnt; + } + + w->instructionBuffer.erase(w->instructionBuffer.begin() + 1, + w->instructionBuffer.end()); + if (w->pendingFetch) + w->dropFetch = true; + } + + void + Call::MagicPanic(Wavefront *w) + { + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + int src_val1 = src1.get<int>(w, lane, 1); + panic("OpenCL Code failed assertion #%d. Triggered by lane %s", + src_val1, lane); + } + } + } + + void + Call::calcAddr(Wavefront *w, GPUDynInstPtr m) + { + // the address is in src1 | src2 + for (int lane = 0; lane < VSZ; ++lane) { + int src_val1 = src1.get<int>(w, lane, 1); + int src_val2 = src1.get<int>(w, lane, 2); + Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2); + + m->addr[lane] = addr; + } + + } + + void + Call::MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) + { + GPUDynInstPtr m = gpuDynInst; + + calcAddr(w, m); + + for (int lane = 0; lane < VSZ; ++lane) { + ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3); + } + + m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET, + Brig::BRIG_ATOMIC_ADD); + m->m_type = U32::memType; + m->v_type = U32::vgprType; + + m->exec_mask = w->execMask(); + m->statusBitVector = 0; + m->equiv = 0; // atomics don't have an equivalence class operand + m->n_reg = 1; + m->memoryOrder = Enums::MEMORY_ORDER_NONE; + m->scope = Enums::MEMORY_SCOPE_NONE; + + m->simdId = w->simdId; + m->wfSlotId = w->wfSlotId; + m->wfDynId = w->wfDynId; + m->latency.init(&w->computeUnit->shader->tick_cnt); + + m->s_type = SEG_GLOBAL; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(64)); + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_wr_gm++; + w->wr_gm_reqs_in_pipe--; + w->outstanding_reqs_rd_gm++; + w->rd_gm_reqs_in_pipe--; + w->outstanding_reqs++; + w->mem_reqs_in_pipe--; + } + + void + Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) + { + GPUDynInstPtr m = gpuDynInst; + calcAddr(w, m); + + for (int lane = 0; lane < VSZ; ++lane) { + ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1); + } + + m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET, + Brig::BRIG_ATOMIC_ADD); + m->m_type = U32::memType; + m->v_type = U32::vgprType; + + m->exec_mask = w->execMask(); + m->statusBitVector = 0; + m->equiv = 0; // atomics don't have an equivalence class operand + m->n_reg = 1; + m->memoryOrder = Enums::MEMORY_ORDER_NONE; + m->scope = Enums::MEMORY_SCOPE_NONE; + + m->simdId = w->simdId; + m->wfSlotId = w->wfSlotId; + m->wfDynId = w->wfDynId; + m->latency.init(&w->computeUnit->shader->tick_cnt); + + m->s_type = SEG_GLOBAL; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(64)); + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_wr_gm++; + w->wr_gm_reqs_in_pipe--; + w->outstanding_reqs_rd_gm++; + w->rd_gm_reqs_in_pipe--; + w->outstanding_reqs++; + w->mem_reqs_in_pipe--; + } + + void + Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) + { + GPUDynInstPtr m = gpuDynInst; + // calculate the address + calcAddr(w, m); + + m->m_op = Enums::MO_LD; + m->m_type = U32::memType; //MemDataType::memType; + m->v_type = U32::vgprType; //DestDataType::vgprType; + + m->exec_mask = w->execMask(); + m->statusBitVector = 0; + m->equiv = 0; + m->n_reg = 1; + m->memoryOrder = Enums::MEMORY_ORDER_NONE; + m->scope = Enums::MEMORY_SCOPE_NONE; + + // FIXME + //m->dst_reg = this->dest.regIndex(); + + m->simdId = w->simdId; + m->wfSlotId = w->wfSlotId; + m->wfDynId = w->wfDynId; + m->latency.init(&w->computeUnit->shader->tick_cnt); + + m->s_type = SEG_GLOBAL; + m->pipeId = GLBMEM_PIPE; + m->latency.set(w->computeUnit->shader->ticks(1)); + w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); + w->outstanding_reqs_rd_gm++; + w->rd_gm_reqs_in_pipe--; + w->outstanding_reqs++; + w->mem_reqs_in_pipe--; + } + + void + Call::MagicXactCasLd(Wavefront *w) + { + const VectorMask &mask = w->get_pred(); + int src_val1 = 0; + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + src_val1 = src1.get<int>(w, lane, 1); + break; + } + } + + if (!w->computeUnit->xactCasLoadMap.count(src_val1)) { + w->computeUnit->xactCasLoadMap[src_val1] = ComputeUnit::waveQueue(); + w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear(); + } + + w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue + .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId)); + } + + void + Call::MagicMostSigThread(Wavefront *w) + { + const VectorMask &mask = w->get_pred(); + unsigned mst = true; + + for (int lane = VSZ - 1; lane >= 0; --lane) { + if (mask[lane]) { + dest.set<int>(w, lane, mst); + mst = false; + } + } + } + + void + Call::MagicMostSigBroadcast(Wavefront *w) + { + const VectorMask &mask = w->get_pred(); + int res = 0; + bool got_res = false; + + for (int lane = VSZ - 1; lane >= 0; --lane) { + if (mask[lane]) { + if (!got_res) { + res = src1.get<int>(w, lane, 1); + got_res = true; + } + dest.set<int>(w, lane, res); + } + } + } + +} // namespace HsailISA diff --git a/src/arch/hsail/operand.cc b/src/arch/hsail/operand.cc new file mode 100644 index 000000000..d0e6c5541 --- /dev/null +++ b/src/arch/hsail/operand.cc @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Steve Reinhardt + */ + +#include "arch/hsail/operand.hh" + +using namespace Brig; + +bool +BaseRegOperand::init(unsigned opOffset, const BrigObject *obj, + unsigned &maxRegIdx, char _regFileChar) +{ + regFileChar = _regFileChar; + const BrigOperand *brigOp = obj->getOperand(opOffset); + + if (brigOp->kind != BRIG_KIND_OPERAND_REGISTER) + return false; + + const BrigOperandRegister *brigRegOp = (const BrigOperandRegister*)brigOp; + + regIdx = brigRegOp->regNum; + + DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d\n", regIdx, + brigRegOp->regKind); + + maxRegIdx = std::max(maxRegIdx, regIdx); + + return true; +} + +void +ListOperand::init(unsigned opOffset, const BrigObject *obj) +{ + const BrigOperand *brigOp = (const BrigOperand*)obj->getOperand(opOffset); + + switch (brigOp->kind) { + case BRIG_KIND_OPERAND_CODE_LIST: + { + const BrigOperandCodeList *opList = + (const BrigOperandCodeList*)brigOp; + + const Brig::BrigData *oprnd_data = + obj->getBrigBaseData(opList->elements); + + // Note: for calls Dest list of operands could be size of 0. + elementCount = oprnd_data->byteCount / 4; + + DPRINTF(GPUReg, "Operand Code List: # elements: %d\n", + elementCount); + + for (int i = 0; i < elementCount; ++i) { + unsigned *data_offset = + (unsigned*)obj->getData(opList->elements + 4 * (i + 1)); + + const BrigDirectiveVariable *p = + (const BrigDirectiveVariable*)obj-> + getCodeSectionEntry(*data_offset); + + StorageElement *se = obj->currentCode->storageMap-> + findSymbol(BRIG_SEGMENT_ARG, p); + + assert(se); + callArgs.push_back(se); + } + } + break; + default: + fatal("ListOperand: bad operand kind %d\n", brigOp->kind); + } +} + +std::string +ListOperand::disassemble() +{ + std::string res_str(""); + + for (auto it : callArgs) { + res_str += csprintf("%s ", it->name.c_str()); + } + + return res_str; +} + +void +FunctionRefOperand::init(unsigned opOffset, const BrigObject *obj) +{ + const BrigOperand *baseOp = obj->getOperand(opOffset); + + if (baseOp->kind != BRIG_KIND_OPERAND_CODE_REF) { + fatal("FunctionRefOperand: bad operand kind %d\n", baseOp->kind); + } + + const BrigOperandCodeRef *brigOp = (const BrigOperandCodeRef*)baseOp; + + const BrigDirectiveExecutable *p = + (const BrigDirectiveExecutable*)obj->getCodeSectionEntry(brigOp->ref); + + func_name = obj->getString(p->name); +} + +std::string +FunctionRefOperand::disassemble() +{ + DPRINTF(GPUReg, "Operand Func-ref name: %s\n", func_name); + + return csprintf("%s", func_name); +} + +bool +BaseRegOperand::init_from_vect(unsigned opOffset, const BrigObject *obj, + int at, unsigned &maxRegIdx, char _regFileChar) +{ + regFileChar = _regFileChar; + const BrigOperand *brigOp = obj->getOperand(opOffset); + + if (brigOp->kind != BRIG_KIND_OPERAND_OPERAND_LIST) + return false; + + + const Brig::BrigOperandOperandList *brigRegVecOp = + (const Brig::BrigOperandOperandList*)brigOp; + + unsigned *data_offset = + (unsigned*)obj->getData(brigRegVecOp->elements + 4 * (at + 1)); + + const BrigOperand *p = + (const BrigOperand*)obj->getOperand(*data_offset); + if (p->kind != BRIG_KIND_OPERAND_REGISTER) { + return false; + } + + const BrigOperandRegister *brigRegOp =(const BrigOperandRegister*)p; + + regIdx = brigRegOp->regNum; + + DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d \n", regIdx, + brigRegOp->regKind); + + maxRegIdx = std::max(maxRegIdx, regIdx); + + return true; +} + +void +BaseRegOperand::initWithStrOffset(unsigned strOffset, const BrigObject *obj, + unsigned &maxRegIdx, char _regFileChar) +{ + const char *name = obj->getString(strOffset); + char *endptr; + regIdx = strtoul(name + 2, &endptr, 10); + + if (name[0] != '$' || name[1] != _regFileChar) { + fatal("register operand parse error on \"%s\"\n", name); + } + + maxRegIdx = std::max(maxRegIdx, regIdx); +} + +unsigned SRegOperand::maxRegIdx; +unsigned DRegOperand::maxRegIdx; +unsigned CRegOperand::maxRegIdx; + +std::string +SRegOperand::disassemble() +{ + return csprintf("$s%d", regIdx); +} + +std::string +DRegOperand::disassemble() +{ + return csprintf("$d%d", regIdx); +} + +std::string +CRegOperand::disassemble() +{ + return csprintf("$c%d", regIdx); +} + +BrigRegOperandInfo +findRegDataType(unsigned opOffset, const BrigObject *obj) +{ + const BrigOperand *baseOp = obj->getOperand(opOffset); + + switch (baseOp->kind) { + case BRIG_KIND_OPERAND_REGISTER: + { + const BrigOperandRegister *op = (BrigOperandRegister*)baseOp; + + return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, + (BrigRegisterKind)op->regKind); + } + break; + + case BRIG_KIND_OPERAND_OPERAND_LIST: + { + const BrigOperandOperandList *op = + (BrigOperandOperandList*)baseOp; + const BrigData *data_p = (BrigData*)obj->getData(op->elements); + + + int num_operands = 0; + BrigRegisterKind reg_kind = (BrigRegisterKind)0; + for (int offset = 0; offset < data_p->byteCount; offset += 4) { + const BrigOperand *op_p = (const BrigOperand *) + obj->getOperand(((int *)data_p->bytes)[offset/4]); + + if (op_p->kind == BRIG_KIND_OPERAND_REGISTER) { + const BrigOperandRegister *brigRegOp = + (const BrigOperandRegister*)op_p; + reg_kind = (BrigRegisterKind)brigRegOp->regKind; + } else if (op_p->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) { + uint16_t num_bytes = + ((Brig::BrigOperandConstantBytes*)op_p)->base.byteCount + - sizeof(BrigBase); + if (num_bytes == sizeof(uint32_t)) { + reg_kind = BRIG_REGISTER_KIND_SINGLE; + } else if (num_bytes == sizeof(uint64_t)) { + reg_kind = BRIG_REGISTER_KIND_DOUBLE; + } else { + fatal("OperandList: bad operand size %d\n", num_bytes); + } + } else { + fatal("OperandList: bad operand kind %d\n", op_p->kind); + } + + num_operands++; + } + assert(baseOp->kind == BRIG_KIND_OPERAND_OPERAND_LIST); + + return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind); + } + break; + + case BRIG_KIND_OPERAND_ADDRESS: + { + const BrigOperandAddress *op = (BrigOperandAddress*)baseOp; + + if (!op->reg) { + BrigType type = BRIG_TYPE_NONE; + + if (op->symbol) { + const BrigDirective *dir = (BrigDirective*) + obj->getCodeSectionEntry(op->symbol); + + assert(dir->kind == BRIG_KIND_DIRECTIVE_VARIABLE); + + const BrigDirectiveVariable *sym = + (const BrigDirectiveVariable*)dir; + + type = (BrigType)sym->type; + } + return BrigRegOperandInfo(BRIG_KIND_OPERAND_ADDRESS, + (BrigType)type); + } else { + const BrigOperandAddress *b = (const BrigOperandAddress*)baseOp; + const BrigOperand *reg = obj->getOperand(b->reg); + const BrigOperandRegister *rop = (BrigOperandRegister*)reg; + + return BrigRegOperandInfo(BRIG_KIND_OPERAND_REGISTER, + (BrigRegisterKind)rop->regKind); + } + } + break; + + default: + fatal("AddrOperand: bad operand kind %d\n", baseOp->kind); + break; + } +} + +void +AddrOperandBase::parseAddr(const BrigOperandAddress *op, const BrigObject *obj) +{ + assert(op->base.kind == BRIG_KIND_OPERAND_ADDRESS); + + const BrigDirective *d = + (BrigDirective*)obj->getCodeSectionEntry(op->symbol); + + assert(d->kind == BRIG_KIND_DIRECTIVE_VARIABLE); + const BrigDirectiveVariable *sym = (BrigDirectiveVariable*)d; + name = obj->getString(sym->name); + + if (sym->segment != BRIG_SEGMENT_ARG) { + storageElement = + obj->currentCode->storageMap->findSymbol(sym->segment, name); + assert(storageElement); + offset = 0; + } else { + // sym->name does not work for BRIG_SEGMENT_ARG for the following case: + // + // void foo(int a); + // void bar(double a); + // + // foo(...) --> arg_u32 %param_p0; + // st_arg_u32 $s0, [%param_p0]; + // call &foo (%param_p0); + // bar(...) --> arg_f64 %param_p0; + // st_arg_u64 $d0, [%param_p0]; + // call &foo (%param_p0); + // + // Both functions use the same variable name (param_p0)!!! + // + // Maybe this is a bug in the compiler (I don't know). + // + // Solution: + // Use directive pointer (BrigDirectiveVariable) to differentiate 2 + // versions of param_p0. + // + // Note this solution is kind of stupid, because we are pulling stuff + // out of the brig binary via the directive pointer and putting it into + // the symbol table, but now we are indexing the symbol table by the + // brig directive pointer! It makes the symbol table sort of pointless. + // But I don't want to mess with the rest of the infrastructure, so + // let's go with this for now. + // + // When we update the compiler again, we should see if this problem goes + // away. If so, we can fold some of this functionality into the code for + // kernel arguments. If not, maybe we can index the symbol name on a + // hash of the variable AND function name + storageElement = obj->currentCode-> + storageMap->findSymbol((Brig::BrigSegment)sym->segment, sym); + + assert(storageElement); + } +} + +uint64_t +AddrOperandBase::calcUniformBase() +{ + // start with offset, will be 0 if not specified + uint64_t address = offset; + + // add in symbol value if specified + if (storageElement) { + address += storageElement->offset; + } + + return address; +} + +std::string +AddrOperandBase::disassemble(std::string reg_disassembly) +{ + std::string disasm; + + if (offset || reg_disassembly != "") { + disasm += "["; + + if (reg_disassembly != "") { + disasm += reg_disassembly; + + if (offset > 0) { + disasm += "+"; + } + } + + if (offset) { + disasm += csprintf("%d", offset); + } + + disasm += "]"; + } else if (name) { + disasm += csprintf("[%s]", name); + } + + return disasm; +} + +void +NoRegAddrOperand::init(unsigned opOffset, const BrigObject *obj) +{ + const BrigOperand *baseOp = obj->getOperand(opOffset); + + if (baseOp->kind == BRIG_KIND_OPERAND_ADDRESS) { + BrigOperandAddress *addrOp = (BrigOperandAddress*)baseOp; + parseAddr(addrOp, obj); + offset = (uint64_t(addrOp->offset.hi) << 32) | + uint64_t(addrOp->offset.lo); + } else { + fatal("NoRegAddrOperand: bad operand kind %d\n", baseOp->kind); + } + +} + +std::string +NoRegAddrOperand::disassemble() +{ + return AddrOperandBase::disassemble(std::string("")); +} + +void +LabelOperand::init(unsigned opOffset, const BrigObject *obj) +{ + const BrigOperandCodeRef *op = + (const BrigOperandCodeRef*)obj->getOperand(opOffset); + + assert(op->base.kind == BRIG_KIND_OPERAND_CODE_REF); + + const BrigDirective *dir = + (const BrigDirective*)obj->getCodeSectionEntry(op->ref); + + assert(dir->kind == BRIG_KIND_DIRECTIVE_LABEL); + label = obj->currentCode->refLabel((BrigDirectiveLabel*)dir, obj); +} + +uint32_t +LabelOperand::getTarget(Wavefront *w, int lane) +{ + return label->get(); +} + +std::string +LabelOperand::disassemble() +{ + return label->name; +} diff --git a/src/arch/hsail/operand.hh b/src/arch/hsail/operand.hh new file mode 100644 index 000000000..e3d275b10 --- /dev/null +++ b/src/arch/hsail/operand.hh @@ -0,0 +1,768 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Steve Reinhardt + */ + +#ifndef __ARCH_HSAIL_OPERAND_HH__ +#define __ARCH_HSAIL_OPERAND_HH__ + +/** + * @file operand.hh + * + * Defines classes encapsulating HSAIL instruction operands. + */ + +#include <string> + +#include "arch/hsail/Brig.h" +#include "base/trace.hh" +#include "base/types.hh" +#include "debug/GPUReg.hh" +#include "enums/RegisterType.hh" +#include "gpu-compute/brig_object.hh" +#include "gpu-compute/compute_unit.hh" +#include "gpu-compute/hsail_code.hh" +#include "gpu-compute/shader.hh" +#include "gpu-compute/vector_register_file.hh" +#include "gpu-compute/wavefront.hh" + +class Label; +class StorageElement; + +class BaseOperand +{ + public: + Enums::RegisterType registerType; + uint32_t regOperandSize; + BaseOperand() { registerType = Enums::RT_NONE; regOperandSize = 0; } + bool isVectorRegister() { return registerType == Enums::RT_VECTOR; } + bool isScalarRegister() { return registerType == Enums::RT_SCALAR; } + bool isCondRegister() { return registerType == Enums::RT_CONDITION; } + unsigned int regIndex() { return 0; } + uint32_t opSize() { return regOperandSize; } + virtual ~BaseOperand() { } +}; + +class BrigRegOperandInfo +{ + public: + Brig::BrigKind16_t kind; + Brig::BrigType type; + Brig::BrigRegisterKind regKind; + + BrigRegOperandInfo(Brig::BrigKind16_t _kind, + Brig::BrigRegisterKind _regKind) + : kind(_kind), regKind(_regKind) + { + } + + BrigRegOperandInfo(Brig::BrigKind16_t _kind, Brig::BrigType _type) + : kind(_kind), type(_type) + { + } + + BrigRegOperandInfo() : kind(Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES), + type(Brig::BRIG_TYPE_NONE) + { + } +}; + +BrigRegOperandInfo findRegDataType(unsigned opOffset, const BrigObject *obj); + +class BaseRegOperand : public BaseOperand +{ + public: + unsigned regIdx; + char regFileChar; + + bool init(unsigned opOffset, const BrigObject *obj, + unsigned &maxRegIdx, char _regFileChar); + + bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at, + unsigned &maxRegIdx, char _regFileChar); + + void initWithStrOffset(unsigned strOffset, const BrigObject *obj, + unsigned &maxRegIdx, char _regFileChar); + unsigned int regIndex() { return regIdx; } +}; + +class SRegOperand : public BaseRegOperand +{ + public: + static unsigned maxRegIdx; + + bool + init(unsigned opOffset, const BrigObject *obj) + { + regOperandSize = sizeof(uint32_t); + registerType = Enums::RT_VECTOR; + + return BaseRegOperand::init(opOffset, obj, maxRegIdx, 's'); + } + + bool + init_from_vect(unsigned opOffset, const BrigObject *obj, int at) + { + regOperandSize = sizeof(uint32_t); + registerType = Enums::RT_VECTOR; + + return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx, + 's'); + } + + void + initWithStrOffset(unsigned strOffset, const BrigObject *obj) + { + regOperandSize = sizeof(uint32_t); + registerType = Enums::RT_VECTOR; + + return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx, + 's'); + } + + template<typename OperandType> + OperandType + get(Wavefront *w, int lane) + { + assert(sizeof(OperandType) <= sizeof(uint32_t)); + assert(regIdx < w->maxSpVgprs); + // if OperandType is smaller than 32-bit, we truncate the value + OperandType ret; + uint32_t vgprIdx; + + switch (sizeof(OperandType)) { + case 1: // 1 byte operand + vgprIdx = w->remap(regIdx, 1, 1); + ret = (w->computeUnit->vrf[w->simdId]-> + read<uint32_t>(vgprIdx, lane)) & 0xff; + break; + case 2: // 2 byte operand + vgprIdx = w->remap(regIdx, 2, 1); + ret = (w->computeUnit->vrf[w->simdId]-> + read<uint32_t>(vgprIdx, lane)) & 0xffff; + break; + case 4: // 4 byte operand + vgprIdx = w->remap(regIdx,sizeof(OperandType), 1); + ret = w->computeUnit->vrf[w->simdId]-> + read<OperandType>(vgprIdx, lane); + break; + default: + panic("Bad OperandType\n"); + break; + } + + return (OperandType)ret; + } + + // special get method for compatibility with LabelOperand + uint32_t + getTarget(Wavefront *w, int lane) + { + return get<uint32_t>(w, lane); + } + + template<typename OperandType> + void set(Wavefront *w, int lane, OperandType &val); + std::string disassemble(); +}; + +template<typename OperandType> +void +SRegOperand::set(Wavefront *w, int lane, OperandType &val) +{ + DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n", + w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val); + + assert(sizeof(OperandType) == sizeof(uint32_t)); + assert(regIdx < w->maxSpVgprs); + uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1); + w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane); +} + +template<> +inline void +SRegOperand::set(Wavefront *w, int lane, uint64_t &val) +{ + DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n", + w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val); + + assert(regIdx < w->maxSpVgprs); + uint32_t vgprIdx = w->remap(regIdx, sizeof(uint32_t), 1); + w->computeUnit->vrf[w->simdId]->write<uint32_t>(vgprIdx, val, lane); +} + +class DRegOperand : public BaseRegOperand +{ + public: + static unsigned maxRegIdx; + + bool + init(unsigned opOffset, const BrigObject *obj) + { + regOperandSize = sizeof(uint64_t); + registerType = Enums::RT_VECTOR; + + return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'd'); + } + + bool + init_from_vect(unsigned opOffset, const BrigObject *obj, int at) + { + regOperandSize = sizeof(uint64_t); + registerType = Enums::RT_VECTOR; + + return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx, + 'd'); + } + + void + initWithStrOffset(unsigned strOffset, const BrigObject *obj) + { + regOperandSize = sizeof(uint64_t); + registerType = Enums::RT_VECTOR; + + return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx, + 'd'); + } + + template<typename OperandType> + OperandType + get(Wavefront *w, int lane) + { + assert(sizeof(OperandType) <= sizeof(uint64_t)); + // TODO: this check is valid only for HSAIL + assert(regIdx < w->maxDpVgprs); + uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1); + + return w->computeUnit->vrf[w->simdId]->read<OperandType>(vgprIdx,lane); + } + + template<typename OperandType> + void + set(Wavefront *w, int lane, OperandType &val) + { + DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $d%d <- %d\n", + w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, + val); + + assert(sizeof(OperandType) <= sizeof(uint64_t)); + // TODO: this check is valid only for HSAIL + assert(regIdx < w->maxDpVgprs); + uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1); + w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane); + } + + std::string disassemble(); +}; + +class CRegOperand : public BaseRegOperand +{ + public: + static unsigned maxRegIdx; + + bool + init(unsigned opOffset, const BrigObject *obj) + { + regOperandSize = sizeof(uint8_t); + registerType = Enums::RT_CONDITION; + + return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'c'); + } + + bool + init_from_vect(unsigned opOffset, const BrigObject *obj, int at) + { + regOperandSize = sizeof(uint8_t); + registerType = Enums::RT_CONDITION; + + return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx, + 'c'); + } + + void + initWithStrOffset(unsigned strOffset, const BrigObject *obj) + { + regOperandSize = sizeof(uint8_t); + registerType = Enums::RT_CONDITION; + + return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx, + 'c'); + } + + template<typename OperandType> + OperandType + get(Wavefront *w, int lane) + { + assert(regIdx < w->condRegState->numRegs()); + + return w->condRegState->read<OperandType>((int)regIdx, lane); + } + + template<typename OperandType> + void + set(Wavefront *w, int lane, OperandType &val) + { + DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $c%d <- %d\n", + w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, + val); + + assert(regIdx < w->condRegState->numRegs()); + w->condRegState->write<OperandType>(regIdx,lane,val); + } + + std::string disassemble(); +}; + +template<typename T> +class ImmOperand : public BaseOperand +{ + public: + T bits; + + bool init(unsigned opOffset, const BrigObject *obj); + bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at); + std::string disassemble(); + + template<typename OperandType> + OperandType + get() + { + assert(sizeof(OperandType) <= sizeof(T)); + + return *(OperandType*)&bits; + } + + // This version of get() takes a WF* and a lane id for + // compatibility with the register-based get() methods. + template<typename OperandType> + OperandType + get(Wavefront *w, int lane) + { + return get<OperandType>(); + } +}; + +template<typename T> +bool +ImmOperand<T>::init(unsigned opOffset, const BrigObject *obj) +{ + const Brig::BrigOperand *brigOp = obj->getOperand(opOffset); + + switch (brigOp->kind) { + // this is immediate operand + case Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES: + { + DPRINTF(GPUReg, "sizeof(T): %lu, byteCount: %d\n", sizeof(T), + brigOp->byteCount); + + auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp; + + bits = *((T*)(obj->getData(cbptr->bytes + 4))); + + return true; + } + break; + + case Brig::BRIG_KIND_OPERAND_WAVESIZE: + bits = VSZ; + return true; + + default: + return false; + } +} + +template <typename T> +bool +ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at) +{ + const Brig::BrigOperand *brigOp = obj->getOperand(opOffset); + + if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { + return false; + } + + + const Brig::BrigOperandOperandList *brigVecOp = + (const Brig::BrigOperandOperandList *)brigOp; + + unsigned *data_offset = + (unsigned *)obj->getData(brigVecOp->elements + 4 * (at + 1)); + + const Brig::BrigOperand *p = + (const Brig::BrigOperand *)obj->getOperand(*data_offset); + + if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) { + return false; + } + + return init(*data_offset, obj); +} +template<typename T> +std::string +ImmOperand<T>::disassemble() +{ + return csprintf("0x%08x", bits); +} + +template<typename RegOperand, typename T> +class RegOrImmOperand : public BaseOperand +{ + private: + bool is_imm; + + public: + void setImm(const bool value) { is_imm = value; } + + ImmOperand<T> imm_op; + RegOperand reg_op; + + RegOrImmOperand() { is_imm = false; } + void init(unsigned opOffset, const BrigObject *obj); + void init_from_vect(unsigned opOffset, const BrigObject *obj, int at); + std::string disassemble(); + + template<typename OperandType> + OperandType + get(Wavefront *w, int lane) + { + return is_imm ? imm_op.template get<OperandType>() : + reg_op.template get<OperandType>(w, lane); + } + + uint32_t + opSize() + { + if (!is_imm) { + return reg_op.opSize(); + } + + return 0; + } + + bool + isVectorRegister() + { + if (!is_imm) { + return reg_op.registerType == Enums::RT_VECTOR; + } + return false; + } + + bool + isCondRegister() + { + if (!is_imm) { + return reg_op.registerType == Enums::RT_CONDITION; + } + + return false; + } + + bool + isScalarRegister() + { + if (!is_imm) { + return reg_op.registerType == Enums::RT_SCALAR; + } + + return false; + } + + unsigned int + regIndex() + { + if (!is_imm) { + return reg_op.regIndex(); + } + return 0; + } +}; + +template<typename RegOperand, typename T> +void +RegOrImmOperand<RegOperand, T>::init(unsigned opOffset, const BrigObject *obj) +{ + is_imm = false; + + if (reg_op.init(opOffset, obj)) { + return; + } + + if (imm_op.init(opOffset, obj)) { + is_imm = true; + return; + } + + fatal("RegOrImmOperand::init(): bad operand kind %d\n", + obj->getOperand(opOffset)->kind); +} + +template<typename RegOperand, typename T> +void +RegOrImmOperand<RegOperand, T>::init_from_vect(unsigned opOffset, + const BrigObject *obj, int at) +{ + if (reg_op.init_from_vect(opOffset, obj, at)) { + is_imm = false; + + return; + } + + if (imm_op.init_from_vect(opOffset, obj, at)) { + is_imm = true; + + return; + } + + fatal("RegOrImmOperand::init(): bad operand kind %d\n", + obj->getOperand(opOffset)->kind); +} + +template<typename RegOperand, typename T> +std::string +RegOrImmOperand<RegOperand, T>::disassemble() +{ + return is_imm ? imm_op.disassemble() : reg_op.disassemble(); +} + +typedef RegOrImmOperand<SRegOperand, uint32_t> SRegOrImmOperand; +typedef RegOrImmOperand<DRegOperand, uint64_t> DRegOrImmOperand; +typedef RegOrImmOperand<CRegOperand, bool> CRegOrImmOperand; + +class AddrOperandBase : public BaseOperand +{ + protected: + // helper function for init() + void parseAddr(const Brig::BrigOperandAddress *op, const BrigObject *obj); + + // helper function for disassemble() + std::string disassemble(std::string reg_disassembly); + uint64_t calcUniformBase(); + + public: + virtual void calcVector(Wavefront *w, uint64_t *addrVec) = 0; + virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0; + + uint64_t offset; + const char *name = nullptr; + StorageElement *storageElement; +}; + +template<typename RegOperandType> +class RegAddrOperand : public AddrOperandBase +{ + public: + RegOperandType reg; + void init(unsigned opOffset, const BrigObject *obj); + uint64_t calcUniform(); + void calcVector(Wavefront *w, uint64_t *addrVec); + uint64_t calcLane(Wavefront *w, int lane=0); + uint32_t opSize() { return reg.opSize(); } + bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; } + bool isCondRegister() { return reg.registerType == Enums::RT_CONDITION; } + bool isScalarRegister() { return reg.registerType == Enums::RT_SCALAR; } + unsigned int regIndex() { return reg.regIndex(); } + std::string disassemble(); +}; + +template<typename RegOperandType> +void +RegAddrOperand<RegOperandType>::init(unsigned opOffset, const BrigObject *obj) +{ + using namespace Brig; + + const BrigOperand *baseOp = obj->getOperand(opOffset); + + switch (baseOp->kind) { + case BRIG_KIND_OPERAND_ADDRESS: + { + const BrigOperandAddress *op = (BrigOperandAddress*)baseOp; + storageElement = nullptr; + + offset = (uint64_t(op->offset.hi) << 32) | uint64_t(op->offset.lo); + reg.init(op->reg, obj); + + if (reg.regFileChar == 's') { + reg.regOperandSize = sizeof(uint32_t); + registerType = Enums::RT_VECTOR; + } + else if (reg.regFileChar == 'd') { + reg.regOperandSize = sizeof(uint64_t); + registerType = Enums::RT_VECTOR; + } + } + break; + + default: + fatal("RegAddrOperand: bad operand kind %d\n", baseOp->kind); + break; + } +} + +template<typename RegOperandType> +uint64_t +RegAddrOperand<RegOperandType>::calcUniform() +{ + fatal("can't do calcUniform() on register-based address\n"); + + return 0; +} + +template<typename RegOperandType> +void +RegAddrOperand<RegOperandType>::calcVector(Wavefront *w, uint64_t *addrVec) +{ + Addr address = calcUniformBase(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (w->execMask(lane)) { + if (reg.regFileChar == 's') { + addrVec[lane] = address + reg.template get<uint32_t>(w, lane); + } else { + addrVec[lane] = address + reg.template get<Addr>(w, lane); + } + } + } +} + +template<typename RegOperandType> +uint64_t +RegAddrOperand<RegOperandType>::calcLane(Wavefront *w, int lane) +{ + Addr address = calcUniformBase(); + + return address + reg.template get<Addr>(w, lane); +} + +template<typename RegOperandType> +std::string +RegAddrOperand<RegOperandType>::disassemble() +{ + return AddrOperandBase::disassemble(reg.disassemble()); +} + +typedef RegAddrOperand<SRegOperand> SRegAddrOperand; +typedef RegAddrOperand<DRegOperand> DRegAddrOperand; + +class NoRegAddrOperand : public AddrOperandBase +{ + public: + void init(unsigned opOffset, const BrigObject *obj); + uint64_t calcUniform(); + void calcVector(Wavefront *w, uint64_t *addrVec); + uint64_t calcLane(Wavefront *w, int lane=0); + std::string disassemble(); +}; + +inline uint64_t +NoRegAddrOperand::calcUniform() +{ + return AddrOperandBase::calcUniformBase(); +} + +inline uint64_t +NoRegAddrOperand::calcLane(Wavefront *w, int lane) +{ + return calcUniform(); +} + +inline void +NoRegAddrOperand::calcVector(Wavefront *w, uint64_t *addrVec) +{ + uint64_t address = calcUniformBase(); + + for (int lane = 0; lane < VSZ; ++lane) + addrVec[lane] = address; +} + +class LabelOperand : public BaseOperand +{ + public: + Label *label; + + void init(unsigned opOffset, const BrigObject *obj); + std::string disassemble(); + + // special get method for compatibility with SRegOperand + uint32_t getTarget(Wavefront *w, int lane); + +}; + +class ListOperand : public BaseOperand +{ + public: + int elementCount; + std::vector<StorageElement*> callArgs; + + int + getSrcOperand(int idx) + { + DPRINTF(GPUReg, "getSrcOperand, idx: %d, sz_args: %d\n", idx, + callArgs.size()); + + return callArgs.at(idx)->offset; + } + + void init(unsigned opOffset, const BrigObject *obj); + + std::string disassemble(); + + template<typename OperandType> + OperandType + get(Wavefront *w, int lane, int arg_idx) + { + return w->readCallArgMem<OperandType>(lane, getSrcOperand(arg_idx)); + } + + template<typename OperandType> + void + set(Wavefront *w, int lane, OperandType val) + { + w->writeCallArgMem<OperandType>(lane, getSrcOperand(0), val); + DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: arg[%d] <- %d\n", + w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, + getSrcOperand(0), val); + } +}; + +class FunctionRefOperand : public BaseOperand +{ + public: + const char *func_name; + + void init(unsigned opOffset, const BrigObject *obj); + std::string disassemble(); +}; + +#endif // __ARCH_HSAIL_OPERAND_HH__ |