#! /usr/bin/python # # Copyright (c) 2015 Advanced Micro Devices, Inc. # All rights reserved. # # For use for simulation and test purposes only # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # Author: Steve Reinhardt # from __future__ import print_function import sys, re from m5.util import code_formatter if len(sys.argv) != 4: print("Error: need 3 args (file names)") sys.exit(0) header_code = code_formatter() decoder_code = code_formatter() exec_code = code_formatter() ############### # # Generate file prologs (includes etc.) # ############### header_code(''' #include "arch/hsail/insts/decl.hh" #include "base/bitfield.hh" #include "gpu-compute/hsail_code.hh" #include "gpu-compute/wavefront.hh" namespace HsailISA { ''') header_code.indent() decoder_code(''' #include "arch/hsail/gpu_decoder.hh" #include "arch/hsail/insts/branch.hh" #include "arch/hsail/insts/decl.hh" #include "arch/hsail/insts/gen_decl.hh" #include "arch/hsail/insts/mem.hh" #include "arch/hsail/insts/mem_impl.hh" #include "gpu-compute/brig_object.hh" namespace HsailISA { std::vector Decoder::decodedInsts; GPUStaticInst* Decoder::decode(MachInst machInst) { using namespace Brig; const BrigInstBase *ib = machInst.brigInstBase; const BrigObject *obj = machInst.brigObj; switch(ib->opcode) { ''') decoder_code.indent() decoder_code.indent() exec_code(''' #include "arch/hsail/insts/gen_decl.hh" #include "base/intmath.hh" namespace HsailISA { ''') exec_code.indent() ############### # # Define code templates for class declarations (for header file) # ############### # Basic header template for an instruction stub. header_template_stub = ''' class $class_name : public $base_class { public: typedef $base_class Base; $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) : Base(ib, obj, "$opcode") { } void execute(GPUDynInstPtr gpuDynInst); }; ''' # Basic header template for an instruction with no template parameters. header_template_nodt = ''' class $class_name : public $base_class { public: typedef $base_class Base; $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) : Base(ib, obj, "$opcode") { } void execute(GPUDynInstPtr gpuDynInst); }; ''' # Basic header template for an instruction with a single DataType # template parameter. header_template_1dt = ''' template class $class_name : public $base_class { public: typedef $base_class Base; typedef typename DataType::CType CType; $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) : Base(ib, obj, "$opcode") { } void execute(GPUDynInstPtr gpuDynInst); }; ''' header_template_1dt_noexec = ''' template class $class_name : public $base_class { public: typedef $base_class Base; typedef typename DataType::CType CType; $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) : Base(ib, obj, "$opcode") { } }; ''' # Same as header_template_1dt, except the base class has a second # template parameter NumSrcOperands to allow a variable number of # source operands. Note that since this is implemented with an array, # it only works for instructions where all sources are of the same # type (like most arithmetics). header_template_1dt_varsrcs = ''' template class $class_name : public $base_class { public: typedef $base_class Base; typedef typename DataType::CType CType; $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) : Base(ib, obj, "$opcode") { } void execute(GPUDynInstPtr gpuDynInst); }; ''' # Header template for instruction with two DataType template # parameters, one for the dest and one for the source. This is used # by compare and convert. header_template_2dt = ''' template class $class_name : public $base_class { public: typedef $base_class Base; typedef typename DestDataType::CType DestCType; typedef typename SrcDataType::CType SrcCType; $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) : Base(ib, obj, "$opcode") { } void execute(GPUDynInstPtr gpuDynInst); }; ''' header_templates = { 'ArithInst': header_template_1dt_varsrcs, 'CmovInst': header_template_1dt, 'ClassInst': header_template_1dt, 'ShiftInst': header_template_1dt, 'ExtractInsertInst': header_template_1dt, 'CmpInst': header_template_2dt, 'CvtInst': header_template_2dt, 'PopcountInst': header_template_2dt, 'LdInst': '', 'StInst': '', 'SpecialInstNoSrc': header_template_nodt, 'SpecialInst1Src': header_template_nodt, 'SpecialInstNoSrcNoDest': '', 'Stub': header_template_stub, } ############### # # Define code templates for exec functions # ############### # exec function body exec_template_stub = ''' void $class_name::execute(GPUDynInstPtr gpuDynInst) { fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble()); } ''' exec_template_nodt_nosrc = ''' void $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); typedef Base::DestCType DestCType; const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { DestCType dest_val = $expr; this->dest.set(w, lane, dest_val); } } } ''' exec_template_nodt_1src = ''' void $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); typedef Base::DestCType DestCType; typedef Base::SrcCType SrcCType; const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { SrcCType src_val0 = this->src0.get(w, lane); DestCType dest_val = $expr; this->dest.set(w, lane, dest_val); } } } ''' exec_template_1dt_varsrcs = ''' template void $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType dest_val; if ($dest_is_src_flag) { dest_val = this->dest.template get(w, lane); } CType src_val[$num_srcs]; for (int i = 0; i < $num_srcs; ++i) { src_val[i] = this->src[i].template get(w, lane); } dest_val = (CType)($expr); this->dest.set(w, lane, dest_val); } } } ''' exec_template_1dt_3srcs = ''' template void $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); typedef typename Base::Src0CType Src0T; typedef typename Base::Src1CType Src1T; typedef typename Base::Src2CType Src2T; const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType dest_val; if ($dest_is_src_flag) { dest_val = this->dest.template get(w, lane); } Src0T src_val0 = this->src0.template get(w, lane); Src1T src_val1 = this->src1.template get(w, lane); Src2T src_val2 = this->src2.template get(w, lane); dest_val = $expr; this->dest.set(w, lane, dest_val); } } } ''' exec_template_1dt_2src_1dest = ''' template void $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); typedef typename Base::DestCType DestT; typedef CType Src0T; typedef typename Base::Src1CType Src1T; const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { DestT dest_val; if ($dest_is_src_flag) { dest_val = this->dest.template get(w, lane); } Src0T src_val0 = this->src0.template get(w, lane); Src1T src_val1 = this->src1.template get(w, lane); dest_val = $expr; this->dest.set(w, lane, dest_val); } } } ''' exec_template_shift = ''' template void $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType dest_val; if ($dest_is_src_flag) { dest_val = this->dest.template get(w, lane); } CType src_val0 = this->src0.template get(w, lane); uint32_t src_val1 = this->src1.template get(w, lane); dest_val = $expr; this->dest.set(w, lane, dest_val); } } } ''' exec_template_2dt = ''' template void $class_name::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { DestCType dest_val; SrcCType src_val[$num_srcs]; for (int i = 0; i < $num_srcs; ++i) { src_val[i] = this->src[i].template get(w, lane); } dest_val = $expr; this->dest.set(w, lane, dest_val); } } } ''' exec_templates = { 'ArithInst': exec_template_1dt_varsrcs, 'CmovInst': exec_template_1dt_3srcs, 'ExtractInsertInst': exec_template_1dt_3srcs, 'ClassInst': exec_template_1dt_2src_1dest, 'CmpInst': exec_template_2dt, 'CvtInst': exec_template_2dt, 'PopcountInst': exec_template_2dt, 'LdInst': '', 'StInst': '', 'SpecialInstNoSrc': exec_template_nodt_nosrc, 'SpecialInst1Src': exec_template_nodt_1src, 'SpecialInstNoSrcNoDest': '', 'Stub': exec_template_stub, } ############### # # Define code templates for the decoder cases # ############### # decode template for nodt-opcode case decode_nodt_template = ''' case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' decode_case_prolog_class_inst = ''' case BRIG_OPCODE_$brig_opcode_upper: { //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]); BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType; //switch (baseOp->kind) { // case BRIG_OPERAND_REG: // type = ((const BrigOperandReg*)baseOp)->type; // break; // case BRIG_OPERAND_IMMED: // type = ((const BrigOperandImmed*)baseOp)->type; // break; // default: // fatal("CLASS unrecognized kind of operand %d\\n", // baseOp->kind); //} switch (type) {''' # common prolog for 1dt- or 2dt-opcode case: switch on data type decode_case_prolog = ''' case BRIG_OPCODE_$brig_opcode_upper: { switch (ib->type) {''' # single-level decode case entry (for 1dt opcodes) decode_case_entry = \ ' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' decode_store_prolog = \ ' case BRIG_TYPE_$type_name: {' decode_store_case_epilog = ''' }''' decode_store_case_entry = \ ' return $constructor(ib, obj);' # common epilog for type switch decode_case_epilog = ''' default: fatal("$brig_opcode_upper: unrecognized type %d\\n", ib->type); } } break;''' # Additional templates for nested decode on a second type field (for # compare and convert). These are used in place of the # decode_case_entry template to create a second-level switch on on the # second type field inside each case of the first-level type switch. # Because the name and location of the second type can vary, the Brig # instruction type must be provided in $brig_type, and the name of the # second type field must be provided in $type_field. decode_case2_prolog = ''' case BRIG_TYPE_$type_name: switch (((Brig$brig_type*)ib)->$type2_field) {''' decode_case2_entry = \ ' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' decode_case2_epilog = ''' default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", ((Brig$brig_type*)ib)->$type2_field); } break;''' # Figure out how many source operands an expr needs by looking for the # highest-numbered srcN value referenced. Since sources are numbered # starting at 0, the return value is N+1. def num_src_operands(expr): if expr.find('src2') != -1: return 3 elif expr.find('src1') != -1: return 2 elif expr.find('src0') != -1: return 1 else: return 0 ############### # # Define final code generation methods # # The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for # generating actual instructions. # ############### # Generate class declaration, exec function, and decode switch case # for an brig_opcode with a single-level type switch. The 'types' # parameter is a list or tuple of types for which the instruction # should be instantiated. def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', type2_info=None, constructor_prefix='new ', is_store=False): brig_opcode_upper = brig_opcode.upper() class_name = brig_opcode opcode = class_name.lower() if base_class == 'ArithInst': # note that expr must be provided with ArithInst so we can # derive num_srcs for the template assert expr if expr: # Derive several bits of info from expr. If expr is not used, # this info will be irrelevant. num_srcs = num_src_operands(expr) # if the RHS expression includes 'dest', then we're doing an RMW # on the reg and we need to treat it like a source dest_is_src = expr.find('dest') != -1 dest_is_src_flag = str(dest_is_src).lower() # for C++ if base_class in ['ShiftInst']: expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']: expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) else: expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) expr = re.sub(r'\bdest\b', r'dest_val', expr) # Strip template arguments off of base class before looking up # appropriate templates base_class_base = re.sub(r'<.*>$', '', base_class) header_code(header_templates[base_class_base]) if base_class.startswith('SpecialInst') or base_class.startswith('Stub'): exec_code(exec_templates[base_class_base]) elif base_class.startswith('ShiftInst'): header_code(exec_template_shift) else: header_code(exec_templates[base_class_base]) if not types or isinstance(types, str): # Just a single type constructor = constructor_prefix + class_name decoder_code(decode_nodt_template) else: # multiple types, need at least one level of decode if brig_opcode == 'Class': decoder_code(decode_case_prolog_class_inst) else: decoder_code(decode_case_prolog) if not type2_info: if not is_store: # single list of types, to basic one-level decode for type_name in types: full_class_name = '%s<%s>' % (class_name, type_name.upper()) constructor = constructor_prefix + full_class_name decoder_code(decode_case_entry) else: # single list of types, to basic one-level decode for type_name in types: decoder_code(decode_store_prolog) type_size = int(re.findall(r'[0-9]+', type_name)[0]) src_size = 32 type_type = type_name[0] full_class_name = '%s<%s,%s>' % (class_name, \ type_name.upper(), \ '%s%d' % \ (type_type.upper(), \ type_size)) constructor = constructor_prefix + full_class_name decoder_code(decode_store_case_entry) decoder_code(decode_store_case_epilog) else: # need secondary type switch (convert, compare) # unpack extra info on second switch (type2_field, types2) = type2_info brig_type = 'Inst%s' % brig_opcode for type_name in types: decoder_code(decode_case2_prolog) fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) for type2_name in types2: full_class_name = fmt % type2_name.upper() constructor = constructor_prefix + full_class_name decoder_code(decode_case2_entry) decoder_code(decode_case2_epilog) decoder_code(decode_case_epilog) ############### # # Generate instructions # ############### # handy abbreviations for common sets of types # arithmetic ops are typically defined only on 32- and 64-bit sizes arith_int_types = ('S32', 'U32', 'S64', 'U64') arith_float_types = ('F32', 'F64') arith_types = arith_int_types + arith_float_types bit_types = ('B1', 'B32', 'B64') all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types # I think you might be able to do 'f16' memory ops too, but we'll # ignore them for now. mem_types = all_int_types + arith_float_types mem_atom_types = all_int_types + ('B32', 'B64') ##### Arithmetic & logical operations gen('Add', arith_types, 'src0 + src1') gen('Sub', arith_types, 'src0 - src1') gen('Mul', arith_types, 'src0 * src1') gen('Div', arith_types, 'src0 / src1') gen('Min', arith_types, 'std::min(src0, src1)') gen('Max', arith_types, 'std::max(src0, src1)') gen('Gcnmin', arith_types, 'std::min(src0, src1)') gen('CopySign', arith_float_types, 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') gen('Sqrt', arith_float_types, 'sqrt(src0)') gen('Floor', arith_float_types, 'floor(src0)') # "fast" sqrt... same as slow for us gen('Nsqrt', arith_float_types, 'sqrt(src0)') gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') gen('Nrcp', arith_float_types, '1.0/src0') gen('Fract', arith_float_types, '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') gen('Ncos', arith_float_types, 'cos(src0)'); gen('Nsin', arith_float_types, 'sin(src0)'); gen('And', bit_types, 'src0 & src1') gen('Or', bit_types, 'src0 | src1') gen('Xor', bit_types, 'src0 ^ src1') gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)') gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \ ('sourceType', ('B32', 'B64'))) gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') # gen('Mul_hi', types=('s32','u32', '??')) # gen('Mul24', types=('s32','u32', '??')) gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') gen('Abs', arith_types, 'std::abs(src0)') gen('Neg', arith_types, '-src0') gen('Mov', bit_types + arith_types, 'src0') gen('Not', bit_types, 'heynot(src0)') # mad and fma differ only in rounding behavior, which we don't emulate # also there's an integer form of mad, but not of fma gen('Mad', arith_types, 'src0 * src1 + src2') gen('Fma', arith_float_types, 'src0 * src1 + src2') #native floating point operations gen('Nfma', arith_float_types, 'src0 * src1 + src2') gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))') gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))') # see base/bitfield.hh gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', 'ExtractInsertInst') gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', 'ExtractInsertInst') ##### Compare gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', 'CmpInst', ('sourceType', arith_types + bit_types)) gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') ##### Conversion # Conversion operations are only defined on B1, not B32 or B64 cvt_types = ('B1',) + mem_types gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) ##### Load & Store gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', is_store=True) gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') gen('AtomicNoRet', mem_atom_types, base_class='StInst', constructor_prefix='decode') gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') gen('Br', base_class = 'LdInst', constructor_prefix='decode') ##### Special operations def gen_special(brig_opcode, expr, dest_type='U32'): num_srcs = num_src_operands(expr) if num_srcs == 0: base_class = 'SpecialInstNoSrc<%s>' % dest_type elif num_srcs == 1: base_class = 'SpecialInst1Src<%s>' % dest_type else: assert false gen(brig_opcode, None, expr, base_class) gen_special('WorkItemId', 'w->workItemId[src0][lane]') gen_special('WorkItemAbsId', 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') gen_special('WorkGroupId', 'w->workGroupId[src0]') gen_special('WorkGroupSize', 'w->workGroupSz[src0]') gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') gen_special('GridSize', 'w->gridSz[src0]') gen_special('GridGroups', 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') gen_special('LaneId', 'lane') gen_special('WaveId', 'w->wfId') gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') # gen_special('CU'', ') gen('Ret', base_class='SpecialInstNoSrcNoDest') gen('Barrier', base_class='SpecialInstNoSrcNoDest') gen('MemFence', base_class='SpecialInstNoSrcNoDest') # Map magic instructions to the BrigSyscall opcode # Magic instructions are defined in magic.hh # # In the future, real HSA kernel system calls can be implemented and coexist # with magic instructions. gen('Call', base_class='SpecialInstNoSrcNoDest') # Stubs for unimplemented instructions: # These may need to be implemented at some point in the future, but # for now we just match the instructions with their operands. # # By defining stubs for these instructions, we can work with # applications that have them in dead/unused code paths. # # Needed for rocm-hcc compilations for HSA backends since # builtins-hsail library is `cat`d onto the generated kernels. # The builtins-hsail library consists of handcoded hsail functions # that __might__ be needed by the rocm-hcc compiler in certain binaries. gen('Bitmask', base_class='Stub') gen('Bitrev', base_class='Stub') gen('Firstbit', base_class='Stub') gen('Lastbit', base_class='Stub') gen('Unpacklo', base_class='Stub') gen('Unpackhi', base_class='Stub') gen('Pack', base_class='Stub') gen('Unpack', base_class='Stub') gen('Lerp', base_class='Stub') gen('Packcvt', base_class='Stub') gen('Unpackcvt', base_class='Stub') gen('Sad', base_class='Stub') gen('Sadhi', base_class='Stub') gen('Activelanecount', base_class='Stub') gen('Activelaneid', base_class='Stub') gen('Activelanemask', base_class='Stub') gen('Activelanepermute', base_class='Stub') gen('Groupbaseptr', base_class='Stub') gen('Signalnoret', base_class='Stub') ############### # # Generate file epilogs # ############### header_code(''' template<> inline void Abs::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType dest_val; CType src_val; src_val = this->src[0].template get(w, lane); dest_val = (CType)(src_val); this->dest.set(w, lane, dest_val); } } } template<> inline void Abs::execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); const VectorMask &mask = w->getPred(); for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType dest_val; CType src_val; src_val = this->src[0].template get(w, lane); dest_val = (CType)(src_val); this->dest.set(w, lane, dest_val); } } } ''') header_code.dedent() header_code(''' } // namespace HsailISA ''') # close off main decode switch decoder_code.dedent() decoder_code.dedent() decoder_code(''' default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); } // end switch(ib->opcode) } // end decode() } // namespace HsailISA ''') exec_code.dedent() exec_code(''' } // namespace HsailISA ''') ############### # # Output accumulated code to files # ############### header_code.write(sys.argv[1]) decoder_code.write(sys.argv[2]) exec_code.write(sys.argv[3])