diff options
author | Tony Gutierrez <anthony.gutierrez@amd.com> | 2016-01-19 14:28:22 -0500 |
---|---|---|
committer | Tony Gutierrez <anthony.gutierrez@amd.com> | 2016-01-19 14:28:22 -0500 |
commit | 1a7d3f9fcb76a68540dd948f91413533a383bfde (patch) | |
tree | 867510a147cd095f19499d26b7c02d27de4cae9d /src/arch/hsail/gen.py | |
parent | 28e353e0403ea379d244a418e8dc8ee0b48187cf (diff) | |
download | gem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz |
gpu-compute: AMD's baseline GPU model
Diffstat (limited to 'src/arch/hsail/gen.py')
-rwxr-xr-x | src/arch/hsail/gen.py | 806 |
1 files changed, 806 insertions, 0 deletions
diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py new file mode 100755 index 000000000..f2996019b --- /dev/null +++ b/src/arch/hsail/gen.py @@ -0,0 +1,806 @@ +#! /usr/bin/python + +# +# Copyright (c) 2015 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: Steve Reinhardt +# + +import sys, re + +from m5.util import code_formatter + +if len(sys.argv) != 4: + print "Error: need 3 args (file names)" + sys.exit(0) + +header_code = code_formatter() +decoder_code = code_formatter() +exec_code = code_formatter() + +############### +# +# Generate file prologs (includes etc.) +# +############### + +header_code(''' +#include "arch/hsail/insts/decl.hh" +#include "base/bitfield.hh" +#include "gpu-compute/hsail_code.hh" +#include "gpu-compute/wavefront.hh" + +namespace HsailISA +{ +''') +header_code.indent() + +decoder_code(''' +#include "arch/hsail/gpu_decoder.hh" +#include "arch/hsail/insts/branch.hh" +#include "arch/hsail/insts/decl.hh" +#include "arch/hsail/insts/gen_decl.hh" +#include "arch/hsail/insts/mem.hh" +#include "arch/hsail/insts/mem_impl.hh" +#include "gpu-compute/brig_object.hh" + +namespace HsailISA +{ + std::vector<GPUStaticInst*> Decoder::decodedInsts; + + GPUStaticInst* + Decoder::decode(MachInst machInst) + { + using namespace Brig; + + const BrigInstBase *ib = machInst.brigInstBase; + const BrigObject *obj = machInst.brigObj; + + switch(ib->opcode) { +''') +decoder_code.indent() +decoder_code.indent() + +exec_code(''' +#include "arch/hsail/insts/gen_decl.hh" +#include "base/intmath.hh" + +namespace HsailISA +{ +''') +exec_code.indent() + +############### +# +# Define code templates for class declarations (for header file) +# +############### + +# Basic header template for an instruction with no template parameters. +header_template_nodt = ''' +class $class_name : public $base_class +{ + public: + typedef $base_class Base; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } + + void execute(GPUDynInstPtr gpuDynInst); +}; + +''' + +# Basic header template for an instruction with a single DataType +# template parameter. +header_template_1dt = ''' +template<typename DataType> +class $class_name : public $base_class<DataType> +{ + public: + typedef $base_class<DataType> Base; + typedef typename DataType::CType CType; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } + + void execute(GPUDynInstPtr gpuDynInst); +}; + +''' + +header_template_1dt_noexec = ''' +template<typename DataType> +class $class_name : public $base_class<DataType> +{ + public: + typedef $base_class<DataType> Base; + typedef typename DataType::CType CType; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } +}; + +''' + +# Same as header_template_1dt, except the base class has a second +# template parameter NumSrcOperands to allow a variable number of +# source operands. Note that since this is implemented with an array, +# it only works for instructions where all sources are of the same +# type (like most arithmetics). +header_template_1dt_varsrcs = ''' +template<typename DataType> +class $class_name : public $base_class<DataType, $num_srcs> +{ + public: + typedef $base_class<DataType, $num_srcs> Base; + typedef typename DataType::CType CType; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } + + void execute(GPUDynInstPtr gpuDynInst); +}; + +''' + +# Header template for instruction with two DataType template +# parameters, one for the dest and one for the source. This is used +# by compare and convert. +header_template_2dt = ''' +template<typename DestDataType, class SrcDataType> +class $class_name : public $base_class<DestDataType, SrcDataType> +{ + public: + typedef $base_class<DestDataType, SrcDataType> Base; + typedef typename DestDataType::CType DestCType; + typedef typename SrcDataType::CType SrcCType; + + $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) + : Base(ib, obj, "$opcode") + { + } + + void execute(GPUDynInstPtr gpuDynInst); +}; + +''' + +header_templates = { + 'ArithInst': header_template_1dt_varsrcs, + 'CmovInst': header_template_1dt, + 'ClassInst': header_template_1dt, + 'ShiftInst': header_template_1dt, + 'ExtractInsertInst': header_template_1dt, + 'CmpInst': header_template_2dt, + 'CvtInst': header_template_2dt, + 'LdInst': '', + 'StInst': '', + 'SpecialInstNoSrc': header_template_nodt, + 'SpecialInst1Src': header_template_nodt, + 'SpecialInstNoSrcNoDest': '', +} + +############### +# +# Define code templates for exec functions +# +############### + +# exec function body +exec_template_nodt_nosrc = ''' +void +$class_name::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + typedef Base::DestCType DestCType; + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + DestCType dest_val = $expr; + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_nodt_1src = ''' +void +$class_name::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + typedef Base::DestCType DestCType; + typedef Base::SrcCType SrcCType; + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); + DestCType dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_1dt_varsrcs = ''' +template<typename DataType> +void +$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + CType dest_val; + if ($dest_is_src_flag) { + dest_val = this->dest.template get<CType>(w, lane); + } + + CType src_val[$num_srcs]; + + for (int i = 0; i < $num_srcs; ++i) { + src_val[i] = this->src[i].template get<CType>(w, lane); + } + + dest_val = (CType)($expr); + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_1dt_3srcs = ''' +template<typename DataType> +void +$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + typedef typename Base::Src0CType Src0T; + typedef typename Base::Src1CType Src1T; + typedef typename Base::Src2CType Src2T; + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + CType dest_val; + + if ($dest_is_src_flag) { + dest_val = this->dest.template get<CType>(w, lane); + } + + Src0T src_val0 = this->src0.template get<Src0T>(w, lane); + Src1T src_val1 = this->src1.template get<Src1T>(w, lane); + Src2T src_val2 = this->src2.template get<Src2T>(w, lane); + + dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_1dt_2src_1dest = ''' +template<typename DataType> +void +$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + typedef typename Base::DestCType DestT; + typedef CType Src0T; + typedef typename Base::Src1CType Src1T; + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + DestT dest_val; + if ($dest_is_src_flag) { + dest_val = this->dest.template get<DestT>(w, lane); + } + Src0T src_val0 = this->src0.template get<Src0T>(w, lane); + Src1T src_val1 = this->src1.template get<Src1T>(w, lane); + + dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_shift = ''' +template<typename DataType> +void +$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + const VectorMask &mask = w->get_pred(); + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + CType dest_val; + + if ($dest_is_src_flag) { + dest_val = this->dest.template get<CType>(w, lane); + } + + CType src_val0 = this->src0.template get<CType>(w, lane); + uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane); + + dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_template_2dt = ''' +template<typename DestDataType, class SrcDataType> +void +$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + const VectorMask &mask = w->get_pred(); + + for (int lane = 0; lane < VSZ; ++lane) { + if (mask[lane]) { + DestCType dest_val; + SrcCType src_val[$num_srcs]; + + for (int i = 0; i < $num_srcs; ++i) { + src_val[i] = this->src[i].template get<SrcCType>(w, lane); + } + + dest_val = $expr; + + this->dest.set(w, lane, dest_val); + } + } +} + +''' + +exec_templates = { + 'ArithInst': exec_template_1dt_varsrcs, + 'CmovInst': exec_template_1dt_3srcs, + 'ExtractInsertInst': exec_template_1dt_3srcs, + 'ClassInst': exec_template_1dt_2src_1dest, + 'CmpInst': exec_template_2dt, + 'CvtInst': exec_template_2dt, + 'LdInst': '', + 'StInst': '', + 'SpecialInstNoSrc': exec_template_nodt_nosrc, + 'SpecialInst1Src': exec_template_nodt_1src, + 'SpecialInstNoSrcNoDest': '', +} + +############### +# +# Define code templates for the decoder cases +# +############### + +# decode template for nodt-opcode case +decode_nodt_template = ''' + case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' + +decode_case_prolog_class_inst = ''' + case BRIG_OPCODE_$brig_opcode_upper: + { + //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]); + BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType; + //switch (baseOp->kind) { + // case BRIG_OPERAND_REG: + // type = ((const BrigOperandReg*)baseOp)->type; + // break; + // case BRIG_OPERAND_IMMED: + // type = ((const BrigOperandImmed*)baseOp)->type; + // break; + // default: + // fatal("CLASS unrecognized kind of operand %d\\n", + // baseOp->kind); + //} + switch (type) {''' + +# common prolog for 1dt- or 2dt-opcode case: switch on data type +decode_case_prolog = ''' + case BRIG_OPCODE_$brig_opcode_upper: + { + switch (ib->type) {''' + +# single-level decode case entry (for 1dt opcodes) +decode_case_entry = \ +' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' + +decode_store_prolog = \ +' case BRIG_TYPE_$type_name: {' + +decode_store_case_epilog = ''' + }''' + +decode_store_case_entry = \ +' return $constructor(ib, obj);' + +# common epilog for type switch +decode_case_epilog = ''' + default: fatal("$brig_opcode_upper: unrecognized type %d\\n", + ib->type); + } + } + break;''' + +# Additional templates for nested decode on a second type field (for +# compare and convert). These are used in place of the +# decode_case_entry template to create a second-level switch on on the +# second type field inside each case of the first-level type switch. +# Because the name and location of the second type can vary, the Brig +# instruction type must be provided in $brig_type, and the name of the +# second type field must be provided in $type_field. +decode_case2_prolog = ''' + case BRIG_TYPE_$type_name: + switch (((Brig$brig_type*)ib)->$type2_field) {''' + +decode_case2_entry = \ +' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' + +decode_case2_epilog = ''' + default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", + ((Brig$brig_type*)ib)->$type2_field); + } + break;''' + +# Figure out how many source operands an expr needs by looking for the +# highest-numbered srcN value referenced. Since sources are numbered +# starting at 0, the return value is N+1. +def num_src_operands(expr): + if expr.find('src2') != -1: + return 3 + elif expr.find('src1') != -1: + return 2 + elif expr.find('src0') != -1: + return 1 + else: + return 0 + +############### +# +# Define final code generation methods +# +# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for +# generating actual instructions. +# +############### + +# Generate class declaration, exec function, and decode switch case +# for an brig_opcode with a single-level type switch. The 'types' +# parameter is a list or tuple of types for which the instruction +# should be instantiated. +def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', + type2_info=None, constructor_prefix='new ', is_store=False): + brig_opcode_upper = brig_opcode.upper() + class_name = brig_opcode + opcode = class_name.lower() + + if base_class == 'ArithInst': + # note that expr must be provided with ArithInst so we can + # derive num_srcs for the template + assert expr + + if expr: + # Derive several bits of info from expr. If expr is not used, + # this info will be irrelevant. + num_srcs = num_src_operands(expr) + # if the RHS expression includes 'dest', then we're doing an RMW + # on the reg and we need to treat it like a source + dest_is_src = expr.find('dest') != -1 + dest_is_src_flag = str(dest_is_src).lower() # for C++ + if base_class in ['ShiftInst']: + expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) + elif base_class in ['ArithInst', 'CmpInst', 'CvtInst']: + expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) + else: + expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) + expr = re.sub(r'\bdest\b', r'dest_val', expr) + + # Strip template arguments off of base class before looking up + # appropriate templates + base_class_base = re.sub(r'<.*>$', '', base_class) + header_code(header_templates[base_class_base]) + + if base_class.startswith('SpecialInst'): + exec_code(exec_templates[base_class_base]) + elif base_class.startswith('ShiftInst'): + header_code(exec_template_shift) + else: + header_code(exec_templates[base_class_base]) + + if not types or isinstance(types, str): + # Just a single type + constructor = constructor_prefix + class_name + decoder_code(decode_nodt_template) + else: + # multiple types, need at least one level of decode + if brig_opcode == 'Class': + decoder_code(decode_case_prolog_class_inst) + else: + decoder_code(decode_case_prolog) + if not type2_info: + if is_store == False: + # single list of types, to basic one-level decode + for type_name in types: + full_class_name = '%s<%s>' % (class_name, type_name.upper()) + constructor = constructor_prefix + full_class_name + decoder_code(decode_case_entry) + else: + # single list of types, to basic one-level decode + for type_name in types: + decoder_code(decode_store_prolog) + type_size = int(re.findall(r'[0-9]+', type_name)[0]) + src_size = 32 + type_type = type_name[0] + full_class_name = '%s<%s,%s>' % (class_name, \ + type_name.upper(), \ + '%s%d' % \ + (type_type.upper(), \ + type_size)) + constructor = constructor_prefix + full_class_name + decoder_code(decode_store_case_entry) + decoder_code(decode_store_case_epilog) + else: + # need secondary type switch (convert, compare) + # unpack extra info on second switch + (type2_field, types2) = type2_info + brig_type = 'Inst%s' % brig_opcode + for type_name in types: + decoder_code(decode_case2_prolog) + fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) + for type2_name in types2: + full_class_name = fmt % type2_name.upper() + constructor = constructor_prefix + full_class_name + decoder_code(decode_case2_entry) + + decoder_code(decode_case2_epilog) + + decoder_code(decode_case_epilog) + +############### +# +# Generate instructions +# +############### + +# handy abbreviations for common sets of types + +# arithmetic ops are typically defined only on 32- and 64-bit sizes +arith_int_types = ('S32', 'U32', 'S64', 'U64') +arith_float_types = ('F32', 'F64') +arith_types = arith_int_types + arith_float_types + +bit_types = ('B1', 'B32', 'B64') + +all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types + +# I think you might be able to do 'f16' memory ops too, but we'll +# ignore them for now. +mem_types = all_int_types + arith_float_types +mem_atom_types = all_int_types + ('B32', 'B64') + +##### Arithmetic & logical operations +gen('Add', arith_types, 'src0 + src1') +gen('Sub', arith_types, 'src0 - src1') +gen('Mul', arith_types, 'src0 * src1') +gen('Div', arith_types, 'src0 / src1') +gen('Min', arith_types, 'std::min(src0, src1)') +gen('Max', arith_types, 'std::max(src0, src1)') +gen('Gcnmin', arith_types, 'std::min(src0, src1)') + +gen('CopySign', arith_float_types, + 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') +gen('Sqrt', arith_float_types, 'sqrt(src0)') +gen('Floor', arith_float_types, 'floor(src0)') + +# "fast" sqrt... same as slow for us +gen('Nsqrt', arith_float_types, 'sqrt(src0)') +gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') +gen('Nrcp', arith_float_types, '1.0/src0') +gen('Fract', arith_float_types, + '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') + +gen('Ncos', arith_float_types, 'cos(src0)'); +gen('Nsin', arith_float_types, 'sin(src0)'); + +gen('And', bit_types, 'src0 & src1') +gen('Or', bit_types, 'src0 | src1') +gen('Xor', bit_types, 'src0 ^ src1') + +gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)') +gen('Firstbit',bit_types, 'firstbit(src0)') +gen('Popcount', ('B32', 'B64'), '__builtin_popcount(src0)') + +gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') +gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') + +# gen('Mul_hi', types=('s32','u32', '??')) +# gen('Mul24', types=('s32','u32', '??')) +gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') + +gen('Abs', arith_types, 'std::abs(src0)') +gen('Neg', arith_types, '-src0') + +gen('Mov', bit_types, 'src0') +gen('Not', bit_types, 'heynot(src0)') + +# mad and fma differ only in rounding behavior, which we don't emulate +# also there's an integer form of mad, but not of fma +gen('Mad', arith_types, 'src0 * src1 + src2') +gen('Fma', arith_float_types, 'src0 * src1 + src2') + +#native floating point operations +gen('Nfma', arith_float_types, 'src0 * src1 + src2') + +gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') +gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))') +gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))') + +# see base/bitfield.hh +gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', + 'ExtractInsertInst') + +gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', + 'ExtractInsertInst') + +##### Compare +gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', + 'CmpInst', ('sourceType', arith_types + bit_types)) +gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') + +##### Conversion + +# Conversion operations are only defined on B1, not B32 or B64 +cvt_types = ('B1',) + mem_types + +gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) + + +##### Load & Store +gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') +gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') +gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', + is_store=True) +gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') +gen('AtomicNoRet', mem_atom_types, base_class='StInst', + constructor_prefix='decode') + +gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') +gen('Br', base_class = 'LdInst', constructor_prefix='decode') + +##### Special operations +def gen_special(brig_opcode, expr, dest_type='U32'): + num_srcs = num_src_operands(expr) + if num_srcs == 0: + base_class = 'SpecialInstNoSrc<%s>' % dest_type + elif num_srcs == 1: + base_class = 'SpecialInst1Src<%s>' % dest_type + else: + assert false + + gen(brig_opcode, None, expr, base_class) + +gen_special('WorkItemId', 'w->workitemid[src0][lane]') +gen_special('WorkItemAbsId', + 'w->workitemid[src0][lane] + (w->workgroupid[src0] * w->workgroupsz[src0])') +gen_special('WorkGroupId', 'w->workgroupid[src0]') +gen_special('WorkGroupSize', 'w->workgroupsz[src0]') +gen_special('CurrentWorkGroupSize', 'w->workgroupsz[src0]') +gen_special('GridSize', 'w->gridsz[src0]') +gen_special('GridGroups', + 'divCeil(w->gridsz[src0],w->workgroupsz[src0])') +gen_special('LaneId', 'lane') +gen_special('WaveId', 'w->dynwaveid') +gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') + +# gen_special('CU'', ') + +gen('Ret', base_class='SpecialInstNoSrcNoDest') +gen('Barrier', base_class='SpecialInstNoSrcNoDest') +gen('MemFence', base_class='SpecialInstNoSrcNoDest') + +# Map magic instructions to the BrigSyscall opcode +# Magic instructions are defined in magic.hh +# +# In the future, real HSA kernel system calls can be implemented and coexist +# with magic instructions. +gen('Call', base_class='SpecialInstNoSrcNoDest') + +############### +# +# Generate file epilogs +# +############### +header_code.dedent() +header_code(''' +} // namespace HsailISA +''') + +# close off main decode switch +decoder_code.dedent() +decoder_code.dedent() +decoder_code(''' + default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); + } // end switch(ib->opcode) + } // end decode() +} // namespace HsailISA +''') + +exec_code.dedent() +exec_code(''' +} // namespace HsailISA +''') + +############### +# +# Output accumulated code to files +# +############### +header_code.write(sys.argv[1]) +decoder_code.write(sys.argv[2]) +exec_code.write(sys.argv[3]) |