From 1a7d3f9fcb76a68540dd948f91413533a383bfde Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Tue, 19 Jan 2016 14:28:22 -0500 Subject: gpu-compute: AMD's baseline GPU model --- src/arch/hsail/insts/branch.hh | 442 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 442 insertions(+) create mode 100644 src/arch/hsail/insts/branch.hh (limited to 'src/arch/hsail/insts/branch.hh') diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh new file mode 100644 index 000000000..54ad9a042 --- /dev/null +++ b/src/arch/hsail/insts/branch.hh @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Steve Reinhardt + */ + +#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__ +#define __ARCH_HSAIL_INSTS_BRANCH_HH__ + +#include "arch/hsail/insts/gpu_static_inst.hh" +#include "arch/hsail/operand.hh" +#include "gpu-compute/gpu_dyn_inst.hh" +#include "gpu-compute/wavefront.hh" + +namespace HsailISA +{ + + // The main difference between a direct branch and an indirect branch + // is whether the target is a register or a label, so we can share a + // lot of code if we template the base implementation on that type. + template + class BrnInstBase : public HsailGPUStaticInst + { + public: + void generateDisassembly(); + + Brig::BrigWidth8_t width; + TargetType target; + + BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) + : HsailGPUStaticInst(obj, "brn") + { + o_type = Enums::OT_BRANCH; + width = ((Brig::BrigInstBr*)ib)->width; + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + target.init(op_offs, obj); + o_type = Enums::OT_BRANCH; + } + + uint32_t getTargetPc() override { return target.getTarget(0, 0); } + + bool unconditionalJumpInstruction() override { return true; } + bool isVectorRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isVectorRegister(); + } + bool isCondRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isCondRegister(); + } + bool isScalarRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isScalarRegister(); + } + + bool isSrcOperand(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return true; + } + + bool isDstOperand(int operandIndex) { + return false; + } + + int getOperandSize(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.opSize(); + } + + int getRegisterIndex(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.regIndex(); + } + + int getNumOperands() { + return 1; + } + + void execute(GPUDynInstPtr gpuDynInst); + }; + + template + void + BrnInstBase::generateDisassembly() + { + std::string widthClause; + + if (width != 1) { + widthClause = csprintf("_width(%d)", width); + } + + disassembly = csprintf("%s%s %s", opcode, widthClause, + target.disassemble()); + } + + template + void + BrnInstBase::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + if (getTargetPc() == w->rpc()) { + w->popFromReconvergenceStack(); + } else { + // Rpc and execution mask remain the same + w->pc(getTargetPc()); + } + w->discardFetch(); + } + + class BrnDirectInst : public BrnInstBase + { + public: + BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : BrnInstBase(ib, obj) + { + } + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return 0; } + }; + + class BrnIndirectInst : public BrnInstBase + { + public: + BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : BrnInstBase(ib, obj) + { + } + int numSrcRegOperands() { return target.isVectorRegister(); } + int numDstRegOperands() { return 0; } + }; + + GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib, + const BrigObject *obj); + + template + class CbrInstBase : public HsailGPUStaticInst + { + public: + void generateDisassembly(); + + Brig::BrigWidth8_t width; + CRegOperand cond; + TargetType target; + + CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) + : HsailGPUStaticInst(obj, "cbr") + { + o_type = Enums::OT_BRANCH; + width = ((Brig::BrigInstBr *)ib)->width; + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + cond.init(op_offs, obj); + op_offs = obj->getOperandPtr(ib->operands, 1); + target.init(op_offs, obj); + o_type = Enums::OT_BRANCH; + } + + uint32_t getTargetPc() override { return target.getTarget(0, 0); } + + void execute(GPUDynInstPtr gpuDynInst); + // Assumption: Target is operand 0, Condition Register is operand 1 + bool isVectorRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + if (!operandIndex) + return target.isVectorRegister(); + else + return false; + } + bool isCondRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + if (!operandIndex) + return target.isCondRegister(); + else + return true; + } + bool isScalarRegister(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (!operandIndex) + return target.isScalarRegister(); + else + return false; + } + bool isSrcOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + if (operandIndex == 0) + return true; + return false; + } + // both Condition Register and Target are source operands + bool isDstOperand(int operandIndex) { + return false; + } + int getOperandSize(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + if (!operandIndex) + return target.opSize(); + else + return 1; + } + int getRegisterIndex(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + if (!operandIndex) + return target.regIndex(); + else + return -1; + } + + // Operands = Target, Condition Register + int getNumOperands() { + return 2; + } + }; + + template + void + CbrInstBase::generateDisassembly() + { + std::string widthClause; + + if (width != 1) { + widthClause = csprintf("_width(%d)", width); + } + + disassembly = csprintf("%s%s %s,%s", opcode, widthClause, + cond.disassemble(), target.disassemble()); + } + + template + void + CbrInstBase::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + const uint32_t curr_pc = w->pc(); + const uint32_t curr_rpc = w->rpc(); + const VectorMask curr_mask = w->execMask(); + + /** + * TODO: can we move this pop outside the instruction, and + * into the wavefront? + */ + w->popFromReconvergenceStack(); + + // immediate post-dominator instruction + const uint32_t rpc = static_cast(ipdInstNum()); + if (curr_rpc != rpc) { + w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask); + } + + // taken branch + const uint32_t true_pc = getTargetPc(); + VectorMask true_mask; + for (unsigned int lane = 0; lane < VSZ; ++lane) { + true_mask[lane] = cond.get(w, lane) & curr_mask[lane]; + } + + // not taken branch + const uint32_t false_pc = curr_pc + 1; + assert(true_pc != false_pc); + if (false_pc != rpc && true_mask.count() < curr_mask.count()) { + VectorMask false_mask = curr_mask & ~true_mask; + w->pushToReconvergenceStack(false_pc, rpc, false_mask); + } + + if (true_pc != rpc && true_mask.count()) { + w->pushToReconvergenceStack(true_pc, rpc, true_mask); + } + assert(w->pc() != curr_pc); + w->discardFetch(); + } + + + class CbrDirectInst : public CbrInstBase + { + public: + CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : CbrInstBase(ib, obj) + { + } + // the source operand of a conditional branch is a Condition + // Register which is not stored in the VRF + // so we do not count it as a source-register operand + // even though, formally, it is one. + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return 0; } + }; + + class CbrIndirectInst : public CbrInstBase + { + public: + CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : CbrInstBase(ib, obj) + { + } + // one source operand of the conditional indirect branch is a Condition + // register which is not stored in the VRF so we do not count it + // as a source-register operand even though, formally, it is one. + int numSrcRegOperands() { return target.isVectorRegister(); } + int numDstRegOperands() { return 0; } + }; + + GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib, + const BrigObject *obj); + + template + class BrInstBase : public HsailGPUStaticInst + { + public: + void generateDisassembly(); + + ImmOperand width; + TargetType target; + + BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) + : HsailGPUStaticInst(obj, "br") + { + o_type = Enums::OT_BRANCH; + width.init(((Brig::BrigInstBr *)ib)->width, obj); + unsigned op_offs = obj->getOperandPtr(ib->operands, 0); + target.init(op_offs, obj); + o_type = Enums::OT_BRANCH; + } + + uint32_t getTargetPc() override { return target.getTarget(0, 0); } + + bool unconditionalJumpInstruction() override { return true; } + + void execute(GPUDynInstPtr gpuDynInst); + bool isVectorRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isVectorRegister(); + } + bool isCondRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isCondRegister(); + } + bool isScalarRegister(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.isScalarRegister(); + } + bool isSrcOperand(int operandIndex) { + assert((operandIndex >= 0) && (operandIndex < getNumOperands())); + return true; + } + bool isDstOperand(int operandIndex) { return false; } + int getOperandSize(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.opSize(); + } + int getRegisterIndex(int operandIndex) { + assert(operandIndex >= 0 && operandIndex < getNumOperands()); + return target.regIndex(); + } + int getNumOperands() { return 1; } + }; + + template + void + BrInstBase::generateDisassembly() + { + std::string widthClause; + + if (width.bits != 1) { + widthClause = csprintf("_width(%d)", width.bits); + } + + disassembly = csprintf("%s%s %s", opcode, widthClause, + target.disassemble()); + } + + template + void + BrInstBase::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *w = gpuDynInst->wavefront(); + + if (getTargetPc() == w->rpc()) { + w->popFromReconvergenceStack(); + } else { + // Rpc and execution mask remain the same + w->pc(getTargetPc()); + } + w->discardFetch(); + } + + class BrDirectInst : public BrInstBase + { + public: + BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : BrInstBase(ib, obj) + { + } + + int numSrcRegOperands() { return 0; } + int numDstRegOperands() { return 0; } + }; + + class BrIndirectInst : public BrInstBase + { + public: + BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) + : BrInstBase(ib, obj) + { + } + int numSrcRegOperands() { return target.isVectorRegister(); } + int numDstRegOperands() { return 0; } + }; + + GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib, + const BrigObject *obj); +} // namespace HsailISA + +#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__ -- cgit v1.2.3