summaryrefslogtreecommitdiff
path: root/src/arch/hsail/insts/branch.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/arch/hsail/insts/branch.hh')
-rw-r--r--src/arch/hsail/insts/branch.hh442
1 files changed, 442 insertions, 0 deletions
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh
new file mode 100644
index 000000000..54ad9a042
--- /dev/null
+++ b/src/arch/hsail/insts/branch.hh
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
+#define __ARCH_HSAIL_INSTS_BRANCH_HH__
+
+#include "arch/hsail/insts/gpu_static_inst.hh"
+#include "arch/hsail/operand.hh"
+#include "gpu-compute/gpu_dyn_inst.hh"
+#include "gpu-compute/wavefront.hh"
+
+namespace HsailISA
+{
+
+ // The main difference between a direct branch and an indirect branch
+ // is whether the target is a register or a label, so we can share a
+ // lot of code if we template the base implementation on that type.
+ template<typename TargetType>
+ class BrnInstBase : public HsailGPUStaticInst
+ {
+ public:
+ void generateDisassembly();
+
+ Brig::BrigWidth8_t width;
+ TargetType target;
+
+ BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : HsailGPUStaticInst(obj, "brn")
+ {
+ o_type = Enums::OT_BRANCH;
+ width = ((Brig::BrigInstBr*)ib)->width;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ target.init(op_offs, obj);
+ o_type = Enums::OT_BRANCH;
+ }
+
+ uint32_t getTargetPc() override { return target.getTarget(0, 0); }
+
+ bool unconditionalJumpInstruction() override { return true; }
+ bool isVectorRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isScalarRegister();
+ }
+
+ bool isSrcOperand(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return true;
+ }
+
+ bool isDstOperand(int operandIndex) {
+ return false;
+ }
+
+ int getOperandSize(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.opSize();
+ }
+
+ int getRegisterIndex(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.regIndex();
+ }
+
+ int getNumOperands() {
+ return 1;
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ template<typename TargetType>
+ void
+ BrnInstBase<TargetType>::generateDisassembly()
+ {
+ std::string widthClause;
+
+ if (width != 1) {
+ widthClause = csprintf("_width(%d)", width);
+ }
+
+ disassembly = csprintf("%s%s %s", opcode, widthClause,
+ target.disassemble());
+ }
+
+ template<typename TargetType>
+ void
+ BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ if (getTargetPc() == w->rpc()) {
+ w->popFromReconvergenceStack();
+ } else {
+ // Rpc and execution mask remain the same
+ w->pc(getTargetPc());
+ }
+ w->discardFetch();
+ }
+
+ class BrnDirectInst : public BrnInstBase<LabelOperand>
+ {
+ public:
+ BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : BrnInstBase<LabelOperand>(ib, obj)
+ {
+ }
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return 0; }
+ };
+
+ class BrnIndirectInst : public BrnInstBase<SRegOperand>
+ {
+ public:
+ BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : BrnInstBase<SRegOperand>(ib, obj)
+ {
+ }
+ int numSrcRegOperands() { return target.isVectorRegister(); }
+ int numDstRegOperands() { return 0; }
+ };
+
+ GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
+ const BrigObject *obj);
+
+ template<typename TargetType>
+ class CbrInstBase : public HsailGPUStaticInst
+ {
+ public:
+ void generateDisassembly();
+
+ Brig::BrigWidth8_t width;
+ CRegOperand cond;
+ TargetType target;
+
+ CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : HsailGPUStaticInst(obj, "cbr")
+ {
+ o_type = Enums::OT_BRANCH;
+ width = ((Brig::BrigInstBr *)ib)->width;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ cond.init(op_offs, obj);
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ target.init(op_offs, obj);
+ o_type = Enums::OT_BRANCH;
+ }
+
+ uint32_t getTargetPc() override { return target.getTarget(0, 0); }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ // Assumption: Target is operand 0, Condition Register is operand 1
+ bool isVectorRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ if (!operandIndex)
+ return target.isVectorRegister();
+ else
+ return false;
+ }
+ bool isCondRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ if (!operandIndex)
+ return target.isCondRegister();
+ else
+ return true;
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (!operandIndex)
+ return target.isScalarRegister();
+ else
+ return false;
+ }
+ bool isSrcOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == 0)
+ return true;
+ return false;
+ }
+ // both Condition Register and Target are source operands
+ bool isDstOperand(int operandIndex) {
+ return false;
+ }
+ int getOperandSize(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ if (!operandIndex)
+ return target.opSize();
+ else
+ return 1;
+ }
+ int getRegisterIndex(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ if (!operandIndex)
+ return target.regIndex();
+ else
+ return -1;
+ }
+
+ // Operands = Target, Condition Register
+ int getNumOperands() {
+ return 2;
+ }
+ };
+
+ template<typename TargetType>
+ void
+ CbrInstBase<TargetType>::generateDisassembly()
+ {
+ std::string widthClause;
+
+ if (width != 1) {
+ widthClause = csprintf("_width(%d)", width);
+ }
+
+ disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
+ cond.disassemble(), target.disassemble());
+ }
+
+ template<typename TargetType>
+ void
+ CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ const uint32_t curr_pc = w->pc();
+ const uint32_t curr_rpc = w->rpc();
+ const VectorMask curr_mask = w->execMask();
+
+ /**
+ * TODO: can we move this pop outside the instruction, and
+ * into the wavefront?
+ */
+ w->popFromReconvergenceStack();
+
+ // immediate post-dominator instruction
+ const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
+ if (curr_rpc != rpc) {
+ w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
+ }
+
+ // taken branch
+ const uint32_t true_pc = getTargetPc();
+ VectorMask true_mask;
+ for (unsigned int lane = 0; lane < VSZ; ++lane) {
+ true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
+ }
+
+ // not taken branch
+ const uint32_t false_pc = curr_pc + 1;
+ assert(true_pc != false_pc);
+ if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
+ VectorMask false_mask = curr_mask & ~true_mask;
+ w->pushToReconvergenceStack(false_pc, rpc, false_mask);
+ }
+
+ if (true_pc != rpc && true_mask.count()) {
+ w->pushToReconvergenceStack(true_pc, rpc, true_mask);
+ }
+ assert(w->pc() != curr_pc);
+ w->discardFetch();
+ }
+
+
+ class CbrDirectInst : public CbrInstBase<LabelOperand>
+ {
+ public:
+ CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : CbrInstBase<LabelOperand>(ib, obj)
+ {
+ }
+ // the source operand of a conditional branch is a Condition
+ // Register which is not stored in the VRF
+ // so we do not count it as a source-register operand
+ // even though, formally, it is one.
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return 0; }
+ };
+
+ class CbrIndirectInst : public CbrInstBase<SRegOperand>
+ {
+ public:
+ CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : CbrInstBase<SRegOperand>(ib, obj)
+ {
+ }
+ // one source operand of the conditional indirect branch is a Condition
+ // register which is not stored in the VRF so we do not count it
+ // as a source-register operand even though, formally, it is one.
+ int numSrcRegOperands() { return target.isVectorRegister(); }
+ int numDstRegOperands() { return 0; }
+ };
+
+ GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
+ const BrigObject *obj);
+
+ template<typename TargetType>
+ class BrInstBase : public HsailGPUStaticInst
+ {
+ public:
+ void generateDisassembly();
+
+ ImmOperand<uint32_t> width;
+ TargetType target;
+
+ BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : HsailGPUStaticInst(obj, "br")
+ {
+ o_type = Enums::OT_BRANCH;
+ width.init(((Brig::BrigInstBr *)ib)->width, obj);
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ target.init(op_offs, obj);
+ o_type = Enums::OT_BRANCH;
+ }
+
+ uint32_t getTargetPc() override { return target.getTarget(0, 0); }
+
+ bool unconditionalJumpInstruction() override { return true; }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ bool isVectorRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return true;
+ }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.opSize();
+ }
+ int getRegisterIndex(int operandIndex) {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return target.regIndex();
+ }
+ int getNumOperands() { return 1; }
+ };
+
+ template<typename TargetType>
+ void
+ BrInstBase<TargetType>::generateDisassembly()
+ {
+ std::string widthClause;
+
+ if (width.bits != 1) {
+ widthClause = csprintf("_width(%d)", width.bits);
+ }
+
+ disassembly = csprintf("%s%s %s", opcode, widthClause,
+ target.disassemble());
+ }
+
+ template<typename TargetType>
+ void
+ BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ if (getTargetPc() == w->rpc()) {
+ w->popFromReconvergenceStack();
+ } else {
+ // Rpc and execution mask remain the same
+ w->pc(getTargetPc());
+ }
+ w->discardFetch();
+ }
+
+ class BrDirectInst : public BrInstBase<LabelOperand>
+ {
+ public:
+ BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : BrInstBase<LabelOperand>(ib, obj)
+ {
+ }
+
+ int numSrcRegOperands() { return 0; }
+ int numDstRegOperands() { return 0; }
+ };
+
+ class BrIndirectInst : public BrInstBase<SRegOperand>
+ {
+ public:
+ BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ : BrInstBase<SRegOperand>(ib, obj)
+ {
+ }
+ int numSrcRegOperands() { return target.isVectorRegister(); }
+ int numDstRegOperands() { return 0; }
+ };
+
+ GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
+ const BrigObject *obj);
+} // namespace HsailISA
+
+#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__