From 1a7d3f9fcb76a68540dd948f91413533a383bfde Mon Sep 17 00:00:00 2001
From: Tony Gutierrez <anthony.gutierrez@amd.com>
Date: Tue, 19 Jan 2016 14:28:22 -0500
Subject: gpu-compute: AMD's baseline GPU model

---
 src/arch/hsail/insts/branch.hh | 442 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 442 insertions(+)
 create mode 100644 src/arch/hsail/insts/branch.hh

(limited to 'src/arch/hsail/insts/branch.hh')
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh
new file mode 100644
index 000000000..54ad9a042
--- /dev/null
+++ b/src/arch/hsail/insts/branch.hh
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
+#define __ARCH_HSAIL_INSTS_BRANCH_HH__
+
+#include "arch/hsail/insts/gpu_static_inst.hh"
+#include "arch/hsail/operand.hh"
+#include "gpu-compute/gpu_dyn_inst.hh"
+#include "gpu-compute/wavefront.hh"
+
+namespace HsailISA
+{
+
+    // The main difference between a direct branch and an indirect branch
+    // is whether the target is a register or a label, so we can share a
+    // lot of code if we template the base implementation on that type.
+    template<typename TargetType>
+    class BrnInstBase : public HsailGPUStaticInst
+    {
+    public:
+        void generateDisassembly();
+
+        Brig::BrigWidth8_t width;
+        TargetType target;
+
+        BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
+           : HsailGPUStaticInst(obj, "brn")
+        {
+            o_type = Enums::OT_BRANCH;
+            width = ((Brig::BrigInstBr*)ib)->width;
+            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+            target.init(op_offs, obj);
+            o_type = Enums::OT_BRANCH;
+        }
+
+        uint32_t getTargetPc()  override { return target.getTarget(0, 0); }
+
+        bool unconditionalJumpInstruction() override { return true; }
+        bool isVectorRegister(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.isVectorRegister();
+        }
+        bool isCondRegister(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.isCondRegister();
+        }
+        bool isScalarRegister(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.isScalarRegister();
+        }
+
+        bool isSrcOperand(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return true;
+        }
+
+        bool isDstOperand(int operandIndex) {
+            return false;
+        }
+
+        int getOperandSize(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.opSize();
+        }
+
+        int getRegisterIndex(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.regIndex();
+        }
+
+        int getNumOperands() {
+            return 1;
+        }
+
+        void execute(GPUDynInstPtr gpuDynInst);
+    };
+
+    template<typename TargetType>
+    void
+    BrnInstBase<TargetType>::generateDisassembly()
+    {
+        std::string widthClause;
+
+        if (width != 1) {
+            widthClause = csprintf("_width(%d)", width);
+        }
+
+        disassembly = csprintf("%s%s %s", opcode, widthClause,
+                               target.disassemble());
+    }
+
+    template<typename TargetType>
+    void
+    BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
+    {
+        Wavefront *w = gpuDynInst->wavefront();
+
+        if (getTargetPc() == w->rpc()) {
+            w->popFromReconvergenceStack();
+        } else {
+            // Rpc and execution mask remain the same
+            w->pc(getTargetPc());
+        }
+        w->discardFetch();
+    }
+
+    class BrnDirectInst : public BrnInstBase<LabelOperand>
+    {
+      public:
+        BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+            : BrnInstBase<LabelOperand>(ib, obj)
+        {
+        }
+        int numSrcRegOperands() { return 0; }
+        int numDstRegOperands() { return 0; }
+    };
+
+    class BrnIndirectInst : public BrnInstBase<SRegOperand>
+    {
+      public:
+        BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+            : BrnInstBase<SRegOperand>(ib, obj)
+        {
+        }
+        int numSrcRegOperands() { return target.isVectorRegister(); }
+        int numDstRegOperands() { return 0; }
+    };
+
+    GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
+                             const BrigObject *obj);
+
+    template<typename TargetType>
+    class CbrInstBase : public HsailGPUStaticInst
+    {
+      public:
+        void generateDisassembly();
+
+        Brig::BrigWidth8_t width;
+        CRegOperand cond;
+        TargetType target;
+
+        CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
+           : HsailGPUStaticInst(obj, "cbr")
+        {
+            o_type = Enums::OT_BRANCH;
+            width = ((Brig::BrigInstBr *)ib)->width;
+            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+            cond.init(op_offs, obj);
+            op_offs = obj->getOperandPtr(ib->operands, 1);
+            target.init(op_offs, obj);
+            o_type = Enums::OT_BRANCH;
+        }
+
+        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
+
+        void execute(GPUDynInstPtr gpuDynInst);
+        // Assumption: Target is operand 0, Condition Register is operand 1
+        bool isVectorRegister(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            if (!operandIndex)
+                return target.isVectorRegister();
+            else
+                return false;
+        }
+        bool isCondRegister(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            if (!operandIndex)
+                return target.isCondRegister();
+            else
+                return true;
+        }
+        bool isScalarRegister(int operandIndex) {
+            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+            if (!operandIndex)
+                return target.isScalarRegister();
+            else
+                return false;
+        }
+        bool isSrcOperand(int operandIndex) {
+            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+            if (operandIndex == 0)
+                return true;
+            return false;
+        }
+        // both Condition Register and Target are source operands
+        bool isDstOperand(int operandIndex) {
+            return false;
+        }
+        int getOperandSize(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            if (!operandIndex)
+                return target.opSize();
+            else
+                return 1;
+        }
+        int getRegisterIndex(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            if (!operandIndex)
+                return target.regIndex();
+            else
+                return -1;
+         }
+
+        // Operands = Target, Condition Register
+        int getNumOperands() {
+            return 2;
+        }
+    };
+
+    template<typename TargetType>
+    void
+    CbrInstBase<TargetType>::generateDisassembly()
+    {
+        std::string widthClause;
+
+        if (width != 1) {
+            widthClause = csprintf("_width(%d)", width);
+        }
+
+        disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
+                               cond.disassemble(), target.disassemble());
+    }
+
+    template<typename TargetType>
+    void
+    CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
+    {
+        Wavefront *w = gpuDynInst->wavefront();
+
+        const uint32_t curr_pc = w->pc();
+        const uint32_t curr_rpc = w->rpc();
+        const VectorMask curr_mask = w->execMask();
+
+        /**
+         * TODO: can we move this pop outside the instruction, and
+         * into the wavefront?
+         */
+        w->popFromReconvergenceStack();
+
+        // immediate post-dominator instruction
+        const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
+        if (curr_rpc != rpc) {
+            w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
+        }
+
+        // taken branch
+        const uint32_t true_pc = getTargetPc();
+        VectorMask true_mask;
+        for (unsigned int lane = 0; lane < VSZ; ++lane) {
+            true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
+        }
+
+        // not taken branch
+        const uint32_t false_pc = curr_pc + 1;
+        assert(true_pc != false_pc);
+        if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
+            VectorMask false_mask = curr_mask & ~true_mask;
+            w->pushToReconvergenceStack(false_pc, rpc, false_mask);
+        }
+
+        if (true_pc != rpc && true_mask.count()) {
+            w->pushToReconvergenceStack(true_pc, rpc, true_mask);
+        }
+        assert(w->pc() != curr_pc);
+        w->discardFetch();
+    }
+
+
+    class CbrDirectInst : public CbrInstBase<LabelOperand>
+    {
+      public:
+        CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+            : CbrInstBase<LabelOperand>(ib, obj)
+        {
+        }
+        // the source operand of a conditional branch is a Condition
+        // Register which is not stored in the VRF
+        // so we do not count it as a source-register operand
+        // even though, formally, it is one.
+        int numSrcRegOperands() { return 0; }
+        int numDstRegOperands() { return 0; }
+    };
+
+    class CbrIndirectInst : public CbrInstBase<SRegOperand>
+    {
+      public:
+        CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+            : CbrInstBase<SRegOperand>(ib, obj)
+        {
+        }
+        // one source operand of the conditional indirect branch is a Condition
+        // register which is not stored in the VRF so we do not count it
+        // as a source-register operand even though, formally, it is one.
+        int numSrcRegOperands() { return target.isVectorRegister(); }
+        int numDstRegOperands() { return 0; }
+    };
+
+    GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
+                             const BrigObject *obj);
+
+    template<typename TargetType>
+    class BrInstBase : public HsailGPUStaticInst
+    {
+      public:
+        void generateDisassembly();
+
+        ImmOperand<uint32_t> width;
+        TargetType target;
+
+        BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
+           : HsailGPUStaticInst(obj, "br")
+        {
+            o_type = Enums::OT_BRANCH;
+            width.init(((Brig::BrigInstBr *)ib)->width, obj);
+            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+            target.init(op_offs, obj);
+            o_type = Enums::OT_BRANCH;
+        }
+
+        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
+
+        bool unconditionalJumpInstruction() override { return true; }
+
+        void execute(GPUDynInstPtr gpuDynInst);
+        bool isVectorRegister(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.isVectorRegister();
+        }
+        bool isCondRegister(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.isCondRegister();
+        }
+        bool isScalarRegister(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.isScalarRegister();
+        }
+        bool isSrcOperand(int operandIndex) {
+            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+            return true;
+        }
+        bool isDstOperand(int operandIndex) { return false; }
+        int getOperandSize(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.opSize();
+        }
+        int getRegisterIndex(int operandIndex) {
+            assert(operandIndex >= 0 && operandIndex < getNumOperands());
+            return target.regIndex();
+        }
+        int getNumOperands() { return 1; }
+    };
+
+    template<typename TargetType>
+    void
+    BrInstBase<TargetType>::generateDisassembly()
+    {
+        std::string widthClause;
+
+        if (width.bits != 1) {
+            widthClause = csprintf("_width(%d)", width.bits);
+        }
+
+        disassembly = csprintf("%s%s %s", opcode, widthClause,
+                               target.disassemble());
+    }
+
+    template<typename TargetType>
+    void
+    BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
+    {
+        Wavefront *w = gpuDynInst->wavefront();
+
+        if (getTargetPc() == w->rpc()) {
+            w->popFromReconvergenceStack();
+        } else {
+            // Rpc and execution mask remain the same
+            w->pc(getTargetPc());
+        }
+        w->discardFetch();
+    }
+
+    class BrDirectInst : public BrInstBase<LabelOperand>
+    {
+      public:
+        BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+            : BrInstBase<LabelOperand>(ib, obj)
+        {
+        }
+
+        int numSrcRegOperands() { return 0; }
+        int numDstRegOperands() { return 0; }
+    };
+
+    class BrIndirectInst : public BrInstBase<SRegOperand>
+    {
+      public:
+        BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
+            : BrInstBase<SRegOperand>(ib, obj)
+        {
+        }
+        int numSrcRegOperands() { return target.isVectorRegister(); }
+        int numDstRegOperands() { return 0; }
+    };
+
+    GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
+                            const BrigObject *obj);
+} // namespace HsailISA
+
+#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__
-- 
cgit v1.2.3