summaryrefslogtreecommitdiff
path: root/src/arch/hsail/insts/mem.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/arch/hsail/insts/mem.hh')
-rw-r--r--src/arch/hsail/insts/mem.hh1629
1 files changed, 1629 insertions, 0 deletions
diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh
new file mode 100644
index 000000000..d3ce76dee
--- /dev/null
+++ b/src/arch/hsail/insts/mem.hh
@@ -0,0 +1,1629 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
+#define __ARCH_HSAIL_INSTS_MEM_HH__
+
+#include "arch/hsail/insts/decl.hh"
+#include "arch/hsail/insts/gpu_static_inst.hh"
+#include "arch/hsail/operand.hh"
+
+namespace HsailISA
+{
+ class MemInst
+ {
+ public:
+ MemInst() : size(0), addr_operand(nullptr) { }
+
+ MemInst(Enums::MemType m_type)
+ {
+ if (m_type == Enums::M_U64 ||
+ m_type == Enums::M_S64 ||
+ m_type == Enums::M_F64) {
+ size = 8;
+ } else if (m_type == Enums::M_U32 ||
+ m_type == Enums::M_S32 ||
+ m_type == Enums::M_F32) {
+ size = 4;
+ } else if (m_type == Enums::M_U16 ||
+ m_type == Enums::M_S16 ||
+ m_type == Enums::M_F16) {
+ size = 2;
+ } else {
+ size = 1;
+ }
+
+ addr_operand = nullptr;
+ }
+
+ void
+ init_addr(AddrOperandBase *_addr_operand)
+ {
+ addr_operand = _addr_operand;
+ }
+
+ private:
+ int size;
+ AddrOperandBase *addr_operand;
+
+ public:
+ int getMemOperandSize() { return size; }
+ AddrOperandBase *getAddressOperand() { return addr_operand; }
+ };
+
+ template<typename DestOperandType, typename AddrOperandType>
+ class LdaInstBase : public HsailGPUStaticInst
+ {
+ public:
+ typename DestOperandType::DestOperand dest;
+ AddrOperandType addr;
+
+ LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ using namespace Brig;
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ dest.init(op_offs, obj);
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+ }
+
+ int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isVectorRegister() :
+ this->addr.isVectorRegister());
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isCondRegister() :
+ this->addr.isCondRegister());
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isScalarRegister() :
+ this->addr.isScalarRegister());
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex > 0)
+ return(this->addr.isVectorRegister());
+ return false;
+ }
+ bool isDstOperand(int operandIndex) {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return(operandIndex == 0);
+ }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.opSize() :
+ this->addr.opSize());
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.regIndex() :
+ this->addr.regIndex());
+ }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister())
+ return 2;
+ return 1;
+ }
+ };
+
+ template<typename DestDataType, typename AddrOperandType>
+ class LdaInst :
+ public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
+ public MemInst
+ {
+ public:
+ void generateDisassembly();
+
+ LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : LdaInstBase<typename DestDataType::OperandType,
+ AddrOperandType>(ib, obj, _opcode)
+ {
+ init_addr(&this->addr);
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ template<typename DataType>
+ GPUStaticInst*
+ decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
+ BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
+
+ if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
+ return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
+ } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
+ // V2/V4 not allowed
+ switch (regDataType.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
+ default:
+ fatal("Bad ldas register operand type %d\n", regDataType.type);
+ }
+ } else {
+ fatal("Bad ldas register operand kind %d\n", regDataType.kind);
+ }
+ }
+
+ template<typename MemOperandType, typename DestOperandType,
+ typename AddrOperandType>
+ class LdInstBase : public HsailGPUStaticInst
+ {
+ public:
+ Brig::BrigWidth8_t width;
+ typename DestOperandType::DestOperand dest;
+ AddrOperandType addr;
+
+ Brig::BrigSegment segment;
+ Brig::BrigMemoryOrder memoryOrder;
+ Brig::BrigMemoryScope memoryScope;
+ unsigned int equivClass;
+ bool isArgLoad()
+ {
+ return segment == Brig::BRIG_SEGMENT_KERNARG ||
+ segment == Brig::BRIG_SEGMENT_ARG;
+ }
+ void
+ initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstMem *ldst = (const BrigInstMem*)ib;
+
+ segment = (BrigSegment)ldst->segment;
+ memoryOrder = BRIG_MEMORY_ORDER_NONE;
+ memoryScope = BRIG_MEMORY_SCOPE_NONE;
+ equivClass = ldst->equivClass;
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_READ;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_READ;
+ break;
+
+ case BRIG_SEGMENT_PRIVATE:
+ o_type = Enums::OT_PRIVATE_READ;
+ break;
+
+ case BRIG_SEGMENT_READONLY:
+ o_type = Enums::OT_READONLY_READ;
+ break;
+
+ case BRIG_SEGMENT_SPILL:
+ o_type = Enums::OT_SPILL_READ;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_READ;
+ break;
+
+ case BRIG_SEGMENT_KERNARG:
+ o_type = Enums::OT_KERN_READ;
+ break;
+
+ case BRIG_SEGMENT_ARG:
+ o_type = Enums::OT_ARG;
+ break;
+
+ default:
+ panic("Ld: segment %d not supported\n", segment);
+ }
+
+ width = ldst->width;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
+ if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
+ dest.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+ }
+
+ void
+ initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
+
+ segment = (BrigSegment)at->segment;
+ memoryOrder = (BrigMemoryOrder)at->memoryOrder;
+ memoryScope = (BrigMemoryScope)at->memoryScope;
+ equivClass = 0;
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_READ;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_READ;
+ break;
+
+ case BRIG_SEGMENT_PRIVATE:
+ o_type = Enums::OT_PRIVATE_READ;
+ break;
+
+ case BRIG_SEGMENT_READONLY:
+ o_type = Enums::OT_READONLY_READ;
+ break;
+
+ case BRIG_SEGMENT_SPILL:
+ o_type = Enums::OT_SPILL_READ;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_READ;
+ break;
+
+ case BRIG_SEGMENT_KERNARG:
+ o_type = Enums::OT_KERN_READ;
+ break;
+
+ case BRIG_SEGMENT_ARG:
+ o_type = Enums::OT_ARG;
+ break;
+
+ default:
+ panic("Ld: segment %d not supported\n", segment);
+ }
+
+ width = BRIG_WIDTH_1;
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
+
+ if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
+ dest.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands,1);
+ addr.init(op_offs, obj);
+ }
+
+ LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ using namespace Brig;
+
+ if (ib->opcode == BRIG_OPCODE_LD) {
+ initLd(ib, obj, _opcode);
+ } else {
+ initAtomicLd(ib, obj, _opcode);
+ }
+ }
+
+ int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister())
+ return 2;
+ else
+ return 1;
+ }
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isVectorRegister() :
+ this->addr.isVectorRegister());
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isCondRegister() :
+ this->addr.isCondRegister());
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.isScalarRegister() :
+ this->addr.isScalarRegister());
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex > 0)
+ return(this->addr.isVectorRegister());
+ return false;
+ }
+ bool isDstOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return(operandIndex == 0);
+ }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.opSize() :
+ this->addr.opSize());
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return((operandIndex == 0) ? dest.regIndex() :
+ this->addr.regIndex());
+ }
+ };
+
+ template<typename MemDataType, typename DestDataType,
+ typename AddrOperandType>
+ class LdInst :
+ public LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType, AddrOperandType>,
+ public MemInst
+ {
+ typename DestDataType::OperandType::DestOperand dest_vect[4];
+ uint16_t num_dest_operands;
+ void generateDisassembly();
+
+ public:
+ LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>(ib, obj, _opcode),
+ MemInst(MemDataType::memType)
+ {
+ init_addr(&this->addr);
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands,0);
+ const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
+
+ if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
+ const Brig::BrigOperandOperandList *brigRegVecOp =
+ (const Brig::BrigOperandOperandList*)brigOp;
+
+ num_dest_operands =
+ *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
+
+ assert(num_dest_operands <= 4);
+ } else {
+ num_dest_operands = 1;
+ }
+
+ if (num_dest_operands > 1) {
+ assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
+
+ for (int i = 0; i < num_dest_operands; ++i) {
+ dest_vect[i].init_from_vect(op_offs, obj, i);
+ }
+ }
+ }
+
+ void
+ initiateAcc(GPUDynInstPtr gpuDynInst) override
+ {
+ typedef typename MemDataType::CType c0;
+
+ gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
+
+ if (num_dest_operands > 1) {
+ for (int i = 0; i < VSZ; ++i)
+ if (gpuDynInst->exec_mask[i])
+ gpuDynInst->statusVector.push_back(num_dest_operands);
+ else
+ gpuDynInst->statusVector.push_back(0);
+ }
+
+ for (int k = 0; k < num_dest_operands; ++k) {
+
+ c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+
+ for (int i = 0; i < VSZ; ++i) {
+ if (gpuDynInst->exec_mask[i]) {
+ Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
+
+ if (isLocalMem()) {
+ // load from shared memory
+ *d = gpuDynInst->wavefront()->ldsChunk->
+ read<c0>(vaddr);
+ } else {
+ Request *req = new Request(0, vaddr, sizeof(c0), 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, i);
+
+ gpuDynInst->setRequestFlags(req);
+ PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
+ pkt->dataStatic(d);
+
+ if (gpuDynInst->computeUnit()->shader->
+ separate_acquire_release &&
+ gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE) {
+ // if this load has acquire semantics,
+ // set the response continuation function
+ // to perform an Acquire request
+ gpuDynInst->execContinuation =
+ &GPUStaticInst::execLdAcq;
+
+ gpuDynInst->useContinuation = true;
+ } else {
+ // the request will be finished when
+ // the load completes
+ gpuDynInst->useContinuation = false;
+ }
+ // translation is performed in sendRequest()
+ gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
+ i, pkt);
+ }
+ }
+ ++d;
+ }
+ }
+
+ gpuDynInst->updateStats();
+ }
+
+ private:
+ void
+ execLdAcq(GPUDynInstPtr gpuDynInst) override
+ {
+ // after the load has complete and if the load has acquire
+ // semantics, issue an acquire request.
+ if (!isLocalMem()) {
+ if (gpuDynInst->computeUnit()->shader->separate_acquire_release
+ && gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE) {
+ gpuDynInst->statusBitVector = VectorMask(1);
+ gpuDynInst->useContinuation = false;
+ // create request
+ Request *req = new Request(0, 0, 0, 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, -1);
+ req->setFlags(Request::ACQUIRE);
+ gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
+ }
+ }
+ }
+
+ public:
+ bool
+ isLocalMem() const override
+ {
+ return this->segment == Brig::BRIG_SEGMENT_GROUP;
+ }
+
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.isVectorRegister());
+ if (num_dest_operands > 1) {
+ return dest_vect[operandIndex].isVectorRegister();
+ }
+ else if (num_dest_operands == 1) {
+ return LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.isVectorRegister();
+ }
+ return false;
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.isCondRegister());
+ if (num_dest_operands > 1)
+ return dest_vect[operandIndex].isCondRegister();
+ else if (num_dest_operands == 1)
+ return LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.isCondRegister();
+ return false;
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.isScalarRegister());
+ if (num_dest_operands > 1)
+ return dest_vect[operandIndex].isScalarRegister();
+ else if (num_dest_operands == 1)
+ return LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.isScalarRegister();
+ return false;
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.isVectorRegister());
+ return false;
+ }
+ bool isDstOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return false;
+ return true;
+ }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.opSize());
+ if (num_dest_operands > 1)
+ return(dest_vect[operandIndex].opSize());
+ else if (num_dest_operands == 1)
+ return(LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.opSize());
+ return 0;
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if ((num_dest_operands != getNumOperands()) &&
+ (operandIndex == (getNumOperands()-1)))
+ return(this->addr.regIndex());
+ if (num_dest_operands > 1)
+ return(dest_vect[operandIndex].regIndex());
+ else if (num_dest_operands == 1)
+ return(LdInstBase<typename MemDataType::CType,
+ typename DestDataType::OperandType,
+ AddrOperandType>::dest.regIndex());
+ return -1;
+ }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
+ return(num_dest_operands+1);
+ else
+ return(num_dest_operands);
+ }
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ template<typename MemDT, typename DestDT>
+ GPUStaticInst*
+ decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands,1);
+ BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
+
+ if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
+ return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
+ } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
+ tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
+ switch (tmp.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ return new LdInst<MemDT, DestDT,
+ SRegAddrOperand>(ib, obj, "ld");
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ return new LdInst<MemDT, DestDT,
+ DRegAddrOperand>(ib, obj, "ld");
+ default:
+ fatal("Bad ld register operand type %d\n", tmp.regKind);
+ }
+ } else {
+ fatal("Bad ld register operand kind %d\n", tmp.kind);
+ }
+ }
+
+ template<typename MemDT>
+ GPUStaticInst*
+ decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ unsigned op_offs = obj->getOperandPtr(ib->operands,0);
+ BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
+
+ assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
+ dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
+ switch(dest.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ switch (ib->type) {
+ case Brig::BRIG_TYPE_B8:
+ case Brig::BRIG_TYPE_B16:
+ case Brig::BRIG_TYPE_B32:
+ return decodeLd2<MemDT, B32>(ib, obj);
+ case Brig::BRIG_TYPE_U8:
+ case Brig::BRIG_TYPE_U16:
+ case Brig::BRIG_TYPE_U32:
+ return decodeLd2<MemDT, U32>(ib, obj);
+ case Brig::BRIG_TYPE_S8:
+ case Brig::BRIG_TYPE_S16:
+ case Brig::BRIG_TYPE_S32:
+ return decodeLd2<MemDT, S32>(ib, obj);
+ case Brig::BRIG_TYPE_F16:
+ case Brig::BRIG_TYPE_F32:
+ return decodeLd2<MemDT, U32>(ib, obj);
+ default:
+ fatal("Bad ld register operand type %d, %d\n",
+ dest.regKind, ib->type);
+ };
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ switch (ib->type) {
+ case Brig::BRIG_TYPE_B64:
+ return decodeLd2<MemDT, B64>(ib, obj);
+ case Brig::BRIG_TYPE_U64:
+ return decodeLd2<MemDT, U64>(ib, obj);
+ case Brig::BRIG_TYPE_S64:
+ return decodeLd2<MemDT, S64>(ib, obj);
+ case Brig::BRIG_TYPE_F64:
+ return decodeLd2<MemDT, U64>(ib, obj);
+ default:
+ fatal("Bad ld register operand type %d, %d\n",
+ dest.regKind, ib->type);
+ };
+ default:
+ fatal("Bad ld register operand type %d, %d\n", dest.regKind,
+ ib->type);
+ }
+ }
+
+ template<typename MemDataType, typename SrcOperandType,
+ typename AddrOperandType>
+ class StInstBase : public HsailGPUStaticInst
+ {
+ public:
+ typename SrcOperandType::SrcOperand src;
+ AddrOperandType addr;
+
+ Brig::BrigSegment segment;
+ Brig::BrigMemoryScope memoryScope;
+ Brig::BrigMemoryOrder memoryOrder;
+ unsigned int equivClass;
+
+ void
+ initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstMem *ldst = (const BrigInstMem*)ib;
+
+ segment = (BrigSegment)ldst->segment;
+ memoryOrder = BRIG_MEMORY_ORDER_NONE;
+ memoryScope = BRIG_MEMORY_SCOPE_NONE;
+ equivClass = ldst->equivClass;
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_WRITE;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_WRITE;
+ break;
+
+ case BRIG_SEGMENT_PRIVATE:
+ o_type = Enums::OT_PRIVATE_WRITE;
+ break;
+
+ case BRIG_SEGMENT_READONLY:
+ o_type = Enums::OT_READONLY_WRITE;
+ break;
+
+ case BRIG_SEGMENT_SPILL:
+ o_type = Enums::OT_SPILL_WRITE;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_WRITE;
+ break;
+
+ case BRIG_SEGMENT_ARG:
+ o_type = Enums::OT_ARG;
+ break;
+
+ default:
+ panic("St: segment %d not supported\n", segment);
+ }
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ const BrigOperand *baseOp = obj->getOperand(op_offs);
+
+ if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
+ (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
+ src.init(op_offs, obj);
+ }
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+ }
+
+ void
+ initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
+
+ segment = (BrigSegment)at->segment;
+ memoryScope = (BrigMemoryScope)at->memoryScope;
+ memoryOrder = (BrigMemoryOrder)at->memoryOrder;
+ equivClass = 0;
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_WRITE;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_WRITE;
+ break;
+
+ case BRIG_SEGMENT_PRIVATE:
+ o_type = Enums::OT_PRIVATE_WRITE;
+ break;
+
+ case BRIG_SEGMENT_READONLY:
+ o_type = Enums::OT_READONLY_WRITE;
+ break;
+
+ case BRIG_SEGMENT_SPILL:
+ o_type = Enums::OT_SPILL_WRITE;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_WRITE;
+ break;
+
+ case BRIG_SEGMENT_ARG:
+ o_type = Enums::OT_ARG;
+ break;
+
+ default:
+ panic("St: segment %d not supported\n", segment);
+ }
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ addr.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ src.init(op_offs, obj);
+ }
+
+ StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ using namespace Brig;
+
+ if (ib->opcode == BRIG_OPCODE_ST) {
+ initSt(ib, obj, _opcode);
+ } else {
+ initAtomicSt(ib, obj, _opcode);
+ }
+ }
+
+ int numDstRegOperands() { return 0; }
+ int numSrcRegOperands()
+ {
+ return src.isVectorRegister() + this->addr.isVectorRegister();
+ }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
+ return 2;
+ else
+ return 1;
+ }
+ bool isVectorRegister(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.isVectorRegister() :
+ this->addr.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.isCondRegister() :
+ this->addr.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.isScalarRegister() :
+ this->addr.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return true;
+ }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.opSize() : this->addr.opSize();
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert(operandIndex >= 0 && operandIndex < getNumOperands());
+ return !operandIndex ? src.regIndex() : this->addr.regIndex();
+ }
+ };
+
+
+ template<typename MemDataType, typename SrcDataType,
+ typename AddrOperandType>
+ class StInst :
+ public StInstBase<MemDataType, typename SrcDataType::OperandType,
+ AddrOperandType>,
+ public MemInst
+ {
+ public:
+ typename SrcDataType::OperandType::SrcOperand src_vect[4];
+ uint16_t num_src_operands;
+ void generateDisassembly();
+
+ StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode, int srcIdx)
+ : StInstBase<MemDataType, typename SrcDataType::OperandType,
+ AddrOperandType>(ib, obj, _opcode),
+ MemInst(SrcDataType::memType)
+ {
+ init_addr(&this->addr);
+
+ BrigRegOperandInfo rinfo;
+ unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
+ const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
+
+ if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
+ const Brig::BrigOperandConstantBytes *op =
+ (Brig::BrigOperandConstantBytes*)baseOp;
+
+ rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
+ Brig::BRIG_TYPE_NONE);
+ } else {
+ rinfo = findRegDataType(op_offs, obj);
+ }
+
+ if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
+ const Brig::BrigOperandOperandList *brigRegVecOp =
+ (const Brig::BrigOperandOperandList*)baseOp;
+
+ num_src_operands =
+ *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
+
+ assert(num_src_operands <= 4);
+ } else {
+ num_src_operands = 1;
+ }
+
+ if (num_src_operands > 1) {
+ assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
+
+ for (int i = 0; i < num_src_operands; ++i) {
+ src_vect[i].init_from_vect(op_offs, obj, i);
+ }
+ }
+ }
+
+ void
+ initiateAcc(GPUDynInstPtr gpuDynInst) override
+ {
+ // before performing a store, check if this store has
+ // release semantics, and if so issue a release first
+ if (!isLocalMem()) {
+ if (gpuDynInst->computeUnit()->shader->separate_acquire_release
+ && gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_RELEASE) {
+
+ gpuDynInst->statusBitVector = VectorMask(1);
+ gpuDynInst->execContinuation = &GPUStaticInst::execSt;
+ gpuDynInst->useContinuation = true;
+ // create request
+ Request *req = new Request(0, 0, 0, 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, -1);
+ req->setFlags(Request::RELEASE);
+ gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
+
+ return;
+ }
+ }
+
+ // if there is no release semantic, perform stores immediately
+ execSt(gpuDynInst);
+ }
+
+ bool
+ isLocalMem() const override
+ {
+ return this->segment == Brig::BRIG_SEGMENT_GROUP;
+ }
+
+ private:
+ // execSt may be called through a continuation
+ // if the store had release semantics. see comment for
+ // execSt in gpu_static_inst.hh
+ void
+ execSt(GPUDynInstPtr gpuDynInst) override
+ {
+ typedef typename MemDataType::CType c0;
+
+ gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
+
+ if (num_src_operands > 1) {
+ for (int i = 0; i < VSZ; ++i)
+ if (gpuDynInst->exec_mask[i])
+ gpuDynInst->statusVector.push_back(num_src_operands);
+ else
+ gpuDynInst->statusVector.push_back(0);
+ }
+
+ for (int k = 0; k < num_src_operands; ++k) {
+ c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+
+ for (int i = 0; i < VSZ; ++i) {
+ if (gpuDynInst->exec_mask[i]) {
+ Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
+
+ if (isLocalMem()) {
+ //store to shared memory
+ gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
+ *d);
+ } else {
+ Request *req =
+ new Request(0, vaddr, sizeof(c0), 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, i);
+
+ gpuDynInst->setRequestFlags(req);
+ PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
+ pkt->dataStatic<c0>(d);
+
+ // translation is performed in sendRequest()
+ // the request will be finished when the store completes
+ gpuDynInst->useContinuation = false;
+ gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
+ i, pkt);
+
+ }
+ }
+ ++d;
+ }
+ }
+
+ gpuDynInst->updateStats();
+ }
+
+ public:
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.isVectorRegister();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].isVectorRegister();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.isVectorRegister();
+ return false;
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.isCondRegister();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].isCondRegister();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.isCondRegister();
+ return false;
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.isScalarRegister();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].isScalarRegister();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.isScalarRegister();
+ return false;
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ return true;
+ }
+ bool isDstOperand(int operandIndex) { return false; }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.opSize();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].opSize();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.opSize();
+ return 0;
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex == num_src_operands)
+ return this->addr.regIndex();
+ if (num_src_operands > 1)
+ return src_vect[operandIndex].regIndex();
+ else if (num_src_operands == 1)
+ return StInstBase<MemDataType,
+ typename SrcDataType::OperandType,
+ AddrOperandType>::src.regIndex();
+ return -1;
+ }
+ int getNumOperands()
+ {
+ if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
+ return num_src_operands + 1;
+ else
+ return num_src_operands;
+ }
+ void execute(GPUDynInstPtr gpuDynInst);
+ };
+
+ template<typename DataType, typename SrcDataType>
+ GPUStaticInst*
+ decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ int srcIdx = 0;
+ int destIdx = 1;
+ if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
+ ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
+ srcIdx = 1;
+ destIdx = 0;
+ }
+ unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
+
+ BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
+
+ if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
+ return new StInst<DataType, SrcDataType,
+ NoRegAddrOperand>(ib, obj, "st", srcIdx);
+ } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
+ // V2/V4 not allowed
+ switch (tmp.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ return new StInst<DataType, SrcDataType,
+ SRegAddrOperand>(ib, obj, "st", srcIdx);
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ return new StInst<DataType, SrcDataType,
+ DRegAddrOperand>(ib, obj, "st", srcIdx);
+ default:
+ fatal("Bad st register operand type %d\n", tmp.type);
+ }
+ } else {
+ fatal("Bad st register operand kind %d\n", tmp.kind);
+ }
+ }
+
+ Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
+ Brig::BrigAtomicOperation brigOp);
+
+ template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
+ bool HasDst>
+ class AtomicInstBase : public HsailGPUStaticInst
+ {
+ public:
+ typename OperandType::DestOperand dest;
+ typename OperandType::SrcOperand src[NumSrcOperands];
+ AddrOperandType addr;
+
+ Brig::BrigSegment segment;
+ Brig::BrigMemoryOrder memoryOrder;
+ Brig::BrigAtomicOperation atomicOperation;
+ Brig::BrigMemoryScope memoryScope;
+ Brig::BrigOpcode opcode;
+ Enums::MemOpType opType;
+
+ AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : HsailGPUStaticInst(obj, _opcode)
+ {
+ using namespace Brig;
+
+ const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
+
+ segment = (BrigSegment)at->segment;
+ memoryScope = (BrigMemoryScope)at->memoryScope;
+ memoryOrder = (BrigMemoryOrder)at->memoryOrder;
+ atomicOperation = (BrigAtomicOperation)at->atomicOperation;
+ opcode = (BrigOpcode)ib->opcode;
+ opType = brigAtomicToMemOpType(opcode, atomicOperation);
+
+ switch (segment) {
+ case BRIG_SEGMENT_GLOBAL:
+ o_type = Enums::OT_GLOBAL_ATOMIC;
+ break;
+
+ case BRIG_SEGMENT_GROUP:
+ o_type = Enums::OT_SHARED_ATOMIC;
+ break;
+
+ case BRIG_SEGMENT_FLAT:
+ o_type = Enums::OT_FLAT_ATOMIC;
+ break;
+
+ default:
+ panic("Atomic: segment %d not supported\n", segment);
+ }
+
+ if (HasDst) {
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ dest.init(op_offs, obj);
+
+ op_offs = obj->getOperandPtr(ib->operands, 1);
+ addr.init(op_offs, obj);
+
+ for (int i = 0; i < NumSrcOperands; ++i) {
+ op_offs = obj->getOperandPtr(ib->operands, i + 2);
+ src[i].init(op_offs, obj);
+ }
+ } else {
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
+ addr.init(op_offs, obj);
+
+ for (int i = 0; i < NumSrcOperands; ++i) {
+ op_offs = obj->getOperandPtr(ib->operands, i + 1);
+ src[i].init(op_offs, obj);
+ }
+ }
+ }
+
+ int numSrcRegOperands()
+ {
+ int operands = 0;
+ for (int i = 0; i < NumSrcOperands; i++) {
+ if (src[i].isVectorRegister() == true) {
+ operands++;
+ }
+ }
+ if (addr.isVectorRegister())
+ operands++;
+ return operands;
+ }
+ int numDstRegOperands() { return dest.isVectorRegister(); }
+ int getNumOperands()
+ {
+ if (addr.isVectorRegister())
+ return(NumSrcOperands + 2);
+ return(NumSrcOperands + 1);
+ }
+ bool isVectorRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].isVectorRegister();
+ else if (operandIndex == NumSrcOperands)
+ return(addr.isVectorRegister());
+ else
+ return dest.isVectorRegister();
+ }
+ bool isCondRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].isCondRegister();
+ else if (operandIndex == NumSrcOperands)
+ return(addr.isCondRegister());
+ else
+ return dest.isCondRegister();
+ }
+ bool isScalarRegister(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return src[operandIndex].isScalarRegister();
+ else if (operandIndex == NumSrcOperands)
+ return(addr.isScalarRegister());
+ else
+ return dest.isScalarRegister();
+ }
+ bool isSrcOperand(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return true;
+ else if (operandIndex == NumSrcOperands)
+ return(addr.isVectorRegister());
+ else
+ return false;
+ }
+ bool isDstOperand(int operandIndex)
+ {
+ if (operandIndex <= NumSrcOperands)
+ return false;
+ else
+ return true;
+ }
+ int getOperandSize(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return(src[operandIndex].opSize());
+ else if (operandIndex == NumSrcOperands)
+ return(addr.opSize());
+ else
+ return(dest.opSize());
+ }
+ int getRegisterIndex(int operandIndex)
+ {
+ assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
+ if (operandIndex < NumSrcOperands)
+ return(src[operandIndex].regIndex());
+ else if (operandIndex == NumSrcOperands)
+ return(addr.regIndex());
+ else
+ return(dest.regIndex());
+ return -1;
+ }
+ };
+
+ template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
+ bool HasDst>
+ class AtomicInst :
+ public AtomicInstBase<typename MemDataType::OperandType,
+ AddrOperandType, NumSrcOperands, HasDst>,
+ public MemInst
+ {
+ public:
+ void generateDisassembly();
+
+ AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
+ const char *_opcode)
+ : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
+ NumSrcOperands, HasDst>
+ (ib, obj, _opcode),
+ MemInst(MemDataType::memType)
+ {
+ init_addr(&this->addr);
+ }
+
+ void
+ initiateAcc(GPUDynInstPtr gpuDynInst) override
+ {
+ // before doing the RMW, check if this atomic has
+ // release semantics, and if so issue a release first
+ if (!isLocalMem()) {
+ if (gpuDynInst->computeUnit()->shader->separate_acquire_release
+ && (gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
+
+ gpuDynInst->statusBitVector = VectorMask(1);
+
+ gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
+ gpuDynInst->useContinuation = true;
+
+ // create request
+ Request *req = new Request(0, 0, 0, 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, -1);
+ req->setFlags(Request::RELEASE);
+ gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
+
+ return;
+ }
+ }
+
+ // if there is no release semantic, execute the RMW immediately
+ execAtomic(gpuDynInst);
+
+ }
+
+ void execute(GPUDynInstPtr gpuDynInst);
+
+ bool
+ isLocalMem() const override
+ {
+ return this->segment == Brig::BRIG_SEGMENT_GROUP;
+ }
+
+ private:
+ // execAtomic may be called through a continuation
+ // if the RMW had release semantics. see comment for
+ // execContinuation in gpu_dyn_inst.hh
+ void
+ execAtomic(GPUDynInstPtr gpuDynInst) override
+ {
+ gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
+
+ typedef typename MemDataType::CType c0;
+
+ c0 *d = &((c0*) gpuDynInst->d_data)[0];
+ c0 *e = &((c0*) gpuDynInst->a_data)[0];
+ c0 *f = &((c0*) gpuDynInst->x_data)[0];
+
+ for (int i = 0; i < VSZ; ++i) {
+ if (gpuDynInst->exec_mask[i]) {
+ Addr vaddr = gpuDynInst->addr[i];
+
+ if (isLocalMem()) {
+ Wavefront *wavefront = gpuDynInst->wavefront();
+ *d = wavefront->ldsChunk->read<c0>(vaddr);
+
+ switch (this->opType) {
+ case Enums::MO_AADD:
+ case Enums::MO_ANRADD:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) + (*e));
+ break;
+ case Enums::MO_ASUB:
+ case Enums::MO_ANRSUB:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) - (*e));
+ break;
+ case Enums::MO_AMAX:
+ case Enums::MO_ANRMAX:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ std::max(wavefront->ldsChunk->read<c0>(vaddr),
+ (*e)));
+ break;
+ case Enums::MO_AMIN:
+ case Enums::MO_ANRMIN:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ std::min(wavefront->ldsChunk->read<c0>(vaddr),
+ (*e)));
+ break;
+ case Enums::MO_AAND:
+ case Enums::MO_ANRAND:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) & (*e));
+ break;
+ case Enums::MO_AOR:
+ case Enums::MO_ANROR:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) | (*e));
+ break;
+ case Enums::MO_AXOR:
+ case Enums::MO_ANRXOR:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
+ break;
+ case Enums::MO_AINC:
+ case Enums::MO_ANRINC:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) + 1);
+ break;
+ case Enums::MO_ADEC:
+ case Enums::MO_ANRDEC:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ wavefront->ldsChunk->read<c0>(vaddr) - 1);
+ break;
+ case Enums::MO_AEXCH:
+ case Enums::MO_ANREXCH:
+ wavefront->ldsChunk->write<c0>(vaddr, (*e));
+ break;
+ case Enums::MO_ACAS:
+ case Enums::MO_ANRCAS:
+ wavefront->ldsChunk->write<c0>(vaddr,
+ (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
+ (*f) : wavefront->ldsChunk->read<c0>(vaddr));
+ break;
+ default:
+ fatal("Unrecognized or invalid HSAIL atomic op "
+ "type.\n");
+ break;
+ }
+ } else {
+ Request *req =
+ new Request(0, vaddr, sizeof(c0), 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, i,
+ gpuDynInst->makeAtomicOpFunctor<c0>(e,
+ f, this->opType));
+
+ gpuDynInst->setRequestFlags(req);
+ PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
+ pkt->dataStatic(d);
+
+ if (gpuDynInst->computeUnit()->shader->
+ separate_acquire_release &&
+ (gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE)) {
+ // if this atomic has acquire semantics,
+ // schedule the continuation to perform an
+ // acquire after the RMW completes
+ gpuDynInst->execContinuation =
+ &GPUStaticInst::execAtomicAcq;
+
+ gpuDynInst->useContinuation = true;
+ } else {
+ // the request will be finished when the RMW completes
+ gpuDynInst->useContinuation = false;
+ }
+ // translation is performed in sendRequest()
+ gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
+ pkt);
+ }
+ }
+
+ ++d;
+ ++e;
+ ++f;
+ }
+
+ gpuDynInst->updateStats();
+ }
+
+ // execAtomicACq will always be called through a continuation.
+ // see comment for execContinuation in gpu_dyn_inst.hh
+ void
+ execAtomicAcq(GPUDynInstPtr gpuDynInst) override
+ {
+ // after performing the RMW, check to see if this instruction
+ // has acquire semantics, and if so, issue an acquire
+ if (!isLocalMem()) {
+ if (gpuDynInst->computeUnit()->shader->separate_acquire_release
+ && gpuDynInst->memoryOrder ==
+ Enums::MEMORY_ORDER_SC_ACQUIRE) {
+ gpuDynInst->statusBitVector = VectorMask(1);
+
+ // the request will be finished when
+ // the acquire completes
+ gpuDynInst->useContinuation = false;
+ // create request
+ Request *req = new Request(0, 0, 0, 0,
+ gpuDynInst->computeUnit()->masterId(),
+ 0, gpuDynInst->wfDynId, -1);
+ req->setFlags(Request::ACQUIRE);
+ gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
+ }
+ }
+ }
+ };
+
+ template<typename DataType, typename AddrOperandType, int NumSrcOperands>
+ GPUStaticInst*
+ constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
+
+ if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
+ return decodeLd<DataType>(ib, obj);
+ } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
+ switch (ib->type) {
+ case Brig::BRIG_TYPE_B8:
+ return decodeSt<S8,S8>(ib, obj);
+ case Brig::BRIG_TYPE_B16:
+ return decodeSt<S8,S16>(ib, obj);
+ case Brig::BRIG_TYPE_B32:
+ return decodeSt<S8,S32>(ib, obj);
+ case Brig::BRIG_TYPE_B64:
+ return decodeSt<S8,S64>(ib, obj);
+ default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
+ }
+ } else {
+ if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
+ return new AtomicInst<DataType, AddrOperandType,
+ NumSrcOperands, false>(ib, obj, "atomicnoret");
+ else
+ return new AtomicInst<DataType, AddrOperandType,
+ NumSrcOperands, true>(ib, obj, "atomic");
+ }
+ }
+
+ template<typename DataType, int NumSrcOperands>
+ GPUStaticInst*
+ decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
+ Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
+
+ unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
+
+ BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
+
+ if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
+ return constructAtomic<DataType, NoRegAddrOperand,
+ NumSrcOperands>(ib, obj);
+ } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
+ // V2/V4 not allowed
+ switch (tmp.regKind) {
+ case Brig::BRIG_REGISTER_KIND_SINGLE:
+ return constructAtomic<DataType, SRegAddrOperand,
+ NumSrcOperands>(ib, obj);
+ case Brig::BRIG_REGISTER_KIND_DOUBLE:
+ return constructAtomic<DataType, DRegAddrOperand,
+ NumSrcOperands>(ib, obj);
+ default:
+ fatal("Bad atomic register operand type %d\n", tmp.type);
+ }
+ } else {
+ fatal("Bad atomic register operand kind %d\n", tmp.kind);
+ }
+ }
+
+
+ template<typename DataType>
+ GPUStaticInst*
+ decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
+
+ if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
+ return decodeAtomicHelper<DataType, 2>(ib, obj);
+ } else {
+ return decodeAtomicHelper<DataType, 1>(ib, obj);
+ }
+ }
+
+ template<typename DataType>
+ GPUStaticInst*
+ decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
+ {
+ const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
+ if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
+ return decodeAtomicHelper<DataType, 2>(ib, obj);
+ } else {
+ return decodeAtomicHelper<DataType, 1>(ib, obj);
+ }
+ }
+} // namespace HsailISA
+
+#endif // __ARCH_HSAIL_INSTS_MEM_HH__