summaryrefslogtreecommitdiff
path: root/src/arch/hsail/insts/mem_impl.hh
diff options
context:
space:
mode:
authorTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
committerTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
commit1a7d3f9fcb76a68540dd948f91413533a383bfde (patch)
tree867510a147cd095f19499d26b7c02d27de4cae9d /src/arch/hsail/insts/mem_impl.hh
parent28e353e0403ea379d244a418e8dc8ee0b48187cf (diff)
downloadgem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz
gpu-compute: AMD's baseline GPU model
Diffstat (limited to 'src/arch/hsail/insts/mem_impl.hh')
-rw-r--r--src/arch/hsail/insts/mem_impl.hh660
1 files changed, 660 insertions, 0 deletions
diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh
new file mode 100644
index 000000000..94f0cd6aa
--- /dev/null
+++ b/src/arch/hsail/insts/mem_impl.hh
@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#include "arch/hsail/generic_types.hh"
+#include "gpu-compute/hsail_code.hh"
+
+// defined in code.cc, but not worth sucking in all of code.h for this
+// at this point
+extern const char *segmentNames[];
+
+namespace HsailISA
+{
+ template<typename DestDataType, typename AddrRegOperandType>
+ void
+ LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
+ {
+ this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
+ DestDataType::label,
+ this->dest.disassemble(),
+ this->addr.disassemble());
+ }
+
+ template<typename DestDataType, typename AddrRegOperandType>
+ void
+ LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef typename DestDataType::CType CType M5_VAR_USED;
+ const VectorMask &mask = w->get_pred();
+ uint64_t addr_vec[VSZ];
+ this->addr.calcVector(w, addr_vec);
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ this->dest.set(w, lane, addr_vec[lane]);
+ }
+ }
+ }
+
+ template<typename MemDataType, typename DestDataType,
+ typename AddrRegOperandType>
+ void
+ LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
+ {
+ switch (num_dest_operands) {
+ case 1:
+ this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
+ segmentNames[this->segment],
+ MemDataType::label,
+ this->dest.disassemble(),
+ this->addr.disassemble());
+ break;
+ case 2:
+ this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
+ segmentNames[this->segment],
+ MemDataType::label,
+ this->dest_vect[0].disassemble(),
+ this->dest_vect[1].disassemble(),
+ this->addr.disassemble());
+ break;
+ case 4:
+ this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
+ this->opcode,
+ segmentNames[this->segment],
+ MemDataType::label,
+ this->dest_vect[0].disassemble(),
+ this->dest_vect[1].disassemble(),
+ this->dest_vect[2].disassemble(),
+ this->dest_vect[3].disassemble(),
+ this->addr.disassemble());
+ break;
+ default:
+ fatal("Bad ld register dest operand, num vector operands: %d \n",
+ num_dest_operands);
+ break;
+ }
+ }
+
+ static Addr
+ calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
+ {
+ // what is the size of the object we are accessing??
+ // NOTE: the compiler doesn't generate enough information
+ // to do this yet..have to just line up all the private
+ // work-item spaces back to back for now
+ /*
+ StorageElement* se =
+ i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
+ assert(se);
+
+ return w->wfSlotId * w->privSizePerItem * VSZ +
+ se->offset * VSZ +
+ lane * se->size;
+ */
+
+ // addressing strategy: interleave the private spaces of
+ // work-items in a wave-front on 8 byte granularity.
+ // this won't be perfect coalescing like the spill space
+ // strategy, but it's better than nothing. The spill space
+ // strategy won't work with private because the same address
+ // may be accessed by different sized loads/stores.
+
+ // Note: I'm assuming that the largest load/store to private
+ // is 8 bytes. If it is larger, the stride will have to increase
+
+ Addr addr_div8 = addr / 8;
+ Addr addr_mod8 = addr % 8;
+
+ Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase;
+
+ assert(ret < w->privBase + (w->privSizePerItem * VSZ));
+
+ return ret;
+ }
+
+ template<typename MemDataType, typename DestDataType,
+ typename AddrRegOperandType>
+ void
+ LdInst<MemDataType, DestDataType,
+ AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef typename MemDataType::CType MemCType;
+ const VectorMask &mask = w->get_pred();
+
+ // Kernarg references are handled uniquely for now (no Memory Request
+ // is used), so special-case them up front. Someday we should
+ // make this more realistic, at which we should get rid of this
+ // block and fold this case into the switch below.
+ if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
+ MemCType val;
+
+ // I assume no vector ld for kernargs
+ assert(num_dest_operands == 1);
+
+ // assuming for the moment that we'll never do register
+ // offsets into kernarg space... just to make life simpler
+ uint64_t address = this->addr.calcUniform();
+
+ val = *(MemCType*)&w->kernelArgs[address];
+
+ DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ this->dest.set(w, lane, val);
+ }
+ }
+
+ return;
+ } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
+ uint64_t address = this->addr.calcUniform();
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ MemCType val = w->readCallArgMem<MemCType>(lane, address);
+
+ DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
+ (unsigned long long)val);
+
+ this->dest.set(w, lane, val);
+ }
+ }
+
+ return;
+ }
+
+ GPUDynInstPtr m = gpuDynInst;
+
+ this->addr.calcVector(w, m->addr);
+
+ m->m_op = Enums::MO_LD;
+ m->m_type = MemDataType::memType;
+ m->v_type = DestDataType::vgprType;
+
+ m->exec_mask = w->execMask();
+ m->statusBitVector = 0;
+ m->equiv = this->equivClass;
+ m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
+
+ m->scope = getGenericMemoryScope(this->memoryScope);
+
+ if (num_dest_operands == 1) {
+ m->dst_reg = this->dest.regIndex();
+ m->n_reg = 1;
+ } else {
+ m->n_reg = num_dest_operands;
+ for (int i = 0; i < num_dest_operands; ++i) {
+ m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
+ }
+ }
+
+ m->simdId = w->simdId;
+ m->wfSlotId = w->wfSlotId;
+ m->wfDynId = w->wfDynId;
+ m->kern_id = w->kern_id;
+ m->cu_id = w->computeUnit->cu_id;
+ m->latency.init(&w->computeUnit->shader->tick_cnt);
+
+ switch (this->segment) {
+ case Brig::BRIG_SEGMENT_GLOBAL:
+ m->s_type = SEG_GLOBAL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+
+ // this is a complete hack to get around a compiler bug
+ // (the compiler currently generates global access for private
+ // addresses (starting from 0). We need to add the private offset)
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (m->addr[lane] < w->privSizePerItem) {
+ if (mask[lane]) {
+ // what is the size of the object we are accessing?
+ // find base for for this wavefront
+
+ // calcPrivAddr will fail if accesses are unaligned
+ assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
+
+ Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
+ this);
+
+ m->addr[lane] = privAddr;
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_SPILL:
+ assert(num_dest_operands == 1);
+ m->s_type = SEG_SPILL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+ {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ // note: this calculation will NOT WORK if the compiler
+ // ever generates loads/stores to the same address with
+ // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
+ if (mask[lane]) {
+ assert(m->addr[lane] < w->spillSizePerItem);
+
+ m->addr[lane] = m->addr[lane] * w->spillWidth +
+ lane * sizeof(MemCType) + w->spillBase;
+
+ w->last_addr[lane] = m->addr[lane];
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_GROUP:
+ m->s_type = SEG_SHARED;
+ m->pipeId = LDSMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(24));
+ w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
+ w->outstanding_reqs_rd_lm++;
+ w->rd_lm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_READONLY:
+ m->s_type = SEG_READONLY;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
+ m->addr[lane] += w->roBase;
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_PRIVATE:
+ m->s_type = SEG_PRIVATE;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+ {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ assert(m->addr[lane] < w->privSizePerItem);
+
+ m->addr[lane] = m->addr[lane] +
+ lane * sizeof(MemCType) + w->privBase;
+ }
+ }
+ }
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ default:
+ fatal("Load to unsupported segment %d %llxe\n", this->segment,
+ m->addr[0]);
+ }
+
+ w->outstanding_reqs++;
+ w->mem_reqs_in_pipe--;
+ }
+
+ template<typename OperationType, typename SrcDataType,
+ typename AddrRegOperandType>
+ void
+ StInst<OperationType, SrcDataType,
+ AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *w = gpuDynInst->wavefront();
+
+ typedef typename OperationType::CType CType;
+
+ const VectorMask &mask = w->get_pred();
+
+ // arg references are handled uniquely for now (no Memory Request
+ // is used), so special-case them up front. Someday we should
+ // make this more realistic, at which we should get rid of this
+ // block and fold this case into the switch below.
+ if (this->segment == Brig::BRIG_SEGMENT_ARG) {
+ uint64_t address = this->addr.calcUniform();
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ CType data = this->src.template get<CType>(w, lane);
+ DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
+ w->writeCallArgMem<CType>(lane, address, data);
+ }
+ }
+
+ return;
+ }
+
+ GPUDynInstPtr m = gpuDynInst;
+
+ m->exec_mask = w->execMask();
+
+ this->addr.calcVector(w, m->addr);
+
+ if (num_src_operands == 1) {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ ((CType*)m->d_data)[lane] =
+ this->src.template get<CType>(w, lane);
+ }
+ }
+ } else {
+ for (int k= 0; k < num_src_operands; ++k) {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ ((CType*)m->d_data)[k * VSZ + lane] =
+ this->src_vect[k].template get<CType>(w, lane);
+ }
+ }
+ }
+ }
+
+ m->m_op = Enums::MO_ST;
+ m->m_type = OperationType::memType;
+ m->v_type = OperationType::vgprType;
+
+ m->statusBitVector = 0;
+ m->equiv = this->equivClass;
+
+ if (num_src_operands == 1) {
+ m->n_reg = 1;
+ } else {
+ m->n_reg = num_src_operands;
+ }
+
+ m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
+
+ m->scope = getGenericMemoryScope(this->memoryScope);
+
+ m->simdId = w->simdId;
+ m->wfSlotId = w->wfSlotId;
+ m->wfDynId = w->wfDynId;
+ m->kern_id = w->kern_id;
+ m->cu_id = w->computeUnit->cu_id;
+ m->latency.init(&w->computeUnit->shader->tick_cnt);
+
+ switch (this->segment) {
+ case Brig::BRIG_SEGMENT_GLOBAL:
+ m->s_type = SEG_GLOBAL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+
+ // this is a complete hack to get around a compiler bug
+ // (the compiler currently generates global access for private
+ // addresses (starting from 0). We need to add the private offset)
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ if (m->addr[lane] < w->privSizePerItem) {
+
+ // calcPrivAddr will fail if accesses are unaligned
+ assert(!((sizeof(CType)-1) & m->addr[lane]));
+
+ Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
+ this);
+
+ m->addr[lane] = privAddr;
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_SPILL:
+ assert(num_src_operands == 1);
+ m->s_type = SEG_SPILL;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+ {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ assert(m->addr[lane] < w->spillSizePerItem);
+
+ m->addr[lane] = m->addr[lane] * w->spillWidth +
+ lane * sizeof(CType) + w->spillBase;
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_GROUP:
+ m->s_type = SEG_SHARED;
+ m->pipeId = LDSMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(24));
+ w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
+ w->outstanding_reqs_wr_lm++;
+ w->wr_lm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_PRIVATE:
+ m->s_type = SEG_PRIVATE;
+ m->pipeId = GLBMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(1));
+ {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ if (mask[lane]) {
+ assert(m->addr[lane] < w->privSizePerItem);
+ m->addr[lane] = m->addr[lane] + lane *
+ sizeof(CType)+w->privBase;
+ }
+ }
+ }
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ break;
+
+ default:
+ fatal("Store to unsupported segment %d\n", this->segment);
+ }
+
+ w->outstanding_reqs++;
+ w->mem_reqs_in_pipe--;
+ }
+
+ template<typename OperationType, typename SrcDataType,
+ typename AddrRegOperandType>
+ void
+ StInst<OperationType, SrcDataType,
+ AddrRegOperandType>::generateDisassembly()
+ {
+ switch (num_src_operands) {
+ case 1:
+ this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
+ segmentNames[this->segment],
+ OperationType::label,
+ this->src.disassemble(),
+ this->addr.disassemble());
+ break;
+ case 2:
+ this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
+ segmentNames[this->segment],
+ OperationType::label,
+ this->src_vect[0].disassemble(),
+ this->src_vect[1].disassemble(),
+ this->addr.disassemble());
+ break;
+ case 4:
+ this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
+ this->opcode,
+ segmentNames[this->segment],
+ OperationType::label,
+ this->src_vect[0].disassemble(),
+ this->src_vect[1].disassemble(),
+ this->src_vect[2].disassemble(),
+ this->src_vect[3].disassemble(),
+ this->addr.disassemble());
+ break;
+ default: fatal("Bad ld register src operand, num vector operands: "
+ "%d \n", num_src_operands);
+ break;
+ }
+ }
+
+ template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
+ bool HasDst>
+ void
+ AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
+ HasDst>::execute(GPUDynInstPtr gpuDynInst)
+ {
+ typedef typename DataType::CType CType;
+
+ Wavefront *w = gpuDynInst->wavefront();
+
+ GPUDynInstPtr m = gpuDynInst;
+
+ this->addr.calcVector(w, m->addr);
+
+ for (int lane = 0; lane < VSZ; ++lane) {
+ ((CType *)m->a_data)[lane] =
+ this->src[0].template get<CType>(w, lane);
+ }
+
+ // load second source operand for CAS
+ if (NumSrcOperands > 1) {
+ for (int lane = 0; lane < VSZ; ++lane) {
+ ((CType*)m->x_data)[lane] =
+ this->src[1].template get<CType>(w, lane);
+ }
+ }
+
+ assert(NumSrcOperands <= 2);
+
+ m->m_op = this->opType;
+ m->m_type = DataType::memType;
+ m->v_type = DataType::vgprType;
+
+ m->exec_mask = w->execMask();
+ m->statusBitVector = 0;
+ m->equiv = 0; // atomics don't have an equivalence class operand
+ m->n_reg = 1;
+ m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
+
+ m->scope = getGenericMemoryScope(this->memoryScope);
+
+ if (HasDst) {
+ m->dst_reg = this->dest.regIndex();
+ }
+
+ m->simdId = w->simdId;
+ m->wfSlotId = w->wfSlotId;
+ m->wfDynId = w->wfDynId;
+ m->kern_id = w->kern_id;
+ m->cu_id = w->computeUnit->cu_id;
+ m->latency.init(&w->computeUnit->shader->tick_cnt);
+
+ switch (this->segment) {
+ case Brig::BRIG_SEGMENT_GLOBAL:
+ m->s_type = SEG_GLOBAL;
+ m->latency.set(w->computeUnit->shader->ticks(64));
+ m->pipeId = GLBMEM_PIPE;
+
+ w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
+ w->outstanding_reqs_wr_gm++;
+ w->wr_gm_reqs_in_pipe--;
+ w->outstanding_reqs_rd_gm++;
+ w->rd_gm_reqs_in_pipe--;
+ break;
+
+ case Brig::BRIG_SEGMENT_GROUP:
+ m->s_type = SEG_SHARED;
+ m->pipeId = LDSMEM_PIPE;
+ m->latency.set(w->computeUnit->shader->ticks(24));
+ w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
+ w->outstanding_reqs_wr_lm++;
+ w->wr_lm_reqs_in_pipe--;
+ w->outstanding_reqs_rd_lm++;
+ w->rd_lm_reqs_in_pipe--;
+ break;
+
+ default:
+ fatal("Atomic op to unsupported segment %d\n",
+ this->segment);
+ }
+
+ w->outstanding_reqs++;
+ w->mem_reqs_in_pipe--;
+ }
+
+ const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
+
+ template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
+ bool HasDst>
+ void
+ AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
+ HasDst>::generateDisassembly()
+ {
+ if (HasDst) {
+ this->disassembly =
+ csprintf("%s_%s_%s_%s %s,%s", this->opcode,
+ atomicOpToString(this->atomicOperation),
+ segmentNames[this->segment],
+ DataType::label, this->dest.disassemble(),
+ this->addr.disassemble());
+ } else {
+ this->disassembly =
+ csprintf("%s_%s_%s_%s %s", this->opcode,
+ atomicOpToString(this->atomicOperation),
+ segmentNames[this->segment],
+ DataType::label, this->addr.disassemble());
+ }
+
+ for (int i = 0; i < NumSrcOperands; ++i) {
+ this->disassembly += ",";
+ this->disassembly += this->src[i].disassemble();
+ }
+ }
+} // namespace HsailISA