summaryrefslogtreecommitdiff
path: root/src/gpu-compute/gpu_dyn_inst.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu-compute/gpu_dyn_inst.hh')
-rw-r--r--src/gpu-compute/gpu_dyn_inst.hh464
1 files changed, 464 insertions, 0 deletions
diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh
new file mode 100644
index 000000000..e44d8f80d
--- /dev/null
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -0,0 +1,464 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Anthony Gutierrez
+ */
+
+#ifndef __GPU_DYN_INST_HH__
+#define __GPU_DYN_INST_HH__
+
+#include <cstdint>
+#include <string>
+
+#include "enums/GenericMemoryOrder.hh"
+#include "enums/GenericMemoryScope.hh"
+#include "enums/MemOpType.hh"
+#include "enums/MemType.hh"
+#include "enums/OpType.hh"
+#include "enums/StorageClassType.hh"
+#include "gpu-compute/compute_unit.hh"
+#include "gpu-compute/gpu_exec_context.hh"
+
+class GPUStaticInst;
+
+template<typename T>
+class AtomicOpAnd : public TypedAtomicOpFunctor<T>
+{
+ public:
+ T a;
+
+ AtomicOpAnd(T _a) : a(_a) { }
+ void execute(T *b) { *b &= a; }
+};
+
+template<typename T>
+class AtomicOpOr : public TypedAtomicOpFunctor<T>
+{
+ public:
+ T a;
+ AtomicOpOr(T _a) : a(_a) { }
+ void execute(T *b) { *b |= a; }
+};
+
+template<typename T>
+class AtomicOpXor : public TypedAtomicOpFunctor<T>
+{
+ public:
+ T a;
+ AtomicOpXor(T _a) : a(_a) {}
+ void execute(T *b) { *b ^= a; }
+};
+
+template<typename T>
+class AtomicOpCAS : public TypedAtomicOpFunctor<T>
+{
+ public:
+ T c;
+ T s;
+
+ ComputeUnit *computeUnit;
+
+ AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
+ : c(_c), s(_s), computeUnit(compute_unit) { }
+
+ void
+ execute(T *b)
+ {
+ computeUnit->numCASOps++;
+
+ if (*b == c) {
+ *b = s;
+ } else {
+ computeUnit->numFailedCASOps++;
+ }
+
+ if (computeUnit->xact_cas_mode) {
+ computeUnit->xactCasLoadMap.clear();
+ }
+ }
+};
+
+template<typename T>
+class AtomicOpExch : public TypedAtomicOpFunctor<T>
+{
+ public:
+ T a;
+ AtomicOpExch(T _a) : a(_a) { }
+ void execute(T *b) { *b = a; }
+};
+
+template<typename T>
+class AtomicOpAdd : public TypedAtomicOpFunctor<T>
+{
+ public:
+ T a;
+ AtomicOpAdd(T _a) : a(_a) { }
+ void execute(T *b) { *b += a; }
+};
+
+template<typename T>
+class AtomicOpSub : public TypedAtomicOpFunctor<T>
+{
+ public:
+ T a;
+ AtomicOpSub(T _a) : a(_a) { }
+ void execute(T *b) { *b -= a; }
+};
+
+template<typename T>
+class AtomicOpInc : public TypedAtomicOpFunctor<T>
+{
+ public:
+ AtomicOpInc() { }
+ void execute(T *b) { *b += 1; }
+};
+
+template<typename T>
+class AtomicOpDec : public TypedAtomicOpFunctor<T>
+{
+ public:
+ AtomicOpDec() {}
+ void execute(T *b) { *b -= 1; }
+};
+
+template<typename T>
+class AtomicOpMax : public TypedAtomicOpFunctor<T>
+{
+ public:
+ T a;
+ AtomicOpMax(T _a) : a(_a) { }
+
+ void
+ execute(T *b)
+ {
+ if (a > *b)
+ *b = a;
+ }
+};
+
+template<typename T>
+class AtomicOpMin : public TypedAtomicOpFunctor<T>
+{
+ public:
+ T a;
+ AtomicOpMin(T _a) : a(_a) {}
+
+ void
+ execute(T *b)
+ {
+ if (a < *b)
+ *b = a;
+ }
+};
+
+#define MO_A(a) ((a)>=Enums::MO_AAND && (a)<=Enums::MO_AMIN)
+#define MO_ANR(a) ((a)>=Enums::MO_ANRAND && (a)<=Enums::MO_ANRMIN)
+#define MO_H(a) ((a)>=Enums::MO_HAND && (a)<=Enums::MO_HMIN)
+
+typedef enum
+{
+ VT_32,
+ VT_64,
+} vgpr_type;
+
+typedef enum
+{
+ SEG_PRIVATE,
+ SEG_SPILL,
+ SEG_GLOBAL,
+ SEG_SHARED,
+ SEG_READONLY,
+ SEG_FLAT
+} seg_type;
+
+class GPUDynInst : public GPUExecContext
+{
+ public:
+ GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *_staticInst,
+ uint64_t instSeqNum);
+
+ void execute();
+ int numSrcRegOperands();
+ int numDstRegOperands();
+ int getNumOperands();
+ bool isVectorRegister(int operandIdx);
+ bool isScalarRegister(int operandIdx);
+ int getRegisterIndex(int operandIdx);
+ int getOperandSize(int operandIdx);
+ bool isDstOperand(int operandIdx);
+ bool isSrcOperand(int operandIdx);
+ bool isArgLoad();
+
+ const std::string &disassemble() const;
+
+ uint64_t seqNum() const;
+
+ Enums::OpType opType();
+ Enums::StorageClassType executedAs();
+
+ // The address of the memory operation
+ Addr addr[VSZ];
+ Addr pAddr;
+
+ // The data to get written
+ uint8_t d_data[VSZ * 16];
+ // Additional data (for atomics)
+ uint8_t a_data[VSZ * 8];
+ // Additional data (for atomics)
+ uint8_t x_data[VSZ * 8];
+ // The execution mask
+ VectorMask exec_mask;
+
+ // The memory type (M_U32, M_S32, ...)
+ Enums::MemType m_type;
+ // The memory operation (MO_LD, MO_ST, ...)
+ Enums::MemOpType m_op;
+ Enums::GenericMemoryOrder memoryOrder;
+
+ // Scope of the request
+ Enums::GenericMemoryScope scope;
+ // The memory segment (SEG_SHARED, SEG_GLOBAL, ...)
+ seg_type s_type;
+ // The equivalency class
+ int equiv;
+ // The return VGPR type (VT_32 or VT_64)
+ vgpr_type v_type;
+ // Number of VGPR's accessed (1, 2, or 4)
+ int n_reg;
+ // The return VGPR index
+ int dst_reg;
+ // There can be max 4 dest regs>
+ int dst_reg_vec[4];
+ // SIMD where the WF of the memory instruction has been mapped to
+ int simdId;
+ // unique id of the WF where the memory instruction belongs to
+ int wfDynId;
+ // The kernel id of the requesting wf
+ int kern_id;
+ // The CU id of the requesting wf
+ int cu_id;
+ // HW slot id where the WF is mapped to inside a SIMD unit
+ int wfSlotId;
+ // execution pipeline id where the memory instruction has been scheduled
+ int pipeId;
+ // The execution time of this operation
+ Tick time;
+ // The latency of this operation
+ WaitClass latency;
+ // A list of bank conflicts for the 4 cycles.
+ uint32_t bc[4];
+
+ // A pointer to ROM
+ uint8_t *rom;
+ // The size of the READONLY segment
+ int sz_rom;
+
+ // Initiate the specified memory operation, by creating a
+ // memory request and sending it off to the memory system.
+ void initiateAcc(GPUDynInstPtr gpuDynInst);
+
+ void updateStats();
+
+ GPUStaticInst* staticInstruction() { return staticInst; }
+
+ // Is the instruction a scalar or vector op?
+ bool scalarOp() const;
+
+ /*
+ * Loads/stores/atomics may have acquire/release semantics associated
+ * withthem. Some protocols want to see the acquire/release as separate
+ * requests from the load/store/atomic. We implement that separation
+ * using continuations (i.e., a function pointer with an object associated
+ * with it). When, for example, the front-end generates a store with
+ * release semantics, we will first issue a normal store and set the
+ * continuation in the GPUDynInst to a function that generate a
+ * release request. That continuation will be called when the normal
+ * store completes (in ComputeUnit::DataPort::recvTimingResponse). The
+ * continuation will be called in the context of the same GPUDynInst
+ * that generated the initial store.
+ */
+ std::function<void(GPUStaticInst*, GPUDynInstPtr)> execContinuation;
+
+ // when true, call execContinuation when response arrives
+ bool useContinuation;
+
+ template<typename c0> AtomicOpFunctor*
+ makeAtomicOpFunctor(c0 *reg0, c0 *reg1, Enums::MemOpType op)
+ {
+ using namespace Enums;
+
+ switch(op) {
+ case MO_AAND:
+ case MO_ANRAND:
+ return new AtomicOpAnd<c0>(*reg0);
+ case MO_AOR:
+ case MO_ANROR:
+ return new AtomicOpOr<c0>(*reg0);
+ case MO_AXOR:
+ case MO_ANRXOR:
+ return new AtomicOpXor<c0>(*reg0);
+ case MO_ACAS:
+ case MO_ANRCAS:
+ return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
+ case MO_AEXCH:
+ case MO_ANREXCH:
+ return new AtomicOpExch<c0>(*reg0);
+ case MO_AADD:
+ case MO_ANRADD:
+ return new AtomicOpAdd<c0>(*reg0);
+ case MO_ASUB:
+ case MO_ANRSUB:
+ return new AtomicOpSub<c0>(*reg0);
+ case MO_AINC:
+ case MO_ANRINC:
+ return new AtomicOpInc<c0>();
+ case MO_ADEC:
+ case MO_ANRDEC:
+ return new AtomicOpDec<c0>();
+ case MO_AMAX:
+ case MO_ANRMAX:
+ return new AtomicOpMax<c0>(*reg0);
+ case MO_AMIN:
+ case MO_ANRMIN:
+ return new AtomicOpMin<c0>(*reg0);
+ default:
+ panic("Unrecognized atomic operation");
+ }
+ }
+
+ void
+ setRequestFlags(Request *req, bool setMemOrder=true)
+ {
+ // currently these are the easy scopes to deduce
+ switch (s_type) {
+ case SEG_PRIVATE:
+ req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
+ break;
+ case SEG_SPILL:
+ req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
+ break;
+ case SEG_GLOBAL:
+ req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
+ break;
+ case SEG_READONLY:
+ req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
+ break;
+ case SEG_SHARED:
+ req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
+ break;
+ case SEG_FLAT:
+ // TODO: translate to correct scope
+ assert(false);
+ default:
+ panic("Bad segment type");
+ break;
+ }
+
+ switch (scope) {
+ case Enums::MEMORY_SCOPE_NONE:
+ case Enums::MEMORY_SCOPE_WORKITEM:
+ break;
+ case Enums::MEMORY_SCOPE_WAVEFRONT:
+ req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
+ Request::WAVEFRONT_SCOPE);
+ break;
+ case Enums::MEMORY_SCOPE_WORKGROUP:
+ req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
+ Request::WORKGROUP_SCOPE);
+ break;
+ case Enums::MEMORY_SCOPE_DEVICE:
+ req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
+ Request::DEVICE_SCOPE);
+ break;
+ case Enums::MEMORY_SCOPE_SYSTEM:
+ req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
+ Request::SYSTEM_SCOPE);
+ break;
+ default:
+ panic("Bad scope type");
+ break;
+ }
+
+ if (setMemOrder) {
+ // set acquire and release flags
+ switch (memoryOrder){
+ case Enums::MEMORY_ORDER_SC_ACQUIRE:
+ req->setFlags(Request::ACQUIRE);
+ break;
+ case Enums::MEMORY_ORDER_SC_RELEASE:
+ req->setFlags(Request::RELEASE);
+ break;
+ case Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE:
+ req->setFlags(Request::ACQUIRE | Request::RELEASE);
+ break;
+ default:
+ break;
+ }
+ }
+
+ // set atomic type
+ // currently, the instruction genenerator only produces atomic return
+ // but a magic instruction can produce atomic no return
+ if (m_op == Enums::MO_AADD || m_op == Enums::MO_ASUB ||
+ m_op == Enums::MO_AAND || m_op == Enums::MO_AOR ||
+ m_op == Enums::MO_AXOR || m_op == Enums::MO_AMAX ||
+ m_op == Enums::MO_AMIN || m_op == Enums::MO_AINC ||
+ m_op == Enums::MO_ADEC || m_op == Enums::MO_AEXCH ||
+ m_op == Enums::MO_ACAS) {
+ req->setFlags(Request::ATOMIC_RETURN_OP);
+ } else if (m_op == Enums::MO_ANRADD || m_op == Enums::MO_ANRSUB ||
+ m_op == Enums::MO_ANRAND || m_op == Enums::MO_ANROR ||
+ m_op == Enums::MO_ANRXOR || m_op == Enums::MO_ANRMAX ||
+ m_op == Enums::MO_ANRMIN || m_op == Enums::MO_ANRINC ||
+ m_op == Enums::MO_ANRDEC || m_op == Enums::MO_ANREXCH ||
+ m_op == Enums::MO_ANRCAS) {
+ req->setFlags(Request::ATOMIC_NO_RETURN_OP);
+ }
+ }
+
+ // Map returned packets and the addresses they satisfy with which lane they
+ // were requested from
+ typedef std::unordered_map<Addr, std::vector<int>> StatusVector;
+ StatusVector memStatusVector;
+
+ // Track the status of memory requests per lane, a bit per lane
+ VectorMask statusBitVector;
+ // for ld_v# or st_v#
+ std::vector<int> statusVector;
+ std::vector<int> tlbHitLevel;
+
+ private:
+ GPUStaticInst *staticInst;
+ uint64_t _seqNum;
+};
+
+#endif // __GPU_DYN_INST_HH__