summaryrefslogtreecommitdiff
path: root/src/gpu-compute
diff options
context:
space:
mode:
authorTony Gutierrez <anthony.gutierrez@amd.com>2016-10-26 22:47:38 -0400
committerTony Gutierrez <anthony.gutierrez@amd.com>2016-10-26 22:47:38 -0400
commitd327cdba078e0956596513b518731e9ec730723f (patch)
tree0ad01f9fa3061d6846c019bd075103f55ad74689 /src/gpu-compute
parent98d8a7051d8caa9b5aebebe5bf16f9d731c34c0e (diff)
downloadgem5-d327cdba078e0956596513b518731e9ec730723f.tar.xz
gpu-compute: add gpu_isa.hh to switch hdrs, add GPUISA to WF
the GPUISA class is meant to encapsulate any ISA-specific behavior - special register accesses, isa-specific WF/kernel state, etc. - in a generic enough way so that it may be used in ISA-agnostic code. gpu-compute: use the GPUISA object to advance the PC the GPU model treats the PC as a pointer to individual instruction objects - which are store in a contiguous array - and not a byte address to be fetched from the real memory system. this is ok for HSAIL because all instructions are considered by the model to be the same size. in machine ISA, however, instructions may be 32b or 64b, and branches are calculated by advancing the PC by the number of words (4 byte chunks) it needs to advance in the real instruction stream. because of this there is a mismatch between the PC we use to index into the instruction array, and the actual byte address PC the ISA expects. here we move the PC advance calculation to the ISA so that differences in the instrucion sizes may be accounted for in generic way.
Diffstat (limited to 'src/gpu-compute')
-rw-r--r--src/gpu-compute/fetch_unit.cc25
-rw-r--r--src/gpu-compute/gpu_exec_context.cc15
-rw-r--r--src/gpu-compute/gpu_exec_context.hh7
-rw-r--r--src/gpu-compute/wavefront.cc4
-rw-r--r--src/gpu-compute/wavefront.hh9
5 files changed, 56 insertions, 4 deletions
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index 9104c400e..2ea7f1f9d 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -114,7 +114,18 @@ void
FetchUnit::initiateFetch(Wavefront *wavefront)
{
// calculate the virtual address to fetch from the SQC
- Addr vaddr = wavefront->pc() + wavefront->instructionBuffer.size();
+ Addr vaddr = wavefront->pc();
+
+ /**
+ * the instruction buffer holds one instruction per entry, regardless
+ * of the underlying instruction's size. the PC, however, addresses
+ * instrutions on a 32b granularity so we must account for that here.
+ */
+ for (int i = 0; i < wavefront->instructionBuffer.size(); ++i) {
+ int current_inst_size =
+ wavefront->instructionBuffer.at(i)->staticInstruction()->instSize();
+ vaddr += current_inst_size / sizeof(uint32_t);
+ }
vaddr = wavefront->basePtr + vaddr * sizeof(GPUStaticInst*);
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
@@ -267,6 +278,18 @@ FetchUnit::processFetchReturn(PacketPtr pkt)
GPUStaticInst *inst_ptr = decoder.decode(inst_index_ptr[i]);
assert(inst_ptr);
+
+ if (inst_ptr->instSize() == 8) {
+ /**
+ * this instruction occupies 2 consecutive
+ * entries in the instruction array, the
+ * second of which contains a nullptr. so if
+ * this inst is 8 bytes we advance two entries
+ * instead of 1
+ */
+ ++i;
+ }
+
DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: added %s\n",
computeUnit->cu_id, wavefront->simdId,
wavefront->wfSlotId, inst_ptr->disassemble());
diff --git a/src/gpu-compute/gpu_exec_context.cc b/src/gpu-compute/gpu_exec_context.cc
index 4af69c41e..ca694187c 100644
--- a/src/gpu-compute/gpu_exec_context.cc
+++ b/src/gpu-compute/gpu_exec_context.cc
@@ -34,9 +34,10 @@
*/
#include "gpu-compute/gpu_exec_context.hh"
+#include "gpu-compute/wavefront.hh"
GPUExecContext::GPUExecContext(ComputeUnit *_cu, Wavefront *_wf)
- : cu(_cu), wf(_wf)
+ : cu(_cu), wf(_wf), gpuISA(_wf->gpuISA())
{
}
@@ -51,3 +52,15 @@ GPUExecContext::wavefront()
{
return wf;
}
+
+TheGpuISA::MiscReg
+GPUExecContext::readMiscReg(int opIdx) const
+{
+ return gpuISA.readMiscReg(opIdx);
+}
+
+void
+GPUExecContext::writeMiscReg(int opIdx, TheGpuISA::MiscReg operandVal)
+{
+ gpuISA.writeMiscReg(opIdx, operandVal);
+}
diff --git a/src/gpu-compute/gpu_exec_context.hh b/src/gpu-compute/gpu_exec_context.hh
index a3deb9b8f..f7c021c0d 100644
--- a/src/gpu-compute/gpu_exec_context.hh
+++ b/src/gpu-compute/gpu_exec_context.hh
@@ -36,6 +36,9 @@
#ifndef __GPU_EXEC_CONTEXT_HH__
#define __GPU_EXEC_CONTEXT_HH__
+#include "arch/gpu_isa.hh"
+#include "config/the_gpu_isa.hh"
+
class ComputeUnit;
class Wavefront;
@@ -46,9 +49,13 @@ class GPUExecContext
Wavefront* wavefront();
ComputeUnit* computeUnit();
+ TheGpuISA::MiscReg readMiscReg(int opIdx) const;
+ void writeMiscReg(int opIdx, TheGpuISA::MiscReg operandVal);
+
protected:
ComputeUnit *cu;
Wavefront *wf;
+ TheGpuISA::GPUISA &gpuISA;
};
#endif // __GPU_EXEC_CONTEXT_HH__
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index 99ac24900..cce76044f 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -49,7 +49,7 @@ WavefrontParams::create()
}
Wavefront::Wavefront(const Params *p)
- : SimObject(p), callArgMem(nullptr)
+ : SimObject(p), callArgMem(nullptr), _gpuISA(*this)
{
lastTrace = 0;
simdId = p->simdId;
@@ -670,7 +670,7 @@ Wavefront::exec()
computeUnit->lastExecCycle[simdId]);
computeUnit->lastExecCycle[simdId] = computeUnit->totalCycles.value();
if (pc() == old_pc) {
- uint32_t new_pc = old_pc + 1;
+ uint32_t new_pc = _gpuISA.advancePC(old_pc, ii);
// PC not modified by instruction, proceed to next or pop frame
pc(new_pc);
if (new_pc == rpc()) {
diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh
index 0df8a6c82..659132941 100644
--- a/src/gpu-compute/wavefront.hh
+++ b/src/gpu-compute/wavefront.hh
@@ -42,8 +42,10 @@
#include <stack>
#include <vector>
+#include "arch/gpu_isa.hh"
#include "base/misc.hh"
#include "base/types.hh"
+#include "config/the_gpu_isa.hh"
#include "gpu-compute/condition_register_state.hh"
#include "gpu-compute/lds_state.hh"
#include "gpu-compute/misc.hh"
@@ -372,7 +374,14 @@ class Wavefront : public SimObject
*/
void setContext(const void *in);
+ TheGpuISA::GPUISA&
+ gpuISA()
+ {
+ return _gpuISA;
+ }
+
private:
+ TheGpuISA::GPUISA _gpuISA;
/**
* Stack containing Control Flow Graph nodes (i.e., kernel instructions)
* to be visited by the wavefront, and the associated execution masks. The