summaryrefslogtreecommitdiff
path: root/src/gpu-compute
diff options
context:
space:
mode:
authorAlexandru Dutu <alexandru.dutu@amd.com>2016-09-16 12:32:36 -0400
committerAlexandru Dutu <alexandru.dutu@amd.com>2016-09-16 12:32:36 -0400
commitbd65ec07441cbb19fc2d3570da32a0205dad52c2 (patch)
tree16c56549f71ae3d9e221bb96dc9a2d4abd818701 /src/gpu-compute
parente9b14d51113def040893d711cc0d5a37cf5235ae (diff)
downloadgem5-bd65ec07441cbb19fc2d3570da32a0205dad52c2.tar.xz
gpu-compute: Adding context serialization methods to Wavefront
This patch adds methods to serialize the context of a particular wavefront to the simulated system memory. Context serialization is used when a wavefront is preempeted (i.e. context switch).
Diffstat (limited to 'src/gpu-compute')
-rw-r--r--src/gpu-compute/wavefront.cc125
-rw-r--r--src/gpu-compute/wavefront.hh12
2 files changed, 137 insertions, 0 deletions
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index c2356b47f..42739a7b0 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -941,3 +941,128 @@ Wavefront::getStaticContextSize() const
sizeof(privBase) + sizeof(spillBase) + sizeof(ldsChunk) +
computeUnit->wfSize() * sizeof(ReconvergenceStackEntry);
}
+
+void
+Wavefront::getContext(const void *out)
+{
+ uint8_t *iter = (uint8_t *)out;
+ for (int i = 0; i < barCnt.size(); i++) {
+ *(int *)iter = barCnt[i]; iter += sizeof(barCnt[i]);
+ }
+ *(int *)iter = wfId; iter += sizeof(wfId);
+ *(int *)iter = maxBarCnt; iter += sizeof(maxBarCnt);
+ *(int *)iter = oldBarrierCnt; iter += sizeof(oldBarrierCnt);
+ *(int *)iter = barrierCnt; iter += sizeof(barrierCnt);
+ *(int *)iter = computeUnit->cu_id; iter += sizeof(computeUnit->cu_id);
+ *(uint32_t *)iter = wgId; iter += sizeof(wgId);
+ *(uint32_t *)iter = barrierId; iter += sizeof(barrierId);
+ *(uint64_t *)iter = initMask.to_ullong(); iter += sizeof(initMask.to_ullong());
+ *(Addr *)iter = privBase; iter += sizeof(privBase);
+ *(Addr *)iter = spillBase; iter += sizeof(spillBase);
+
+ int stackSize = reconvergenceStack.size();
+ ReconvergenceStackEntry empty = {std::numeric_limits<uint32_t>::max(),
+ std::numeric_limits<uint32_t>::max(),
+ std::numeric_limits<uint64_t>::max()};
+ for (int i = 0; i < workItemId[0].size(); i++) {
+ if (i < stackSize) {
+ *(ReconvergenceStackEntry *)iter = *reconvergenceStack.back();
+ iter += sizeof(ReconvergenceStackEntry);
+ reconvergenceStack.pop_back();
+ } else {
+ *(ReconvergenceStackEntry *)iter = empty;
+ iter += sizeof(ReconvergenceStackEntry);
+ }
+ }
+
+ int wf_size = computeUnit->wfSize();
+ for (int i = 0; i < maxSpVgprs; i++) {
+ uint32_t vgprIdx = remap(i, sizeof(uint32_t), 1);
+ for (int lane = 0; lane < wf_size; lane++) {
+ uint32_t regVal = computeUnit->vrf[simdId]->
+ read<uint32_t>(vgprIdx,lane);
+ *(uint32_t *)iter = regVal; iter += sizeof(regVal);
+ }
+ }
+
+ for (int i = 0; i < maxDpVgprs; i++) {
+ uint32_t vgprIdx = remap(i, sizeof(uint64_t), 1);
+ for (int lane = 0; lane < wf_size; lane++) {
+ uint64_t regVal = computeUnit->vrf[simdId]->
+ read<uint64_t>(vgprIdx,lane);
+ *(uint64_t *)iter = regVal; iter += sizeof(regVal);
+ }
+ }
+
+ for (int i = 0; i < condRegState->numRegs(); i++) {
+ for (int lane = 0; lane < wf_size; lane++) {
+ uint64_t regVal = condRegState->read<uint64_t>(i, lane);
+ *(uint64_t *)iter = regVal; iter += sizeof(regVal);
+ }
+ }
+
+ /* saving LDS content */
+ if (ldsChunk)
+ for (int i = 0; i < ldsChunk->size(); i++) {
+ char val = ldsChunk->read<char>(i);
+ *(char *) iter = val; iter += sizeof(val);
+ }
+}
+
+void
+Wavefront::setContext(const void *in)
+{
+ uint8_t *iter = (uint8_t *)in;
+ for (int i = 0; i < barCnt.size(); i++) {
+ barCnt[i] = *(int *)iter; iter += sizeof(barCnt[i]);
+ }
+ wfId = *(int *)iter; iter += sizeof(wfId);
+ maxBarCnt = *(int *)iter; iter += sizeof(maxBarCnt);
+ oldBarrierCnt = *(int *)iter; iter += sizeof(oldBarrierCnt);
+ barrierCnt = *(int *)iter; iter += sizeof(barrierCnt);
+ computeUnit->cu_id = *(int *)iter; iter += sizeof(computeUnit->cu_id);
+ wgId = *(uint32_t *)iter; iter += sizeof(wgId);
+ barrierId = *(uint32_t *)iter; iter += sizeof(barrierId);
+ initMask = VectorMask(*(uint64_t *)iter); iter += sizeof(initMask);
+ privBase = *(Addr *)iter; iter += sizeof(privBase);
+ spillBase = *(Addr *)iter; iter += sizeof(spillBase);
+
+ for (int i = 0; i < workItemId[0].size(); i++) {
+ ReconvergenceStackEntry newEntry = *(ReconvergenceStackEntry *)iter;
+ iter += sizeof(ReconvergenceStackEntry);
+ if (newEntry.pc != std::numeric_limits<uint32_t>::max()) {
+ pushToReconvergenceStack(newEntry.pc, newEntry.rpc,
+ newEntry.execMask);
+ }
+ }
+ int wf_size = computeUnit->wfSize();
+
+ for (int i = 0; i < maxSpVgprs; i++) {
+ uint32_t vgprIdx = remap(i, sizeof(uint32_t), 1);
+ for (int lane = 0; lane < wf_size; lane++) {
+ uint32_t regVal = *(uint32_t *)iter; iter += sizeof(regVal);
+ computeUnit->vrf[simdId]->write<uint32_t>(vgprIdx, regVal, lane);
+ }
+ }
+
+ for (int i = 0; i < maxDpVgprs; i++) {
+ uint32_t vgprIdx = remap(i, sizeof(uint64_t), 1);
+ for (int lane = 0; lane < wf_size; lane++) {
+ uint64_t regVal = *(uint64_t *)iter; iter += sizeof(regVal);
+ computeUnit->vrf[simdId]->write<uint64_t>(vgprIdx, regVal, lane);
+ }
+ }
+
+ for (int i = 0; i < condRegState->numRegs(); i++) {
+ for (int lane = 0; lane < wf_size; lane++) {
+ uint64_t regVal = *(uint64_t *)iter; iter += sizeof(regVal);
+ condRegState->write<uint64_t>(i, lane, regVal);
+ }
+ }
+ /** Restoring LDS contents */
+ if (ldsChunk)
+ for (int i = 0; i < ldsChunk->size(); i++) {
+ char val = *(char *) iter; iter += sizeof(val);
+ ldsChunk->write<char>(i, val);
+ }
+}
diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh
index 8caeda91e..ef8c80989 100644
--- a/src/gpu-compute/wavefront.hh
+++ b/src/gpu-compute/wavefront.hh
@@ -354,6 +354,18 @@ class Wavefront : public SimObject
*/
uint32_t getStaticContextSize() const;
+ /**
+ * Returns the hardware context as a stream of bytes
+ * This method is designed for HSAIL execution
+ */
+ void getContext(const void *out);
+
+ /**
+ * Sets the hardware context fromt a stream of bytes
+ * This method is designed for HSAIL execution
+ */
+ void setContext(const void *in);
+
private:
/**
* Stack containing Control Flow Graph nodes (i.e., kernel instructions)