gpu-compute: parametrize Wavefront size

Eliminate the VSZ constant that defined the Wavefront size (in numbers of work items); replaced it with a parameter in the GPU.py configuration script. Changed all data structures dependent on the Wavefront size to be dynamically sized. Legal values of Wavefront size are 16, 32, 64 for now and checked at initialization time.
author: jkalamat <john.kalamatianos@amd.com> 2016-06-09 11:24:55 -0400
committer: jkalamat <john.kalamatianos@amd.com> 2016-06-09 11:24:55 -0400
commit: 3724fb15faafaaca54cc7a500df9c1490a387049 (patch)
tree: bbd671b68ba971087a1cd45b208947c09a622d38 /src/gpu-compute/wavefront.hh
parent: e5b7b6780f9748b6f13ef91e3e22d53ebdf47968 (diff)
download: gem5-3724fb15faafaaca54cc7a500df9c1490a387049.tar.xz
1 files changed, 13 insertions, 13 deletions
diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh
index 0abab8e83..5a5386a3d 100644
--- a/src/gpu-compute/wavefront.hh
+++ b/src/gpu-compute/wavefront.hh
@@ -83,6 +83,7 @@ class CallArgMem
   public:
     // pointer to buffer for storing function arguments
     uint8_t *mem;
+    int wfSize;
     // size of function args
     int funcArgsSizePerItem;
 
@@ -90,13 +91,13 @@ class CallArgMem
     int
     getLaneOffset(int lane, int addr)
     {
-        return addr * VSZ + sizeof(CType) * lane;
+        return addr * wfSize + sizeof(CType) * lane;
     }
 
-    CallArgMem(int func_args_size_per_item)
-      : funcArgsSizePerItem(func_args_size_per_item)
+    CallArgMem(int func_args_size_per_item, int wf_size)
+        : wfSize(wf_size), funcArgsSizePerItem(func_args_size_per_item)
     {
-        mem = (uint8_t*)malloc(funcArgsSizePerItem * VSZ);
+        mem = (uint8_t*)malloc(funcArgsSizePerItem * wfSize);
     }
 
     ~CallArgMem()
@@ -192,9 +193,9 @@ class Wavefront : public SimObject
     bool isOldestInstALU();
     bool isOldestInstBarrier();
     // used for passing spill address to DDInstGPU
-    uint64_t last_addr[VSZ];
-    uint32_t workitemid[3][VSZ];
-    uint32_t workitemFlatId[VSZ];
+    std::vector<Addr> last_addr;
+    std::vector<uint32_t> workitemid[3];
+    std::vector<uint32_t> workitemFlatId;
     uint32_t workgroupid[3];
     uint32_t workgroupsz[3];
     uint32_t gridsz[3];
@@ -230,14 +231,14 @@ class Wavefront : public SimObject
     uint32_t startVgprIndex;
 
     // Old value of destination gpr (for trace)
-    uint32_t old_vgpr[VSZ];
+    std::vector<uint32_t> old_vgpr;
     // Id of destination gpr (for trace)
     uint32_t old_vgpr_id;
     // Tick count of last old_vgpr copy
     uint64_t old_vgpr_tcnt;
 
     // Old value of destination gpr (for trace)
-    uint64_t old_dgpr[VSZ];
+    std::vector<uint64_t> old_dgpr;
     // Id of destination gpr (for trace)
     uint32_t old_dgpr_id;
     // Tick count of last old_vgpr copy
@@ -247,7 +248,7 @@ class Wavefront : public SimObject
     VectorMask init_mask;
 
     // number of barriers this WF has joined
-    int bar_cnt[VSZ];
+    std::vector<int> bar_cnt;
     int max_bar_cnt;
     // Flag to stall a wave on barrier
     bool stalledAtBarrier;
@@ -296,9 +297,9 @@ class Wavefront : public SimObject
     // argument memory for hsail call instruction
     CallArgMem *callArgMem;
     void
-    initCallArgMem(int func_args_size_per_item)
+    initCallArgMem(int func_args_size_per_item, int wf_size)
     {
-        callArgMem = new CallArgMem(func_args_size_per_item);
+        callArgMem = new CallArgMem(func_args_size_per_item, wf_size);
     }
 
     template<typename CType>
@@ -327,7 +328,6 @@ class Wavefront : public SimObject
     }
 
     void start(uint64_t _wfDynId, uint64_t _base_ptr);
-
     void exec();
     void updateResources();
     int ready(itype_e type);
author	jkalamat <john.kalamatianos@amd.com>	2016-06-09 11:24:55 -0400
committer	jkalamat <john.kalamatianos@amd.com>	2016-06-09 11:24:55 -0400
commit	3724fb15faafaaca54cc7a500df9c1490a387049 (patch)
tree	bbd671b68ba971087a1cd45b208947c09a622d38 /src/gpu-compute/wavefront.hh
parent	e5b7b6780f9748b6f13ef91e3e22d53ebdf47968 (diff)
download	gem5-3724fb15faafaaca54cc7a500df9c1490a387049.tar.xz