summaryrefslogtreecommitdiff
path: root/src/gpu-compute/wavefront.hh
diff options
context:
space:
mode:
authorAlexandru Dutu <alexandru.dutu@amd.com>2016-10-04 13:03:52 -0400
committerAlexandru Dutu <alexandru.dutu@amd.com>2016-10-04 13:03:52 -0400
commitc8cf71f1a046b4758e20d6398a654777c3fb5193 (patch)
treecd1d32884dbedee58802d73b87a3185422ead4f4 /src/gpu-compute/wavefront.hh
parentb4b50f823029920542c3bb22f6aae38867b9f674 (diff)
downloadgem5-c8cf71f1a046b4758e20d6398a654777c3fb5193.tar.xz
gpu-compute: Added method to compute the actual workgroup size
This patch adds a method to the Wavefront class to compute the actual workgroup size. This can be different from the maximum workgroup size specified when launching the kernel through the NDRange object. Current solution is still not optimal, as we are computing these for each wavefront and the dispatcher also needs to have this information and can't actually call Wavefront::computeActuallWgSz before the wavefronts are being created. A long term solution would be to have a Workgroup class that deals with all these details.
Diffstat (limited to 'src/gpu-compute/wavefront.hh')
-rw-r--r--src/gpu-compute/wavefront.hh6
1 files changed, 6 insertions, 0 deletions
diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh
index ef8c80989..0df8a6c82 100644
--- a/src/gpu-compute/wavefront.hh
+++ b/src/gpu-compute/wavefront.hh
@@ -47,6 +47,7 @@
#include "gpu-compute/condition_register_state.hh"
#include "gpu-compute/lds_state.hh"
#include "gpu-compute/misc.hh"
+#include "gpu-compute/ndrange.hh"
#include "params/Wavefront.hh"
#include "sim/sim_object.hh"
@@ -189,11 +190,16 @@ class Wavefront : public SimObject
std::vector<Addr> lastAddr;
std::vector<uint32_t> workItemId[3];
std::vector<uint32_t> workItemFlatId;
+ /* kernel launch parameters */
uint32_t workGroupId[3];
uint32_t workGroupSz[3];
uint32_t gridSz[3];
uint32_t wgId;
uint32_t wgSz;
+ /* the actual WG size can differ than the maximum size */
+ uint32_t actualWgSz[3];
+ uint32_t actualWgSzTotal;
+ void computeActualWgSz(NDRange *ndr);
// wavefront id within a workgroup
uint32_t wfId;
uint32_t maxDynWaveId;