gpu-compute: parametrize Wavefront size

Eliminate the VSZ constant that defined the Wavefront size (in numbers of work items); replaced it with a parameter in the GPU.py configuration script. Changed all data structures dependent on the Wavefront size to be dynamically sized. Legal values of Wavefront size are 16, 32, 64 for now and checked at initialization time.
author: jkalamat <john.kalamatianos@amd.com> 2016-06-09 11:24:55 -0400
committer: jkalamat <john.kalamatianos@amd.com> 2016-06-09 11:24:55 -0400
commit: 3724fb15faafaaca54cc7a500df9c1490a387049 (patch)
tree: bbd671b68ba971087a1cd45b208947c09a622d38 /src/arch/hsail/insts
parent: e5b7b6780f9748b6f13ef91e3e22d53ebdf47968 (diff)
download: gem5-3724fb15faafaaca54cc7a500df9c1490a387049.tar.xz
5 files changed, 65 insertions, 58 deletions
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh
index f4b00fc8d..45cd876ad 100644
--- a/src/arch/hsail/insts/branch.hh
+++ b/src/arch/hsail/insts/branch.hh
@@ -279,7 +279,7 @@ namespace HsailISA
         // taken branch
         const uint32_t true_pc = getTargetPc();
         VectorMask true_mask;
-        for (unsigned int lane = 0; lane < VSZ; ++lane) {
+        for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
         }
 
diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc
index 4e70bf46a..004054524 100644
--- a/src/arch/hsail/insts/main.cc
+++ b/src/arch/hsail/insts/main.cc
@@ -134,7 +134,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
 
         // mask off completed work-items
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 w->init_mask[lane] = 0;
             }
diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh
index f2792cd49..1db98d212 100644
--- a/src/arch/hsail/insts/mem.hh
+++ b/src/arch/hsail/insts/mem.hh
@@ -457,7 +457,7 @@ namespace HsailISA
             gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
 
             if (num_dest_operands > 1) {
-                for (int i = 0; i < VSZ; ++i)
+                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
                     if (gpuDynInst->exec_mask[i])
                         gpuDynInst->statusVector.push_back(num_dest_operands);
                     else
@@ -466,9 +466,10 @@ namespace HsailISA
 
             for (int k = 0; k < num_dest_operands; ++k) {
 
-                c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+                c0 *d = &((c0*)gpuDynInst->d_data)
+                    [k * gpuDynInst->computeUnit()->wfSize()];
 
-                for (int i = 0; i < VSZ; ++i) {
+                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
                     if (gpuDynInst->exec_mask[i]) {
                         Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
 
@@ -1004,7 +1005,7 @@ namespace HsailISA
             gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
 
             if (num_src_operands > 1) {
-                for (int i = 0; i < VSZ; ++i)
+                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
                     if (gpuDynInst->exec_mask[i])
                         gpuDynInst->statusVector.push_back(num_src_operands);
                     else
@@ -1012,9 +1013,10 @@ namespace HsailISA
             }
 
             for (int k = 0; k < num_src_operands; ++k) {
-                c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+                c0 *d = &((c0*)gpuDynInst->d_data)
+                    [k * gpuDynInst->computeUnit()->wfSize()];
 
-                for (int i = 0; i < VSZ; ++i) {
+                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
                     if (gpuDynInst->exec_mask[i]) {
                         Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
 
@@ -1402,7 +1404,7 @@ namespace HsailISA
             c0 *e = &((c0*) gpuDynInst->a_data)[0];
             c0 *f = &((c0*) gpuDynInst->x_data)[0];
 
-            for (int i = 0; i < VSZ; ++i) {
+            for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
                 if (gpuDynInst->exec_mask[i]) {
                     Addr vaddr = gpuDynInst->addr[i];
 
diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh
index 94f0cd6aa..8329c6e8a 100644
--- a/src/arch/hsail/insts/mem_impl.hh
+++ b/src/arch/hsail/insts/mem_impl.hh
@@ -60,14 +60,16 @@ namespace HsailISA
 
         typedef typename DestDataType::CType CType M5_VAR_USED;
         const VectorMask &mask = w->get_pred();
-        uint64_t addr_vec[VSZ];
+        std::vector<Addr> addr_vec;
+        addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
         this->addr.calcVector(w, addr_vec);
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 this->dest.set(w, lane, addr_vec[lane]);
             }
         }
+        addr_vec.clear();
     }
 
     template<typename MemDataType, typename DestDataType,
@@ -121,8 +123,8 @@ namespace HsailISA
             i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
         assert(se);
 
-        return w->wfSlotId * w->privSizePerItem * VSZ +
-            se->offset * VSZ +
+        return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
+            se->offset * w->computeUnit->wfSize() +
             lane * se->size;
         */
 
@@ -139,9 +141,11 @@ namespace HsailISA
         Addr addr_div8 = addr / 8;
         Addr addr_mod8 = addr % 8;
 
-        Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase;
+        Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
+            addr_mod8 + w->privBase;
 
-        assert(ret < w->privBase + (w->privSizePerItem * VSZ));
+        assert(ret < w->privBase +
+               (w->privSizePerItem * w->computeUnit->wfSize()));
 
         return ret;
     }
@@ -175,7 +179,7 @@ namespace HsailISA
 
             DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
 
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     this->dest.set(w, lane, val);
                 }
@@ -184,7 +188,7 @@ namespace HsailISA
             return;
         } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
             uint64_t address = this->addr.calcUniform();
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     MemCType val = w->readCallArgMem<MemCType>(lane, address);
 
@@ -239,7 +243,7 @@ namespace HsailISA
             // this is a complete hack to get around a compiler bug
             // (the compiler currently generates global access for private
             //  addresses (starting from 0). We need to add the private offset)
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (m->addr[lane] < w->privSizePerItem) {
                     if (mask[lane]) {
                         // what is the size of the object we are accessing?
@@ -267,7 +271,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
             {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     //  note: this calculation will NOT WORK if the compiler
                     //  ever generates loads/stores to the same address with
                     //  different widths (e.g., a ld_u32 addr and a ld_u16 addr)
@@ -301,7 +305,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
 
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
                     m->addr[lane] += w->roBase;
@@ -318,7 +322,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
             {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     if (mask[lane]) {
                         assert(m->addr[lane] < w->privSizePerItem);
 
@@ -360,7 +364,7 @@ namespace HsailISA
         if (this->segment == Brig::BRIG_SEGMENT_ARG) {
             uint64_t address = this->addr.calcUniform();
 
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     CType data = this->src.template get<CType>(w, lane);
                     DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
@@ -378,7 +382,7 @@ namespace HsailISA
         this->addr.calcVector(w, m->addr);
 
         if (num_src_operands == 1) {
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     ((CType*)m->d_data)[lane] =
                         this->src.template get<CType>(w, lane);
@@ -386,9 +390,9 @@ namespace HsailISA
             }
         } else {
             for (int k= 0; k < num_src_operands; ++k) {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     if (mask[lane]) {
-                        ((CType*)m->d_data)[k * VSZ + lane] =
+                        ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
                             this->src_vect[k].template get<CType>(w, lane);
                     }
                 }
@@ -428,7 +432,7 @@ namespace HsailISA
             // this is a complete hack to get around a compiler bug
             // (the compiler currently generates global access for private
             //  addresses (starting from 0). We need to add the private offset)
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     if (m->addr[lane] < w->privSizePerItem) {
 
@@ -454,7 +458,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
             {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     if (mask[lane]) {
                         assert(m->addr[lane] < w->spillSizePerItem);
 
@@ -483,7 +487,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
             {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     if (mask[lane]) {
                         assert(m->addr[lane] < w->privSizePerItem);
                         m->addr[lane] = m->addr[lane] + lane *
@@ -558,14 +562,14 @@ namespace HsailISA
 
         this->addr.calcVector(w, m->addr);
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             ((CType *)m->a_data)[lane] =
                 this->src[0].template get<CType>(w, lane);
         }
 
         // load second source operand for CAS
         if (NumSrcOperands > 1) {
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 ((CType*)m->x_data)[lane] =
                     this->src[1].template get<CType>(w, lane);
             }
diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc
index 9506a80ab..56ca8047c 100644
--- a/src/arch/hsail/insts/pseudo_inst.cc
+++ b/src/arch/hsail/insts/pseudo_inst.cc
@@ -84,7 +84,7 @@ namespace HsailISA
         int op = 0;
         bool got_op = false;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val0 = src1.get<int>(w, lane, 0);
                 if (got_op) {
@@ -182,7 +182,7 @@ namespace HsailISA
     {
     #if TRACING_ON
         const VectorMask &mask = w->get_pred();
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
                 int src_val2 = src1.get<int>(w, lane, 2);
@@ -205,7 +205,7 @@ namespace HsailISA
     {
     #if TRACING_ON
         const VectorMask &mask = w->get_pred();
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
                 int src_val2 = src1.get<int>(w, lane, 2);
@@ -231,7 +231,7 @@ namespace HsailISA
         std::string res_str;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 7)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -270,7 +270,7 @@ namespace HsailISA
         int src_val3 = -1;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 7)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -311,7 +311,7 @@ namespace HsailISA
         std::string res_str;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 3)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -350,7 +350,7 @@ namespace HsailISA
         int src_val3 = -1;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 3)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -391,7 +391,7 @@ namespace HsailISA
         std::string res_str;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 7)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -430,7 +430,7 @@ namespace HsailISA
         res_str += csprintf("  Executing on CU #%i\n", w->computeUnit->cu_id);
         res_str += csprintf("  Exec mask: ");
 
-        for (int i = VSZ - 1; i >= 0; --i) {
+        for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
             if (w->execMask(i))
                 res_str += "1";
             else
@@ -458,7 +458,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int res = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
                 dest.set<int>(w, lane, res);
@@ -477,14 +477,14 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int res = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
                 res += src_val1;
             }
         }
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 dest.set<int>(w, lane, res);
             }
@@ -497,19 +497,19 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int res = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
 
                 if (src_val1) {
-                    if (lane < (VSZ/2)) {
+                    if (lane < (w->computeUnit->wfSize()/2)) {
                         res = res | ((uint32_t)(1) << lane);
                     }
                 }
             }
         }
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 dest.set<int>(w, lane, res);
             }
@@ -521,19 +521,20 @@ namespace HsailISA
     {
         const VectorMask &mask = w->get_pred();
         int res = 0;
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
 
                 if (src_val1) {
-                    if (lane >= (VSZ/2)) {
-                        res = res | ((uint32_t)(1) << (lane - (VSZ/2)));
+                    if (lane >= (w->computeUnit->wfSize()/2)) {
+                        res = res | ((uint32_t)(1) <<
+                                     (lane - (w->computeUnit->wfSize()/2)));
                     }
                 }
             }
         }
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 dest.set<int>(w, lane, res);
             }
@@ -546,7 +547,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int max_cnt = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 w->bar_cnt[lane]++;
 
@@ -567,7 +568,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int max_cnt = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 w->bar_cnt[lane]--;
             }
@@ -592,7 +593,7 @@ namespace HsailISA
     {
         const VectorMask &mask = w->get_pred();
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
                 panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
@@ -605,7 +606,7 @@ namespace HsailISA
     Call::calcAddr(Wavefront *w, GPUDynInstPtr m)
     {
         // the address is in src1 | src2
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             int src_val1 = src1.get<int>(w, lane, 1);
             int src_val2 = src1.get<int>(w, lane, 2);
             Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
@@ -622,7 +623,7 @@ namespace HsailISA
 
         calcAddr(w, m);
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
         }
 
@@ -661,7 +662,7 @@ namespace HsailISA
         GPUDynInstPtr m = gpuDynInst;
         calcAddr(w, m);
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
         }
 
@@ -736,7 +737,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int src_val1 = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 src_val1 = src1.get<int>(w, lane, 1);
                 break;
@@ -758,7 +759,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         unsigned mst = true;
 
-        for (int lane = VSZ - 1; lane >= 0; --lane) {
+        for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
             if (mask[lane]) {
                 dest.set<int>(w, lane, mst);
                 mst = false;
@@ -773,7 +774,7 @@ namespace HsailISA
         int res = 0;
         bool got_res = false;
 
-        for (int lane = VSZ - 1; lane >= 0; --lane) {
+        for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
             if (mask[lane]) {
                 if (!got_res) {
                     res = src1.get<int>(w, lane, 1);
author	jkalamat <john.kalamatianos@amd.com>	2016-06-09 11:24:55 -0400
committer	jkalamat <john.kalamatianos@amd.com>	2016-06-09 11:24:55 -0400
commit	3724fb15faafaaca54cc7a500df9c1490a387049 (patch)
tree	bbd671b68ba971087a1cd45b208947c09a622d38 /src/arch/hsail/insts
parent	e5b7b6780f9748b6f13ef91e3e22d53ebdf47968 (diff)
download	gem5-3724fb15faafaaca54cc7a500df9c1490a387049.tar.xz