7 files changed, 103 insertions, 78 deletions
diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py
index bb369fd10..f77680541 100755
--- a/src/arch/hsail/gen.py
+++ b/src/arch/hsail/gen.py
@@ -235,7 +235,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst)
 
     const VectorMask &mask = w->get_pred();
 
-    for (int lane = 0; lane < VSZ; ++lane) {
+    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
         if (mask[lane]) {
             DestCType dest_val = $expr;
             this->dest.set(w, lane, dest_val);
@@ -256,7 +256,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst)
 
     const VectorMask &mask = w->get_pred();
 
-    for (int lane = 0; lane < VSZ; ++lane) {
+    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
         if (mask[lane]) {
             SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
             DestCType dest_val = $expr;
@@ -277,7 +277,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
 
     const VectorMask &mask = w->get_pred();
 
-    for (int lane = 0; lane < VSZ; ++lane) {
+    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
         if (mask[lane]) {
             CType dest_val;
             if ($dest_is_src_flag) {
@@ -312,7 +312,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
 
     const VectorMask &mask = w->get_pred();
 
-    for (int lane = 0; lane < VSZ; ++lane) {
+    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
         if (mask[lane]) {
             CType dest_val;
 
@@ -346,7 +346,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
 
     const VectorMask &mask = w->get_pred();
 
-    for (int lane = 0; lane < VSZ; ++lane) {
+    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
         if (mask[lane]) {
             DestT dest_val;
             if ($dest_is_src_flag) {
@@ -372,7 +372,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
     Wavefront *w = gpuDynInst->wavefront();
 
     const VectorMask &mask = w->get_pred();
-    for (int lane = 0; lane < VSZ; ++lane) {
+    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
         if (mask[lane]) {
             CType dest_val;
 
@@ -401,7 +401,7 @@ $class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
 
     const VectorMask &mask = w->get_pred();
 
-    for (int lane = 0; lane < VSZ; ++lane) {
+    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
         if (mask[lane]) {
             DestCType dest_val;
             SrcCType src_val[$num_srcs];
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh
index f4b00fc8d..45cd876ad 100644
--- a/src/arch/hsail/insts/branch.hh
+++ b/src/arch/hsail/insts/branch.hh
@@ -279,7 +279,7 @@ namespace HsailISA
         // taken branch
         const uint32_t true_pc = getTargetPc();
         VectorMask true_mask;
-        for (unsigned int lane = 0; lane < VSZ; ++lane) {
+        for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
         }
 
diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc
index 4e70bf46a..004054524 100644
--- a/src/arch/hsail/insts/main.cc
+++ b/src/arch/hsail/insts/main.cc
@@ -134,7 +134,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
 
         // mask off completed work-items
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 w->init_mask[lane] = 0;
             }
diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh
index f2792cd49..1db98d212 100644
--- a/src/arch/hsail/insts/mem.hh
+++ b/src/arch/hsail/insts/mem.hh
@@ -457,7 +457,7 @@ namespace HsailISA
             gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
 
             if (num_dest_operands > 1) {
-                for (int i = 0; i < VSZ; ++i)
+                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
                     if (gpuDynInst->exec_mask[i])
                         gpuDynInst->statusVector.push_back(num_dest_operands);
                     else
@@ -466,9 +466,10 @@ namespace HsailISA
 
             for (int k = 0; k < num_dest_operands; ++k) {
 
-                c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+                c0 *d = &((c0*)gpuDynInst->d_data)
+                    [k * gpuDynInst->computeUnit()->wfSize()];
 
-                for (int i = 0; i < VSZ; ++i) {
+                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
                     if (gpuDynInst->exec_mask[i]) {
                         Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
 
@@ -1004,7 +1005,7 @@ namespace HsailISA
             gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
 
             if (num_src_operands > 1) {
-                for (int i = 0; i < VSZ; ++i)
+                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
                     if (gpuDynInst->exec_mask[i])
                         gpuDynInst->statusVector.push_back(num_src_operands);
                     else
@@ -1012,9 +1013,10 @@ namespace HsailISA
             }
 
             for (int k = 0; k < num_src_operands; ++k) {
-                c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+                c0 *d = &((c0*)gpuDynInst->d_data)
+                    [k * gpuDynInst->computeUnit()->wfSize()];
 
-                for (int i = 0; i < VSZ; ++i) {
+                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
                     if (gpuDynInst->exec_mask[i]) {
                         Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
 
@@ -1402,7 +1404,7 @@ namespace HsailISA
             c0 *e = &((c0*) gpuDynInst->a_data)[0];
             c0 *f = &((c0*) gpuDynInst->x_data)[0];
 
-            for (int i = 0; i < VSZ; ++i) {
+            for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
                 if (gpuDynInst->exec_mask[i]) {
                     Addr vaddr = gpuDynInst->addr[i];
 
diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh
index 94f0cd6aa..8329c6e8a 100644
--- a/src/arch/hsail/insts/mem_impl.hh
+++ b/src/arch/hsail/insts/mem_impl.hh
@@ -60,14 +60,16 @@ namespace HsailISA
 
         typedef typename DestDataType::CType CType M5_VAR_USED;
         const VectorMask &mask = w->get_pred();
-        uint64_t addr_vec[VSZ];
+        std::vector<Addr> addr_vec;
+        addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
         this->addr.calcVector(w, addr_vec);
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 this->dest.set(w, lane, addr_vec[lane]);
             }
         }
+        addr_vec.clear();
     }
 
     template<typename MemDataType, typename DestDataType,
@@ -121,8 +123,8 @@ namespace HsailISA
             i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
         assert(se);
 
-        return w->wfSlotId * w->privSizePerItem * VSZ +
-            se->offset * VSZ +
+        return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
+            se->offset * w->computeUnit->wfSize() +
             lane * se->size;
         */
 
@@ -139,9 +141,11 @@ namespace HsailISA
         Addr addr_div8 = addr / 8;
         Addr addr_mod8 = addr % 8;
 
-        Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase;
+        Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
+            addr_mod8 + w->privBase;
 
-        assert(ret < w->privBase + (w->privSizePerItem * VSZ));
+        assert(ret < w->privBase +
+               (w->privSizePerItem * w->computeUnit->wfSize()));
 
         return ret;
     }
@@ -175,7 +179,7 @@ namespace HsailISA
 
             DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
 
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     this->dest.set(w, lane, val);
                 }
@@ -184,7 +188,7 @@ namespace HsailISA
             return;
         } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
             uint64_t address = this->addr.calcUniform();
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     MemCType val = w->readCallArgMem<MemCType>(lane, address);
 
@@ -239,7 +243,7 @@ namespace HsailISA
             // this is a complete hack to get around a compiler bug
             // (the compiler currently generates global access for private
             //  addresses (starting from 0). We need to add the private offset)
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (m->addr[lane] < w->privSizePerItem) {
                     if (mask[lane]) {
                         // what is the size of the object we are accessing?
@@ -267,7 +271,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
             {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     //  note: this calculation will NOT WORK if the compiler
                     //  ever generates loads/stores to the same address with
                     //  different widths (e.g., a ld_u32 addr and a ld_u16 addr)
@@ -301,7 +305,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
 
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
                     m->addr[lane] += w->roBase;
@@ -318,7 +322,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
             {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     if (mask[lane]) {
                         assert(m->addr[lane] < w->privSizePerItem);
 
@@ -360,7 +364,7 @@ namespace HsailISA
         if (this->segment == Brig::BRIG_SEGMENT_ARG) {
             uint64_t address = this->addr.calcUniform();
 
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     CType data = this->src.template get<CType>(w, lane);
                     DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
@@ -378,7 +382,7 @@ namespace HsailISA
         this->addr.calcVector(w, m->addr);
 
         if (num_src_operands == 1) {
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     ((CType*)m->d_data)[lane] =
                         this->src.template get<CType>(w, lane);
@@ -386,9 +390,9 @@ namespace HsailISA
             }
         } else {
             for (int k= 0; k < num_src_operands; ++k) {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     if (mask[lane]) {
-                        ((CType*)m->d_data)[k * VSZ + lane] =
+                        ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
                             this->src_vect[k].template get<CType>(w, lane);
                     }
                 }
@@ -428,7 +432,7 @@ namespace HsailISA
             // this is a complete hack to get around a compiler bug
             // (the compiler currently generates global access for private
             //  addresses (starting from 0). We need to add the private offset)
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 if (mask[lane]) {
                     if (m->addr[lane] < w->privSizePerItem) {
 
@@ -454,7 +458,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
             {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     if (mask[lane]) {
                         assert(m->addr[lane] < w->spillSizePerItem);
 
@@ -483,7 +487,7 @@ namespace HsailISA
             m->pipeId = GLBMEM_PIPE;
             m->latency.set(w->computeUnit->shader->ticks(1));
             {
-                for (int lane = 0; lane < VSZ; ++lane) {
+                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                     if (mask[lane]) {
                         assert(m->addr[lane] < w->privSizePerItem);
                         m->addr[lane] = m->addr[lane] + lane *
@@ -558,14 +562,14 @@ namespace HsailISA
 
         this->addr.calcVector(w, m->addr);
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             ((CType *)m->a_data)[lane] =
                 this->src[0].template get<CType>(w, lane);
         }
 
         // load second source operand for CAS
         if (NumSrcOperands > 1) {
-            for (int lane = 0; lane < VSZ; ++lane) {
+            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
                 ((CType*)m->x_data)[lane] =
                     this->src[1].template get<CType>(w, lane);
             }
diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc
index 9506a80ab..56ca8047c 100644
--- a/src/arch/hsail/insts/pseudo_inst.cc
+++ b/src/arch/hsail/insts/pseudo_inst.cc
@@ -84,7 +84,7 @@ namespace HsailISA
         int op = 0;
         bool got_op = false;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val0 = src1.get<int>(w, lane, 0);
                 if (got_op) {
@@ -182,7 +182,7 @@ namespace HsailISA
     {
     #if TRACING_ON
         const VectorMask &mask = w->get_pred();
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
                 int src_val2 = src1.get<int>(w, lane, 2);
@@ -205,7 +205,7 @@ namespace HsailISA
     {
     #if TRACING_ON
         const VectorMask &mask = w->get_pred();
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
                 int src_val2 = src1.get<int>(w, lane, 2);
@@ -231,7 +231,7 @@ namespace HsailISA
         std::string res_str;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 7)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -270,7 +270,7 @@ namespace HsailISA
         int src_val3 = -1;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 7)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -311,7 +311,7 @@ namespace HsailISA
         std::string res_str;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 3)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -350,7 +350,7 @@ namespace HsailISA
         int src_val3 = -1;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 3)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -391,7 +391,7 @@ namespace HsailISA
         std::string res_str;
         res_str = csprintf("krl_prt (%s)\n", disassemble());
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (!(lane & 7)) {
                 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
             }
@@ -430,7 +430,7 @@ namespace HsailISA
         res_str += csprintf("  Executing on CU #%i\n", w->computeUnit->cu_id);
         res_str += csprintf("  Exec mask: ");
 
-        for (int i = VSZ - 1; i >= 0; --i) {
+        for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
             if (w->execMask(i))
                 res_str += "1";
             else
@@ -458,7 +458,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int res = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
                 dest.set<int>(w, lane, res);
@@ -477,14 +477,14 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int res = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
                 res += src_val1;
             }
         }
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 dest.set<int>(w, lane, res);
             }
@@ -497,19 +497,19 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int res = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
 
                 if (src_val1) {
-                    if (lane < (VSZ/2)) {
+                    if (lane < (w->computeUnit->wfSize()/2)) {
                         res = res | ((uint32_t)(1) << lane);
                     }
                 }
             }
         }
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 dest.set<int>(w, lane, res);
             }
@@ -521,19 +521,20 @@ namespace HsailISA
     {
         const VectorMask &mask = w->get_pred();
         int res = 0;
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
 
                 if (src_val1) {
-                    if (lane >= (VSZ/2)) {
-                        res = res | ((uint32_t)(1) << (lane - (VSZ/2)));
+                    if (lane >= (w->computeUnit->wfSize()/2)) {
+                        res = res | ((uint32_t)(1) <<
+                                     (lane - (w->computeUnit->wfSize()/2)));
                     }
                 }
             }
         }
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 dest.set<int>(w, lane, res);
             }
@@ -546,7 +547,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int max_cnt = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 w->bar_cnt[lane]++;
 
@@ -567,7 +568,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int max_cnt = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 w->bar_cnt[lane]--;
             }
@@ -592,7 +593,7 @@ namespace HsailISA
     {
         const VectorMask &mask = w->get_pred();
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 int src_val1 = src1.get<int>(w, lane, 1);
                 panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
@@ -605,7 +606,7 @@ namespace HsailISA
     Call::calcAddr(Wavefront *w, GPUDynInstPtr m)
     {
         // the address is in src1 | src2
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             int src_val1 = src1.get<int>(w, lane, 1);
             int src_val2 = src1.get<int>(w, lane, 2);
             Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
@@ -622,7 +623,7 @@ namespace HsailISA
 
         calcAddr(w, m);
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
         }
 
@@ -661,7 +662,7 @@ namespace HsailISA
         GPUDynInstPtr m = gpuDynInst;
         calcAddr(w, m);
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
         }
 
@@ -736,7 +737,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         int src_val1 = 0;
 
-        for (int lane = 0; lane < VSZ; ++lane) {
+        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
             if (mask[lane]) {
                 src_val1 = src1.get<int>(w, lane, 1);
                 break;
@@ -758,7 +759,7 @@ namespace HsailISA
         const VectorMask &mask = w->get_pred();
         unsigned mst = true;
 
-        for (int lane = VSZ - 1; lane >= 0; --lane) {
+        for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
             if (mask[lane]) {
                 dest.set<int>(w, lane, mst);
                 mst = false;
@@ -773,7 +774,7 @@ namespace HsailISA
         int res = 0;
         bool got_res = false;
 
-        for (int lane = VSZ - 1; lane >= 0; --lane) {
+        for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
             if (mask[lane]) {
                 if (!got_res) {
                     res = src1.get<int>(w, lane, 1);
diff --git a/src/arch/hsail/operand.hh b/src/arch/hsail/operand.hh
index e3d275b10..4d981ee00 100644
--- a/src/arch/hsail/operand.hh
+++ b/src/arch/hsail/operand.hh
@@ -42,6 +42,7 @@
  *  Defines classes encapsulating HSAIL instruction operands.
  */
 
+#include <limits>
 #include <string>
 
 #include "arch/hsail/Brig.h"
@@ -346,6 +347,8 @@ class CRegOperand : public BaseRegOperand
 template<typename T>
 class ImmOperand : public BaseOperand
 {
+  private:
+    uint16_t kind;
   public:
     T bits;
 
@@ -355,11 +358,21 @@ class ImmOperand : public BaseOperand
 
     template<typename OperandType>
     OperandType
-    get()
+    get(Wavefront *w)
     {
         assert(sizeof(OperandType) <= sizeof(T));
+        panic_if(w == nullptr, "WF pointer needs to be set");
+
+        switch (kind) {
+          // immediate operand is WF size
+          case Brig::BRIG_KIND_OPERAND_WAVESIZE:
+            return (OperandType)w->computeUnit->wfSize();
+            break;
 
-        return *(OperandType*)&bits;
+          default:
+            return *(OperandType*)&bits;
+            break;
+        }
     }
 
     // This version of get() takes a WF* and a lane id for
@@ -368,7 +381,7 @@ class ImmOperand : public BaseOperand
     OperandType
     get(Wavefront *w, int lane)
     {
-        return get<OperandType>();
+        return get<OperandType>(w);
     }
 };
 
@@ -388,16 +401,18 @@ ImmOperand<T>::init(unsigned opOffset, const BrigObject *obj)
             auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp;
 
             bits = *((T*)(obj->getData(cbptr->bytes + 4)));
-
+            kind = brigOp->kind;
             return true;
         }
         break;
 
       case Brig::BRIG_KIND_OPERAND_WAVESIZE:
-        bits = VSZ;
+        kind = brigOp->kind;
+        bits = std::numeric_limits<unsigned long long>::digits;
         return true;
 
       default:
+        kind = Brig::BRIG_KIND_NONE;
         return false;
     }
 }
@@ -409,6 +424,7 @@ ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
     const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
 
     if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
+        kind = Brig::BRIG_KIND_NONE;
         return false;
     }
 
@@ -423,6 +439,7 @@ ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
         (const Brig::BrigOperand *)obj->getOperand(*data_offset);
 
     if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
+        kind = Brig::BRIG_KIND_NONE;
         return false;
     }
 
@@ -456,7 +473,7 @@ class RegOrImmOperand : public BaseOperand
     OperandType
     get(Wavefront *w, int lane)
     {
-        return is_imm ?  imm_op.template get<OperandType>() :
+        return is_imm ?  imm_op.template get<OperandType>(w) :
                          reg_op.template get<OperandType>(w, lane);
     }
 
@@ -571,7 +588,7 @@ class AddrOperandBase : public BaseOperand
     uint64_t calcUniformBase();
 
   public:
-    virtual void calcVector(Wavefront *w, uint64_t *addrVec) = 0;
+    virtual void calcVector(Wavefront *w, std::vector<Addr> &addrVec) = 0;
     virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0;
 
     uint64_t offset;
@@ -586,7 +603,7 @@ class RegAddrOperand : public AddrOperandBase
     RegOperandType reg;
     void init(unsigned opOffset, const BrigObject *obj);
     uint64_t calcUniform();
-    void calcVector(Wavefront *w, uint64_t *addrVec);
+    void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
     uint64_t calcLane(Wavefront *w, int lane=0);
     uint32_t opSize() { return reg.opSize(); }
     bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; }
@@ -641,11 +658,12 @@ RegAddrOperand<RegOperandType>::calcUniform()
 
 template<typename RegOperandType>
 void
-RegAddrOperand<RegOperandType>::calcVector(Wavefront *w, uint64_t *addrVec)
+RegAddrOperand<RegOperandType>::calcVector(Wavefront *w,
+                                           std::vector<Addr> &addrVec)
 {
     Addr address = calcUniformBase();
 
-    for (int lane = 0; lane < VSZ; ++lane) {
+    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
         if (w->execMask(lane)) {
             if (reg.regFileChar == 's') {
                 addrVec[lane] = address + reg.template get<uint32_t>(w, lane);
@@ -680,7 +698,7 @@ class NoRegAddrOperand : public AddrOperandBase
   public:
     void init(unsigned opOffset, const BrigObject *obj);
     uint64_t calcUniform();
-    void calcVector(Wavefront *w, uint64_t *addrVec);
+    void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
     uint64_t calcLane(Wavefront *w, int lane=0);
     std::string disassemble();
 };
@@ -698,11 +716,11 @@ NoRegAddrOperand::calcLane(Wavefront *w, int lane)
 }
 
 inline void
-NoRegAddrOperand::calcVector(Wavefront *w, uint64_t *addrVec)
+NoRegAddrOperand::calcVector(Wavefront *w, std::vector<Addr> &addrVec)
 {
     uint64_t address = calcUniformBase();
 
-    for (int lane = 0; lane < VSZ; ++lane)
+    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane)
         addrVec[lane] = address;
 }