diff options
author | jkalamat <john.kalamatianos@amd.com> | 2016-06-09 11:24:55 -0400 |
---|---|---|
committer | jkalamat <john.kalamatianos@amd.com> | 2016-06-09 11:24:55 -0400 |
commit | 3724fb15faafaaca54cc7a500df9c1490a387049 (patch) | |
tree | bbd671b68ba971087a1cd45b208947c09a622d38 /src/arch | |
parent | e5b7b6780f9748b6f13ef91e3e22d53ebdf47968 (diff) | |
download | gem5-3724fb15faafaaca54cc7a500df9c1490a387049.tar.xz |
gpu-compute: parametrize Wavefront size
Eliminate the VSZ constant that defined the Wavefront size (in numbers of work
items); replaced it with a parameter in the GPU.py configuration script.
Changed all data structures dependent on the Wavefront size to be dynamically
sized. Legal values of Wavefront size are 16, 32, 64 for now and checked at
initialization time.
Diffstat (limited to 'src/arch')
-rwxr-xr-x | src/arch/hsail/gen.py | 14 | ||||
-rw-r--r-- | src/arch/hsail/insts/branch.hh | 2 | ||||
-rw-r--r-- | src/arch/hsail/insts/main.cc | 2 | ||||
-rw-r--r-- | src/arch/hsail/insts/mem.hh | 16 | ||||
-rw-r--r-- | src/arch/hsail/insts/mem_impl.hh | 46 | ||||
-rw-r--r-- | src/arch/hsail/insts/pseudo_inst.cc | 57 | ||||
-rw-r--r-- | src/arch/hsail/operand.hh | 44 |
7 files changed, 103 insertions, 78 deletions
diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py index bb369fd10..f77680541 100755 --- a/src/arch/hsail/gen.py +++ b/src/arch/hsail/gen.py @@ -235,7 +235,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst) const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { DestCType dest_val = $expr; this->dest.set(w, lane, dest_val); @@ -256,7 +256,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst) const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); DestCType dest_val = $expr; @@ -277,7 +277,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType dest_val; if ($dest_is_src_flag) { @@ -312,7 +312,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType dest_val; @@ -346,7 +346,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { DestT dest_val; if ($dest_is_src_flag) { @@ -372,7 +372,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) Wavefront *w = gpuDynInst->wavefront(); const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType dest_val; @@ -401,7 +401,7 @@ $class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { DestCType dest_val; SrcCType src_val[$num_srcs]; diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh index f4b00fc8d..45cd876ad 100644 --- a/src/arch/hsail/insts/branch.hh +++ b/src/arch/hsail/insts/branch.hh @@ -279,7 +279,7 @@ namespace HsailISA // taken branch const uint32_t true_pc = getTargetPc(); VectorMask true_mask; - for (unsigned int lane = 0; lane < VSZ; ++lane) { + for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane]; } diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc index 4e70bf46a..004054524 100644 --- a/src/arch/hsail/insts/main.cc +++ b/src/arch/hsail/insts/main.cc @@ -134,7 +134,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); // mask off completed work-items - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { w->init_mask[lane] = 0; } diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh index f2792cd49..1db98d212 100644 --- a/src/arch/hsail/insts/mem.hh +++ b/src/arch/hsail/insts/mem.hh @@ -457,7 +457,7 @@ namespace HsailISA gpuDynInst->statusBitVector = gpuDynInst->exec_mask; if (num_dest_operands > 1) { - for (int i = 0; i < VSZ; ++i) + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) if (gpuDynInst->exec_mask[i]) gpuDynInst->statusVector.push_back(num_dest_operands); else @@ -466,9 +466,10 @@ namespace HsailISA for (int k = 0; k < num_dest_operands; ++k) { - c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; + c0 *d = &((c0*)gpuDynInst->d_data) + [k * gpuDynInst->computeUnit()->wfSize()]; - for (int i = 0; i < VSZ; ++i) { + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { if (gpuDynInst->exec_mask[i]) { Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); @@ -1004,7 +1005,7 @@ namespace HsailISA gpuDynInst->statusBitVector = gpuDynInst->exec_mask; if (num_src_operands > 1) { - for (int i = 0; i < VSZ; ++i) + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) if (gpuDynInst->exec_mask[i]) gpuDynInst->statusVector.push_back(num_src_operands); else @@ -1012,9 +1013,10 @@ namespace HsailISA } for (int k = 0; k < num_src_operands; ++k) { - c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; + c0 *d = &((c0*)gpuDynInst->d_data) + [k * gpuDynInst->computeUnit()->wfSize()]; - for (int i = 0; i < VSZ; ++i) { + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { if (gpuDynInst->exec_mask[i]) { Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); @@ -1402,7 +1404,7 @@ namespace HsailISA c0 *e = &((c0*) gpuDynInst->a_data)[0]; c0 *f = &((c0*) gpuDynInst->x_data)[0]; - for (int i = 0; i < VSZ; ++i) { + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { if (gpuDynInst->exec_mask[i]) { Addr vaddr = gpuDynInst->addr[i]; diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh index 94f0cd6aa..8329c6e8a 100644 --- a/src/arch/hsail/insts/mem_impl.hh +++ b/src/arch/hsail/insts/mem_impl.hh @@ -60,14 +60,16 @@ namespace HsailISA typedef typename DestDataType::CType CType M5_VAR_USED; const VectorMask &mask = w->get_pred(); - uint64_t addr_vec[VSZ]; + std::vector<Addr> addr_vec; + addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); this->addr.calcVector(w, addr_vec); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { this->dest.set(w, lane, addr_vec[lane]); } } + addr_vec.clear(); } template<typename MemDataType, typename DestDataType, @@ -121,8 +123,8 @@ namespace HsailISA i->parent->findSymbol(Brig::BrigPrivateSpace, addr); assert(se); - return w->wfSlotId * w->privSizePerItem * VSZ + - se->offset * VSZ + + return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() + + se->offset * w->computeUnit->wfSize() + lane * se->size; */ @@ -139,9 +141,11 @@ namespace HsailISA Addr addr_div8 = addr / 8; Addr addr_mod8 = addr % 8; - Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase; + Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 + + addr_mod8 + w->privBase; - assert(ret < w->privBase + (w->privSizePerItem * VSZ)); + assert(ret < w->privBase + + (w->privSizePerItem * w->computeUnit->wfSize())); return ret; } @@ -175,7 +179,7 @@ namespace HsailISA DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { this->dest.set(w, lane, val); } @@ -184,7 +188,7 @@ namespace HsailISA return; } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { uint64_t address = this->addr.calcUniform(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { MemCType val = w->readCallArgMem<MemCType>(lane, address); @@ -239,7 +243,7 @@ namespace HsailISA // this is a complete hack to get around a compiler bug // (the compiler currently generates global access for private // addresses (starting from 0). We need to add the private offset) - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (m->addr[lane] < w->privSizePerItem) { if (mask[lane]) { // what is the size of the object we are accessing? @@ -267,7 +271,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { // note: this calculation will NOT WORK if the compiler // ever generates loads/stores to the same address with // different widths (e.g., a ld_u32 addr and a ld_u16 addr) @@ -301,7 +305,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); m->addr[lane] += w->roBase; @@ -318,7 +322,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { assert(m->addr[lane] < w->privSizePerItem); @@ -360,7 +364,7 @@ namespace HsailISA if (this->segment == Brig::BRIG_SEGMENT_ARG) { uint64_t address = this->addr.calcUniform(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType data = this->src.template get<CType>(w, lane); DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); @@ -378,7 +382,7 @@ namespace HsailISA this->addr.calcVector(w, m->addr); if (num_src_operands == 1) { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { ((CType*)m->d_data)[lane] = this->src.template get<CType>(w, lane); @@ -386,9 +390,9 @@ namespace HsailISA } } else { for (int k= 0; k < num_src_operands; ++k) { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { - ((CType*)m->d_data)[k * VSZ + lane] = + ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] = this->src_vect[k].template get<CType>(w, lane); } } @@ -428,7 +432,7 @@ namespace HsailISA // this is a complete hack to get around a compiler bug // (the compiler currently generates global access for private // addresses (starting from 0). We need to add the private offset) - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { if (m->addr[lane] < w->privSizePerItem) { @@ -454,7 +458,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { assert(m->addr[lane] < w->spillSizePerItem); @@ -483,7 +487,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { assert(m->addr[lane] < w->privSizePerItem); m->addr[lane] = m->addr[lane] + lane * @@ -558,14 +562,14 @@ namespace HsailISA this->addr.calcVector(w, m->addr); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { ((CType *)m->a_data)[lane] = this->src[0].template get<CType>(w, lane); } // load second source operand for CAS if (NumSrcOperands > 1) { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { ((CType*)m->x_data)[lane] = this->src[1].template get<CType>(w, lane); } diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc index 9506a80ab..56ca8047c 100644 --- a/src/arch/hsail/insts/pseudo_inst.cc +++ b/src/arch/hsail/insts/pseudo_inst.cc @@ -84,7 +84,7 @@ namespace HsailISA int op = 0; bool got_op = false; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val0 = src1.get<int>(w, lane, 0); if (got_op) { @@ -182,7 +182,7 @@ namespace HsailISA { #if TRACING_ON const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); int src_val2 = src1.get<int>(w, lane, 2); @@ -205,7 +205,7 @@ namespace HsailISA { #if TRACING_ON const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int64_t src_val1 = src1.get<int64_t>(w, lane, 1); int src_val2 = src1.get<int>(w, lane, 2); @@ -231,7 +231,7 @@ namespace HsailISA std::string res_str; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 7)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -270,7 +270,7 @@ namespace HsailISA int src_val3 = -1; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 7)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -311,7 +311,7 @@ namespace HsailISA std::string res_str; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 3)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -350,7 +350,7 @@ namespace HsailISA int src_val3 = -1; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 3)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -391,7 +391,7 @@ namespace HsailISA std::string res_str; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 7)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -430,7 +430,7 @@ namespace HsailISA res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id); res_str += csprintf(" Exec mask: "); - for (int i = VSZ - 1; i >= 0; --i) { + for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) { if (w->execMask(i)) res_str += "1"; else @@ -458,7 +458,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int res = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); dest.set<int>(w, lane, res); @@ -477,14 +477,14 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int res = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); res += src_val1; } } - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { dest.set<int>(w, lane, res); } @@ -497,19 +497,19 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int res = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); if (src_val1) { - if (lane < (VSZ/2)) { + if (lane < (w->computeUnit->wfSize()/2)) { res = res | ((uint32_t)(1) << lane); } } } } - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { dest.set<int>(w, lane, res); } @@ -521,19 +521,20 @@ namespace HsailISA { const VectorMask &mask = w->get_pred(); int res = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); if (src_val1) { - if (lane >= (VSZ/2)) { - res = res | ((uint32_t)(1) << (lane - (VSZ/2))); + if (lane >= (w->computeUnit->wfSize()/2)) { + res = res | ((uint32_t)(1) << + (lane - (w->computeUnit->wfSize()/2))); } } } } - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { dest.set<int>(w, lane, res); } @@ -546,7 +547,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int max_cnt = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { w->bar_cnt[lane]++; @@ -567,7 +568,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int max_cnt = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { w->bar_cnt[lane]--; } @@ -592,7 +593,7 @@ namespace HsailISA { const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); panic("OpenCL Code failed assertion #%d. Triggered by lane %s", @@ -605,7 +606,7 @@ namespace HsailISA Call::calcAddr(Wavefront *w, GPUDynInstPtr m) { // the address is in src1 | src2 - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { int src_val1 = src1.get<int>(w, lane, 1); int src_val2 = src1.get<int>(w, lane, 2); Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2); @@ -622,7 +623,7 @@ namespace HsailISA calcAddr(w, m); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3); } @@ -661,7 +662,7 @@ namespace HsailISA GPUDynInstPtr m = gpuDynInst; calcAddr(w, m); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1); } @@ -736,7 +737,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int src_val1 = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { src_val1 = src1.get<int>(w, lane, 1); break; @@ -758,7 +759,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); unsigned mst = true; - for (int lane = VSZ - 1; lane >= 0; --lane) { + for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { if (mask[lane]) { dest.set<int>(w, lane, mst); mst = false; @@ -773,7 +774,7 @@ namespace HsailISA int res = 0; bool got_res = false; - for (int lane = VSZ - 1; lane >= 0; --lane) { + for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { if (mask[lane]) { if (!got_res) { res = src1.get<int>(w, lane, 1); diff --git a/src/arch/hsail/operand.hh b/src/arch/hsail/operand.hh index e3d275b10..4d981ee00 100644 --- a/src/arch/hsail/operand.hh +++ b/src/arch/hsail/operand.hh @@ -42,6 +42,7 @@ * Defines classes encapsulating HSAIL instruction operands. */ +#include <limits> #include <string> #include "arch/hsail/Brig.h" @@ -346,6 +347,8 @@ class CRegOperand : public BaseRegOperand template<typename T> class ImmOperand : public BaseOperand { + private: + uint16_t kind; public: T bits; @@ -355,11 +358,21 @@ class ImmOperand : public BaseOperand template<typename OperandType> OperandType - get() + get(Wavefront *w) { assert(sizeof(OperandType) <= sizeof(T)); + panic_if(w == nullptr, "WF pointer needs to be set"); + + switch (kind) { + // immediate operand is WF size + case Brig::BRIG_KIND_OPERAND_WAVESIZE: + return (OperandType)w->computeUnit->wfSize(); + break; - return *(OperandType*)&bits; + default: + return *(OperandType*)&bits; + break; + } } // This version of get() takes a WF* and a lane id for @@ -368,7 +381,7 @@ class ImmOperand : public BaseOperand OperandType get(Wavefront *w, int lane) { - return get<OperandType>(); + return get<OperandType>(w); } }; @@ -388,16 +401,18 @@ ImmOperand<T>::init(unsigned opOffset, const BrigObject *obj) auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp; bits = *((T*)(obj->getData(cbptr->bytes + 4))); - + kind = brigOp->kind; return true; } break; case Brig::BRIG_KIND_OPERAND_WAVESIZE: - bits = VSZ; + kind = brigOp->kind; + bits = std::numeric_limits<unsigned long long>::digits; return true; default: + kind = Brig::BRIG_KIND_NONE; return false; } } @@ -409,6 +424,7 @@ ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at) const Brig::BrigOperand *brigOp = obj->getOperand(opOffset); if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { + kind = Brig::BRIG_KIND_NONE; return false; } @@ -423,6 +439,7 @@ ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at) (const Brig::BrigOperand *)obj->getOperand(*data_offset); if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) { + kind = Brig::BRIG_KIND_NONE; return false; } @@ -456,7 +473,7 @@ class RegOrImmOperand : public BaseOperand OperandType get(Wavefront *w, int lane) { - return is_imm ? imm_op.template get<OperandType>() : + return is_imm ? imm_op.template get<OperandType>(w) : reg_op.template get<OperandType>(w, lane); } @@ -571,7 +588,7 @@ class AddrOperandBase : public BaseOperand uint64_t calcUniformBase(); public: - virtual void calcVector(Wavefront *w, uint64_t *addrVec) = 0; + virtual void calcVector(Wavefront *w, std::vector<Addr> &addrVec) = 0; virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0; uint64_t offset; @@ -586,7 +603,7 @@ class RegAddrOperand : public AddrOperandBase RegOperandType reg; void init(unsigned opOffset, const BrigObject *obj); uint64_t calcUniform(); - void calcVector(Wavefront *w, uint64_t *addrVec); + void calcVector(Wavefront *w, std::vector<Addr> &addrVec); uint64_t calcLane(Wavefront *w, int lane=0); uint32_t opSize() { return reg.opSize(); } bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; } @@ -641,11 +658,12 @@ RegAddrOperand<RegOperandType>::calcUniform() template<typename RegOperandType> void -RegAddrOperand<RegOperandType>::calcVector(Wavefront *w, uint64_t *addrVec) +RegAddrOperand<RegOperandType>::calcVector(Wavefront *w, + std::vector<Addr> &addrVec) { Addr address = calcUniformBase(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (w->execMask(lane)) { if (reg.regFileChar == 's') { addrVec[lane] = address + reg.template get<uint32_t>(w, lane); @@ -680,7 +698,7 @@ class NoRegAddrOperand : public AddrOperandBase public: void init(unsigned opOffset, const BrigObject *obj); uint64_t calcUniform(); - void calcVector(Wavefront *w, uint64_t *addrVec); + void calcVector(Wavefront *w, std::vector<Addr> &addrVec); uint64_t calcLane(Wavefront *w, int lane=0); std::string disassemble(); }; @@ -698,11 +716,11 @@ NoRegAddrOperand::calcLane(Wavefront *w, int lane) } inline void -NoRegAddrOperand::calcVector(Wavefront *w, uint64_t *addrVec) +NoRegAddrOperand::calcVector(Wavefront *w, std::vector<Addr> &addrVec) { uint64_t address = calcUniformBase(); - for (int lane = 0; lane < VSZ; ++lane) + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) addrVec[lane] = address; } |