diff options
Diffstat (limited to 'src/arch/hsail/insts')
-rw-r--r-- | src/arch/hsail/insts/branch.hh | 2 | ||||
-rw-r--r-- | src/arch/hsail/insts/main.cc | 2 | ||||
-rw-r--r-- | src/arch/hsail/insts/mem.hh | 16 | ||||
-rw-r--r-- | src/arch/hsail/insts/mem_impl.hh | 46 | ||||
-rw-r--r-- | src/arch/hsail/insts/pseudo_inst.cc | 57 |
5 files changed, 65 insertions, 58 deletions
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh index f4b00fc8d..45cd876ad 100644 --- a/src/arch/hsail/insts/branch.hh +++ b/src/arch/hsail/insts/branch.hh @@ -279,7 +279,7 @@ namespace HsailISA // taken branch const uint32_t true_pc = getTargetPc(); VectorMask true_mask; - for (unsigned int lane = 0; lane < VSZ; ++lane) { + for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane]; } diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc index 4e70bf46a..004054524 100644 --- a/src/arch/hsail/insts/main.cc +++ b/src/arch/hsail/insts/main.cc @@ -134,7 +134,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); // mask off completed work-items - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { w->init_mask[lane] = 0; } diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh index f2792cd49..1db98d212 100644 --- a/src/arch/hsail/insts/mem.hh +++ b/src/arch/hsail/insts/mem.hh @@ -457,7 +457,7 @@ namespace HsailISA gpuDynInst->statusBitVector = gpuDynInst->exec_mask; if (num_dest_operands > 1) { - for (int i = 0; i < VSZ; ++i) + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) if (gpuDynInst->exec_mask[i]) gpuDynInst->statusVector.push_back(num_dest_operands); else @@ -466,9 +466,10 @@ namespace HsailISA for (int k = 0; k < num_dest_operands; ++k) { - c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; + c0 *d = &((c0*)gpuDynInst->d_data) + [k * gpuDynInst->computeUnit()->wfSize()]; - for (int i = 0; i < VSZ; ++i) { + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { if (gpuDynInst->exec_mask[i]) { Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); @@ -1004,7 +1005,7 @@ namespace HsailISA gpuDynInst->statusBitVector = gpuDynInst->exec_mask; if (num_src_operands > 1) { - for (int i = 0; i < VSZ; ++i) + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) if (gpuDynInst->exec_mask[i]) gpuDynInst->statusVector.push_back(num_src_operands); else @@ -1012,9 +1013,10 @@ namespace HsailISA } for (int k = 0; k < num_src_operands; ++k) { - c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; + c0 *d = &((c0*)gpuDynInst->d_data) + [k * gpuDynInst->computeUnit()->wfSize()]; - for (int i = 0; i < VSZ; ++i) { + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { if (gpuDynInst->exec_mask[i]) { Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); @@ -1402,7 +1404,7 @@ namespace HsailISA c0 *e = &((c0*) gpuDynInst->a_data)[0]; c0 *f = &((c0*) gpuDynInst->x_data)[0]; - for (int i = 0; i < VSZ; ++i) { + for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { if (gpuDynInst->exec_mask[i]) { Addr vaddr = gpuDynInst->addr[i]; diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh index 94f0cd6aa..8329c6e8a 100644 --- a/src/arch/hsail/insts/mem_impl.hh +++ b/src/arch/hsail/insts/mem_impl.hh @@ -60,14 +60,16 @@ namespace HsailISA typedef typename DestDataType::CType CType M5_VAR_USED; const VectorMask &mask = w->get_pred(); - uint64_t addr_vec[VSZ]; + std::vector<Addr> addr_vec; + addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); this->addr.calcVector(w, addr_vec); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { this->dest.set(w, lane, addr_vec[lane]); } } + addr_vec.clear(); } template<typename MemDataType, typename DestDataType, @@ -121,8 +123,8 @@ namespace HsailISA i->parent->findSymbol(Brig::BrigPrivateSpace, addr); assert(se); - return w->wfSlotId * w->privSizePerItem * VSZ + - se->offset * VSZ + + return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() + + se->offset * w->computeUnit->wfSize() + lane * se->size; */ @@ -139,9 +141,11 @@ namespace HsailISA Addr addr_div8 = addr / 8; Addr addr_mod8 = addr % 8; - Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase; + Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 + + addr_mod8 + w->privBase; - assert(ret < w->privBase + (w->privSizePerItem * VSZ)); + assert(ret < w->privBase + + (w->privSizePerItem * w->computeUnit->wfSize())); return ret; } @@ -175,7 +179,7 @@ namespace HsailISA DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { this->dest.set(w, lane, val); } @@ -184,7 +188,7 @@ namespace HsailISA return; } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { uint64_t address = this->addr.calcUniform(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { MemCType val = w->readCallArgMem<MemCType>(lane, address); @@ -239,7 +243,7 @@ namespace HsailISA // this is a complete hack to get around a compiler bug // (the compiler currently generates global access for private // addresses (starting from 0). We need to add the private offset) - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (m->addr[lane] < w->privSizePerItem) { if (mask[lane]) { // what is the size of the object we are accessing? @@ -267,7 +271,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { // note: this calculation will NOT WORK if the compiler // ever generates loads/stores to the same address with // different widths (e.g., a ld_u32 addr and a ld_u16 addr) @@ -301,7 +305,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); m->addr[lane] += w->roBase; @@ -318,7 +322,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { assert(m->addr[lane] < w->privSizePerItem); @@ -360,7 +364,7 @@ namespace HsailISA if (this->segment == Brig::BRIG_SEGMENT_ARG) { uint64_t address = this->addr.calcUniform(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { CType data = this->src.template get<CType>(w, lane); DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); @@ -378,7 +382,7 @@ namespace HsailISA this->addr.calcVector(w, m->addr); if (num_src_operands == 1) { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { ((CType*)m->d_data)[lane] = this->src.template get<CType>(w, lane); @@ -386,9 +390,9 @@ namespace HsailISA } } else { for (int k= 0; k < num_src_operands; ++k) { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { - ((CType*)m->d_data)[k * VSZ + lane] = + ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] = this->src_vect[k].template get<CType>(w, lane); } } @@ -428,7 +432,7 @@ namespace HsailISA // this is a complete hack to get around a compiler bug // (the compiler currently generates global access for private // addresses (starting from 0). We need to add the private offset) - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { if (m->addr[lane] < w->privSizePerItem) { @@ -454,7 +458,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { assert(m->addr[lane] < w->spillSizePerItem); @@ -483,7 +487,7 @@ namespace HsailISA m->pipeId = GLBMEM_PIPE; m->latency.set(w->computeUnit->shader->ticks(1)); { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { assert(m->addr[lane] < w->privSizePerItem); m->addr[lane] = m->addr[lane] + lane * @@ -558,14 +562,14 @@ namespace HsailISA this->addr.calcVector(w, m->addr); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { ((CType *)m->a_data)[lane] = this->src[0].template get<CType>(w, lane); } // load second source operand for CAS if (NumSrcOperands > 1) { - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { ((CType*)m->x_data)[lane] = this->src[1].template get<CType>(w, lane); } diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc index 9506a80ab..56ca8047c 100644 --- a/src/arch/hsail/insts/pseudo_inst.cc +++ b/src/arch/hsail/insts/pseudo_inst.cc @@ -84,7 +84,7 @@ namespace HsailISA int op = 0; bool got_op = false; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val0 = src1.get<int>(w, lane, 0); if (got_op) { @@ -182,7 +182,7 @@ namespace HsailISA { #if TRACING_ON const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); int src_val2 = src1.get<int>(w, lane, 2); @@ -205,7 +205,7 @@ namespace HsailISA { #if TRACING_ON const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int64_t src_val1 = src1.get<int64_t>(w, lane, 1); int src_val2 = src1.get<int>(w, lane, 2); @@ -231,7 +231,7 @@ namespace HsailISA std::string res_str; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 7)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -270,7 +270,7 @@ namespace HsailISA int src_val3 = -1; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 7)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -311,7 +311,7 @@ namespace HsailISA std::string res_str; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 3)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -350,7 +350,7 @@ namespace HsailISA int src_val3 = -1; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 3)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -391,7 +391,7 @@ namespace HsailISA std::string res_str; res_str = csprintf("krl_prt (%s)\n", disassemble()); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (!(lane & 7)) { res_str += csprintf("DB%03d: ", (int)w->wfDynId); } @@ -430,7 +430,7 @@ namespace HsailISA res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id); res_str += csprintf(" Exec mask: "); - for (int i = VSZ - 1; i >= 0; --i) { + for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) { if (w->execMask(i)) res_str += "1"; else @@ -458,7 +458,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int res = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); dest.set<int>(w, lane, res); @@ -477,14 +477,14 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int res = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); res += src_val1; } } - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { dest.set<int>(w, lane, res); } @@ -497,19 +497,19 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int res = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); if (src_val1) { - if (lane < (VSZ/2)) { + if (lane < (w->computeUnit->wfSize()/2)) { res = res | ((uint32_t)(1) << lane); } } } } - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { dest.set<int>(w, lane, res); } @@ -521,19 +521,20 @@ namespace HsailISA { const VectorMask &mask = w->get_pred(); int res = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); if (src_val1) { - if (lane >= (VSZ/2)) { - res = res | ((uint32_t)(1) << (lane - (VSZ/2))); + if (lane >= (w->computeUnit->wfSize()/2)) { + res = res | ((uint32_t)(1) << + (lane - (w->computeUnit->wfSize()/2))); } } } } - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { dest.set<int>(w, lane, res); } @@ -546,7 +547,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int max_cnt = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { w->bar_cnt[lane]++; @@ -567,7 +568,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int max_cnt = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { w->bar_cnt[lane]--; } @@ -592,7 +593,7 @@ namespace HsailISA { const VectorMask &mask = w->get_pred(); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { int src_val1 = src1.get<int>(w, lane, 1); panic("OpenCL Code failed assertion #%d. Triggered by lane %s", @@ -605,7 +606,7 @@ namespace HsailISA Call::calcAddr(Wavefront *w, GPUDynInstPtr m) { // the address is in src1 | src2 - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { int src_val1 = src1.get<int>(w, lane, 1); int src_val2 = src1.get<int>(w, lane, 2); Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2); @@ -622,7 +623,7 @@ namespace HsailISA calcAddr(w, m); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3); } @@ -661,7 +662,7 @@ namespace HsailISA GPUDynInstPtr m = gpuDynInst; calcAddr(w, m); - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1); } @@ -736,7 +737,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); int src_val1 = 0; - for (int lane = 0; lane < VSZ; ++lane) { + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { if (mask[lane]) { src_val1 = src1.get<int>(w, lane, 1); break; @@ -758,7 +759,7 @@ namespace HsailISA const VectorMask &mask = w->get_pred(); unsigned mst = true; - for (int lane = VSZ - 1; lane >= 0; --lane) { + for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { if (mask[lane]) { dest.set<int>(w, lane, mst); mst = false; @@ -773,7 +774,7 @@ namespace HsailISA int res = 0; bool got_res = false; - for (int lane = VSZ - 1; lane >= 0; --lane) { + for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { if (mask[lane]) { if (!got_res) { res = src1.get<int>(w, lane, 1); |