summaryrefslogtreecommitdiff
path: root/src/arch/hsail
diff options
context:
space:
mode:
Diffstat (limited to 'src/arch/hsail')
-rwxr-xr-xsrc/arch/hsail/gen.py14
-rw-r--r--src/arch/hsail/insts/branch.hh2
-rw-r--r--src/arch/hsail/insts/main.cc2
-rw-r--r--src/arch/hsail/insts/mem.hh16
-rw-r--r--src/arch/hsail/insts/mem_impl.hh46
-rw-r--r--src/arch/hsail/insts/pseudo_inst.cc57
-rw-r--r--src/arch/hsail/operand.hh44
7 files changed, 103 insertions, 78 deletions
diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py
index bb369fd10..f77680541 100755
--- a/src/arch/hsail/gen.py
+++ b/src/arch/hsail/gen.py
@@ -235,7 +235,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst)
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
DestCType dest_val = $expr;
this->dest.set(w, lane, dest_val);
@@ -256,7 +256,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst)
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
DestCType dest_val = $expr;
@@ -277,7 +277,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
if ($dest_is_src_flag) {
@@ -312,7 +312,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
@@ -346,7 +346,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
DestT dest_val;
if ($dest_is_src_flag) {
@@ -372,7 +372,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
@@ -401,7 +401,7 @@ $class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
DestCType dest_val;
SrcCType src_val[$num_srcs];
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh
index f4b00fc8d..45cd876ad 100644
--- a/src/arch/hsail/insts/branch.hh
+++ b/src/arch/hsail/insts/branch.hh
@@ -279,7 +279,7 @@ namespace HsailISA
// taken branch
const uint32_t true_pc = getTargetPc();
VectorMask true_mask;
- for (unsigned int lane = 0; lane < VSZ; ++lane) {
+ for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
}
diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc
index 4e70bf46a..004054524 100644
--- a/src/arch/hsail/insts/main.cc
+++ b/src/arch/hsail/insts/main.cc
@@ -134,7 +134,7 @@ namespace HsailISA
const VectorMask &mask = w->get_pred();
// mask off completed work-items
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
w->init_mask[lane] = 0;
}
diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh
index f2792cd49..1db98d212 100644
--- a/src/arch/hsail/insts/mem.hh
+++ b/src/arch/hsail/insts/mem.hh
@@ -457,7 +457,7 @@ namespace HsailISA
gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
if (num_dest_operands > 1) {
- for (int i = 0; i < VSZ; ++i)
+ for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
if (gpuDynInst->exec_mask[i])
gpuDynInst->statusVector.push_back(num_dest_operands);
else
@@ -466,9 +466,10 @@ namespace HsailISA
for (int k = 0; k < num_dest_operands; ++k) {
- c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+ c0 *d = &((c0*)gpuDynInst->d_data)
+ [k * gpuDynInst->computeUnit()->wfSize()];
- for (int i = 0; i < VSZ; ++i) {
+ for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
@@ -1004,7 +1005,7 @@ namespace HsailISA
gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
if (num_src_operands > 1) {
- for (int i = 0; i < VSZ; ++i)
+ for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
if (gpuDynInst->exec_mask[i])
gpuDynInst->statusVector.push_back(num_src_operands);
else
@@ -1012,9 +1013,10 @@ namespace HsailISA
}
for (int k = 0; k < num_src_operands; ++k) {
- c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
+ c0 *d = &((c0*)gpuDynInst->d_data)
+ [k * gpuDynInst->computeUnit()->wfSize()];
- for (int i = 0; i < VSZ; ++i) {
+ for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
@@ -1402,7 +1404,7 @@ namespace HsailISA
c0 *e = &((c0*) gpuDynInst->a_data)[0];
c0 *f = &((c0*) gpuDynInst->x_data)[0];
- for (int i = 0; i < VSZ; ++i) {
+ for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
if (gpuDynInst->exec_mask[i]) {
Addr vaddr = gpuDynInst->addr[i];
diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh
index 94f0cd6aa..8329c6e8a 100644
--- a/src/arch/hsail/insts/mem_impl.hh
+++ b/src/arch/hsail/insts/mem_impl.hh
@@ -60,14 +60,16 @@ namespace HsailISA
typedef typename DestDataType::CType CType M5_VAR_USED;
const VectorMask &mask = w->get_pred();
- uint64_t addr_vec[VSZ];
+ std::vector<Addr> addr_vec;
+ addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
this->addr.calcVector(w, addr_vec);
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
this->dest.set(w, lane, addr_vec[lane]);
}
}
+ addr_vec.clear();
}
template<typename MemDataType, typename DestDataType,
@@ -121,8 +123,8 @@ namespace HsailISA
i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
assert(se);
- return w->wfSlotId * w->privSizePerItem * VSZ +
- se->offset * VSZ +
+ return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
+ se->offset * w->computeUnit->wfSize() +
lane * se->size;
*/
@@ -139,9 +141,11 @@ namespace HsailISA
Addr addr_div8 = addr / 8;
Addr addr_mod8 = addr % 8;
- Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase;
+ Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
+ addr_mod8 + w->privBase;
- assert(ret < w->privBase + (w->privSizePerItem * VSZ));
+ assert(ret < w->privBase +
+ (w->privSizePerItem * w->computeUnit->wfSize()));
return ret;
}
@@ -175,7 +179,7 @@ namespace HsailISA
DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
this->dest.set(w, lane, val);
}
@@ -184,7 +188,7 @@ namespace HsailISA
return;
} else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
uint64_t address = this->addr.calcUniform();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
MemCType val = w->readCallArgMem<MemCType>(lane, address);
@@ -239,7 +243,7 @@ namespace HsailISA
// this is a complete hack to get around a compiler bug
// (the compiler currently generates global access for private
// addresses (starting from 0). We need to add the private offset)
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (m->addr[lane] < w->privSizePerItem) {
if (mask[lane]) {
// what is the size of the object we are accessing?
@@ -267,7 +271,7 @@ namespace HsailISA
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
// note: this calculation will NOT WORK if the compiler
// ever generates loads/stores to the same address with
// different widths (e.g., a ld_u32 addr and a ld_u16 addr)
@@ -301,7 +305,7 @@ namespace HsailISA
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
m->addr[lane] += w->roBase;
@@ -318,7 +322,7 @@ namespace HsailISA
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
assert(m->addr[lane] < w->privSizePerItem);
@@ -360,7 +364,7 @@ namespace HsailISA
if (this->segment == Brig::BRIG_SEGMENT_ARG) {
uint64_t address = this->addr.calcUniform();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType data = this->src.template get<CType>(w, lane);
DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
@@ -378,7 +382,7 @@ namespace HsailISA
this->addr.calcVector(w, m->addr);
if (num_src_operands == 1) {
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
((CType*)m->d_data)[lane] =
this->src.template get<CType>(w, lane);
@@ -386,9 +390,9 @@ namespace HsailISA
}
} else {
for (int k= 0; k < num_src_operands; ++k) {
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
- ((CType*)m->d_data)[k * VSZ + lane] =
+ ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
this->src_vect[k].template get<CType>(w, lane);
}
}
@@ -428,7 +432,7 @@ namespace HsailISA
// this is a complete hack to get around a compiler bug
// (the compiler currently generates global access for private
// addresses (starting from 0). We need to add the private offset)
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
if (m->addr[lane] < w->privSizePerItem) {
@@ -454,7 +458,7 @@ namespace HsailISA
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
assert(m->addr[lane] < w->spillSizePerItem);
@@ -483,7 +487,7 @@ namespace HsailISA
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
assert(m->addr[lane] < w->privSizePerItem);
m->addr[lane] = m->addr[lane] + lane *
@@ -558,14 +562,14 @@ namespace HsailISA
this->addr.calcVector(w, m->addr);
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
((CType *)m->a_data)[lane] =
this->src[0].template get<CType>(w, lane);
}
// load second source operand for CAS
if (NumSrcOperands > 1) {
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
((CType*)m->x_data)[lane] =
this->src[1].template get<CType>(w, lane);
}
diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc
index 9506a80ab..56ca8047c 100644
--- a/src/arch/hsail/insts/pseudo_inst.cc
+++ b/src/arch/hsail/insts/pseudo_inst.cc
@@ -84,7 +84,7 @@ namespace HsailISA
int op = 0;
bool got_op = false;
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val0 = src1.get<int>(w, lane, 0);
if (got_op) {
@@ -182,7 +182,7 @@ namespace HsailISA
{
#if TRACING_ON
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
@@ -205,7 +205,7 @@ namespace HsailISA
{
#if TRACING_ON
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
@@ -231,7 +231,7 @@ namespace HsailISA
std::string res_str;
res_str = csprintf("krl_prt (%s)\n", disassemble());
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 7)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
@@ -270,7 +270,7 @@ namespace HsailISA
int src_val3 = -1;
res_str = csprintf("krl_prt (%s)\n", disassemble());
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 7)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
@@ -311,7 +311,7 @@ namespace HsailISA
std::string res_str;
res_str = csprintf("krl_prt (%s)\n", disassemble());
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 3)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
@@ -350,7 +350,7 @@ namespace HsailISA
int src_val3 = -1;
res_str = csprintf("krl_prt (%s)\n", disassemble());
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 3)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
@@ -391,7 +391,7 @@ namespace HsailISA
std::string res_str;
res_str = csprintf("krl_prt (%s)\n", disassemble());
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 7)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
@@ -430,7 +430,7 @@ namespace HsailISA
res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
res_str += csprintf(" Exec mask: ");
- for (int i = VSZ - 1; i >= 0; --i) {
+ for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
if (w->execMask(i))
res_str += "1";
else
@@ -458,7 +458,7 @@ namespace HsailISA
const VectorMask &mask = w->get_pred();
int res = 0;
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
dest.set<int>(w, lane, res);
@@ -477,14 +477,14 @@ namespace HsailISA
const VectorMask &mask = w->get_pred();
int res = 0;
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
res += src_val1;
}
}
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
dest.set<int>(w, lane, res);
}
@@ -497,19 +497,19 @@ namespace HsailISA
const VectorMask &mask = w->get_pred();
int res = 0;
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
if (src_val1) {
- if (lane < (VSZ/2)) {
+ if (lane < (w->computeUnit->wfSize()/2)) {
res = res | ((uint32_t)(1) << lane);
}
}
}
}
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
dest.set<int>(w, lane, res);
}
@@ -521,19 +521,20 @@ namespace HsailISA
{
const VectorMask &mask = w->get_pred();
int res = 0;
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
if (src_val1) {
- if (lane >= (VSZ/2)) {
- res = res | ((uint32_t)(1) << (lane - (VSZ/2)));
+ if (lane >= (w->computeUnit->wfSize()/2)) {
+ res = res | ((uint32_t)(1) <<
+ (lane - (w->computeUnit->wfSize()/2)));
}
}
}
}
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
dest.set<int>(w, lane, res);
}
@@ -546,7 +547,7 @@ namespace HsailISA
const VectorMask &mask = w->get_pred();
int max_cnt = 0;
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
w->bar_cnt[lane]++;
@@ -567,7 +568,7 @@ namespace HsailISA
const VectorMask &mask = w->get_pred();
int max_cnt = 0;
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
w->bar_cnt[lane]--;
}
@@ -592,7 +593,7 @@ namespace HsailISA
{
const VectorMask &mask = w->get_pred();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
@@ -605,7 +606,7 @@ namespace HsailISA
Call::calcAddr(Wavefront *w, GPUDynInstPtr m)
{
// the address is in src1 | src2
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
int src_val1 = src1.get<int>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
@@ -622,7 +623,7 @@ namespace HsailISA
calcAddr(w, m);
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
}
@@ -661,7 +662,7 @@ namespace HsailISA
GPUDynInstPtr m = gpuDynInst;
calcAddr(w, m);
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
}
@@ -736,7 +737,7 @@ namespace HsailISA
const VectorMask &mask = w->get_pred();
int src_val1 = 0;
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
src_val1 = src1.get<int>(w, lane, 1);
break;
@@ -758,7 +759,7 @@ namespace HsailISA
const VectorMask &mask = w->get_pred();
unsigned mst = true;
- for (int lane = VSZ - 1; lane >= 0; --lane) {
+ for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
if (mask[lane]) {
dest.set<int>(w, lane, mst);
mst = false;
@@ -773,7 +774,7 @@ namespace HsailISA
int res = 0;
bool got_res = false;
- for (int lane = VSZ - 1; lane >= 0; --lane) {
+ for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
if (mask[lane]) {
if (!got_res) {
res = src1.get<int>(w, lane, 1);
diff --git a/src/arch/hsail/operand.hh b/src/arch/hsail/operand.hh
index e3d275b10..4d981ee00 100644
--- a/src/arch/hsail/operand.hh
+++ b/src/arch/hsail/operand.hh
@@ -42,6 +42,7 @@
* Defines classes encapsulating HSAIL instruction operands.
*/
+#include <limits>
#include <string>
#include "arch/hsail/Brig.h"
@@ -346,6 +347,8 @@ class CRegOperand : public BaseRegOperand
template<typename T>
class ImmOperand : public BaseOperand
{
+ private:
+ uint16_t kind;
public:
T bits;
@@ -355,11 +358,21 @@ class ImmOperand : public BaseOperand
template<typename OperandType>
OperandType
- get()
+ get(Wavefront *w)
{
assert(sizeof(OperandType) <= sizeof(T));
+ panic_if(w == nullptr, "WF pointer needs to be set");
+
+ switch (kind) {
+ // immediate operand is WF size
+ case Brig::BRIG_KIND_OPERAND_WAVESIZE:
+ return (OperandType)w->computeUnit->wfSize();
+ break;
- return *(OperandType*)&bits;
+ default:
+ return *(OperandType*)&bits;
+ break;
+ }
}
// This version of get() takes a WF* and a lane id for
@@ -368,7 +381,7 @@ class ImmOperand : public BaseOperand
OperandType
get(Wavefront *w, int lane)
{
- return get<OperandType>();
+ return get<OperandType>(w);
}
};
@@ -388,16 +401,18 @@ ImmOperand<T>::init(unsigned opOffset, const BrigObject *obj)
auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp;
bits = *((T*)(obj->getData(cbptr->bytes + 4)));
-
+ kind = brigOp->kind;
return true;
}
break;
case Brig::BRIG_KIND_OPERAND_WAVESIZE:
- bits = VSZ;
+ kind = brigOp->kind;
+ bits = std::numeric_limits<unsigned long long>::digits;
return true;
default:
+ kind = Brig::BRIG_KIND_NONE;
return false;
}
}
@@ -409,6 +424,7 @@ ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
+ kind = Brig::BRIG_KIND_NONE;
return false;
}
@@ -423,6 +439,7 @@ ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
(const Brig::BrigOperand *)obj->getOperand(*data_offset);
if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
+ kind = Brig::BRIG_KIND_NONE;
return false;
}
@@ -456,7 +473,7 @@ class RegOrImmOperand : public BaseOperand
OperandType
get(Wavefront *w, int lane)
{
- return is_imm ? imm_op.template get<OperandType>() :
+ return is_imm ? imm_op.template get<OperandType>(w) :
reg_op.template get<OperandType>(w, lane);
}
@@ -571,7 +588,7 @@ class AddrOperandBase : public BaseOperand
uint64_t calcUniformBase();
public:
- virtual void calcVector(Wavefront *w, uint64_t *addrVec) = 0;
+ virtual void calcVector(Wavefront *w, std::vector<Addr> &addrVec) = 0;
virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0;
uint64_t offset;
@@ -586,7 +603,7 @@ class RegAddrOperand : public AddrOperandBase
RegOperandType reg;
void init(unsigned opOffset, const BrigObject *obj);
uint64_t calcUniform();
- void calcVector(Wavefront *w, uint64_t *addrVec);
+ void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
uint64_t calcLane(Wavefront *w, int lane=0);
uint32_t opSize() { return reg.opSize(); }
bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; }
@@ -641,11 +658,12 @@ RegAddrOperand<RegOperandType>::calcUniform()
template<typename RegOperandType>
void
-RegAddrOperand<RegOperandType>::calcVector(Wavefront *w, uint64_t *addrVec)
+RegAddrOperand<RegOperandType>::calcVector(Wavefront *w,
+ std::vector<Addr> &addrVec)
{
Addr address = calcUniformBase();
- for (int lane = 0; lane < VSZ; ++lane) {
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (w->execMask(lane)) {
if (reg.regFileChar == 's') {
addrVec[lane] = address + reg.template get<uint32_t>(w, lane);
@@ -680,7 +698,7 @@ class NoRegAddrOperand : public AddrOperandBase
public:
void init(unsigned opOffset, const BrigObject *obj);
uint64_t calcUniform();
- void calcVector(Wavefront *w, uint64_t *addrVec);
+ void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
uint64_t calcLane(Wavefront *w, int lane=0);
std::string disassemble();
};
@@ -698,11 +716,11 @@ NoRegAddrOperand::calcLane(Wavefront *w, int lane)
}
inline void
-NoRegAddrOperand::calcVector(Wavefront *w, uint64_t *addrVec)
+NoRegAddrOperand::calcVector(Wavefront *w, std::vector<Addr> &addrVec)
{
uint64_t address = calcUniformBase();
- for (int lane = 0; lane < VSZ; ++lane)
+ for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane)
addrVec[lane] = address;
}