summaryrefslogtreecommitdiff
path: root/src/gpu-compute
diff options
context:
space:
mode:
authorTony Gutierrez <anthony.gutierrez@amd.com>2016-10-26 22:47:43 -0400
committerTony Gutierrez <anthony.gutierrez@amd.com>2016-10-26 22:47:43 -0400
commit844fb845a51b15f13c7c744e0d5fdf5567c3da98 (patch)
tree407cd19c909cdf3cd4da7947ad86dfbd4470ef68 /src/gpu-compute
parentd327cdba078e0956596513b518731e9ec730723f (diff)
downloadgem5-844fb845a51b15f13c7c744e0d5fdf5567c3da98.tar.xz
gpu-compute, hsail: make the PC a byte address, not an instruction index
currently the PC is incremented on an instruction granularity, and not as an instruction's byte address. machine ISA instructions assume the PC is a byte address, and is incremented accordingly. here we make the GPU model, and the HSAIL instructions treat the PC as a byte address as well.
Diffstat (limited to 'src/gpu-compute')
-rw-r--r--src/gpu-compute/cl_driver.cc5
-rw-r--r--src/gpu-compute/fetch_unit.cc5
-rw-r--r--src/gpu-compute/gpu_static_inst.cc2
-rw-r--r--src/gpu-compute/gpu_static_inst.hh8
-rw-r--r--src/gpu-compute/hsail_code.cc17
-rw-r--r--src/gpu-compute/kernel_cfg.cc29
-rw-r--r--src/gpu-compute/kernel_cfg.hh2
7 files changed, 39 insertions, 29 deletions
diff --git a/src/gpu-compute/cl_driver.cc b/src/gpu-compute/cl_driver.cc
index d3950ec04..d6d1b1334 100644
--- a/src/gpu-compute/cl_driver.cc
+++ b/src/gpu-compute/cl_driver.cc
@@ -79,7 +79,7 @@ ClDriver::ClDriver(ClDriverParams *p)
kernelInfo[i].code_offs = code_offs;
name_offs += k->name().size() + 1;
- code_offs += k->numInsts() * sizeof(GPUStaticInst*);
+ code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
}
}
@@ -130,7 +130,8 @@ ClDriver::ioctl(LiveProcess *process, ThreadContext *tc, unsigned req)
HsaCode *k = kernels[i];
// add one for terminating '\0'
sizes->string_table_size += k->name().size() + 1;
- sizes->code_size += k->numInsts() * sizeof(GPUStaticInst*);
+ sizes->code_size +=
+ k->numInsts() * sizeof(TheGpuISA::RawMachInst);
}
sizes.copyOut(tc->getMemProxy());
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index 2ea7f1f9d..1b19a3223 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -122,11 +122,10 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
* instrutions on a 32b granularity so we must account for that here.
*/
for (int i = 0; i < wavefront->instructionBuffer.size(); ++i) {
- int current_inst_size =
+ vaddr +=
wavefront->instructionBuffer.at(i)->staticInstruction()->instSize();
- vaddr += current_inst_size / sizeof(uint32_t);
}
- vaddr = wavefront->basePtr + vaddr * sizeof(GPUStaticInst*);
+ vaddr = wavefront->basePtr + vaddr;
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
diff --git a/src/gpu-compute/gpu_static_inst.cc b/src/gpu-compute/gpu_static_inst.cc
index 0f74bd532..c375bf248 100644
--- a/src/gpu-compute/gpu_static_inst.cc
+++ b/src/gpu-compute/gpu_static_inst.cc
@@ -37,7 +37,7 @@
GPUStaticInst::GPUStaticInst(const std::string &opcode)
: executed_as(Enums::SC_NONE), opcode(opcode),
- _instNum(0)
+ _instNum(0), _instAddr(0)
{
setFlag(NoOrder);
}
diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh
index a73ec12e3..2fa1e0ca5 100644
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -61,6 +61,9 @@ class GPUStaticInst : public GPUStaticInstFlags
{
public:
GPUStaticInst(const std::string &opcode);
+ void instAddr(int inst_addr) { _instAddr = inst_addr; }
+ int instAddr() const { return _instAddr; }
+ int nextInstAddr() const { return _instAddr + instSize(); }
void instNum(int num) { _instNum = num; }
@@ -190,7 +193,7 @@ class GPUStaticInst : public GPUStaticInstFlags
bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
bool isSystemCoherent() const { return _flags[SystemCoherent]; }
- virtual uint32_t instSize() = 0;
+ virtual int instSize() const = 0;
// only used for memory instructions
virtual void
@@ -243,6 +246,7 @@ class GPUStaticInst : public GPUStaticInstFlags
const std::string opcode;
std::string disassembly;
int _instNum;
+ int _instAddr;
/**
* Identifier of the immediate post-dominator instruction.
*/
@@ -286,7 +290,7 @@ class KernelLaunchStaticInst : public GPUStaticInst
int numDstRegOperands() { return 0; }
int numSrcRegOperands() { return 0; }
bool isValid() const { return true; }
- uint32_t instSize() { return 0; }
+ int instSize() const override { return 0; }
};
#endif // __GPU_STATIC_INST_HH__
diff --git a/src/gpu-compute/hsail_code.cc b/src/gpu-compute/hsail_code.cc
index b0ddf0161..59faa67e9 100644
--- a/src/gpu-compute/hsail_code.cc
+++ b/src/gpu-compute/hsail_code.cc
@@ -84,6 +84,11 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
const BrigBase *endPtr =
obj->getCodeSectionEntry(code_dir->nextModuleEntry);
+ // the instruction's byte address (relative to the base addr
+ // of the code section)
+ int inst_addr = 0;
+ // the index that points to the instruction in the instruction
+ // array
int inst_idx = 0;
std::vector<GPUStaticInst*> instructions;
int funcarg_size_scope = 0;
@@ -121,7 +126,7 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
"kind_label, label is: %s \n",
obj->getString(lbl->name));
- labelMap.addLabel(lbl, inst_idx, obj);
+ labelMap.addLabel(lbl, inst_addr, obj);
}
break;
@@ -175,14 +180,16 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
if (iptr) {
DPRINTF(HSAILObject, "Initializing code, processing inst "
- "#%d idx %d: OPCODE=%d\n",
- inst_idx, _insts.size(), instPtr->opcode);
+ "byte addr #%d idx %d: OPCODE=%d\n", inst_addr,
+ inst_idx, instPtr->opcode);
- TheGpuISA::RawMachInst inst_num = decoder.saveInst(iptr);
+ TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr);
iptr->instNum(inst_idx);
- _insts.push_back(inst_num);
+ iptr->instAddr(inst_addr);
+ _insts.push_back(raw_inst);
instructions.push_back(iptr);
}
+ inst_addr += sizeof(TheGpuISA::RawMachInst);
++inst_idx;
} else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
entryPtr->kind < BRIG_KIND_OPERAND_END) {
diff --git a/src/gpu-compute/kernel_cfg.cc b/src/gpu-compute/kernel_cfg.cc
index ac6a81b16..de518ec84 100644
--- a/src/gpu-compute/kernel_cfg.cc
+++ b/src/gpu-compute/kernel_cfg.cc
@@ -63,11 +63,11 @@ ControlFlowInfo::ControlFlowInfo(const std::vector<GPUStaticInst*>& insts) :
}
BasicBlock*
-ControlFlowInfo::basicBlock(int inst_num) const {
+ControlFlowInfo::basicBlock(int inst_addr) const {
for (auto& block: basicBlocks) {
- int first_block_id = block->firstInstruction->instNum();
- if (inst_num >= first_block_id &&
- inst_num < first_block_id + block->size) {
+ int first_block_addr = block->firstInstruction->instAddr();
+ if (inst_addr >= first_block_addr && inst_addr <
+ first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) {
return block.get();
}
}
@@ -102,24 +102,23 @@ ControlFlowInfo::createBasicBlocks()
std::set<int> leaders;
// first instruction is a leader
leaders.insert(0);
- for (int i = 1; i < instructions.size(); i++) {
- GPUStaticInst* instruction = instructions[i];
+ for (const auto &instruction : instructions) {
if (instruction->isBranch()) {
const int target_pc = instruction->getTargetPc();
leaders.insert(target_pc);
- leaders.insert(i + 1);
+ leaders.insert(instruction->nextInstAddr());
}
}
size_t block_size = 0;
- for (int i = 0; i < instructions.size(); i++) {
- if (leaders.find(i) != leaders.end()) {
+ for (const auto &instruction : instructions) {
+ if (leaders.find(instruction->instAddr()) != leaders.end()) {
uint32_t id = basicBlocks.size();
if (id > 0) {
basicBlocks.back()->size = block_size;
}
block_size = 0;
- basicBlocks.emplace_back(new BasicBlock(id, instructions[i]));
+ basicBlocks.emplace_back(new BasicBlock(id, instruction));
}
block_size++;
}
@@ -149,7 +148,7 @@ ControlFlowInfo::connectBasicBlocks()
// Unconditional jump instructions have a unique successor
if (!last->isUnconditionalJump()) {
- BasicBlock* next_bb = basicBlock(last->instNum() + 1);
+ BasicBlock* next_bb = basicBlock(last->nextInstAddr());
bb->successorIds.insert(next_bb->id);
}
}
@@ -236,9 +235,9 @@ ControlFlowInfo::findImmediatePostDominators()
BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
if (!ipd_block->isExit()) {
GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
- last_instruction->ipdInstNum(ipd_first_inst->instNum());
+ last_instruction->ipdInstNum(ipd_first_inst->instAddr());
} else {
- last_instruction->ipdInstNum(last_instruction->instNum() + 1);
+ last_instruction->ipdInstNum(last_instruction->nextInstAddr());
}
}
}
@@ -271,8 +270,8 @@ void
ControlFlowInfo::printBasicBlocks() const
{
for (GPUStaticInst* inst : instructions) {
- int inst_num = inst->instNum();
- std::cout << inst_num << " [" << basicBlock(inst_num)->id
+ int inst_addr = inst->instAddr();
+ std::cout << inst_addr << " [" << basicBlock(inst_addr)->id
<< "]: " << inst->disassemble();
if (inst->isBranch()) {
std::cout << ", PC = " << inst->getTargetPc();
diff --git a/src/gpu-compute/kernel_cfg.hh b/src/gpu-compute/kernel_cfg.hh
index 74ea861d8..d4959c857 100644
--- a/src/gpu-compute/kernel_cfg.hh
+++ b/src/gpu-compute/kernel_cfg.hh
@@ -106,7 +106,7 @@ private:
GPUStaticInst* lastInstruction(const BasicBlock* block) const;
- BasicBlock* basicBlock(int inst_num) const;
+ BasicBlock* basicBlock(int inst_addr) const;
BasicBlock* postDominator(const BasicBlock* block) const;