From 844fb845a51b15f13c7c744e0d5fdf5567c3da98 Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Wed, 26 Oct 2016 22:47:43 -0400 Subject: gpu-compute, hsail: make the PC a byte address, not an instruction index currently the PC is incremented on an instruction granularity, and not as an instruction's byte address. machine ISA instructions assume the PC is a byte address, and is incremented accordingly. here we make the GPU model, and the HSAIL instructions treat the PC as a byte address as well. --- src/arch/hsail/gpu_isa.hh | 3 ++- src/arch/hsail/gpu_types.hh | 2 +- src/arch/hsail/insts/branch.hh | 4 ++-- src/arch/hsail/insts/gpu_static_inst.hh | 3 ++- src/gpu-compute/cl_driver.cc | 5 +++-- src/gpu-compute/fetch_unit.cc | 5 ++--- src/gpu-compute/gpu_static_inst.cc | 2 +- src/gpu-compute/gpu_static_inst.hh | 8 ++++++-- src/gpu-compute/hsail_code.cc | 17 ++++++++++++----- src/gpu-compute/kernel_cfg.cc | 29 ++++++++++++++--------------- src/gpu-compute/kernel_cfg.hh | 2 +- 11 files changed, 46 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/arch/hsail/gpu_isa.hh b/src/arch/hsail/gpu_isa.hh index dbd816d91..caee776f1 100644 --- a/src/arch/hsail/gpu_isa.hh +++ b/src/arch/hsail/gpu_isa.hh @@ -38,6 +38,7 @@ #include +#include "arch/hsail/gpu_types.hh" #include "base/misc.hh" #include "gpu-compute/misc.hh" @@ -71,7 +72,7 @@ namespace HsailISA uint32_t advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst) { - return old_pc + 1; + return old_pc + sizeof(RawMachInst); } private: diff --git a/src/arch/hsail/gpu_types.hh b/src/arch/hsail/gpu_types.hh index 4b3a66a9a..7b6689d67 100644 --- a/src/arch/hsail/gpu_types.hh +++ b/src/arch/hsail/gpu_types.hh @@ -51,7 +51,7 @@ namespace HsailISA // our model uses to represent an actual instruction. In // the case of HSAIL this is just an index into a list of // instruction objects. - typedef uint64_t RawMachInst; + typedef uint32_t RawMachInst; // The MachInst is a representation of an instruction // that has more information than just the machine code. diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh index 3a520b216..6df6f766a 100644 --- a/src/arch/hsail/insts/branch.hh +++ b/src/arch/hsail/insts/branch.hh @@ -257,7 +257,7 @@ namespace HsailISA { Wavefront *w = gpuDynInst->wavefront(); - const uint32_t curr_pc = w->pc(); + const uint32_t curr_pc M5_VAR_USED = w->pc(); const uint32_t curr_rpc = w->rpc(); const VectorMask curr_mask = w->execMask(); @@ -281,7 +281,7 @@ namespace HsailISA } // not taken branch - const uint32_t false_pc = curr_pc + 1; + const uint32_t false_pc = nextInstAddr(); assert(true_pc != false_pc); if (false_pc != rpc && true_mask.count() < curr_mask.count()) { VectorMask false_mask = curr_mask & ~true_mask; diff --git a/src/arch/hsail/insts/gpu_static_inst.hh b/src/arch/hsail/insts/gpu_static_inst.hh index 5dcfe78d5..bb40411ed 100644 --- a/src/arch/hsail/insts/gpu_static_inst.hh +++ b/src/arch/hsail/insts/gpu_static_inst.hh @@ -42,6 +42,7 @@ * Defines the base class representing HSAIL GPU static instructions. */ +#include "arch/hsail/gpu_types.hh" #include "gpu-compute/gpu_static_inst.hh" class BrigObject; @@ -54,7 +55,7 @@ namespace HsailISA public: HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode); void generateDisassembly(); - uint32_t instSize() { return 4; } + int instSize() const override { return sizeof(RawMachInst); } bool isValid() const override { return true; } protected: diff --git a/src/gpu-compute/cl_driver.cc b/src/gpu-compute/cl_driver.cc index d3950ec04..d6d1b1334 100644 --- a/src/gpu-compute/cl_driver.cc +++ b/src/gpu-compute/cl_driver.cc @@ -79,7 +79,7 @@ ClDriver::ClDriver(ClDriverParams *p) kernelInfo[i].code_offs = code_offs; name_offs += k->name().size() + 1; - code_offs += k->numInsts() * sizeof(GPUStaticInst*); + code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst); } } @@ -130,7 +130,8 @@ ClDriver::ioctl(LiveProcess *process, ThreadContext *tc, unsigned req) HsaCode *k = kernels[i]; // add one for terminating '\0' sizes->string_table_size += k->name().size() + 1; - sizes->code_size += k->numInsts() * sizeof(GPUStaticInst*); + sizes->code_size += + k->numInsts() * sizeof(TheGpuISA::RawMachInst); } sizes.copyOut(tc->getMemProxy()); diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc index 2ea7f1f9d..1b19a3223 100644 --- a/src/gpu-compute/fetch_unit.cc +++ b/src/gpu-compute/fetch_unit.cc @@ -122,11 +122,10 @@ FetchUnit::initiateFetch(Wavefront *wavefront) * instrutions on a 32b granularity so we must account for that here. */ for (int i = 0; i < wavefront->instructionBuffer.size(); ++i) { - int current_inst_size = + vaddr += wavefront->instructionBuffer.at(i)->staticInstruction()->instSize(); - vaddr += current_inst_size / sizeof(uint32_t); } - vaddr = wavefront->basePtr + vaddr * sizeof(GPUStaticInst*); + vaddr = wavefront->basePtr + vaddr; DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n", computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr); diff --git a/src/gpu-compute/gpu_static_inst.cc b/src/gpu-compute/gpu_static_inst.cc index 0f74bd532..c375bf248 100644 --- a/src/gpu-compute/gpu_static_inst.cc +++ b/src/gpu-compute/gpu_static_inst.cc @@ -37,7 +37,7 @@ GPUStaticInst::GPUStaticInst(const std::string &opcode) : executed_as(Enums::SC_NONE), opcode(opcode), - _instNum(0) + _instNum(0), _instAddr(0) { setFlag(NoOrder); } diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh index a73ec12e3..2fa1e0ca5 100644 --- a/src/gpu-compute/gpu_static_inst.hh +++ b/src/gpu-compute/gpu_static_inst.hh @@ -61,6 +61,9 @@ class GPUStaticInst : public GPUStaticInstFlags { public: GPUStaticInst(const std::string &opcode); + void instAddr(int inst_addr) { _instAddr = inst_addr; } + int instAddr() const { return _instAddr; } + int nextInstAddr() const { return _instAddr + instSize(); } void instNum(int num) { _instNum = num; } @@ -190,7 +193,7 @@ class GPUStaticInst : public GPUStaticInstFlags bool isGloballyCoherent() const { return _flags[GloballyCoherent]; } bool isSystemCoherent() const { return _flags[SystemCoherent]; } - virtual uint32_t instSize() = 0; + virtual int instSize() const = 0; // only used for memory instructions virtual void @@ -243,6 +246,7 @@ class GPUStaticInst : public GPUStaticInstFlags const std::string opcode; std::string disassembly; int _instNum; + int _instAddr; /** * Identifier of the immediate post-dominator instruction. */ @@ -286,7 +290,7 @@ class KernelLaunchStaticInst : public GPUStaticInst int numDstRegOperands() { return 0; } int numSrcRegOperands() { return 0; } bool isValid() const { return true; } - uint32_t instSize() { return 0; } + int instSize() const override { return 0; } }; #endif // __GPU_STATIC_INST_HH__ diff --git a/src/gpu-compute/hsail_code.cc b/src/gpu-compute/hsail_code.cc index b0ddf0161..59faa67e9 100644 --- a/src/gpu-compute/hsail_code.cc +++ b/src/gpu-compute/hsail_code.cc @@ -84,6 +84,11 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj, const BrigBase *endPtr = obj->getCodeSectionEntry(code_dir->nextModuleEntry); + // the instruction's byte address (relative to the base addr + // of the code section) + int inst_addr = 0; + // the index that points to the instruction in the instruction + // array int inst_idx = 0; std::vector instructions; int funcarg_size_scope = 0; @@ -121,7 +126,7 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj, "kind_label, label is: %s \n", obj->getString(lbl->name)); - labelMap.addLabel(lbl, inst_idx, obj); + labelMap.addLabel(lbl, inst_addr, obj); } break; @@ -175,14 +180,16 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj, if (iptr) { DPRINTF(HSAILObject, "Initializing code, processing inst " - "#%d idx %d: OPCODE=%d\n", - inst_idx, _insts.size(), instPtr->opcode); + "byte addr #%d idx %d: OPCODE=%d\n", inst_addr, + inst_idx, instPtr->opcode); - TheGpuISA::RawMachInst inst_num = decoder.saveInst(iptr); + TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr); iptr->instNum(inst_idx); - _insts.push_back(inst_num); + iptr->instAddr(inst_addr); + _insts.push_back(raw_inst); instructions.push_back(iptr); } + inst_addr += sizeof(TheGpuISA::RawMachInst); ++inst_idx; } else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN && entryPtr->kind < BRIG_KIND_OPERAND_END) { diff --git a/src/gpu-compute/kernel_cfg.cc b/src/gpu-compute/kernel_cfg.cc index ac6a81b16..de518ec84 100644 --- a/src/gpu-compute/kernel_cfg.cc +++ b/src/gpu-compute/kernel_cfg.cc @@ -63,11 +63,11 @@ ControlFlowInfo::ControlFlowInfo(const std::vector& insts) : } BasicBlock* -ControlFlowInfo::basicBlock(int inst_num) const { +ControlFlowInfo::basicBlock(int inst_addr) const { for (auto& block: basicBlocks) { - int first_block_id = block->firstInstruction->instNum(); - if (inst_num >= first_block_id && - inst_num < first_block_id + block->size) { + int first_block_addr = block->firstInstruction->instAddr(); + if (inst_addr >= first_block_addr && inst_addr < + first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) { return block.get(); } } @@ -102,24 +102,23 @@ ControlFlowInfo::createBasicBlocks() std::set leaders; // first instruction is a leader leaders.insert(0); - for (int i = 1; i < instructions.size(); i++) { - GPUStaticInst* instruction = instructions[i]; + for (const auto &instruction : instructions) { if (instruction->isBranch()) { const int target_pc = instruction->getTargetPc(); leaders.insert(target_pc); - leaders.insert(i + 1); + leaders.insert(instruction->nextInstAddr()); } } size_t block_size = 0; - for (int i = 0; i < instructions.size(); i++) { - if (leaders.find(i) != leaders.end()) { + for (const auto &instruction : instructions) { + if (leaders.find(instruction->instAddr()) != leaders.end()) { uint32_t id = basicBlocks.size(); if (id > 0) { basicBlocks.back()->size = block_size; } block_size = 0; - basicBlocks.emplace_back(new BasicBlock(id, instructions[i])); + basicBlocks.emplace_back(new BasicBlock(id, instruction)); } block_size++; } @@ -149,7 +148,7 @@ ControlFlowInfo::connectBasicBlocks() // Unconditional jump instructions have a unique successor if (!last->isUnconditionalJump()) { - BasicBlock* next_bb = basicBlock(last->instNum() + 1); + BasicBlock* next_bb = basicBlock(last->nextInstAddr()); bb->successorIds.insert(next_bb->id); } } @@ -236,9 +235,9 @@ ControlFlowInfo::findImmediatePostDominators() BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get(); if (!ipd_block->isExit()) { GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction; - last_instruction->ipdInstNum(ipd_first_inst->instNum()); + last_instruction->ipdInstNum(ipd_first_inst->instAddr()); } else { - last_instruction->ipdInstNum(last_instruction->instNum() + 1); + last_instruction->ipdInstNum(last_instruction->nextInstAddr()); } } } @@ -271,8 +270,8 @@ void ControlFlowInfo::printBasicBlocks() const { for (GPUStaticInst* inst : instructions) { - int inst_num = inst->instNum(); - std::cout << inst_num << " [" << basicBlock(inst_num)->id + int inst_addr = inst->instAddr(); + std::cout << inst_addr << " [" << basicBlock(inst_addr)->id << "]: " << inst->disassemble(); if (inst->isBranch()) { std::cout << ", PC = " << inst->getTargetPc(); diff --git a/src/gpu-compute/kernel_cfg.hh b/src/gpu-compute/kernel_cfg.hh index 74ea861d8..d4959c857 100644 --- a/src/gpu-compute/kernel_cfg.hh +++ b/src/gpu-compute/kernel_cfg.hh @@ -106,7 +106,7 @@ private: GPUStaticInst* lastInstruction(const BasicBlock* block) const; - BasicBlock* basicBlock(int inst_num) const; + BasicBlock* basicBlock(int inst_addr) const; BasicBlock* postDominator(const BasicBlock* block) const; -- cgit v1.2.3