diff options
Diffstat (limited to 'src/gpu-compute/cl_driver.cc')
-rw-r--r-- | src/gpu-compute/cl_driver.cc | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/src/gpu-compute/cl_driver.cc b/src/gpu-compute/cl_driver.cc new file mode 100644 index 000000000..3b3291c03 --- /dev/null +++ b/src/gpu-compute/cl_driver.cc @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Anthony Gutierrez + */ + +#include "gpu-compute/cl_driver.hh" + +#include "base/intmath.hh" +#include "cpu/thread_context.hh" +#include "gpu-compute/dispatcher.hh" +#include "gpu-compute/hsa_code.hh" +#include "gpu-compute/hsa_kernel_info.hh" +#include "gpu-compute/hsa_object.hh" +#include "params/ClDriver.hh" +#include "sim/process.hh" +#include "sim/syscall_emul_buf.hh" + +ClDriver::ClDriver(ClDriverParams *p) + : EmulatedDriver(p), hsaCode(0) +{ + for (const auto &codeFile : p->codefile) + codeFiles.push_back(&codeFile); + + maxFuncArgsSize = 0; + + for (int i = 0; i < codeFiles.size(); ++i) { + HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]); + + for (int k = 0; k < obj->numKernels(); ++k) { + assert(obj->getKernel(k)); + kernels.push_back(obj->getKernel(k)); + kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData); + int kern_funcargs_size = kernels.back()->funcarg_size; + maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ? + kern_funcargs_size : maxFuncArgsSize; + } + } + + int name_offs = 0; + int code_offs = 0; + + for (int i = 0; i < kernels.size(); ++i) { + kernelInfo.push_back(HsaKernelInfo()); + HsaCode *k = kernels[i]; + + k->generateHsaKernelInfo(&kernelInfo[i]); + + kernelInfo[i].name_offs = name_offs; + kernelInfo[i].code_offs = code_offs; + + name_offs += k->name().size() + 1; + code_offs += k->numInsts() * sizeof(GPUStaticInst*); + } +} + +void +ClDriver::handshake(GpuDispatcher *_dispatcher) +{ + dispatcher = _dispatcher; + dispatcher->setFuncargsSize(maxFuncArgsSize); +} + +int +ClDriver::open(LiveProcess *p, ThreadContext *tc, int mode, int flags) +{ + int fd = p->allocFD(-1, filename, 0, 0, false); + FDEntry *fde = p->getFDEntry(fd); + fde->driver = this; + + return fd; +} + +int +ClDriver::ioctl(LiveProcess *process, ThreadContext *tc, unsigned req) +{ + int index = 2; + Addr buf_addr = process->getSyscallArg(tc, index); + + switch (req) { + case HSA_GET_SIZES: + { + TypedBufferArg<HsaDriverSizes> sizes(buf_addr); + sizes->num_kernels = kernels.size(); + sizes->string_table_size = 0; + sizes->code_size = 0; + sizes->readonly_size = 0; + + if (kernels.size() > 0) { + // all kernels will share the same read-only memory + sizes->readonly_size = + kernels[0]->getSize(HsaCode::MemorySegment::READONLY); + // check our assumption + for (int i = 1; i<kernels.size(); ++i) { + assert(sizes->readonly_size == + kernels[i]->getSize(HsaCode::MemorySegment::READONLY)); + } + } + + for (int i = 0; i < kernels.size(); ++i) { + HsaCode *k = kernels[i]; + // add one for terminating '\0' + sizes->string_table_size += k->name().size() + 1; + sizes->code_size += k->numInsts() * sizeof(GPUStaticInst*); + } + + sizes.copyOut(tc->getMemProxy()); + } + break; + + case HSA_GET_KINFO: + { + TypedBufferArg<HsaKernelInfo> + kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size()); + + for (int i = 0; i < kernels.size(); ++i) { + HsaKernelInfo *ki = &kinfo[i]; + ki->name_offs = kernelInfo[i].name_offs; + ki->code_offs = kernelInfo[i].code_offs; + ki->sRegCount = kernelInfo[i].sRegCount; + ki->dRegCount = kernelInfo[i].dRegCount; + ki->cRegCount = kernelInfo[i].cRegCount; + ki->static_lds_size = kernelInfo[i].static_lds_size; + ki->private_mem_size = kernelInfo[i].private_mem_size; + ki->spill_mem_size = kernelInfo[i].spill_mem_size; + } + + kinfo.copyOut(tc->getMemProxy()); + } + break; + + case HSA_GET_STRINGS: + { + int string_table_size = 0; + for (int i = 0; i < kernels.size(); ++i) { + HsaCode *k = kernels[i]; + string_table_size += k->name().size() + 1; + } + + BufferArg buf(buf_addr, string_table_size); + char *bufp = (char*)buf.bufferPtr(); + + for (int i = 0; i < kernels.size(); ++i) { + HsaCode *k = kernels[i]; + const char *n = k->name().c_str(); + + // idiomatic string copy + while ((*bufp++ = *n++)); + } + + assert(bufp - (char *)buf.bufferPtr() == string_table_size); + + buf.copyOut(tc->getMemProxy()); + } + break; + + case HSA_GET_READONLY_DATA: + { + // we can pick any kernel --- they share the same + // readonly segment (this assumption is checked in GET_SIZES) + uint64_t size = + kernels.back()->getSize(HsaCode::MemorySegment::READONLY); + BufferArg data(buf_addr, size); + char *datap = (char *)data.bufferPtr(); + memcpy(datap, + kernels.back()->readonly_data, + size); + data.copyOut(tc->getMemProxy()); + } + break; + + case HSA_GET_CODE: + { + // set hsaCode pointer + hsaCode = buf_addr; + int code_size = 0; + + for (int i = 0; i < kernels.size(); ++i) { + HsaCode *k = kernels[i]; + code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst); + } + + TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size); + TheGpuISA::RawMachInst *bufp = buf; + + int buf_idx = 0; + + for (int i = 0; i < kernels.size(); ++i) { + HsaCode *k = kernels[i]; + + for (int j = 0; j < k->numInsts(); ++j) { + bufp[buf_idx] = k->insts()->at(j); + ++buf_idx; + } + } + + buf.copyOut(tc->getMemProxy()); + } + break; + + case HSA_GET_CU_CNT: + { + BufferArg buf(buf_addr, sizeof(uint32_t)); + *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs(); + buf.copyOut(tc->getMemProxy()); + } + break; + + case HSA_GET_VSZ: + { + BufferArg buf(buf_addr, sizeof(uint32_t)); + *((uint32_t*)buf.bufferPtr()) = VSZ; + buf.copyOut(tc->getMemProxy()); + } + break; + + default: + fatal("ClDriver: bad ioctl %d\n", req); + } + + return 0; +} + +const char* +ClDriver::codeOffToKernelName(uint64_t code_ptr) +{ + assert(hsaCode); + uint32_t code_offs = code_ptr - hsaCode; + + for (int i = 0; i < kernels.size(); ++i) { + if (code_offs == kernelInfo[i].code_offs) { + return kernels[i]->name().c_str(); + } + } + + return nullptr; +} + +ClDriver* +ClDriverParams::create() +{ + return new ClDriver(this); +} |