/* * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Authors: Anthony Gutierrez */ #include "gpu-compute/gpu_dyn_inst.hh" #include "debug/GPUMem.hh" #include "gpu-compute/gpu_static_inst.hh" #include "gpu-compute/shader.hh" #include "gpu-compute/wavefront.hh" GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum) : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0), n_reg(0), useContinuation(false), statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum) { tlbHitLevel.assign(computeUnit()->wfSize(), -1); d_data = new uint8_t[computeUnit()->wfSize() * 16]; a_data = new uint8_t[computeUnit()->wfSize() * 8]; x_data = new uint8_t[computeUnit()->wfSize() * 8]; for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) { a_data[i] = 0; x_data[i] = 0; } for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) { d_data[i] = 0; } } GPUDynInst::~GPUDynInst() { delete[] d_data; delete[] a_data; delete[] x_data; } void GPUDynInst::execute(GPUDynInstPtr gpuDynInst) { _staticInst->execute(gpuDynInst); } int GPUDynInst::numSrcRegOperands() { return _staticInst->numSrcRegOperands(); } int GPUDynInst::numDstRegOperands() { return _staticInst->numDstRegOperands(); } int GPUDynInst::getNumOperands() { return _staticInst->getNumOperands(); } bool GPUDynInst::isVectorRegister(int operandIdx) { return _staticInst->isVectorRegister(operandIdx); } bool GPUDynInst::isScalarRegister(int operandIdx) { return _staticInst->isScalarRegister(operandIdx); } bool GPUDynInst::isCondRegister(int operandIdx) { return _staticInst->isCondRegister(operandIdx); } int GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst) { return _staticInst->getRegisterIndex(operandIdx, gpuDynInst); } int GPUDynInst::getOperandSize(int operandIdx) { return _staticInst->getOperandSize(operandIdx); } bool GPUDynInst::isDstOperand(int operandIdx) { return _staticInst->isDstOperand(operandIdx); } bool GPUDynInst::isSrcOperand(int operandIdx) { return _staticInst->isSrcOperand(operandIdx); } const std::string& GPUDynInst::disassemble() const { return _staticInst->disassemble(); } uint64_t GPUDynInst::seqNum() const { return _seqNum; } Enums::StorageClassType GPUDynInst::executedAs() { return _staticInst->executed_as; } // Process a memory instruction and (if necessary) submit timing request void GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst) { DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n", cu->cu_id, simdId, wfSlotId, exec_mask); _staticInst->initiateAcc(gpuDynInst); time = 0; } void GPUDynInst::completeAcc(GPUDynInstPtr gpuDynInst) { _staticInst->completeAcc(gpuDynInst); } /** * accessor methods for the attributes of * the underlying GPU static instruction */ bool GPUDynInst::isALU() const { return _staticInst->isALU(); } bool GPUDynInst::isBranch() const { return _staticInst->isBranch(); } bool GPUDynInst::isNop() const { return _staticInst->isNop(); } bool GPUDynInst::isReturn() const { return _staticInst->isReturn(); } bool GPUDynInst::isUnconditionalJump() const { return _staticInst->isUnconditionalJump(); } bool GPUDynInst::isSpecialOp() const { return _staticInst->isSpecialOp(); } bool GPUDynInst::isWaitcnt() const { return _staticInst->isWaitcnt(); } bool GPUDynInst::isBarrier() const { return _staticInst->isBarrier(); } bool GPUDynInst::isMemFence() const { return _staticInst->isMemFence(); } bool GPUDynInst::isMemRef() const { return _staticInst->isMemRef(); } bool GPUDynInst::isFlat() const { return _staticInst->isFlat(); } bool GPUDynInst::isLoad() const { return _staticInst->isLoad(); } bool GPUDynInst::isStore() const { return _staticInst->isStore(); } bool GPUDynInst::isAtomic() const { return _staticInst->isAtomic(); } bool GPUDynInst::isAtomicNoRet() const { return _staticInst->isAtomicNoRet(); } bool GPUDynInst::isAtomicRet() const { return _staticInst->isAtomicRet(); } bool GPUDynInst::isScalar() const { return _staticInst->isScalar(); } bool GPUDynInst::readsSCC() const { return _staticInst->readsSCC(); } bool GPUDynInst::writesSCC() const { return _staticInst->writesSCC(); } bool GPUDynInst::readsVCC() const { return _staticInst->readsVCC(); } bool GPUDynInst::writesVCC() const { return _staticInst->writesVCC(); } bool GPUDynInst::isAtomicAnd() const { return _staticInst->isAtomicAnd(); } bool GPUDynInst::isAtomicOr() const { return _staticInst->isAtomicOr(); } bool GPUDynInst::isAtomicXor() const { return _staticInst->isAtomicXor(); } bool GPUDynInst::isAtomicCAS() const { return _staticInst->isAtomicCAS(); } bool GPUDynInst::isAtomicExch() const { return _staticInst->isAtomicExch(); } bool GPUDynInst::isAtomicAdd() const { return _staticInst->isAtomicAdd(); } bool GPUDynInst::isAtomicSub() const { return _staticInst->isAtomicSub(); } bool GPUDynInst::isAtomicInc() const { return _staticInst->isAtomicInc(); } bool GPUDynInst::isAtomicDec() const { return _staticInst->isAtomicDec(); } bool GPUDynInst::isAtomicMax() const { return _staticInst->isAtomicMax(); } bool GPUDynInst::isAtomicMin() const { return _staticInst->isAtomicMin(); } bool GPUDynInst::isArgLoad() const { return _staticInst->isArgLoad(); } bool GPUDynInst::isGlobalMem() const { return _staticInst->isGlobalMem(); } bool GPUDynInst::isLocalMem() const { return _staticInst->isLocalMem(); } bool GPUDynInst::isArgSeg() const { return _staticInst->isArgSeg(); } bool GPUDynInst::isGlobalSeg() const { return _staticInst->isGlobalSeg(); } bool GPUDynInst::isGroupSeg() const { return _staticInst->isGroupSeg(); } bool GPUDynInst::isKernArgSeg() const { return _staticInst->isKernArgSeg(); } bool GPUDynInst::isPrivateSeg() const { return _staticInst->isPrivateSeg(); } bool GPUDynInst::isReadOnlySeg() const { return _staticInst->isReadOnlySeg(); } bool GPUDynInst::isSpillSeg() const { return _staticInst->isSpillSeg(); } bool GPUDynInst::isWorkitemScope() const { return _staticInst->isWorkitemScope(); } bool GPUDynInst::isWavefrontScope() const { return _staticInst->isWavefrontScope(); } bool GPUDynInst::isWorkgroupScope() const { return _staticInst->isWorkgroupScope(); } bool GPUDynInst::isDeviceScope() const { return _staticInst->isDeviceScope(); } bool GPUDynInst::isSystemScope() const { return _staticInst->isSystemScope(); } bool GPUDynInst::isNoScope() const { return _staticInst->isNoScope(); } bool GPUDynInst::isRelaxedOrder() const { return _staticInst->isRelaxedOrder(); } bool GPUDynInst::isAcquire() const { return _staticInst->isAcquire(); } bool GPUDynInst::isRelease() const { return _staticInst->isRelease(); } bool GPUDynInst::isAcquireRelease() const { return _staticInst->isAcquireRelease(); } bool GPUDynInst::isNoOrder() const { return _staticInst->isNoOrder(); } bool GPUDynInst::isGloballyCoherent() const { return _staticInst->isGloballyCoherent(); } bool GPUDynInst::isSystemCoherent() const { return _staticInst->isSystemCoherent(); } void GPUDynInst::updateStats() { if (_staticInst->isLocalMem()) { // access to LDS (shared) memory cu->dynamicLMemInstrCnt++; } else { // access to global memory // update PageDivergence histogram int number_pages_touched = cu->pagesTouched.size(); assert(number_pages_touched); cu->pageDivergenceDist.sample(number_pages_touched); std::pair ret; for (auto it : cu->pagesTouched) { // see if this page has been touched before. if not, this also // inserts the page into the table. ret = cu->pageAccesses .insert(ComputeUnit::pageDataStruct::value_type(it.first, std::make_pair(1, it.second))); // if yes, then update the stats if (!ret.second) { ret.first->second.first++; ret.first->second.second += it.second; } } cu->pagesTouched.clear(); // total number of memory instructions (dynamic) // Atomics are counted as a single memory instruction. // this is # memory instructions per wavefronts, not per workitem cu->dynamicGMemInstrCnt++; } }