/*
 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * For use for simulation and test purposes only
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its contributors
 * may be used to endorse or promote products derived from this software
 * without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * Author: Steve Reinhardt
 */

#include "arch/hsail/insts/decl.hh"
#include "debug/GPUExec.hh"
#include "gpu-compute/dispatcher.hh"
#include "gpu-compute/simple_pool_manager.hh"

namespace HsailISA
{
    template<> const char *B1::label = "b1";
    template<> const char *B8::label = "b8";
    template<> const char *B16::label = "b16";
    template<> const char *B32::label = "b32";
    template<> const char *B64::label = "b64";

    template<> const char *S8::label = "s8";
    template<> const char *S16::label = "s16";
    template<> const char *S32::label = "s32";
    template<> const char *S64::label = "s64";

    template<> const char *U8::label = "u8";
    template<> const char *U16::label = "u16";
    template<> const char *U32::label = "u32";
    template<> const char *U64::label = "u64";

    template<> const char *F32::label = "f32";
    template<> const char *F64::label = "f64";

    const char*
    cmpOpToString(Brig::BrigCompareOperation cmpOp)
    {
        using namespace Brig;

        switch (cmpOp) {
          case BRIG_COMPARE_EQ:
            return "eq";
          case BRIG_COMPARE_NE:
            return "ne";
          case BRIG_COMPARE_LT:
            return "lt";
          case BRIG_COMPARE_LE:
            return "le";
          case BRIG_COMPARE_GT:
            return "gt";
          case BRIG_COMPARE_GE:
            return "ge";
          case BRIG_COMPARE_EQU:
            return "equ";
          case BRIG_COMPARE_NEU:
            return "neu";
          case BRIG_COMPARE_LTU:
            return "ltu";
          case BRIG_COMPARE_LEU:
            return "leu";
          case BRIG_COMPARE_GTU:
            return "gtu";
          case BRIG_COMPARE_GEU:
            return "geu";
          case BRIG_COMPARE_NUM:
            return "num";
          case BRIG_COMPARE_NAN:
            return "nan";
          case BRIG_COMPARE_SEQ:
            return "seq";
          case BRIG_COMPARE_SNE:
            return "sne";
          case BRIG_COMPARE_SLT:
            return "slt";
          case BRIG_COMPARE_SLE:
            return "sle";
          case BRIG_COMPARE_SGT:
            return "sgt";
          case BRIG_COMPARE_SGE:
            return "sge";
          case BRIG_COMPARE_SGEU:
            return "sgeu";
          case BRIG_COMPARE_SEQU:
            return "sequ";
          case BRIG_COMPARE_SNEU:
            return "sneu";
          case BRIG_COMPARE_SLTU:
            return "sltu";
          case BRIG_COMPARE_SLEU:
            return "sleu";
          case BRIG_COMPARE_SNUM:
            return "snum";
          case BRIG_COMPARE_SNAN:
            return "snan";
          case BRIG_COMPARE_SGTU:
            return "sgtu";
          default:
            return "unknown";
        }
    }

    void
    Ret::execute(GPUDynInstPtr gpuDynInst)
    {
        Wavefront *w = gpuDynInst->wavefront();

        const VectorMask &mask = w->getPred();

        // mask off completed work-items
        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
            if (mask[lane]) {
                w->initMask[lane] = 0;
            }

        }

        // delete extra instructions fetched for completed work-items
        w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
                                   w->instructionBuffer.end());
        if (w->pendingFetch) {
            w->dropFetch = true;
        }

        // if all work-items have completed, then wave-front is done
        if (w->initMask.none()) {
            w->status = Wavefront::S_STOPPED;

            int32_t refCount = w->computeUnit->getLds().
                                   decreaseRefCounter(w->dispatchId, w->wgId);

            DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
                            w->computeUnit->cu_id, w->wgId, refCount);

            // free the vector registers of the completed wavefront
            w->computeUnit->vectorRegsReserved[w->simdId] -=
                w->reservedVectorRegs;

            assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);

            uint32_t endIndex = (w->startVgprIndex +
                                 w->reservedVectorRegs - 1) %
                w->computeUnit->vrf[w->simdId]->numRegs();

            w->computeUnit->vrf[w->simdId]->manager->
                freeRegion(w->startVgprIndex, endIndex);

            w->reservedVectorRegs = 0;
            w->startVgprIndex = 0;
            w->computeUnit->completedWfs++;

            DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
                    w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);

            if (!refCount) {
                setFlag(SystemScope);
                setFlag(Release);
                setFlag(GlobalSegment);
                // Notify Memory System of Kernel Completion
                // Kernel End = isKernel + isRelease
                w->status = Wavefront::S_RETURNING;
                GPUDynInstPtr local_mempacket = gpuDynInst;
                local_mempacket->useContinuation = false;
                local_mempacket->simdId = w->simdId;
                local_mempacket->wfSlotId = w->wfSlotId;
                local_mempacket->wfDynId = w->wfDynId;
                w->computeUnit->injectGlobalMemFence(local_mempacket, true);
            } else {
                w->computeUnit->shader->dispatcher->scheduleDispatch();
            }
        }
    }

    void
    Barrier::execute(GPUDynInstPtr gpuDynInst)
    {
        Wavefront *w = gpuDynInst->wavefront();

        assert(w->barrierCnt == w->oldBarrierCnt);
        w->barrierCnt = w->oldBarrierCnt + 1;
        w->stalledAtBarrier = true;
    }
} // namespace HsailISA