/*
 * Copyright (c) 2012 ARM Limited
 * All rights reserved
 *
 * The license below extends only to copyright in the software and shall
 * not be construed as granting a license to any other intellectual
 * property including but not limited to intellectual property relating
 * to a hardware implementation of the functionality of the software
 * licensed hereunder.  You may use the software subject to the license
 * terms below provided that you ensure that this notice is replicated
 * unmodified and in its entirety in all distributions of the software,
 * modified or unmodified, in source code or in binary form.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met: redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer;
 * redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution;
 * neither the name of the copyright holders nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Authors: Andreas Sandberg
 */

#ifndef __CPU_KVM_BASE_HH__
#define __CPU_KVM_BASE_HH__

#include <pthread.h>

#include <csignal>
#include <memory>
#include <queue>

#include "base/statistics.hh"
#include "cpu/kvm/perfevent.hh"
#include "cpu/kvm/timer.hh"
#include "cpu/kvm/vm.hh"
#include "cpu/base.hh"
#include "cpu/simple_thread.hh"

/** Signal to use to trigger exits from KVM */
#define KVM_KICK_SIGNAL SIGRTMIN

// forward declarations
class ThreadContext;
struct BaseKvmCPUParams;

/**
 * Base class for KVM based CPU models
 *
 * All architecture specific KVM implementation should inherit from
 * this class. The most basic CPU models only need to override the
 * updateKvmState() and updateThreadContext() methods to implement
 * state synchronization between gem5 and KVM.
 *
 * The architecture specific implementation is also responsible for
 * delivering interrupts into the VM. This is typically done by
 * overriding tick() and checking the thread context before entering
 * into the VM. In order to deliver an interrupt, the implementation
 * then calls KvmVM::setIRQLine() or BaseKvmCPU::kvmInterrupt()
 * depending on the specifics of the underlying hardware/drivers.
 */
class BaseKvmCPU : public BaseCPU
{
  public:
    BaseKvmCPU(BaseKvmCPUParams *params);
    virtual ~BaseKvmCPU();

    void init() override;
    void startup() override;
    void regStats() override;

    void serializeThread(CheckpointOut &cp, ThreadID tid) const override;
    void unserializeThread(CheckpointIn &cp, ThreadID tid) override;

    DrainState drain() override;
    void drainResume() override;
    void notifyFork() override;

    void switchOut() override;
    void takeOverFrom(BaseCPU *cpu) override;

    void verifyMemoryMode() const override;

    MasterPort &getDataPort() override { return dataPort; }
    MasterPort &getInstPort() override { return instPort; }

    void wakeup(ThreadID tid = 0) override;
    void activateContext(ThreadID thread_num) override;
    void suspendContext(ThreadID thread_num) override;
    void deallocateContext(ThreadID thread_num);
    void haltContext(ThreadID thread_num) override;

    long getVCpuID() const { return vcpuID; }
    ThreadContext *getContext(int tn) override;

    Counter totalInsts() const override;
    Counter totalOps() const override;

    /**
     * Callback from KvmCPUPort to transition the CPU out of RunningMMIOPending
     * when all timing requests have completed.
     */
    void finishMMIOPending();

    /** Dump the internal state to the terminal. */
    virtual void dump() const;

    /**
     * Force an exit from KVM.
     *
     * Send a signal to the thread owning this vCPU to get it to exit
     * from KVM. Ignored if the vCPU is not executing.
     */
    void kick() const { pthread_kill(vcpuThread, KVM_KICK_SIGNAL); }

    /**
     * A cached copy of a thread's state in the form of a SimpleThread
     * object.
     *
     * Normally the actual thread state is stored in the KVM vCPU. If KVM has
     * been running this copy is will be out of date. If we recently handled
     * some events within gem5 that required state to be updated this could be
     * the most up-to-date copy. When getContext() or updateThreadContext() is
     * called this copy gets updated.  The method syncThreadContext can
     * be used within a KVM CPU to update the thread context if the
     * KVM state is dirty (i.e., the vCPU has been run since the last
     * update).
     */
    SimpleThread *thread;

    /** ThreadContext object, provides an interface for external
     * objects to modify this thread's state.
     */
    ThreadContext *tc;

    KvmVM &vm;

  protected:
    /**
     *
     * @dot
     *   digraph {
     *     Idle;
     *     Running;
     *     RunningService;
     *     RunningServiceCompletion;
     *     RunningMMIOPending;
     *
     *     Idle -> Idle;
     *     Idle -> Running [label="activateContext()", URL="\ref activateContext"];
     *     Running -> Running [label="tick()", URL="\ref tick"];
     *     Running -> RunningService [label="tick()", URL="\ref tick"];
     *     Running -> Idle [label="suspendContext()", URL="\ref suspendContext"];
     *     Running -> Idle [label="drain()", URL="\ref drain"];
     *     Idle -> Running [label="drainResume()", URL="\ref drainResume"];
     *     RunningService -> RunningServiceCompletion [label="handleKvmExit()", URL="\ref handleKvmExit"];
     *     RunningService -> RunningMMIOPending [label="handleKvmExit()", URL="\ref handleKvmExit"];
     *     RunningMMIOPending -> RunningServiceCompletion [label="finishMMIOPending()", URL="\ref finishMMIOPending"];
     *     RunningServiceCompletion -> Running [label="tick()", URL="\ref tick"];
     *     RunningServiceCompletion -> RunningService [label="tick()", URL="\ref tick"];
     *   }
     * @enddot
     */
    enum Status {
        /** Context not scheduled in KVM.
         *
         * The CPU generally enters this state when the guest execute
         * an instruction that halts the CPU (e.g., WFI on ARM or HLT
         * on X86) if KVM traps this instruction. Ticks are not
         * scheduled in this state.
         *
         * @see suspendContext()
         */
        Idle,
        /** Running normally.
         *
         * This is the normal run state of the CPU. KVM will be
         * entered next time tick() is called.
         */
        Running,
        /** Requiring service at the beginning of the next cycle.
         *
         * The virtual machine has exited and requires service, tick()
         * will call handleKvmExit() on the next cycle. The next state
         * after running service is determined in handleKvmExit() and
         * depends on what kind of service the guest requested:
         * <ul>
         *   <li>IO/MMIO (Atomic): RunningServiceCompletion
         *   <li>IO/MMIO (Timing): RunningMMIOPending
         *   <li>Halt: Idle
         *   <li>Others: Running
         * </ul>
         */
        RunningService,
        /** Timing MMIO request in flight or stalled.
         *
         *  The VM has requested IO/MMIO and we are in timing mode.  A timing
         *  request is either stalled (and will be retried with recvReqRetry())
         *  or it is in flight.  After the timing request is complete, the CPU
         *  will transition to the RunningServiceCompletion state.
         */
        RunningMMIOPending,
        /** Service completion in progress.
         *
         * The VM has requested service that requires KVM to be
         * entered once in order to get to a consistent state. This
         * happens in handleKvmExit() or one of its friends after IO
         * exits. After executing tick(), the CPU will transition into
         * the Running or RunningService state.
         */
        RunningServiceCompletion,
    };

    /** CPU run state */
    Status _status;

    /**
     * Execute the CPU until the next event in the main event queue or
     * until the guest needs service from gem5.
     */
    void tick();

    /**
     * Get the value of the hardware cycle counter in the guest.
     *
     * This method is supposed to return the total number of cycles
     * executed in hardware mode relative to some arbitrary point in
     * the past. It's mainly used when estimating the number of cycles
     * actually executed by the CPU in kvmRun(). The default behavior
     * of this method is to use the cycles performance counter, but
     * some architectures may want to use internal registers instead.
     *
     * @return Number of host cycles executed relative to an undefined
     * point in the past.
     */
    virtual uint64_t getHostCycles() const;

    /**
     * Request KVM to run the guest for a given number of ticks. The
     * method returns the approximate number of ticks executed.
     *
     * @note The returned number of ticks can be both larger or
     * smaller than the requested number of ticks. A smaller number
     * can, for example, occur when the guest executes MMIO. A larger
     * number is typically due to performance counter inaccuracies.
     *
     * @note This method is virtual in order to allow implementations
     * to check for architecture specific events (e.g., interrupts)
     * before entering the VM.
     *
     * @note It is the response of the caller (normally tick()) to
     * make sure that the KVM state is synchronized and that the TC is
     * invalidated after entering KVM.
     *
     * @note This method does not normally cause any state
     * transitions. However, if it may suspend the CPU by suspending
     * the thread, which leads to a transition to the Idle state. In
     * such a case, kvm <i>must not</i> be entered.
     *
     * @param ticks Number of ticks to execute, set to 0 to exit
     * immediately after finishing pending operations.
     * @return Number of ticks executed (see note)
     */
    virtual Tick kvmRun(Tick ticks);

    /**
     * Request the CPU to run until draining completes.
     *
     * This function normally calls kvmRun(0) to make KVM finish
     * pending MMIO operations. Architecures implementing
     * archIsDrained() must override this method.
     *
     * @see BaseKvmCPU::archIsDrained()
     *
     * @return Number of ticks executed
     */
    virtual Tick kvmRunDrain();

    /**
     * Get a pointer to the kvm_run structure containing all the input
     * and output parameters from kvmRun().
     */
    struct kvm_run *getKvmRunState() { return _kvmRun; };

    /**
     * Retrieve a pointer to guest data stored at the end of the
     * kvm_run structure. This is mainly used for PIO operations
     * (KVM_EXIT_IO).
     *
     * @param offset Offset as specified by the kvm_run structure
     * @return Pointer to guest data
     */
    uint8_t *getGuestData(uint64_t offset) const {
        return (uint8_t *)_kvmRun + offset;
    };

    /**
     * @addtogroup KvmInterrupts
     * @{
     */
    /**
     * Send a non-maskable interrupt to the guest
     *
     * @note The presence of this call depends on Kvm::capUserNMI().
     */
    void kvmNonMaskableInterrupt();

    /**
     * Send a normal interrupt to the guest
     *
     * @note Make sure that ready_for_interrupt_injection in kvm_run
     * is set prior to calling this function. If not, an interrupt
     * window must be requested by setting request_interrupt_window in
     * kvm_run to 1 and restarting the guest.
     *
     * @param interrupt Structure describing the interrupt to send
     */
    void kvmInterrupt(const struct kvm_interrupt &interrupt);

    /** @} */

    /** @{ */
    /**
     * Get/Set the register state of the guest vCPU
     *
     * KVM has two different interfaces for accessing the state of the
     * guest CPU. One interface updates 'normal' registers and one
     * updates 'special' registers. The distinction between special
     * and normal registers isn't very clear and is architecture
     * dependent.
     */
    void getRegisters(struct kvm_regs &regs) const;
    void setRegisters(const struct kvm_regs &regs);
    void getSpecialRegisters(struct kvm_sregs &regs) const;
    void setSpecialRegisters(const struct kvm_sregs &regs);
    /** @} */

    /** @{ */
    /**
     * Get/Set the guest FPU/vector state
     */
    void getFPUState(struct kvm_fpu &state) const;
    void setFPUState(const struct kvm_fpu &state);
    /** @} */

    /** @{ */
    /**
     * Get/Set single register using the KVM_(SET|GET)_ONE_REG API.
     *
     * @note The presence of this call depends on Kvm::capOneReg().
     */
    void setOneReg(uint64_t id, const void *addr);
    void setOneReg(uint64_t id, uint64_t value) { setOneReg(id, &value); }
    void setOneReg(uint64_t id, uint32_t value) { setOneReg(id, &value); }
    void getOneReg(uint64_t id, void *addr) const;
    uint64_t getOneRegU64(uint64_t id) const {
        uint64_t value;
        getOneReg(id, &value);
        return value;
    }
    uint32_t getOneRegU32(uint64_t id) const {
        uint32_t value;
        getOneReg(id, &value);
        return value;
    }
    /** @} */

    /**
     * Get and format one register for printout.
     *
     * This function call getOneReg() to retrieve the contents of one
     * register and automatically formats it for printing.
     *
     * @note The presence of this call depends on Kvm::capOneReg().
     */
    std::string getAndFormatOneReg(uint64_t id) const;

    /** @{ */
    /**
     * Update the KVM state from the current thread context
     *
     * The base CPU calls this method before starting the guest CPU
     * when the contextDirty flag is set. The architecture dependent
     * CPU implementation is expected to update all guest state
     * (registers, special registers, and FPU state).
     */
    virtual void updateKvmState() = 0;

    /**
     * Update the current thread context with the KVM state
     *
     * The base CPU after the guest updates any of the KVM state. In
     * practice, this happens after kvmRun is called. The architecture
     * dependent code is expected to read the state of the guest CPU
     * and update gem5's thread state.
     */
    virtual void updateThreadContext() = 0;

    /**
     * Update a thread context if the KVM state is dirty with respect
     * to the cached thread context.
     */
    void syncThreadContext();

    /**
     * Get a pointer to the event queue owning devices.
     *
     * Devices always live in a separate device event queue when
     * running in multi-core mode. We need to temporarily migrate to
     * this queue when accessing devices. By convention, devices and
     * the VM use the same event queue.
     */
    EventQueue *deviceEventQueue() { return vm.eventQueue(); }

    /**
     * Update the KVM if the thread context is dirty.
     */
    void syncKvmState();
    /** @} */

    /** @{ */
    /**
     * Main kvmRun exit handler, calls the relevant handleKvmExit*
     * depending on exit type.
     *
     * @return Number of ticks spent servicing the exit request
     */
    virtual Tick handleKvmExit();

    /**
     * The guest performed a legacy IO request (out/inp on x86)
     *
     * @return Number of ticks spent servicing the IO request
     */
    virtual Tick handleKvmExitIO();

    /**
     * The guest requested a monitor service using a hypercall
     *
     * @return Number of ticks spent servicing the hypercall
     */
    virtual Tick handleKvmExitHypercall();

    /**
     * The guest exited because an interrupt window was requested
     *
     * The guest exited because an interrupt window was requested
     * (request_interrupt_window in the kvm_run structure was set to 1
     * before calling kvmRun) and it is now ready to receive
     *
     * @return Number of ticks spent servicing the IRQ
     */
    virtual Tick handleKvmExitIRQWindowOpen();

    /**
     * An unknown architecture dependent error occurred when starting
     * the vCPU
     *
     * The kvm_run data structure contains the hardware error
     * code. The defaults behavior of this method just prints the HW
     * error code and panics. Architecture dependent implementations
     * may want to override this method to provide better,
     * hardware-aware, error messages.
     *
     * @return Number of ticks delay the next CPU tick
     */
    virtual Tick handleKvmExitUnknown();

    /**
     * An unhandled virtualization exception occured
     *
     * Some KVM virtualization drivers return unhandled exceptions to
     * the user-space monitor. This interface is currently only used
     * by the Intel VMX KVM driver.
     *
     * @return Number of ticks delay the next CPU tick
     */
    virtual Tick handleKvmExitException();

    /**
     * KVM failed to start the virtualized CPU
     *
     * The kvm_run data structure contains the hardware-specific error
     * code.
     *
     * @return Number of ticks delay the next CPU tick
     */
    virtual Tick handleKvmExitFailEntry();
    /** @} */

    /**
     * Is the architecture specific code in a state that prevents
     * draining?
     *
     * This method should return false if there are any pending events
     * in the guest vCPU that won't be carried over to the gem5 state
     * and thus will prevent correct checkpointing or CPU handover. It
     * might, for example, check for pending interrupts that have been
     * passed to the vCPU but not acknowledged by the OS. Architecures
     * implementing this method <i>must</i> override
     * kvmRunDrain().
     *
     * @see BaseKvmCPU::kvmRunDrain()
     *
     * @return true if the vCPU is drained, false otherwise.
     */
    virtual bool archIsDrained() const { return true; }

    /**
     * Inject a memory mapped IO request into gem5
     *
     * @param paddr Physical address
     * @param data Pointer to the source/destination buffer
     * @param size Memory access size
     * @param write True if write, False if read
     * @return Number of ticks spent servicing the memory access
     */
    Tick doMMIOAccess(Addr paddr, void *data, int size, bool write);

    /** @{ */
    /**
     * Set the signal mask used in kvmRun()
     *
     * This method allows the signal mask of the thread executing
     * kvmRun() to be overridden inside the actual system call. This
     * allows us to mask timer signals used to force KVM exits while
     * in gem5.
     *
     * The signal mask can be disabled by setting it to NULL.
     *
     * @param mask Signals to mask
     */
    void setSignalMask(const sigset_t *mask);
    /** @} */

    /**
     * @addtogroup KvmIoctl
     * @{
     */
    /**
     * vCPU ioctl interface.
     *
     * @param request KVM vCPU request
     * @param p1 Optional request parameter
     *
     * @return -1 on error (error number in errno), ioctl dependent
     * value otherwise.
     */
    int ioctl(int request, long p1) const;
    int ioctl(int request, void *p1) const {
        return ioctl(request, (long)p1);
    }
    int ioctl(int request) const {
        return ioctl(request, 0L);
    }
    /** @} */


    /**
     * KVM memory port.  Uses default MasterPort behavior and provides an
     * interface for KVM to transparently submit atomic or timing requests.
     */
    class KVMCpuPort : public MasterPort
    {

      public:
        KVMCpuPort(const std::string &_name, BaseKvmCPU *_cpu)
            : MasterPort(_name, _cpu), cpu(_cpu), activeMMIOReqs(0)
        { }
        /**
         * Interface to send Atomic or Timing IO request.  Assumes that the pkt
         * and corresponding req have been dynamically allocated and deletes
         * them both if the system is in atomic mode.
         */
        Tick submitIO(PacketPtr pkt);

        /** Returns next valid state after one or more IO accesses */
        Status nextIOState() const;

      protected:
        /** KVM cpu pointer for finishMMIOPending() callback */
        BaseKvmCPU *cpu;

        /** Pending MMIO packets */
        std::queue<PacketPtr> pendingMMIOPkts;

        /** Number of MMIO requests in flight */
        unsigned int activeMMIOReqs;

        bool recvTimingResp(PacketPtr pkt) override;

        void recvReqRetry() override;

    };

    /** Port for data requests */
    KVMCpuPort dataPort;

    /** Unused dummy port for the instruction interface */
    KVMCpuPort instPort;

    /**
     * Be conservative and always synchronize the thread context on
     * KVM entry/exit.
     */
    const bool alwaysSyncTC;

    /**
     * Is the gem5 context dirty? Set to true to force an update of
     * the KVM vCPU state upon the next call to kvmRun().
     */
    bool threadContextDirty;

    /**
     * Is the KVM state dirty? Set to true to force an update of
     * the KVM vCPU state upon the next call to kvmRun().
     */
    bool kvmStateDirty;

    /** KVM internal ID of the vCPU */
    const long vcpuID;

    /** ID of the vCPU thread */
    pthread_t vcpuThread;

  private:
    /**
     * Service MMIO requests in the mmioRing.
     *
     *
     * @return Number of ticks spent servicing the MMIO requests in
     * the MMIO ring buffer
     */
    Tick flushCoalescedMMIO();

    /**
     * Setup a signal handler to catch the timer signal used to
     * switch back to the monitor.
     */
    void setupSignalHandler();

    /**
     * Discard a (potentially) pending signal.
     *
     * @param signum Signal to discard
     * @return true if the signal was pending, false otherwise.
     */
    bool discardPendingSignal(int signum) const;

    /**
     * Thread-specific initialization.
     *
     * Some KVM-related initialization requires us to know the TID of
     * the thread that is going to execute our event queue. For
     * example, when setting up timers, we need to know the TID of the
     * thread executing in KVM in order to deliver the timer signal to
     * that thread. This method is called as the first event in this
     * SimObject's event queue.
     *
     * @see startup
     */
    void startupThread();

    /** Try to drain the CPU if a drain is pending */
    bool tryDrain();

    /** Execute the KVM_RUN ioctl */
    void ioctlRun();

    /** KVM vCPU file descriptor */
    int vcpuFD;
    /** Size of MMAPed kvm_run area */
    int vcpuMMapSize;
    /**
     * Pointer to the kvm_run structure used to communicate parameters
     * with KVM.
     *
     * @note This is the base pointer of the MMAPed KVM region. The
     * first page contains the kvm_run structure. Subsequent pages may
     * contain other data such as the MMIO ring buffer.
     */
    struct kvm_run *_kvmRun;
    /**
     * Coalesced MMIO ring buffer. NULL if coalesced MMIO is not
     * supported.
     */
    struct kvm_coalesced_mmio_ring *mmioRing;
    /** Cached page size of the host */
    const long pageSize;

    EventFunctionWrapper tickEvent;

    /**
     * Setup an instruction break if there is one pending.
     *
     * Check if there are pending instruction breaks in the CPU's
     * instruction event queue and schedule an instruction break using
     * PerfEvent.
     *
     * @note This method doesn't currently handle the main system
     * instruction event queue.
     */
    void setupInstStop();

    /** @{ */
    /** Setup hardware performance counters */
    void setupCounters();

    /**
     * Setup the guest instruction counter.
     *
     * Setup the guest instruction counter and optionally request a
     * signal every N instructions executed by the guest. This method
     * will re-attach the counter if the counter has already been
     * attached and its sampling settings have changed.
     *
     * @param period Signal period, set to 0 to disable signaling.
     */
    void setupInstCounter(uint64_t period = 0);

    /** Currently active instruction count breakpoint */
    uint64_t activeInstPeriod;

    /**
     * Guest cycle counter.
     *
     * This is the group leader of all performance counters measuring
     * the guest system. It can be used in conjunction with the
     * PerfKvmTimer (see perfControlledByTimer) to trigger exits from
     * KVM.
     */
    PerfKvmCounter hwCycles;

    /**
     * Guest instruction counter.
     *
     * This counter is typically only used to measure the number of
     * instructions executed by the guest. However, it can also be
     * used to trigger exits from KVM if the configuration script
     * requests an exit after a certain number of instructions.
     *
     * @see setupInstBreak
     * @see scheduleInstStop
     */
    PerfKvmCounter hwInstructions;

    /**
     * Does the runTimer control the performance counters?
     *
     * The run timer will automatically enable and disable performance
     * counters if a PerfEvent-based timer is used to control KVM
     * exits.
     */
    bool perfControlledByTimer;
    /** @} */

    /**
     * Timer used to force execution into the monitor after a
     * specified number of simulation tick equivalents have executed
     * in the guest. This counter generates the signal specified by
     * KVM_TIMER_SIGNAL.
     */
    std::unique_ptr<BaseKvmTimer> runTimer;

    /** Host factor as specified in the configuration */
    float hostFactor;

  public:
    /* @{ */
    Stats::Scalar numInsts;
    Stats::Scalar numVMExits;
    Stats::Scalar numVMHalfEntries;
    Stats::Scalar numExitSignal;
    Stats::Scalar numMMIO;
    Stats::Scalar numCoalescedMMIO;
    Stats::Scalar numIO;
    Stats::Scalar numHalt;
    Stats::Scalar numInterrupts;
    Stats::Scalar numHypercalls;
    /* @} */

    /** Number of instructions executed by the CPU */
    Counter ctrInsts;
};

#endif