diff options
Diffstat (limited to 'src/cpu/kvm/base.cc')
-rw-r--r-- | src/cpu/kvm/base.cc | 805 |
1 files changed, 805 insertions, 0 deletions
diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc new file mode 100644 index 000000000..04e35854a --- /dev/null +++ b/src/cpu/kvm/base.cc @@ -0,0 +1,805 @@ +/* + * Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andreas Sandberg + */ + +#include <linux/kvm.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <cerrno> +#include <csignal> +#include <ostream> + +#include "arch/utility.hh" +#include "cpu/kvm/base.hh" +#include "debug/Kvm.hh" +#include "debug/KvmIO.hh" +#include "debug/KvmRun.hh" +#include "params/BaseKvmCPU.hh" +#include "sim/process.hh" +#include "sim/system.hh" + +/* Used by some KVM macros */ +#define PAGE_SIZE pageSize + +volatile bool timerOverflowed = false; + +static void +onTimerOverflow(int signo, siginfo_t *si, void *data) +{ + timerOverflowed = true; +} + +BaseKvmCPU::BaseKvmCPU(BaseKvmCPUParams *params) + : BaseCPU(params), + vm(*params->kvmVM), + _status(Idle), + dataPort(name() + ".dcache_port", this), + instPort(name() + ".icache_port", this), + contextDirty(true), + vcpuID(vm.allocVCPUID()), vcpuFD(-1), vcpuMMapSize(0), + _kvmRun(NULL), mmioRing(NULL), + pageSize(sysconf(_SC_PAGE_SIZE)), + tickEvent(*this), + hostFactor(params->hostFactor) +{ + if (pageSize == -1) + panic("KVM: Failed to determine host page size (%i)\n", + errno); + + thread = new SimpleThread(this, 0, params->system, + params->itb, params->dtb, params->isa[0]); + thread->setStatus(ThreadContext::Halted); + tc = thread->getTC(); + threadContexts.push_back(tc); + + setupCounters(); + setupSignalHandler(); + + runTimer.reset(new PosixKvmTimer(KVM_TIMER_SIGNAL, CLOCK_MONOTONIC, + params->hostFactor, + params->clock)); +} + +BaseKvmCPU::~BaseKvmCPU() +{ + if (_kvmRun) + munmap(_kvmRun, vcpuMMapSize); + close(vcpuFD); +} + +void +BaseKvmCPU::init() +{ + BaseCPU::init(); + + if (numThreads != 1) + fatal("KVM: Multithreading not supported"); + + tc->initMemProxies(tc); + + // initialize CPU, including PC + if (FullSystem && !switchedOut()) + TheISA::initCPU(tc, tc->contextId()); + + mmio_req.setThreadContext(tc->contextId(), 0); +} + +void +BaseKvmCPU::startup() +{ + Kvm &kvm(vm.kvm); + + BaseCPU::startup(); + + assert(vcpuFD == -1); + + // Tell the VM that a CPU is about to start. + vm.cpuStartup(); + + // We can't initialize KVM CPUs in BaseKvmCPU::init() since we are + // not guaranteed that the parent KVM VM has initialized at that + // point. Initialize virtual CPUs here instead. + vcpuFD = vm.createVCPU(vcpuID); + + // Map the KVM run structure */ + vcpuMMapSize = kvm.getVCPUMMapSize(); + _kvmRun = (struct kvm_run *)mmap(0, vcpuMMapSize, + PROT_READ | PROT_WRITE, MAP_SHARED, + vcpuFD, 0); + if (_kvmRun == MAP_FAILED) + panic("KVM: Failed to map run data structure\n"); + + // Setup a pointer to the MMIO ring buffer if coalesced MMIO is + // available. The offset into the KVM's communication page is + // provided by the coalesced MMIO capability. + int mmioOffset(kvm.capCoalescedMMIO()); + if (mmioOffset) { + inform("KVM: Coalesced IO available\n"); + mmioRing = (struct kvm_coalesced_mmio_ring *)( + (char *)_kvmRun + (mmioOffset * pageSize)); + } else { + inform("KVM: Coalesced not supported by host OS\n"); + } +} + +void +BaseKvmCPU::regStats() +{ + using namespace Stats; + + BaseCPU::regStats(); + + numVMExits + .name(name() + ".numVMExits") + .desc("total number of KVM exits") + ; + + numMMIO + .name(name() + ".numMMIO") + .desc("number of VM exits due to memory mapped IO") + ; + + numCoalescedMMIO + .name(name() + ".numCoalescedMMIO") + .desc("number of coalesced memory mapped IO requests") + ; + + numIO + .name(name() + ".numIO") + .desc("number of VM exits due to legacy IO") + ; + + numHalt + .name(name() + ".numHalt") + .desc("number of VM exits due to wait for interrupt instructions") + ; + + numInterrupts + .name(name() + ".numInterrupts") + .desc("number of interrupts delivered") + ; + + numHypercalls + .name(name() + ".numHypercalls") + .desc("number of hypercalls") + ; +} + +void +BaseKvmCPU::serializeThread(std::ostream &os, ThreadID tid) +{ + assert(tid == 0); + assert(_status == Idle); + thread->serialize(os); +} + +void +BaseKvmCPU::unserializeThread(Checkpoint *cp, const std::string §ion, + ThreadID tid) +{ + assert(tid == 0); + assert(_status == Idle); + thread->unserialize(cp, section); + contextDirty = true; +} + +unsigned int +BaseKvmCPU::drain(DrainManager *dm) +{ + if (switchedOut()) + return 0; + + DPRINTF(Kvm, "drain\n"); + + // De-schedule the tick event so we don't insert any more MMIOs + // into the system while it is draining. + if (tickEvent.scheduled()) + deschedule(tickEvent); + + _status = Idle; + return 0; +} + +void +BaseKvmCPU::drainResume() +{ + assert(!tickEvent.scheduled()); + + // We might have been switched out. In that case, we don't need to + // do anything. + if (switchedOut()) + return; + + DPRINTF(Kvm, "drainResume\n"); + verifyMemoryMode(); + + // The tick event is de-scheduled as a part of the draining + // process. Re-schedule it if the thread context is active. + if (tc->status() == ThreadContext::Active) { + schedule(tickEvent, nextCycle()); + _status = Running; + } else { + _status = Idle; + } +} + +void +BaseKvmCPU::switchOut() +{ + BaseCPU::switchOut(); + + DPRINTF(Kvm, "switchOut\n"); + + // We should have drained prior to executing a switchOut, which + // means that the tick event shouldn't be scheduled and the CPU is + // idle. + assert(!tickEvent.scheduled()); + assert(_status == Idle); +} + +void +BaseKvmCPU::takeOverFrom(BaseCPU *cpu) +{ + DPRINTF(Kvm, "takeOverFrom\n"); + + BaseCPU::takeOverFrom(cpu); + + // We should have drained prior to executing a switchOut, which + // means that the tick event shouldn't be scheduled and the CPU is + // idle. + assert(!tickEvent.scheduled()); + assert(_status == Idle); + assert(threadContexts.size() == 1); + + // Force a gem5 -> KVM context synchronization + contextDirty = true; +} + +void +BaseKvmCPU::verifyMemoryMode() const +{ + if (!(system->isAtomicMode() && system->bypassCaches())) { + fatal("The KVM-based CPUs requires the memory system to be in the " + "'atomic_noncaching' mode.\n"); + } +} + +void +BaseKvmCPU::wakeup() +{ + DPRINTF(Kvm, "wakeup()\n"); + + if (thread->status() != ThreadContext::Suspended) + return; + + thread->activate(); +} + +void +BaseKvmCPU::activateContext(ThreadID thread_num, Cycles delay) +{ + DPRINTF(Kvm, "ActivateContext %d (%d cycles)\n", thread_num, delay); + + assert(thread_num == 0); + assert(thread); + + assert(_status == Idle); + assert(!tickEvent.scheduled()); + + numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend) + * hostFactor; + + schedule(tickEvent, clockEdge(delay)); + _status = Running; +} + + +void +BaseKvmCPU::suspendContext(ThreadID thread_num) +{ + DPRINTF(Kvm, "SuspendContext %d\n", thread_num); + + assert(thread_num == 0); + assert(thread); + + if (_status == Idle) + return; + + assert(_status == Running); + + // The tick event may no be scheduled if the quest has requested + // the monitor to wait for interrupts. The normal CPU models can + // get their tick events descheduled by quiesce instructions, but + // that can't happen here. + if (tickEvent.scheduled()) + deschedule(tickEvent); + + _status = Idle; +} + +void +BaseKvmCPU::deallocateContext(ThreadID thread_num) +{ + // for now, these are equivalent + suspendContext(thread_num); +} + +void +BaseKvmCPU::haltContext(ThreadID thread_num) +{ + // for now, these are equivalent + suspendContext(thread_num); +} + +Counter +BaseKvmCPU::totalInsts() const +{ + return hwInstructions.read(); +} + +Counter +BaseKvmCPU::totalOps() const +{ + hack_once("Pretending totalOps is equivalent to totalInsts()\n"); + return hwInstructions.read(); +} + +void +BaseKvmCPU::dump() +{ + inform("State dumping not implemented."); +} + +void +BaseKvmCPU::tick() +{ + assert(_status == Running); + + DPRINTF(KvmRun, "Entering KVM...\n"); + + if (contextDirty) { + contextDirty = false; + updateKvmState(); + } + + Tick ticksToExecute(mainEventQueue.nextTick() - curTick()); + Tick ticksExecuted(kvmRun(ticksToExecute)); + updateThreadContext(); + + Tick delay(ticksExecuted + handleKvmExit()); + + switch (_status) { + case Running: + schedule(tickEvent, clockEdge(ticksToCycles(delay))); + break; + + default: + /* The CPU is halted or waiting for an interrupt from a + * device. Don't start it. */ + break; + } +} + +Tick +BaseKvmCPU::kvmRun(Tick ticks) +{ + uint64_t baseCycles(hwCycles.read()); + uint64_t baseInstrs(hwInstructions.read()); + + if (ticks < runTimer->resolution()) { + DPRINTF(KvmRun, "KVM: Adjusting tick count (%i -> %i)\n", + ticks, runTimer->resolution()); + ticks = runTimer->resolution(); + } + + DPRINTF(KvmRun, "KVM: Executing for %i ticks\n", ticks); + timerOverflowed = false; + runTimer->arm(ticks); + startCounters(); + if (ioctl(KVM_RUN) == -1) { + if (errno != EINTR) + panic("KVM: Failed to start virtual CPU (errno: %i)\n", + errno); + } + stopCounters(); + runTimer->disarm(); + + uint64_t cyclesExecuted(hwCycles.read() - baseCycles); + Tick ticksExecuted(runTimer->ticksFromHostCycles(cyclesExecuted)); + + if (ticksExecuted < ticks && + timerOverflowed && + _kvmRun->exit_reason == KVM_EXIT_INTR) { + // TODO: We should probably do something clever here... + warn("KVM: Early timer event, requested %i ticks but got %i ticks.\n", + ticks, ticksExecuted); + } + + numCycles += cyclesExecuted * hostFactor; + ++numVMExits; + + DPRINTF(KvmRun, "KVM: Executed %i instructions in %i cycles (%i ticks, sim cycles: %i).\n", + hwInstructions.read() - baseInstrs, + cyclesExecuted, + ticksExecuted, + cyclesExecuted * hostFactor); + + return ticksExecuted + flushCoalescedMMIO(); +} + +void +BaseKvmCPU::kvmNonMaskableInterrupt() +{ + ++numInterrupts; + if (ioctl(KVM_NMI) == -1) + panic("KVM: Failed to deliver NMI to virtual CPU\n"); +} + +void +BaseKvmCPU::kvmInterrupt(const struct kvm_interrupt &interrupt) +{ + ++numInterrupts; + if (ioctl(KVM_INTERRUPT, (void *)&interrupt) == -1) + panic("KVM: Failed to deliver interrupt to virtual CPU\n"); +} + +void +BaseKvmCPU::getRegisters(struct kvm_regs ®s) const +{ + if (ioctl(KVM_GET_REGS, ®s) == -1) + panic("KVM: Failed to get guest registers\n"); +} + +void +BaseKvmCPU::setRegisters(const struct kvm_regs ®s) +{ + if (ioctl(KVM_SET_REGS, (void *)®s) == -1) + panic("KVM: Failed to set guest registers\n"); +} + +void +BaseKvmCPU::getSpecialRegisters(struct kvm_sregs ®s) const +{ + if (ioctl(KVM_GET_SREGS, ®s) == -1) + panic("KVM: Failed to get guest special registers\n"); +} + +void +BaseKvmCPU::setSpecialRegisters(const struct kvm_sregs ®s) +{ + if (ioctl(KVM_SET_SREGS, (void *)®s) == -1) + panic("KVM: Failed to set guest special registers\n"); +} + +void +BaseKvmCPU::getFPUState(struct kvm_fpu &state) const +{ + if (ioctl(KVM_GET_FPU, &state) == -1) + panic("KVM: Failed to get guest FPU state\n"); +} + +void +BaseKvmCPU::setFPUState(const struct kvm_fpu &state) +{ + if (ioctl(KVM_SET_FPU, (void *)&state) == -1) + panic("KVM: Failed to set guest FPU state\n"); +} + + +void +BaseKvmCPU::setOneReg(uint64_t id, const void *addr) +{ +#ifdef KVM_SET_ONE_REG + struct kvm_one_reg reg; + reg.id = id; + reg.addr = (uint64_t)addr; + + if (ioctl(KVM_SET_ONE_REG, ®) == -1) { + panic("KVM: Failed to set register (0x%x) value (errno: %i)\n", + id, errno); + } +#else + panic("KVM_SET_ONE_REG is unsupported on this platform.\n"); +#endif +} + +void +BaseKvmCPU::getOneReg(uint64_t id, void *addr) const +{ +#ifdef KVM_GET_ONE_REG + struct kvm_one_reg reg; + reg.id = id; + reg.addr = (uint64_t)addr; + + if (ioctl(KVM_GET_ONE_REG, ®) == -1) { + panic("KVM: Failed to get register (0x%x) value (errno: %i)\n", + id, errno); + } +#else + panic("KVM_GET_ONE_REG is unsupported on this platform.\n"); +#endif +} + +std::string +BaseKvmCPU::getAndFormatOneReg(uint64_t id) const +{ +#ifdef KVM_GET_ONE_REG + std::ostringstream ss; + + ss.setf(std::ios::hex, std::ios::basefield); + ss.setf(std::ios::showbase); +#define HANDLE_INTTYPE(len) \ + case KVM_REG_SIZE_U ## len: { \ + uint ## len ## _t value; \ + getOneReg(id, &value); \ + ss << value; \ + } break + +#define HANDLE_ARRAY(len) \ + case KVM_REG_SIZE_U ## len: { \ + uint8_t value[len / 8]; \ + getOneReg(id, value); \ + ss << "[" << value[0]; \ + for (int i = 1; i < len / 8; ++i) \ + ss << ", " << value[i]; \ + ss << "]"; \ + } break + + switch (id & KVM_REG_SIZE_MASK) { + HANDLE_INTTYPE(8); + HANDLE_INTTYPE(16); + HANDLE_INTTYPE(32); + HANDLE_INTTYPE(64); + HANDLE_ARRAY(128); + HANDLE_ARRAY(256); + HANDLE_ARRAY(512); + HANDLE_ARRAY(1024); + default: + ss << "??"; + } + +#undef HANDLE_INTTYPE +#undef HANDLE_ARRAY + + return ss.str(); +#else + panic("KVM_GET_ONE_REG is unsupported on this platform.\n"); +#endif +} + +Tick +BaseKvmCPU::handleKvmExit() +{ + DPRINTF(KvmRun, "handleKvmExit (exit_reason: %i)\n", _kvmRun->exit_reason); + + switch (_kvmRun->exit_reason) { + case KVM_EXIT_UNKNOWN: + return handleKvmExitUnknown(); + + case KVM_EXIT_EXCEPTION: + return handleKvmExitException(); + + case KVM_EXIT_IO: + ++numIO; + return handleKvmExitIO(); + + case KVM_EXIT_HYPERCALL: + ++numHypercalls; + return handleKvmExitHypercall(); + + case KVM_EXIT_HLT: + /* The guest has halted and is waiting for interrupts */ + DPRINTF(Kvm, "handleKvmExitHalt\n"); + ++numHalt; + + // Suspend the thread until the next interrupt arrives + thread->suspend(); + + // This is actually ignored since the thread is suspended. + return 0; + + case KVM_EXIT_MMIO: + /* Service memory mapped IO requests */ + DPRINTF(KvmIO, "KVM: Handling MMIO (w: %u, addr: 0x%x, len: %u)\n", + _kvmRun->mmio.is_write, + _kvmRun->mmio.phys_addr, _kvmRun->mmio.len); + + ++numMMIO; + return doMMIOAccess(_kvmRun->mmio.phys_addr, _kvmRun->mmio.data, + _kvmRun->mmio.len, _kvmRun->mmio.is_write); + + case KVM_EXIT_IRQ_WINDOW_OPEN: + return handleKvmExitIRQWindowOpen(); + + case KVM_EXIT_FAIL_ENTRY: + return handleKvmExitFailEntry(); + + case KVM_EXIT_INTR: + /* KVM was interrupted by a signal, restart it in the next + * tick. */ + return 0; + + case KVM_EXIT_INTERNAL_ERROR: + panic("KVM: Internal error (suberror: %u)\n", + _kvmRun->internal.suberror); + + default: + panic("KVM: Unexpected exit (exit_reason: %u)\n", _kvmRun->exit_reason); + } +} + +Tick +BaseKvmCPU::handleKvmExitIO() +{ + panic("KVM: Unhandled guest IO (dir: %i, size: %i, port: 0x%x, count: %i)\n", + _kvmRun->io.direction, _kvmRun->io.size, + _kvmRun->io.port, _kvmRun->io.count); +} + +Tick +BaseKvmCPU::handleKvmExitHypercall() +{ + panic("KVM: Unhandled hypercall\n"); +} + +Tick +BaseKvmCPU::handleKvmExitIRQWindowOpen() +{ + warn("KVM: Unhandled IRQ window.\n"); + return 0; +} + + +Tick +BaseKvmCPU::handleKvmExitUnknown() +{ + panic("KVM: Unknown error when starting vCPU (hw reason: 0x%llx)\n", + _kvmRun->hw.hardware_exit_reason); +} + +Tick +BaseKvmCPU::handleKvmExitException() +{ + panic("KVM: Got exception when starting vCPU " + "(exception: %u, error_code: %u)\n", + _kvmRun->ex.exception, _kvmRun->ex.error_code); +} + +Tick +BaseKvmCPU::handleKvmExitFailEntry() +{ + panic("KVM: Failed to enter virtualized mode (hw reason: 0x%llx)\n", + _kvmRun->fail_entry.hardware_entry_failure_reason); +} + +Tick +BaseKvmCPU::doMMIOAccess(Addr paddr, void *data, int size, bool write) +{ + mmio_req.setPhys(paddr, size, + 0, /* flags */ + dataMasterId()); + + const MemCmd cmd(write ? MemCmd::WriteReq : MemCmd::ReadReq); + Packet pkt(&mmio_req, cmd); + pkt.dataStatic(data); + return dataPort.sendAtomic(&pkt); +} + +int +BaseKvmCPU::ioctl(int request, long p1) const +{ + if (vcpuFD == -1) + panic("KVM: CPU ioctl called before initialization\n"); + + return ::ioctl(vcpuFD, request, p1); +} + +Tick +BaseKvmCPU::flushCoalescedMMIO() +{ + if (!mmioRing) + return 0; + + DPRINTF(KvmIO, "KVM: Flushing the coalesced MMIO ring buffer\n"); + + // TODO: We might need to do synchronization when we start to + // support multiple CPUs + Tick ticks(0); + while (mmioRing->first != mmioRing->last) { + struct kvm_coalesced_mmio &ent( + mmioRing->coalesced_mmio[mmioRing->first]); + + DPRINTF(KvmIO, "KVM: Handling coalesced MMIO (addr: 0x%x, len: %u)\n", + ent.phys_addr, ent.len); + + ++numCoalescedMMIO; + ticks += doMMIOAccess(ent.phys_addr, ent.data, ent.len, true); + + mmioRing->first = (mmioRing->first + 1) % KVM_COALESCED_MMIO_MAX; + } + + return ticks; +} + +void +BaseKvmCPU::setupSignalHandler() +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = onTimerOverflow; + sa.sa_flags = SA_SIGINFO | SA_RESTART; + if (sigaction(KVM_TIMER_SIGNAL, &sa, NULL) == -1) + panic("KVM: Failed to setup vCPU signal handler\n"); +} + +void +BaseKvmCPU::setupCounters() +{ + DPRINTF(Kvm, "Attaching cycle counter...\n"); + PerfKvmCounterConfig cfgCycles(PERF_TYPE_HARDWARE, + PERF_COUNT_HW_CPU_CYCLES); + cfgCycles.disabled(true) + .pinned(true); + hwCycles.attach(cfgCycles, + 0); // TID (0 => currentThread) + + DPRINTF(Kvm, "Attaching instruction counter...\n"); + PerfKvmCounterConfig cfgInstructions(PERF_TYPE_HARDWARE, + PERF_COUNT_HW_INSTRUCTIONS); + hwInstructions.attach(cfgInstructions, + 0, // TID (0 => currentThread) + hwCycles); +} + +void +BaseKvmCPU::startCounters() +{ + // We only need to start/stop the hwCycles counter since hwCycles + // and hwInstructions are a counter group with hwCycles as the + // group leader. + hwCycles.start(); +} + +void +BaseKvmCPU::stopCounters() +{ + hwCycles.stop(); +} |