/* * Copyright 2014 Google, Inc. * Copyright (c) 2012-2013,2015,2017-2018 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall * not be construed as granting a license to any other intellectual * property including but not limited to intellectual property relating * to a hardware implementation of the functionality of the software * licensed hereunder. You may use the software subject to the license * terms below provided that you ensure that this notice is replicated * unmodified and in its entirety in all distributions of the software, * modified or unmodified, in source code or in binary form. * * Copyright (c) 2002-2005 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Steve Reinhardt */ #include "cpu/simple/atomic.hh" #include "arch/locked_mem.hh" #include "arch/mmapped_ipr.hh" #include "arch/utility.hh" #include "base/output.hh" #include "config/the_isa.hh" #include "cpu/exetrace.hh" #include "debug/Drain.hh" #include "debug/ExecFaulting.hh" #include "debug/SimpleCPU.hh" #include "mem/packet.hh" #include "mem/packet_access.hh" #include "mem/physical.hh" #include "params/AtomicSimpleCPU.hh" #include "sim/faults.hh" #include "sim/full_system.hh" #include "sim/system.hh" using namespace std; using namespace TheISA; void AtomicSimpleCPU::init() { BaseSimpleCPU::init(); int cid = threadContexts[0]->contextId(); ifetch_req->setContext(cid); data_read_req->setContext(cid); data_write_req->setContext(cid); } AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) : BaseSimpleCPU(p), tickEvent([this]{ tick(); }, "AtomicSimpleCPU tick", false, Event::CPU_Tick_Pri), width(p->width), locked(false), simulate_data_stalls(p->simulate_data_stalls), simulate_inst_stalls(p->simulate_inst_stalls), icachePort(name() + ".icache_port", this), dcachePort(name() + ".dcache_port", this), dcache_access(false), dcache_latency(0), ppCommit(nullptr) { _status = Idle; ifetch_req = std::make_shared(); data_read_req = std::make_shared(); data_write_req = std::make_shared(); } AtomicSimpleCPU::~AtomicSimpleCPU() { if (tickEvent.scheduled()) { deschedule(tickEvent); } } DrainState AtomicSimpleCPU::drain() { // Deschedule any power gating event (if any) deschedulePowerGatingEvent(); if (switchedOut()) return DrainState::Drained; if (!isDrained()) { DPRINTF(Drain, "Requesting drain.\n"); return DrainState::Draining; } else { if (tickEvent.scheduled()) deschedule(tickEvent); activeThreads.clear(); DPRINTF(Drain, "Not executing microcode, no need to drain.\n"); return DrainState::Drained; } } void AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender) { DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(), pkt->cmdString()); for (ThreadID tid = 0; tid < numThreads; tid++) { if (tid != sender) { if (getCpuAddrMonitor(tid)->doMonitor(pkt)) { wakeup(tid); } TheISA::handleLockedSnoop(threadInfo[tid]->thread, pkt, dcachePort.cacheBlockMask); } } } void AtomicSimpleCPU::drainResume() { assert(!tickEvent.scheduled()); if (switchedOut()) return; DPRINTF(SimpleCPU, "Resume\n"); verifyMemoryMode(); assert(!threadContexts.empty()); _status = BaseSimpleCPU::Idle; for (ThreadID tid = 0; tid < numThreads; tid++) { if (threadInfo[tid]->thread->status() == ThreadContext::Active) { threadInfo[tid]->notIdleFraction = 1; activeThreads.push_back(tid); _status = BaseSimpleCPU::Running; // Tick if any threads active if (!tickEvent.scheduled()) { schedule(tickEvent, nextCycle()); } } else { threadInfo[tid]->notIdleFraction = 0; } } // Reschedule any power gating event (if any) schedulePowerGatingEvent(); } bool AtomicSimpleCPU::tryCompleteDrain() { if (drainState() != DrainState::Draining) return false; DPRINTF(Drain, "tryCompleteDrain.\n"); if (!isDrained()) return false; DPRINTF(Drain, "CPU done draining, processing drain event\n"); signalDrainDone(); return true; } void AtomicSimpleCPU::switchOut() { BaseSimpleCPU::switchOut(); assert(!tickEvent.scheduled()); assert(_status == BaseSimpleCPU::Running || _status == Idle); assert(isDrained()); } void AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) { BaseSimpleCPU::takeOverFrom(oldCPU); // The tick event should have been descheduled by drain() assert(!tickEvent.scheduled()); } void AtomicSimpleCPU::verifyMemoryMode() const { if (!system->isAtomicMode()) { fatal("The atomic CPU requires the memory system to be in " "'atomic' mode.\n"); } } void AtomicSimpleCPU::activateContext(ThreadID thread_num) { DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num); assert(thread_num < numThreads); threadInfo[thread_num]->notIdleFraction = 1; Cycles delta = ticksToCycles(threadInfo[thread_num]->thread->lastActivate - threadInfo[thread_num]->thread->lastSuspend); numCycles += delta; if (!tickEvent.scheduled()) { //Make sure ticks are still on multiples of cycles schedule(tickEvent, clockEdge(Cycles(0))); } _status = BaseSimpleCPU::Running; if (std::find(activeThreads.begin(), activeThreads.end(), thread_num) == activeThreads.end()) { activeThreads.push_back(thread_num); } BaseCPU::activateContext(thread_num); } void AtomicSimpleCPU::suspendContext(ThreadID thread_num) { DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); assert(thread_num < numThreads); activeThreads.remove(thread_num); if (_status == Idle) return; assert(_status == BaseSimpleCPU::Running); threadInfo[thread_num]->notIdleFraction = 0; if (activeThreads.empty()) { _status = Idle; if (tickEvent.scheduled()) { deschedule(tickEvent); } } BaseCPU::suspendContext(thread_num); } Tick AtomicSimpleCPU::sendPacket(MasterPort &port, const PacketPtr &pkt) { return port.sendAtomic(pkt); } Tick AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt) { DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(), pkt->cmdString()); // X86 ISA: Snooping an invalidation for monitor/mwait AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { cpu->wakeup(tid); } } // if snoop invalidates, release any associated locks // When run without caches, Invalidation packets will not be received // hence we must check if the incoming packets are writes and wakeup // the processor accordingly if (pkt->isInvalidate() || pkt->isWrite()) { DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n", pkt->getAddr()); for (auto &t_info : cpu->threadInfo) { TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask); } } return 0; } void AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt) { DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(), pkt->cmdString()); // X86 ISA: Snooping an invalidation for monitor/mwait AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { cpu->wakeup(tid); } } // if snoop invalidates, release any associated locks if (pkt->isInvalidate()) { DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n", pkt->getAddr()); for (auto &t_info : cpu->threadInfo) { TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask); } } } Fault AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size, Request::Flags flags) { SimpleExecContext& t_info = *threadInfo[curThread]; SimpleThread* thread = t_info.thread; // use the CPU's statically allocated read request and packet objects const RequestPtr &req = data_read_req; if (traceData) traceData->setMem(addr, size, flags); //The size of the data we're trying to read. int fullSize = size; //The address of the second part of this access if it needs to be split //across a cache line boundary. Addr secondAddr = roundDown(addr + size - 1, cacheLineSize()); if (secondAddr > addr) size = secondAddr - addr; dcache_latency = 0; req->taskId(taskId()); while (1) { req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr()); // translate to physical address Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Read); // Now do the access. if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) { Packet pkt(req, Packet::makeReadCmd(req)); pkt.dataStatic(data); if (req->isMmappedIpr()) { dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt); } else { dcache_latency += sendPacket(dcachePort, &pkt); } dcache_access = true; assert(!pkt.isError()); if (req->isLLSC()) { TheISA::handleLockedRead(thread, req); } } //If there's a fault, return it if (fault != NoFault) { if (req->isPrefetch()) { return NoFault; } else { return fault; } } //If we don't need to access a second cache line, stop now. if (secondAddr <= addr) { if (req->isLockedRMW() && fault == NoFault) { assert(!locked); locked = true; } return fault; } /* * Set up for accessing the second cache line. */ //Move the pointer we're reading into to the correct location. data += size; //Adjust the size to get the remaining bytes. size = addr + fullSize - secondAddr; //And access the right address. addr = secondAddr; } } Fault AtomicSimpleCPU::initiateMemRead(Addr addr, unsigned size, Request::Flags flags) { panic("initiateMemRead() is for timing accesses, and should " "never be called on AtomicSimpleCPU.\n"); } Fault AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr, Request::Flags flags, uint64_t *res) { SimpleExecContext& t_info = *threadInfo[curThread]; SimpleThread* thread = t_info.thread; static uint8_t zero_array[64] = {}; if (data == NULL) { assert(size <= 64); assert(flags & Request::STORE_NO_DATA); // This must be a cache block cleaning request data = zero_array; } // use the CPU's statically allocated write request and packet objects const RequestPtr &req = data_write_req; if (traceData) traceData->setMem(addr, size, flags); //The size of the data we're trying to read. int fullSize = size; //The address of the second part of this access if it needs to be split //across a cache line boundary. Addr secondAddr = roundDown(addr + size - 1, cacheLineSize()); if (secondAddr > addr) size = secondAddr - addr; dcache_latency = 0; req->taskId(taskId()); while (1) { req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr()); // translate to physical address Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write); // Now do the access. if (fault == NoFault) { bool do_access = true; // flag to suppress cache access if (req->isLLSC()) { do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask); } else if (req->isSwap()) { if (req->isCondSwap()) { assert(res); req->setExtraData(*res); } } if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) { Packet pkt(req, Packet::makeWriteCmd(req)); pkt.dataStatic(data); if (req->isMmappedIpr()) { dcache_latency += TheISA::handleIprWrite(thread->getTC(), &pkt); } else { dcache_latency += sendPacket(dcachePort, &pkt); // Notify other threads on this CPU of write threadSnoop(&pkt, curThread); } dcache_access = true; assert(!pkt.isError()); if (req->isSwap()) { assert(res); memcpy(res, pkt.getConstPtr(), fullSize); } } if (res && !req->isSwap()) { *res = req->getExtraData(); } } //If there's a fault or we don't need to access a second cache line, //stop now. if (fault != NoFault || secondAddr <= addr) { if (req->isLockedRMW() && fault == NoFault) { assert(locked); locked = false; } if (fault != NoFault && req->isPrefetch()) { return NoFault; } else { return fault; } } /* * Set up for accessing the second cache line. */ //Move the pointer we're reading into to the correct location. data += size; //Adjust the size to get the remaining bytes. size = addr + fullSize - secondAddr; //And access the right address. addr = secondAddr; } } void AtomicSimpleCPU::tick() { DPRINTF(SimpleCPU, "Tick\n"); // Change thread if multi-threaded swapActiveThread(); // Set memroy request ids to current thread if (numThreads > 1) { ContextID cid = threadContexts[curThread]->contextId(); ifetch_req->setContext(cid); data_read_req->setContext(cid); data_write_req->setContext(cid); } SimpleExecContext& t_info = *threadInfo[curThread]; SimpleThread* thread = t_info.thread; Tick latency = 0; for (int i = 0; i < width || locked; ++i) { numCycles++; updateCycleCounters(BaseCPU::CPU_STATE_ON); if (!curStaticInst || !curStaticInst->isDelayedCommit()) { checkForInterrupts(); checkPcEventQueue(); } // We must have just got suspended by a PC event if (_status == Idle) { tryCompleteDrain(); return; } Fault fault = NoFault; TheISA::PCState pcState = thread->pcState(); bool needToFetch = !isRomMicroPC(pcState.microPC()) && !curMacroStaticInst; if (needToFetch) { ifetch_req->taskId(taskId()); setupFetchRequest(ifetch_req); fault = thread->itb->translateAtomic(ifetch_req, thread->getTC(), BaseTLB::Execute); } if (fault == NoFault) { Tick icache_latency = 0; bool icache_access = false; dcache_access = false; // assume no dcache access if (needToFetch) { // This is commented out because the decoder would act like // a tiny cache otherwise. It wouldn't be flushed when needed // like the I cache. It should be flushed, and when that works // this code should be uncommented. //Fetch more instruction memory if necessary //if (decoder.needMoreBytes()) //{ icache_access = true; Packet ifetch_pkt = Packet(ifetch_req, MemCmd::ReadReq); ifetch_pkt.dataStatic(&inst); icache_latency = sendPacket(icachePort, &ifetch_pkt); assert(!ifetch_pkt.isError()); // ifetch_req is initialized to read the instruction directly // into the CPU object's inst field. //} } preExecute(); Tick stall_ticks = 0; if (curStaticInst) { fault = curStaticInst->execute(&t_info, traceData); // keep an instruction count if (fault == NoFault) { countInst(); ppCommit->notify(std::make_pair(thread, curStaticInst)); } else if (traceData && !DTRACE(ExecFaulting)) { delete traceData; traceData = NULL; } if (fault != NoFault && dynamic_pointer_cast(fault)) { // Retry execution of system calls after a delay. // Prevents immediate re-execution since conditions which // caused the retry are unlikely to change every tick. stall_ticks += clockEdge(syscallRetryLatency) - curTick(); } postExecute(); } // @todo remove me after debugging with legion done if (curStaticInst && (!curStaticInst->isMicroop() || curStaticInst->isFirstMicroop())) instCnt++; if (simulate_inst_stalls && icache_access) stall_ticks += icache_latency; if (simulate_data_stalls && dcache_access) stall_ticks += dcache_latency; if (stall_ticks) { // the atomic cpu does its accounting in ticks, so // keep counting in ticks but round to the clock // period latency += divCeil(stall_ticks, clockPeriod()) * clockPeriod(); } } if (fault != NoFault || !t_info.stayAtPC) advancePC(fault); } if (tryCompleteDrain()) return; // instruction takes at least one cycle if (latency < clockPeriod()) latency = clockPeriod(); if (_status != Idle) reschedule(tickEvent, curTick() + latency, true); } void AtomicSimpleCPU::regProbePoints() { BaseCPU::regProbePoints(); ppCommit = new ProbePointArg> (getProbeManager(), "Commit"); } void AtomicSimpleCPU::printAddr(Addr a) { dcachePort.printAddr(a); } //////////////////////////////////////////////////////////////////////// // // AtomicSimpleCPU Simulation Object // AtomicSimpleCPU * AtomicSimpleCPUParams::create() { return new AtomicSimpleCPU(this); }