/* * Copyright (c) 2005 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __CPU_OOO_CPU_OOO_CPU_HH__ #define __CPU_OOO_CPU_OOO_CPU_HH__ #include "base/statistics.hh" #include "config/full_system.hh" #include "cpu/base.hh" #include "cpu/exec_context.hh" #include "encumbered/cpu/full/fu_pool.hh" #include "cpu/ooo_cpu/ea_list.hh" #include "cpu/pc_event.hh" #include "cpu/static_inst.hh" #include "mem/mem_interface.hh" #include "sim/eventq.hh" // forward declarations #if FULL_SYSTEM class Processor; class AlphaITB; class AlphaDTB; class PhysicalMemory; class RemoteGDB; class GDBListener; #else class Process; #endif // FULL_SYSTEM class Checkpoint; class MemInterface; namespace Trace { class InstRecord; } /** * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with * simple out-of-order capabilities added to it. It is still a 1 CPI machine * (?), but is capable of handling cache misses. Basically it models having * a ROB/IQ by only allowing a certain amount of instructions to execute while * the cache miss is outstanding. */ template class OoOCPU : public BaseCPU { private: typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; public: // main simulation loop (one cycle) void tick(); private: struct TickEvent : public Event { OoOCPU *cpu; int width; TickEvent(OoOCPU *c, int w); void process(); const char *description(); }; TickEvent tickEvent; /// Schedule tick event, regardless of its current state. void scheduleTickEvent(int delay) { if (tickEvent.squashed()) tickEvent.reschedule(curTick + delay); else if (!tickEvent.scheduled()) tickEvent.schedule(curTick + delay); } /// Unschedule tick event, regardless of its current state. void unscheduleTickEvent() { if (tickEvent.scheduled()) tickEvent.squash(); } private: Trace::InstRecord *traceData; template void trace_data(T data); public: // enum Status { Running, Idle, IcacheMiss, IcacheMissComplete, DcacheMissStall, SwitchedOut }; private: Status _status; public: void post_interrupt(int int_num, int index); void zero_fill_64(Addr addr) { static int warned = 0; if (!warned) { warn ("WH64 is not implemented"); warned = 1; } }; struct Params : public BaseCPU::Params { MemInterface *icache_interface; MemInterface *dcache_interface; int width; #if FULL_SYSTEM AlphaITB *itb; AlphaDTB *dtb; FunctionalMemory *mem; #else Process *process; #endif int issueWidth; }; OoOCPU(Params *params); virtual ~OoOCPU(); void init(); private: void copyFromXC(); public: // execution context ExecContext *xc; void switchOut(); void takeOverFrom(BaseCPU *oldCPU); #if FULL_SYSTEM Addr dbg_vtophys(Addr addr); bool interval_stats; #endif // L1 instruction cache MemInterface *icacheInterface; // L1 data cache MemInterface *dcacheInterface; FuncUnitPool *fuPool; // Refcounted pointer to the one memory request. MemReqPtr cacheMemReq; class ICacheCompletionEvent : public Event { private: OoOCPU *cpu; public: ICacheCompletionEvent(OoOCPU *_cpu); virtual void process(); virtual const char *description(); }; // Will need to create a cache completion event upon any memory miss. ICacheCompletionEvent iCacheCompletionEvent; class DCacheCompletionEvent; typedef typename std::list::iterator DCacheCompEventIt; class DCacheCompletionEvent : public Event { private: OoOCPU *cpu; DynInstPtr inst; DCacheCompEventIt dcceIt; public: DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst, DCacheCompEventIt &_dcceIt); virtual void process(); virtual const char *description(); }; friend class DCacheCompletionEvent; protected: std::list dCacheCompList; DCacheCompEventIt dcceIt; private: Status status() const { return _status; } virtual void activateContext(int thread_num, int delay); virtual void suspendContext(int thread_num); virtual void deallocateContext(int thread_num); virtual void haltContext(int thread_num); // statistics virtual void regStats(); virtual void resetStats(); // number of simulated instructions Counter numInst; Counter startNumInst; Stats::Scalar<> numInsts; virtual Counter totalInstructions() const { return numInst - startNumInst; } // number of simulated memory references Stats::Scalar<> numMemRefs; // number of simulated loads Counter numLoad; Counter startNumLoad; // number of idle cycles Stats::Average<> notIdleFraction; Stats::Formula idleFraction; // number of cycles stalled for I-cache misses Stats::Scalar<> icacheStallCycles; Counter lastIcacheStall; // number of cycles stalled for D-cache misses Stats::Scalar<> dcacheStallCycles; Counter lastDcacheStall; void processICacheCompletion(); public: virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); #if FULL_SYSTEM bool validInstAddr(Addr addr) { return true; } bool validDataAddr(Addr addr) { return true; } int getInstAsid() { return xc->regs.instAsid(); } int getDataAsid() { return xc->regs.dataAsid(); } Fault translateInstReq(MemReqPtr &req) { return itb->translate(req); } Fault translateDataReadReq(MemReqPtr &req) { return dtb->translate(req, false); } Fault translateDataWriteReq(MemReqPtr &req) { return dtb->translate(req, true); } #else bool validInstAddr(Addr addr) { return xc->validInstAddr(addr); } bool validDataAddr(Addr addr) { return xc->validDataAddr(addr); } int getInstAsid() { return xc->asid; } int getDataAsid() { return xc->asid; } Fault dummyTranslation(MemReqPtr &req) { #if 0 assert((req->vaddr >> 48 & 0xffff) == 0); #endif // put the asid in the upper 16 bits of the paddr req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; return NoFault; } Fault translateInstReq(MemReqPtr &req) { return dummyTranslation(req); } Fault translateDataReadReq(MemReqPtr &req) { return dummyTranslation(req); } Fault translateDataWriteReq(MemReqPtr &req) { return dummyTranslation(req); } #endif template Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst); template Fault write(T data, Addr addr, unsigned flags, uint64_t *res, DynInstPtr inst); void prefetch(Addr addr, unsigned flags) { // need to do this... } void writeHint(Addr addr, int size, unsigned flags) { // need to do this... } Fault copySrcTranslate(Addr src); Fault copy(Addr dest); private: bool executeInst(DynInstPtr &inst); void renameInst(DynInstPtr &inst); void addInst(DynInstPtr &inst); void commitHeadInst(); bool getOneInst(); Fault fetchCacheLine(); InstSeqNum getAndIncrementInstSeq(); bool ambigMemAddr; private: InstSeqNum globalSeqNum; DynInstPtr renameTable[TheISA::TotalNumRegs]; DynInstPtr commitTable[TheISA::TotalNumRegs]; // Might need a table of the shadow registers as well. #if FULL_SYSTEM DynInstPtr palShadowTable[TheISA::NumIntRegs]; #endif public: // The register accessor methods provide the index of the // instruction's operand (e.g., 0 or 1), not the architectural // register index, to simplify the implementation of register // renaming. We find the architectural register index by indexing // into the instruction's own operand index table. Note that a // raw pointer to the StaticInst is provided instead of a // ref-counted StaticInstPtr to redice overhead. This is fine as // long as these methods don't copy the pointer into any long-term // storage (which is pretty hard to imagine they would have reason // to do). // In the OoO case these shouldn't read from the XC but rather from the // rename table of DynInsts. Also these likely shouldn't be called very // often, other than when adding things into the xc during say a syscall. uint64_t readIntReg(StaticInst *si, int idx) { return xc->readIntReg(si->srcRegIdx(idx)); } FloatReg readFloatReg(StaticInst *si, int idx, width) { int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; return xc->readFloatReg(reg_idx, width); } FloatReg readFloatReg(StaticInst *si, int idx) { int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; return xc->readFloatReg(reg_idx); } FloatRegBits readFloatRegBits(StaticInst *si, int idx, int width) { int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; return xc->readFloatRegBits(reg_idx, width); } FloatRegBits readFloatRegBits(StaticInst *si, int idx) { int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; return xc->readFloatRegBits(reg_idx); } void setIntReg(StaticInst *si, int idx, uint64_t val) { xc->setIntReg(si->destRegIdx(idx), val); } void setFloatReg(StaticInst *si, int idx, FloatReg val, int width) { int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; xc->setFloatReg(reg_idx, val, width); } void setFloatReg(StaticInst *si, int idx, FloatReg val) { int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; xc->setFloatReg(reg_idx, val); } void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val, int width) { int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; xc->setFloatRegBits(reg_idx, val, width); } void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val) { int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; xc->setFloatRegBits(reg_idx, val); } uint64_t readPC() { return PC; } void setNextPC(Addr val) { nextPC = val; } private: Addr PC; Addr nextPC; unsigned issueWidth; bool fetchRedirExcp; bool fetchRedirBranch; /** Mask to get a cache block's address. */ Addr cacheBlkMask; unsigned cacheBlkSize; Addr cacheBlkPC; /** The cache line being fetched. */ uint8_t *cacheData; protected: bool cacheBlkValid; private: // Align an address (typically a PC) to the start of an I-cache block. // We fold in the PISA 64- to 32-bit conversion here as well. Addr icacheBlockAlignPC(Addr addr) { addr = TheISA::realPCToFetchPC(addr); return (addr & ~(cacheBlkMask)); } unsigned instSize; // ROB tracking stuff. DynInstPtr robHeadPtr; DynInstPtr robTailPtr; unsigned robSize; unsigned robInsts; // List of outstanding EA instructions. protected: EAList eaList; public: void branchToTarget(Addr val) { if (!fetchRedirExcp) { fetchRedirBranch = true; PC = val; } } // ISA stuff: uint64_t readUniq() { return xc->readUniq(); } void setUniq(uint64_t val) { xc->setUniq(val); } uint64_t readFpcr() { return xc->readFpcr(); } void setFpcr(uint64_t val) { xc->setFpcr(val); } #if FULL_SYSTEM uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); } Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); } Fault hwrei() { return xc->hwrei(); } int readIntrFlag() { return xc->readIntrFlag(); } void setIntrFlag(int val) { xc->setIntrFlag(val); } bool inPalMode() { return xc->inPalMode(); } void trap(Fault fault) { fault->invoke(xc); } bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); } #else void syscall() { xc->syscall(); } #endif ExecContext *xcBase() { return xc; } }; // precise architected memory state accessor macros template template Fault OoOCPU::read(Addr addr, T &data, unsigned flags, DynInstPtr inst) { MemReqPtr readReq = new MemReq(); readReq->xc = xc; readReq->asid = 0; readReq->data = new uint8_t[64]; readReq->reset(addr, sizeof(T), flags); // translate to physical address - This might be an ISA impl call Fault fault = translateDataReadReq(readReq); // do functional access if (fault == NoFault) fault = xc->mem->read(readReq, data); #if 0 if (traceData) { traceData->setAddr(addr); if (fault == NoFault) traceData->setData(data); } #endif // if we have a cache, do cache access too if (fault == NoFault && dcacheInterface) { readReq->cmd = Read; readReq->completionEvent = NULL; readReq->time = curTick; /*MemAccessResult result = */dcacheInterface->access(readReq); if (dcacheInterface->doEvents()) { readReq->completionEvent = new DCacheCompletionEvent(this, inst, dcceIt); } } if (!dcacheInterface && (readReq->flags & UNCACHEABLE)) recordEvent("Uncached Read"); return fault; } template template Fault OoOCPU::write(T data, Addr addr, unsigned flags, uint64_t *res, DynInstPtr inst) { MemReqPtr writeReq = new MemReq(); writeReq->xc = xc; writeReq->asid = 0; writeReq->data = new uint8_t[64]; #if 0 if (traceData) { traceData->setAddr(addr); traceData->setData(data); } #endif writeReq->reset(addr, sizeof(T), flags); // translate to physical address Fault fault = translateDataWriteReq(writeReq); // do functional access if (fault == NoFault) fault = xc->write(writeReq, data); if (fault == NoFault && dcacheInterface) { writeReq->cmd = Write; memcpy(writeReq->data,(uint8_t *)&data,writeReq->size); writeReq->completionEvent = NULL; writeReq->time = curTick; /*MemAccessResult result = */dcacheInterface->access(writeReq); if (dcacheInterface->doEvents()) { writeReq->completionEvent = new DCacheCompletionEvent(this, inst, dcceIt); } } if (res && (fault == NoFault)) *res = writeReq->result; if (!dcacheInterface && (writeReq->flags & UNCACHEABLE)) recordEvent("Uncached Write"); return fault; } #endif // __CPU_OOO_CPU_OOO_CPU_HH__