summaryrefslogtreecommitdiff
path: root/src/cpu/ozone/cpu.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/ozone/cpu.hh')
-rw-r--r--src/cpu/ozone/cpu.hh638
1 files changed, 638 insertions, 0 deletions
diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh
new file mode 100644
index 000000000..fa849bb09
--- /dev/null
+++ b/src/cpu/ozone/cpu.hh
@@ -0,0 +1,638 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OOO_CPU_OOO_CPU_HH__
+#define __CPU_OOO_CPU_OOO_CPU_HH__
+
+#include "base/statistics.hh"
+#include "config/full_system.hh"
+#include "cpu/base.hh"
+#include "cpu/exec_context.hh"
+#include "encumbered/cpu/full/fu_pool.hh"
+#include "cpu/ooo_cpu/ea_list.hh"
+#include "cpu/pc_event.hh"
+#include "cpu/static_inst.hh"
+#include "mem/mem_interface.hh"
+#include "sim/eventq.hh"
+
+// forward declarations
+#if FULL_SYSTEM
+class Processor;
+class AlphaITB;
+class AlphaDTB;
+class PhysicalMemory;
+
+class RemoteGDB;
+class GDBListener;
+
+#else
+
+class Process;
+
+#endif // FULL_SYSTEM
+
+class Checkpoint;
+class MemInterface;
+
+namespace Trace {
+ class InstRecord;
+}
+
+/**
+ * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
+ * simple out-of-order capabilities added to it. It is still a 1 CPI machine
+ * (?), but is capable of handling cache misses. Basically it models having
+ * a ROB/IQ by only allowing a certain amount of instructions to execute while
+ * the cache miss is outstanding.
+ */
+
+template <class Impl>
+class OoOCPU : public BaseCPU
+{
+ private:
+ typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ public:
+ // main simulation loop (one cycle)
+ void tick();
+
+ private:
+ struct TickEvent : public Event
+ {
+ OoOCPU *cpu;
+ int width;
+
+ TickEvent(OoOCPU *c, int w);
+ void process();
+ const char *description();
+ };
+
+ TickEvent tickEvent;
+
+ /// Schedule tick event, regardless of its current state.
+ void scheduleTickEvent(int delay)
+ {
+ if (tickEvent.squashed())
+ tickEvent.reschedule(curTick + delay);
+ else if (!tickEvent.scheduled())
+ tickEvent.schedule(curTick + delay);
+ }
+
+ /// Unschedule tick event, regardless of its current state.
+ void unscheduleTickEvent()
+ {
+ if (tickEvent.scheduled())
+ tickEvent.squash();
+ }
+
+ private:
+ Trace::InstRecord *traceData;
+
+ template<typename T>
+ void trace_data(T data);
+
+ public:
+ //
+ enum Status {
+ Running,
+ Idle,
+ IcacheMiss,
+ IcacheMissComplete,
+ DcacheMissStall,
+ SwitchedOut
+ };
+
+ private:
+ Status _status;
+
+ public:
+ void post_interrupt(int int_num, int index);
+
+ void zero_fill_64(Addr addr) {
+ static int warned = 0;
+ if (!warned) {
+ warn ("WH64 is not implemented");
+ warned = 1;
+ }
+ };
+
+ struct Params : public BaseCPU::Params
+ {
+ MemInterface *icache_interface;
+ MemInterface *dcache_interface;
+ int width;
+#if FULL_SYSTEM
+ AlphaITB *itb;
+ AlphaDTB *dtb;
+ FunctionalMemory *mem;
+#else
+ Process *process;
+#endif
+ int issueWidth;
+ };
+
+ OoOCPU(Params *params);
+
+ virtual ~OoOCPU();
+
+ void init();
+
+ private:
+ void copyFromXC();
+
+ public:
+ // execution context
+ ExecContext *xc;
+
+ void switchOut();
+ void takeOverFrom(BaseCPU *oldCPU);
+
+#if FULL_SYSTEM
+ Addr dbg_vtophys(Addr addr);
+
+ bool interval_stats;
+#endif
+
+ // L1 instruction cache
+ MemInterface *icacheInterface;
+
+ // L1 data cache
+ MemInterface *dcacheInterface;
+
+ FuncUnitPool *fuPool;
+
+ // Refcounted pointer to the one memory request.
+ MemReqPtr cacheMemReq;
+
+ class ICacheCompletionEvent : public Event
+ {
+ private:
+ OoOCPU *cpu;
+
+ public:
+ ICacheCompletionEvent(OoOCPU *_cpu);
+
+ virtual void process();
+ virtual const char *description();
+ };
+
+ // Will need to create a cache completion event upon any memory miss.
+ ICacheCompletionEvent iCacheCompletionEvent;
+
+ class DCacheCompletionEvent;
+
+ typedef typename
+ std::list<DCacheCompletionEvent>::iterator DCacheCompEventIt;
+
+ class DCacheCompletionEvent : public Event
+ {
+ private:
+ OoOCPU *cpu;
+ DynInstPtr inst;
+ DCacheCompEventIt dcceIt;
+
+ public:
+ DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst,
+ DCacheCompEventIt &_dcceIt);
+
+ virtual void process();
+ virtual const char *description();
+ };
+
+ friend class DCacheCompletionEvent;
+
+ protected:
+ std::list<DCacheCompletionEvent> dCacheCompList;
+ DCacheCompEventIt dcceIt;
+
+ private:
+ Status status() const { return _status; }
+
+ virtual void activateContext(int thread_num, int delay);
+ virtual void suspendContext(int thread_num);
+ virtual void deallocateContext(int thread_num);
+ virtual void haltContext(int thread_num);
+
+ // statistics
+ virtual void regStats();
+ virtual void resetStats();
+
+ // number of simulated instructions
+ Counter numInst;
+ Counter startNumInst;
+ Stats::Scalar<> numInsts;
+
+ virtual Counter totalInstructions() const
+ {
+ return numInst - startNumInst;
+ }
+
+ // number of simulated memory references
+ Stats::Scalar<> numMemRefs;
+
+ // number of simulated loads
+ Counter numLoad;
+ Counter startNumLoad;
+
+ // number of idle cycles
+ Stats::Average<> notIdleFraction;
+ Stats::Formula idleFraction;
+
+ // number of cycles stalled for I-cache misses
+ Stats::Scalar<> icacheStallCycles;
+ Counter lastIcacheStall;
+
+ // number of cycles stalled for D-cache misses
+ Stats::Scalar<> dcacheStallCycles;
+ Counter lastDcacheStall;
+
+ void processICacheCompletion();
+
+ public:
+
+ virtual void serialize(std::ostream &os);
+ virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+#if FULL_SYSTEM
+ bool validInstAddr(Addr addr) { return true; }
+ bool validDataAddr(Addr addr) { return true; }
+ int getInstAsid() { return xc->regs.instAsid(); }
+ int getDataAsid() { return xc->regs.dataAsid(); }
+
+ Fault translateInstReq(MemReqPtr &req)
+ {
+ return itb->translate(req);
+ }
+
+ Fault translateDataReadReq(MemReqPtr &req)
+ {
+ return dtb->translate(req, false);
+ }
+
+ Fault translateDataWriteReq(MemReqPtr &req)
+ {
+ return dtb->translate(req, true);
+ }
+
+#else
+ bool validInstAddr(Addr addr)
+ { return xc->validInstAddr(addr); }
+
+ bool validDataAddr(Addr addr)
+ { return xc->validDataAddr(addr); }
+
+ int getInstAsid() { return xc->asid; }
+ int getDataAsid() { return xc->asid; }
+
+ Fault dummyTranslation(MemReqPtr &req)
+ {
+#if 0
+ assert((req->vaddr >> 48 & 0xffff) == 0);
+#endif
+
+ // put the asid in the upper 16 bits of the paddr
+ req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
+ req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
+ return NoFault;
+ }
+ Fault translateInstReq(MemReqPtr &req)
+ {
+ return dummyTranslation(req);
+ }
+ Fault translateDataReadReq(MemReqPtr &req)
+ {
+ return dummyTranslation(req);
+ }
+ Fault translateDataWriteReq(MemReqPtr &req)
+ {
+ return dummyTranslation(req);
+ }
+
+#endif
+
+ template <class T>
+ Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst);
+
+ template <class T>
+ Fault write(T data, Addr addr, unsigned flags,
+ uint64_t *res, DynInstPtr inst);
+
+ void prefetch(Addr addr, unsigned flags)
+ {
+ // need to do this...
+ }
+
+ void writeHint(Addr addr, int size, unsigned flags)
+ {
+ // need to do this...
+ }
+
+ Fault copySrcTranslate(Addr src);
+
+ Fault copy(Addr dest);
+
+ private:
+ bool executeInst(DynInstPtr &inst);
+
+ void renameInst(DynInstPtr &inst);
+
+ void addInst(DynInstPtr &inst);
+
+ void commitHeadInst();
+
+ bool getOneInst();
+
+ Fault fetchCacheLine();
+
+ InstSeqNum getAndIncrementInstSeq();
+
+ bool ambigMemAddr;
+
+ private:
+ InstSeqNum globalSeqNum;
+
+ DynInstPtr renameTable[TheISA::TotalNumRegs];
+ DynInstPtr commitTable[TheISA::TotalNumRegs];
+
+ // Might need a table of the shadow registers as well.
+#if FULL_SYSTEM
+ DynInstPtr palShadowTable[TheISA::NumIntRegs];
+#endif
+
+ public:
+ // The register accessor methods provide the index of the
+ // instruction's operand (e.g., 0 or 1), not the architectural
+ // register index, to simplify the implementation of register
+ // renaming. We find the architectural register index by indexing
+ // into the instruction's own operand index table. Note that a
+ // raw pointer to the StaticInst is provided instead of a
+ // ref-counted StaticInstPtr to redice overhead. This is fine as
+ // long as these methods don't copy the pointer into any long-term
+ // storage (which is pretty hard to imagine they would have reason
+ // to do).
+
+ // In the OoO case these shouldn't read from the XC but rather from the
+ // rename table of DynInsts. Also these likely shouldn't be called very
+ // often, other than when adding things into the xc during say a syscall.
+
+ uint64_t readIntReg(StaticInst *si, int idx)
+ {
+ return xc->readIntReg(si->srcRegIdx(idx));
+ }
+
+ FloatReg readFloatReg(StaticInst *si, int idx, width)
+ {
+ int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ return xc->readFloatReg(reg_idx, width);
+ }
+
+ FloatReg readFloatReg(StaticInst *si, int idx)
+ {
+ int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ return xc->readFloatReg(reg_idx);
+ }
+
+ FloatRegBits readFloatRegBits(StaticInst *si, int idx, int width)
+ {
+ int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ return xc->readFloatRegBits(reg_idx, width);
+ }
+
+ FloatRegBits readFloatRegBits(StaticInst *si, int idx)
+ {
+ int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ return xc->readFloatRegBits(reg_idx);
+ }
+
+ void setIntReg(StaticInst *si, int idx, uint64_t val)
+ {
+ xc->setIntReg(si->destRegIdx(idx), val);
+ }
+
+ void setFloatReg(StaticInst *si, int idx, FloatReg val, int width)
+ {
+ int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ xc->setFloatReg(reg_idx, val, width);
+ }
+
+ void setFloatReg(StaticInst *si, int idx, FloatReg val)
+ {
+ int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ xc->setFloatReg(reg_idx, val);
+ }
+
+ void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val, int width)
+ {
+ int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ xc->setFloatRegBits(reg_idx, val, width);
+ }
+
+ void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val)
+ {
+ int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ xc->setFloatRegBits(reg_idx, val);
+ }
+
+ uint64_t readPC() { return PC; }
+ void setNextPC(Addr val) { nextPC = val; }
+
+ private:
+ Addr PC;
+ Addr nextPC;
+
+ unsigned issueWidth;
+
+ bool fetchRedirExcp;
+ bool fetchRedirBranch;
+
+ /** Mask to get a cache block's address. */
+ Addr cacheBlkMask;
+
+ unsigned cacheBlkSize;
+
+ Addr cacheBlkPC;
+
+ /** The cache line being fetched. */
+ uint8_t *cacheData;
+
+ protected:
+ bool cacheBlkValid;
+
+ private:
+
+ // Align an address (typically a PC) to the start of an I-cache block.
+ // We fold in the PISA 64- to 32-bit conversion here as well.
+ Addr icacheBlockAlignPC(Addr addr)
+ {
+ addr = TheISA::realPCToFetchPC(addr);
+ return (addr & ~(cacheBlkMask));
+ }
+
+ unsigned instSize;
+
+ // ROB tracking stuff.
+ DynInstPtr robHeadPtr;
+ DynInstPtr robTailPtr;
+ unsigned robSize;
+ unsigned robInsts;
+
+ // List of outstanding EA instructions.
+ protected:
+ EAList eaList;
+
+ public:
+ void branchToTarget(Addr val)
+ {
+ if (!fetchRedirExcp) {
+ fetchRedirBranch = true;
+ PC = val;
+ }
+ }
+
+ // ISA stuff:
+ uint64_t readUniq() { return xc->readUniq(); }
+ void setUniq(uint64_t val) { xc->setUniq(val); }
+
+ uint64_t readFpcr() { return xc->readFpcr(); }
+ void setFpcr(uint64_t val) { xc->setFpcr(val); }
+
+#if FULL_SYSTEM
+ uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); }
+ Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); }
+ Fault hwrei() { return xc->hwrei(); }
+ int readIntrFlag() { return xc->readIntrFlag(); }
+ void setIntrFlag(int val) { xc->setIntrFlag(val); }
+ bool inPalMode() { return xc->inPalMode(); }
+ void trap(Fault fault) { fault->invoke(xc); }
+ bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); }
+#else
+ void syscall() { xc->syscall(); }
+#endif
+
+ ExecContext *xcBase() { return xc; }
+};
+
+
+// precise architected memory state accessor macros
+template <class Impl>
+template <class T>
+Fault
+OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst)
+{
+ MemReqPtr readReq = new MemReq();
+ readReq->xc = xc;
+ readReq->asid = 0;
+ readReq->data = new uint8_t[64];
+
+ readReq->reset(addr, sizeof(T), flags);
+
+ // translate to physical address - This might be an ISA impl call
+ Fault fault = translateDataReadReq(readReq);
+
+ // do functional access
+ if (fault == NoFault)
+ fault = xc->mem->read(readReq, data);
+#if 0
+ if (traceData) {
+ traceData->setAddr(addr);
+ if (fault == NoFault)
+ traceData->setData(data);
+ }
+#endif
+
+ // if we have a cache, do cache access too
+ if (fault == NoFault && dcacheInterface) {
+ readReq->cmd = Read;
+ readReq->completionEvent = NULL;
+ readReq->time = curTick;
+ /*MemAccessResult result = */dcacheInterface->access(readReq);
+
+ if (dcacheInterface->doEvents()) {
+ readReq->completionEvent = new DCacheCompletionEvent(this, inst,
+ dcceIt);
+ }
+ }
+
+ if (!dcacheInterface && (readReq->flags & UNCACHEABLE))
+ recordEvent("Uncached Read");
+
+ return fault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
+ uint64_t *res, DynInstPtr inst)
+{
+ MemReqPtr writeReq = new MemReq();
+ writeReq->xc = xc;
+ writeReq->asid = 0;
+ writeReq->data = new uint8_t[64];
+
+#if 0
+ if (traceData) {
+ traceData->setAddr(addr);
+ traceData->setData(data);
+ }
+#endif
+
+ writeReq->reset(addr, sizeof(T), flags);
+
+ // translate to physical address
+ Fault fault = translateDataWriteReq(writeReq);
+
+ // do functional access
+ if (fault == NoFault)
+ fault = xc->write(writeReq, data);
+
+ if (fault == NoFault && dcacheInterface) {
+ writeReq->cmd = Write;
+ memcpy(writeReq->data,(uint8_t *)&data,writeReq->size);
+ writeReq->completionEvent = NULL;
+ writeReq->time = curTick;
+ /*MemAccessResult result = */dcacheInterface->access(writeReq);
+
+ if (dcacheInterface->doEvents()) {
+ writeReq->completionEvent = new DCacheCompletionEvent(this, inst,
+ dcceIt);
+ }
+ }
+
+ if (res && (fault == NoFault))
+ *res = writeReq->result;
+
+ if (!dcacheInterface && (writeReq->flags & UNCACHEABLE))
+ recordEvent("Uncached Write");
+
+ return fault;
+}
+
+
+#endif // __CPU_OOO_CPU_OOO_CPU_HH__